Version in base suite: 2.41-12+deb13u1

Base version: glibc_2.41-12+deb13u1
Target version: glibc_2.41-12+deb13u2
Base file: /srv/ftp-master.debian.org/ftp/pool/main/g/glibc/glibc_2.41-12+deb13u1.dsc
Target file: /srv/ftp-master.debian.org/policy/pool/main/g/glibc/glibc_2.41-12+deb13u2.dsc

 changelog                                                                  |   51 
 patches/amd64/local-revert-x86-64-add-GLIBC_ABI_DT_X86_64_PLT-version.diff |   32 
 patches/amd64/local-revert-x86-64-add-GLIBC_ABI_GNU2_TLS-version.diff      |   32 
 patches/git-updates.diff                                                   | 5428 ++++++++--
 patches/i386/local-revert-i386-add-GLIBC_ABI_GNU2_TLS-version.diff         |   62 
 patches/i386/local-revert-i386-add-GLIBC_ABI_GNU_TLS-version.diff          |   32 
 patches/series                                                             |    7 
 7 files changed, 5042 insertions(+), 602 deletions(-)

dpkg-source: warning: cannot verify inline signature for /srv/release.debian.org/tmp/tmp9zgelfuh/glibc_2.41-12+deb13u1.dsc: no acceptable signature found
dpkg-source: warning: cannot verify inline signature for /srv/release.debian.org/tmp/tmp9zgelfuh/glibc_2.41-12+deb13u2.dsc: no acceptable signature found
diff -Nru glibc-2.41/debian/changelog glibc-2.41/debian/changelog
--- glibc-2.41/debian/changelog	2025-12-28 16:32:33.000000000 +0000
+++ glibc-2.41/debian/changelog	2026-03-01 20:10:06.000000000 +0000
@@ -1,3 +1,54 @@
+glibc (2.41-12+deb13u2) trixie; urgency=medium
+
+  * debian/patches/git-updates.diff: update from upstream stable branch:
+    - Fix a null pointer dereference in <ctype.h> macros in multithreaded
+      programs with multiple libc.so.
+    - Fix _r_debug handling when interposed by the main executable, restoring
+      compatibility with Dyninst.
+    - Fix a null pointer dereference in symbol lookup when the symbol version
+      hash value is zero.
+    - Add a new test for dlopen (NULL, RTLD_LAZY) from an ELF constructor.
+    - Preserve vector registers in the i386 TLS slow path.
+    - Add GLIBC_ABI_GNU2_TLS and GLIBC_ABI_GNU_TLS symbol versions and i386.
+    - Add GLIBC_ABI_GNU2_TLS and GLIBC_ABI_DT_X86_64_PLT symbol versions on
+      amd64.
+    - Fix NSS group merge not reacting to ERANGE during merge.
+    - Detect Intel Nova and Wildcat processors and use the same ifunc
+      selection as for Intel Panther Lake.
+    - Fix typo in wmemset ifunc selector that caused AVX2/AVX512 paths to be
+      skipped.
+    - Fix incorrect return values and improve special case handling in arm64
+      SVE pow/powf and tanpi/tanpif implementations.
+    - Optimise SVE scalar callbacks on arm64.
+    - Correct SME handling on arm64 by disabling ZA state in setjmp and
+      sigsetjmp, clearing ZA state in clone/clone3.
+    - Fix conform tests on arm64 when the toolchain does not default to
+      -mbranch-protection=standard.
+    - Fix performance instability in AdvSIMD tan and sinh function on arm64
+    - Fix and restore POWER10 optimized strcmp/strncmp functions on ppc64el,
+      they got previously disabled 2.41-8 due to a security issue.
+    - Fix POWER optimized rawmemchr function on ppc64el.
+    - Validate pread size and offset for overflow when reading ELF
+      headers in the sprof utility.
+    - Minor fixes to testsuite support code.
+    - Optimize trylock for high cache contention workloads.
+    - Fix and integer overflow in _int_memalign leading to heap corruption
+      (CVE-2026-0861).  Closes: #1125678.
+    - Fix stack contents leak in getnetbyaddr (CVE-2026-0915).  Closes:
+      #1125748.
+    - Fix bug in wordexp, which could return uninitialized memory when using
+      WRDE_REUSE together with WRDE_APPEND (CVE-2025-15281).  Closes: #1126266.
+    - Switch currency symbol for the bg_BG locale to euro.
+  * Revert addition of symbol versions used as ABI flags, as the
+    dpkg-shlibdeps version in trixie is not able to handle them (see
+    #1122107):
+    - local-revert-x86-64-add-GLIBC_ABI_DT_X86_64_PLT-version.diff
+    - local-revert-x86-64-add-GLIBC_ABI_GNU2_TLS-version.diff
+    - local-revert-i386-add-GLIBC_ABI_GNU2_TLS-version.diff
+    - local-revert-i386-add-GLIBC_ABI_GNU_TLS-version.diff
+
+ -- Aurelien Jarno <aurel32@debian.org>  Sun, 01 Mar 2026 21:10:06 +0100
+
 glibc (2.41-12+deb13u1) trixie; urgency=medium
 
   * debian/patches/git-updates.diff: update from upstream stable branch:
diff -Nru glibc-2.41/debian/patches/amd64/local-revert-x86-64-add-GLIBC_ABI_DT_X86_64_PLT-version.diff glibc-2.41/debian/patches/amd64/local-revert-x86-64-add-GLIBC_ABI_DT_X86_64_PLT-version.diff
--- glibc-2.41/debian/patches/amd64/local-revert-x86-64-add-GLIBC_ABI_DT_X86_64_PLT-version.diff	1970-01-01 00:00:00.000000000 +0000
+++ glibc-2.41/debian/patches/amd64/local-revert-x86-64-add-GLIBC_ABI_DT_X86_64_PLT-version.diff	2026-03-01 20:10:06.000000000 +0000
@@ -0,0 +1,32 @@
+--- a/sysdeps/x86_64/Makefile
++++ b/sysdeps/x86_64/Makefile
+@@ -209,15 +209,6 @@ LDFLAGS-tst-plt-rewritemod2.so = -Wl,-z,now,-z,undefs
+ tst-plt-rewrite2-ENV = GLIBC_TUNABLES=glibc.cpu.plt_rewrite=2
+ $(objpfx)tst-plt-rewrite2: $(objpfx)tst-plt-rewritemod2.so
+ 
+-tests-special += $(objpfx)check-dt-x86-64-plt.out
+-
+-$(objpfx)check-dt-x86-64-plt.out: $(common-objpfx)libc.so
+-	LC_ALL=C $(READELF) -V -W $< \
+-		| sed -ne '/.gnu.version_d/, /.gnu.version_r/ p' \
+-		| grep GLIBC_ABI_DT_X86_64_PLT > $@; \
+-	$(evaluate-test)
+-generated += check-dt-x86-64-plt.out
+-
+ tests-special += $(objpfx)check-gnu2-tls.out
+ 
+ $(objpfx)check-gnu2-tls.out: $(common-objpfx)libc.so
+--- a/sysdeps/x86_64/Versions
++++ b/sysdeps/x86_64/Versions
+@@ -10,11 +10,6 @@ libc {
+     # by scripts/versions.awk.
+     __placeholder_only_for_empty_version_map;
+   }
+-  GLIBC_ABI_DT_X86_64_PLT {
+-    # This symbol is used only for empty version map and will be removed
+-    # by scripts/versions.awk.
+-    __placeholder_only_for_empty_version_map;
+-  }
+ }
+ libm {
+   GLIBC_2.1 {
diff -Nru glibc-2.41/debian/patches/amd64/local-revert-x86-64-add-GLIBC_ABI_GNU2_TLS-version.diff glibc-2.41/debian/patches/amd64/local-revert-x86-64-add-GLIBC_ABI_GNU2_TLS-version.diff
--- glibc-2.41/debian/patches/amd64/local-revert-x86-64-add-GLIBC_ABI_GNU2_TLS-version.diff	1970-01-01 00:00:00.000000000 +0000
+++ glibc-2.41/debian/patches/amd64/local-revert-x86-64-add-GLIBC_ABI_GNU2_TLS-version.diff	2026-03-01 20:10:06.000000000 +0000
@@ -0,0 +1,32 @@
+--- a/sysdeps/x86_64/Makefile
++++ b/sysdeps/x86_64/Makefile
+@@ -208,15 +208,6 @@ LDFLAGS-tst-plt-rewrite2 = -Wl,-z,now
+ LDFLAGS-tst-plt-rewritemod2.so = -Wl,-z,now,-z,undefs
+ tst-plt-rewrite2-ENV = GLIBC_TUNABLES=glibc.cpu.plt_rewrite=2
+ $(objpfx)tst-plt-rewrite2: $(objpfx)tst-plt-rewritemod2.so
+-
+-tests-special += $(objpfx)check-gnu2-tls.out
+-
+-$(objpfx)check-gnu2-tls.out: $(common-objpfx)libc.so
+-	LC_ALL=C $(READELF) -V -W $< \
+-		| sed -ne '/.gnu.version_d/, /.gnu.version_r/ p' \
+-		| grep GLIBC_ABI_GNU2_TLS > $@; \
+-	$(evaluate-test)
+-generated += check-gnu2-tls.out
+ endif
+ 
+ test-internal-extras += tst-gnu2-tls2mod1
+--- a/sysdeps/x86_64/Versions
++++ b/sysdeps/x86_64/Versions
+@@ -5,11 +5,6 @@ libc {
+   GLIBC_2.13 {
+     __fentry__;
+   }
+-  GLIBC_ABI_GNU2_TLS {
+-    # This symbol is used only for empty version map and will be removed
+-    # by scripts/versions.awk.
+-    __placeholder_only_for_empty_version_map;
+-  }
+ }
+ libm {
+   GLIBC_2.1 {
diff -Nru glibc-2.41/debian/patches/git-updates.diff glibc-2.41/debian/patches/git-updates.diff
--- glibc-2.41/debian/patches/git-updates.diff	2025-12-28 16:32:28.000000000 +0000
+++ glibc-2.41/debian/patches/git-updates.diff	2026-03-01 20:10:06.000000000 +0000
@@ -22,10 +22,10 @@
  $(common-objdir):$(subst $(empty) ,:,$(patsubst ../$(subdir),.,$(rpath-dirs:%=$(common-objpfx)%)))
  else  # build-static
 diff --git a/NEWS b/NEWS
-index b11422b060..f77d1471c3 100644
+index b11422b060..b6bab59ca6 100644
 --- a/NEWS
 +++ b/NEWS
-@@ -5,6 +5,39 @@ See the end for copying conditions.
+@@ -5,6 +5,47 @@ See the end for copying conditions.
  Please send GNU C library bug reports via <https://sourceware.org/bugzilla/>
  using `glibc' in the "product" field.
  
@@ -39,7 +39,10 @@
 +
 +The following bugs were resolved with this release:
 +
++  [19341] ctype: Fallback initialization of TLS using relocations
++  [29190] Fix handling of symbol versions which hash to zero
 +  [31943] _dl_find_object can fail if ld.so contains gaps between load segments
++  [32483] ctype macros segfault in multithreaded programs with multiple libc.so
 +  [32269] RISC-V IFUNC resolver cannot access gp pointer
 +  [32626] math: math: log10p1f is not correctly rounded
 +  [32627] math: math: sinhf is not correctly rounded
@@ -60,7 +63,12 @@
 +  [32994] stdlib: resolve a double lock init issue after fork
 +  [33164] iconv -o should not create executable files
 +  [33185] Fix double-free after allocation failure in regcomp
++  [33234] Use TLS initial-exec model for __libc_tsd_CTYPE_* thread variables
 +  [33245] nptl: nptl: error in internal cancellation syscall handling
++  [33361] nss: Group merge does not react to ERANGE during merge
++  [33601] aarch64: Do not link conform tests with -Wl,-z,force-bti
++  [33814] glob: wordexp with WRDE_REUSE and WRDE_APPEND may return
++    uninitialized memory
 +
  Version 2.41
  
@@ -661,7 +669,7 @@
  have-insert = @libc_cv_insert@
  have-glob-dat-reloc = @libc_cv_has_glob_dat@
 diff --git a/configure b/configure
-index eb8abd0054..674d1d7e4a 100755
+index eb8abd0054..f5bd5dc7ac 100755
 --- a/configure
 +++ b/configure
 @@ -659,6 +659,7 @@ libc_cv_has_glob_dat
@@ -672,7 +680,17 @@
  ASFLAGS_config
  libc_cv_cc_with_libunwind
  libc_cv_insert
-@@ -7114,6 +7115,40 @@ if test $libc_cv_as_noexecstack = yes; then
+@@ -4928,6 +4929,9 @@ with_fp_cond=1
+ # A preconfigure script may define another name to TLS descriptor variant
+ mtls_descriptor=gnu2
+ 
++# A preconfigure script may define another name to traditional TLS variant
++mtls_traditional=gnu
++
+ if frags=`ls -d $srcdir/sysdeps/*/preconfigure 2> /dev/null`
+ then
+   { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for sysdeps preconfigure fragments" >&5
+@@ -7114,6 +7118,40 @@ if test $libc_cv_as_noexecstack = yes; then
  fi
  
  
@@ -713,7 +731,49 @@
  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for linker that supports -z execstack" >&5
  printf %s "checking for linker that supports -z execstack... " >&6; }
  libc_linker_feature=no
-@@ -8643,6 +8678,35 @@ if test $libc_cv_builtin_trap = yes; then
+@@ -7452,6 +7490,41 @@ rm -f conftest*
+ config_vars="$config_vars
+ have-test-mtls-descriptor = $libc_cv_test_mtls_descriptor"
+ 
++
++cat > conftest.c <<EOF
++$conftest_code
++EOF
++
++saved_CC="$CC"
++CC="$TEST_CC"
++{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for traditional tls support in testing" >&5
++printf %s "checking for traditional tls support in testing... " >&6; }
++if test ${libc_cv_test_mtls_traditional+y}
++then :
++  printf %s "(cached) " >&6
++else case e in #(
++  e)     if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=$mtls_traditional -nostdlib -nostartfiles -shared conftest.c -o conftest 1>&5'
++  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
++  (eval $ac_try) 2>&5
++  ac_status=$?
++  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
++  test $ac_status = 0; }; }
++    then
++      libc_cv_test_mtls_traditional=$mtls_traditional
++    else
++      libc_cv_test_mtls_traditional=no
++    fi ;;
++esac
++fi
++{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_test_mtls_traditional" >&5
++printf "%s\n" "$libc_cv_test_mtls_traditional" >&6; }
++
++CC="$saved_CC"
++
++rm -f conftest*
++config_vars="$config_vars
++have-test-mtls-traditional = $libc_cv_test_mtls_traditional"
++
+ conftest_code="
+ void __foo (void)
+ {
+@@ -8643,6 +8716,35 @@ if test $libc_cv_builtin_trap = yes; then
  
  fi
  
@@ -749,7 +809,7 @@
  ac_ext=cpp
  ac_cpp='$CXXCPP $CPPFLAGS'
  ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-@@ -8908,6 +8972,104 @@ printf "%s\n" "$libc_linker_feature" >&6; }
+@@ -8908,6 +9010,104 @@ printf "%s\n" "$libc_linker_feature" >&6; }
  config_vars="$config_vars
  load-address-ldflag = $libc_cv_load_address_ldflag"
  
@@ -855,10 +915,20 @@
  printf %s "checking if we can build programs as PIE... " >&6; }
  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 diff --git a/configure.ac b/configure.ac
-index 050bfa65e3..57cd24c87d 100644
+index 050bfa65e3..e55ee81959 100644
 --- a/configure.ac
 +++ b/configure.ac
-@@ -1318,6 +1318,10 @@ if test $libc_cv_as_noexecstack = yes; then
+@@ -483,6 +483,9 @@ with_fp_cond=1
+ # A preconfigure script may define another name to TLS descriptor variant
+ mtls_descriptor=gnu2
+ 
++# A preconfigure script may define another name to traditional TLS variant
++mtls_traditional=gnu
++
+ dnl Let sysdeps/*/preconfigure act here.
+ LIBC_PRECONFIGURE([$srcdir], [for sysdeps])
+ 
+@@ -1318,6 +1321,10 @@ if test $libc_cv_as_noexecstack = yes; then
  fi
  AC_SUBST(ASFLAGS_config)
  
@@ -869,7 +939,24 @@
  LIBC_LINKER_FEATURE([-z execstack], [-Wl,-z,execstack],
  		    [libc_cv_z_execstack=yes], [libc_cv_z_execstack=no])
  AC_SUBST(libc_cv_z_execstack)
-@@ -1820,6 +1824,17 @@ if test $libc_cv_builtin_trap = yes; then
+@@ -1396,6 +1403,16 @@ LIBC_TRY_TEST_CC_COMMAND([for tls descriptor support],
+ LIBC_CONFIG_VAR([have-test-mtls-descriptor],
+ 		[$libc_cv_test_mtls_descriptor])
+ 
++dnl Check if TEST_CC support traditional tls.
++LIBC_TRY_TEST_CC_COMMAND([for traditional tls support],
++  [$conftest_code],
++  [-fPIC -mtls-dialect=$mtls_traditional -nostdlib -nostartfiles -shared],
++  libc_cv_test_mtls_traditional,
++  [libc_cv_test_mtls_traditional=$mtls_traditional],
++  [libc_cv_test_mtls_traditional=no])
++LIBC_CONFIG_VAR([have-test-mtls-traditional],
++		[$libc_cv_test_mtls_traditional])
++
+ dnl clang emits an warning for a double alias redirection, to warn the
+ dnl original symbol is sed even when weak definition overrides it.
+ dnl It is a usual pattern for weak_alias, where multiple alias point to
+@@ -1820,6 +1837,17 @@ if test $libc_cv_builtin_trap = yes; then
    AC_DEFINE([HAVE_BUILTIN_TRAP])
  fi
  
@@ -887,7 +974,7 @@
  dnl C++ feature tests.
  AC_LANG_PUSH([C++])
  
-@@ -1992,6 +2007,23 @@ LIBC_LINKER_FEATURE([-Ttext-segment=$libc_cv_pde_load_address],
+@@ -1992,6 +2020,23 @@ LIBC_LINKER_FEATURE([-Ttext-segment=$libc_cv_pde_load_address],
  		    [libc_cv_load_address_ldflag=])
  LIBC_CONFIG_VAR([load-address-ldflag], [$libc_cv_load_address_ldflag])
  
@@ -911,8 +998,221 @@
  AC_MSG_CHECKING(if we can build programs as PIE)
  AC_COMPILE_IFELSE([AC_LANG_SOURCE([[#ifdef PIE_UNSUPPORTED
  # error PIE is not supported
+diff --git a/ctype/Makefile b/ctype/Makefile
+index 64848bd02e..783b689bbf 100644
+--- a/ctype/Makefile
++++ b/ctype/Makefile
+@@ -36,6 +36,23 @@ aux := ctype-info
+ 
+ tests := \
+   test_ctype \
++  tst-ctype-tls-dlmopen \
++  tst-ctype-tls-dlopen-static \
+   # tests
+ 
++tests-static := \
++  tst-ctype-tls-dlopen-static \
++  # tests-static
++
++modules-names := \
++  tst-ctype-tls-mod \
++  # modules-names
++
+ include ../Rules
++
++$(objpfx)tst-ctype-tls-dlmopen: $(shared-thread-library)
++$(objpfx)tst-ctype-tls-dlmopen.out: $(objpfx)tst-ctype-tls-mod.so
++$(objpfx)tst-ctype-tls-dlopen-static: $(static-thread-library)
++$(objpfx)tst-ctype-tls-dlopen-static.out: $(objpfx)tst-ctype-tls-mod.so
++tst-ctype-tls-dlopen-static-ENV = \
++  LD_LIBRARY_PATH=$(ld-library-path):$(common-objpfx):$(common-objpfx)elf
+diff --git a/ctype/ctype-info.c b/ctype/ctype-info.c
+index 5ee578e901..fb5acf9419 100644
+--- a/ctype/ctype-info.c
++++ b/ctype/ctype-info.c
+@@ -19,20 +19,28 @@
+ #include <ctype.h>
+ #include <locale/localeinfo.h>
+ 
+-__libc_tsd_define (, const uint16_t *, CTYPE_B)
+-__libc_tsd_define (, const int32_t *, CTYPE_TOLOWER)
+-__libc_tsd_define (, const int32_t *, CTYPE_TOUPPER)
++/* Fallback initialization using relocations.  See the _nl_C_locobj
++   initializers in locale/xlocale.c.  Usually, this is overwritten by
++   __ctype_init before user code runs, but this does not happen for
++   threads in secondary namespaces.  With the initializers, secondary
++   namespaces at least get locale data from the C locale.  */
++__thread const uint16_t * __libc_tsd_CTYPE_B attribute_tls_model_ie
++  = (const uint16_t *) _nl_C_LC_CTYPE_class + 128;
++__thread const int32_t * __libc_tsd_CTYPE_TOLOWER attribute_tls_model_ie
++  = (const int32_t *) _nl_C_LC_CTYPE_tolower + 128;
++__thread const int32_t * __libc_tsd_CTYPE_TOUPPER attribute_tls_model_ie
++  = (const int32_t *) _nl_C_LC_CTYPE_toupper + 128;
+ 
+ 
+ void
+ __ctype_init (void)
+ {
+-  const uint16_t **bp = __libc_tsd_address (const uint16_t *, CTYPE_B);
+-  *bp = (const uint16_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_CLASS) + 128;
+-  const int32_t **up = __libc_tsd_address (const int32_t *, CTYPE_TOUPPER);
+-  *up = ((int32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TOUPPER) + 128);
+-  const int32_t **lp = __libc_tsd_address (const int32_t *, CTYPE_TOLOWER);
+-  *lp = ((int32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TOLOWER) + 128);
++  __libc_tsd_CTYPE_B
++    = ((const uint16_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_CLASS)) + 128;
++  __libc_tsd_CTYPE_TOUPPER
++    = ((const int32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TOUPPER)) + 128;
++  __libc_tsd_CTYPE_TOLOWER =
++    ((const int32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TOLOWER)) + 128;
+ }
+ libc_hidden_def (__ctype_init)
+ 
+@@ -41,10 +49,7 @@ libc_hidden_def (__ctype_init)
+ #if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3)
+ 
+ /* Defined in locale/C-ctype.c.  */
+-extern const char _nl_C_LC_CTYPE_class[] attribute_hidden;
+ extern const char _nl_C_LC_CTYPE_class32[] attribute_hidden;
+-extern const char _nl_C_LC_CTYPE_toupper[] attribute_hidden;
+-extern const char _nl_C_LC_CTYPE_tolower[] attribute_hidden;
+ extern const char _nl_C_LC_CTYPE_class_upper[] attribute_hidden;
+ extern const char _nl_C_LC_CTYPE_class_lower[] attribute_hidden;
+ extern const char _nl_C_LC_CTYPE_class_alpha[] attribute_hidden;
+diff --git a/ctype/tst-ctype-tls-dlmopen.c b/ctype/tst-ctype-tls-dlmopen.c
+new file mode 100644
+index 0000000000..f7eeb65551
+--- /dev/null
++++ b/ctype/tst-ctype-tls-dlmopen.c
+@@ -0,0 +1,2 @@
++#define DO_STATIC_TEST 0
++#include "tst-ctype-tls-skeleton.c"
+diff --git a/ctype/tst-ctype-tls-dlopen-static.c b/ctype/tst-ctype-tls-dlopen-static.c
+new file mode 100644
+index 0000000000..c2c09c362c
+--- /dev/null
++++ b/ctype/tst-ctype-tls-dlopen-static.c
+@@ -0,0 +1,2 @@
++#define DO_STATIC_TEST 1
++#include "tst-ctype-tls-skeleton.c"
+diff --git a/ctype/tst-ctype-tls-mod.c b/ctype/tst-ctype-tls-mod.c
+new file mode 100644
+index 0000000000..52cbb9dcb6
+--- /dev/null
++++ b/ctype/tst-ctype-tls-mod.c
+@@ -0,0 +1,37 @@
++/* Wrappers for <ctype.h> macros in a secondary namespace.
++   Copyright (C) 2025 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <ctype.h>
++
++int
++my_isalpha (int ch)
++{
++  return isalpha (ch);
++}
++
++int
++my_toupper (int ch)
++{
++  return toupper (ch);
++}
++
++int
++my_tolower (int ch)
++{
++  return tolower (ch);
++}
+diff --git a/ctype/tst-ctype-tls-skeleton.c b/ctype/tst-ctype-tls-skeleton.c
+new file mode 100644
+index 0000000000..8c53e35899
+--- /dev/null
++++ b/ctype/tst-ctype-tls-skeleton.c
+@@ -0,0 +1,67 @@
++/* Test that <ctype.h> in a secondary namespace works.
++   Copyright (C) 2025 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++/* Before this file is included, define DO_STATIC_TEST to 0 or 1.
++   With 0, dlmopen is used for the test.  With 1, dlopen is used.  */
++
++#include <stddef.h>
++#include <stdlib.h>
++#include <support/check.h>
++#include <support/support.h>
++#include <support/xdlfcn.h>
++#include <support/xthread.h>
++
++static int (*my_isalpha) (int);
++static int (*my_toupper) (int);
++static int (*my_tolower) (int);
++
++static void *
++checks (void *ignore)
++{
++  TEST_VERIFY (my_isalpha ('a'));
++  TEST_VERIFY (!my_isalpha ('0'));
++  TEST_COMPARE (my_toupper ('a'), 'A');
++  TEST_COMPARE (my_toupper ('A'), 'A');
++  TEST_COMPARE (my_tolower ('a'), 'a');
++  TEST_COMPARE (my_tolower ('A'), 'a');
++  return NULL;
++}
++
++static int
++do_test (void)
++{
++  char *dso = xasprintf ("%s/ctype/tst-ctype-tls-mod.so", support_objdir_root);
++#if DO_STATIC_TEST
++  void *handle = xdlopen (dso, RTLD_LAZY);
++#else
++  void *handle = xdlmopen (LM_ID_NEWLM, dso, RTLD_LAZY);
++#endif
++  my_isalpha = xdlsym (handle, "my_isalpha");
++  my_toupper = xdlsym (handle, "my_toupper");
++  my_tolower = xdlsym (handle, "my_tolower");
++
++  checks (NULL);
++  xpthread_join (xpthread_create (NULL, checks, NULL));
++
++  xdlclose (handle);
++  free (dso);
++
++  return 0;
++}
++
++#include <support/test-driver.c>
 diff --git a/elf/Makefile b/elf/Makefile
-index 4b1d0d8741..b8064ef14c 100644
+index 4b1d0d8741..d8a0c0c4ae 100644
 --- a/elf/Makefile
 +++ b/elf/Makefile
 @@ -34,7 +34,6 @@ routines = \
@@ -923,7 +1223,11 @@
    dl-iteratephdr \
    dl-libc \
    dl-origin \
-@@ -61,6 +60,8 @@ dl-routines = \
+@@ -58,9 +57,12 @@ dl-routines = \
+   dl-close \
+   dl-debug \
+   dl-debug-symbols \
++  dl-debug_state \
    dl-deps \
    dl-exception \
    dl-execstack \
@@ -932,7 +1236,7 @@
    dl-fini \
    dl-init \
    dl-load \
-@@ -266,6 +267,7 @@ tests-static-normal := \
+@@ -266,6 +268,7 @@ tests-static-normal := \
    tst-array1-static \
    tst-array5-static \
    tst-dl-iter-static \
@@ -940,7 +1244,7 @@
    tst-dst-static \
    tst-env-setuid-static \
    tst-getauxval-static \
-@@ -379,6 +381,7 @@ tests += \
+@@ -379,6 +382,7 @@ tests += \
    tst-align3 \
    tst-audit-tlsdesc \
    tst-audit-tlsdesc-dlopen \
@@ -948,7 +1252,34 @@
    tst-audit1 \
    tst-audit2 \
    tst-audit8 \
-@@ -532,6 +535,8 @@ tests-internal += \
+@@ -415,7 +419,10 @@ tests += \
+   tst-dlmopen1 \
+   tst-dlmopen3 \
+   tst-dlmopen4 \
++  tst-dlmopen4-nonpic \
++  tst-dlmopen4-pic \
+   tst-dlopen-auditdup \
++  tst-dlopen-constructor-null \
+   tst-dlopen-self \
+   tst-dlopen-tlsmodid \
+   tst-dlopen-tlsreinit1 \
+@@ -490,6 +497,7 @@ tests += \
+   tst-tls21 \
+   tst-tls22 \
+   tst-tls22-gnu2 \
++  tst-tls23 \
+   tst-tlsalign \
+   tst-tlsalign-extern \
+   tst-tlsgap \
+@@ -497,6 +505,7 @@ tests += \
+   tst-unique2 \
+   tst-unwind-ctor \
+   tst-unwind-main \
++  tst-version-hash-zero \
+   unload3 \
+   unload4 \
+   unload5 \
+@@ -532,6 +541,8 @@ tests-internal += \
    tst-dl_find_object-threads \
    tst-dlmopen2 \
    tst-hash-collision3 \
@@ -957,7 +1288,7 @@
    tst-ptrguard1 \
    tst-stackguard1 \
    tst-tls-surplus \
-@@ -543,6 +548,10 @@ tests-internal += \
+@@ -543,6 +554,10 @@ tests-internal += \
    unload2 \
    # tests-internal
  
@@ -968,7 +1299,7 @@
  tests-container += \
    tst-dlopen-self-container \
    tst-dlopen-tlsmodid-container \
-@@ -567,9 +576,11 @@ tests-execstack-yes = \
+@@ -567,9 +582,11 @@ tests-execstack-yes = \
    tst-execstack \
    tst-execstack-needed \
    tst-execstack-prog \
@@ -981,7 +1312,7 @@
    # tests-execstack-static-yes
  ifeq (yes,$(run-built-tests))
  tests-execstack-special-yes = \
-@@ -863,6 +874,7 @@ modules-names += \
+@@ -863,6 +880,7 @@ modules-names += \
    tst-auditmanymod8 \
    tst-auditmanymod9 \
    tst-auditmod-tlsdesc  \
@@ -989,15 +1320,35 @@
    tst-auditmod1 \
    tst-auditmod11 \
    tst-auditmod12 \
-@@ -905,6 +917,7 @@ modules-names += \
+@@ -905,6 +923,9 @@ modules-names += \
    tst-dlmopen1mod \
    tst-dlopen-auditdup-auditmod \
    tst-dlopen-auditdupmod \
++  tst-dlopen-constructor-null-mod1 \
++  tst-dlopen-constructor-null-mod2 \
 +  tst-dlopen-sgid-mod \
    tst-dlopen-tlsreinitmod1 \
    tst-dlopen-tlsreinitmod2 \
    tst-dlopen-tlsreinitmod3 \
-@@ -1144,6 +1157,10 @@ tests-pie += \
+@@ -1003,6 +1024,7 @@ modules-names += \
+   tst-tls22-mod1-gnu2 \
+   tst-tls22-mod2 \
+   tst-tls22-mod2-gnu2 \
++  tst-tls23-mod \
+   tst-tlsalign-lib \
+   tst-tlsgap-mod0 \
+   tst-tlsgap-mod1 \
+@@ -1033,6 +1055,9 @@ modules-names += \
+   tst-unique2mod1 \
+   tst-unique2mod2 \
+   tst-unwind-ctor-lib \
++  tst-version-hash-zero-linkmod \
++  tst-version-hash-zero-mod \
++  tst-version-hash-zero-refmod \
+   unload2dep \
+   unload2mod \
+   unload3mod1 \
+@@ -1144,6 +1169,10 @@ tests-pie += \
    tst-pie1 \
    tst-pie2 \
    # tests-pie
@@ -1008,7 +1359,7 @@
  ifneq (,$(load-address-ldflag))
  tests += \
    tst-pie-address \
-@@ -1159,6 +1176,10 @@ tests += \
+@@ -1159,6 +1188,10 @@ tests += \
  tests-static += \
    tst-pie-address-static \
    # tests-static
@@ -1019,7 +1370,7 @@
  LDFLAGS-tst-pie-address-static += \
    $(load-address-ldflag)=$(pde-load-address)
  endif
-@@ -1988,6 +2009,9 @@ $(objpfx)tst-execstack.out: $(objpfx)tst-execstack-mod.so
+@@ -1988,6 +2021,9 @@ $(objpfx)tst-execstack.out: $(objpfx)tst-execstack-mod.so
  CPPFLAGS-tst-execstack.c += -DUSE_PTHREADS=0
  LDFLAGS-tst-execstack = -Wl,-z,noexecstack
  LDFLAGS-tst-execstack-mod.so = -Wl,-z,execstack
@@ -1029,7 +1380,7 @@
  
  $(objpfx)tst-execstack-needed: $(objpfx)tst-execstack-mod.so
  LDFLAGS-tst-execstack-needed = -Wl,-z,noexecstack
-@@ -1996,7 +2020,18 @@ LDFLAGS-tst-execstack-prog = -Wl,-z,execstack
+@@ -1996,7 +2032,18 @@ LDFLAGS-tst-execstack-prog = -Wl,-z,execstack
  CFLAGS-tst-execstack-prog.c += -Wno-trampolines
  CFLAGS-tst-execstack-mod.c += -Wno-trampolines
  
@@ -1048,7 +1399,7 @@
  CFLAGS-tst-execstack-prog-static.c += -Wno-trampolines
  
  ifeq (yes,$(build-hardcoded-path-in-tests))
-@@ -2074,6 +2109,7 @@ $(objpfx)tst-array5-static-cmp.out: tst-array5-static.exp \
+@@ -2074,6 +2121,7 @@ $(objpfx)tst-array5-static-cmp.out: tst-array5-static.exp \
  
  CFLAGS-tst-pie1.c += $(pie-ccflag)
  CFLAGS-tst-pie2.c += $(pie-ccflag)
@@ -1056,7 +1407,21 @@
  CFLAGS-tst-pie-address.c += $(pie-ccflag)
  
  $(objpfx)tst-piemod1.so: $(libsupport)
-@@ -3189,6 +3225,9 @@ $(objpfx)tst-audit-tlsdesc.out: $(objpfx)tst-auditmod-tlsdesc.so
+@@ -2206,6 +2254,13 @@ $(objpfx)tst-dlmopen3.out: $(objpfx)tst-dlmopen1mod.so
+ 
+ $(objpfx)tst-dlmopen4.out: $(objpfx)tst-dlmopen1mod.so
+ 
++CFLAGS-tst-dlmopen4-pic.c += -fPIC
++$(objpfx)tst-dlmopen4-pic.out: $(objpfx)tst-dlmopen1mod.so
++
++CFLAGS-tst-dlmopen4-nonpic.c += -fno-pie
++tst-dlmopen4-nonpic-no-pie = yes
++$(objpfx)tst-dlmopen4-nonpic.out: $(objpfx)tst-dlmopen1mod.so
++
+ $(objpfx)tst-audit1.out: $(objpfx)tst-auditmod1.so
+ tst-audit1-ENV = LD_AUDIT=$(objpfx)tst-auditmod1.so
+ 
+@@ -3189,6 +3244,9 @@ $(objpfx)tst-audit-tlsdesc.out: $(objpfx)tst-auditmod-tlsdesc.so
  tst-audit-tlsdesc-ENV = LD_AUDIT=$(objpfx)tst-auditmod-tlsdesc.so
  $(objpfx)tst-audit-tlsdesc-dlopen.out: $(objpfx)tst-auditmod-tlsdesc.so
  tst-audit-tlsdesc-dlopen-ENV = LD_AUDIT=$(objpfx)tst-auditmod-tlsdesc.so
@@ -1066,12 +1431,322 @@
  
  $(objpfx)tst-dlmopen-twice.out: \
    $(objpfx)tst-dlmopen-twice-mod1.so \
-@@ -3392,3 +3431,5 @@ $(objpfx)tst-nolink-libc-2: $(objpfx)tst-nolink-libc.o
+@@ -3354,6 +3412,13 @@ tst-tls22-mod1-gnu2.so-no-z-defs = yes
+ tst-tls22-mod2.so-no-z-defs = yes
+ tst-tls22-mod2-gnu2.so-no-z-defs = yes
+ 
++$(objpfx)tst-tls23: $(shared-thread-library)
++$(objpfx)tst-tls23.out: $(objpfx)tst-tls23-mod.so
++
++ifneq (no,$(have-test-mtls-traditional))
++CFLAGS-tst-tls23-mod.c += -mtls-dialect=$(have-test-mtls-traditional)
++endif
++
+ ifeq ($(have-test-cc-cflags-fsemantic-interposition),yes)
+ # Compiler may default to -fno-semantic-interposition.  These modules
+ # must be compiled with -fsemantic-interposition.
+@@ -3392,3 +3457,28 @@ $(objpfx)tst-nolink-libc-2: $(objpfx)tst-nolink-libc.o
  	  -Wl,--dynamic-linker=$(objpfx)ld.so
  $(objpfx)tst-nolink-libc-2.out: $(objpfx)tst-nolink-libc-2 $(objpfx)ld.so
  	$< > $@ 2>&1; $(evaluate-test)
 +
 +$(objpfx)tst-dlopen-sgid.out: $(objpfx)tst-dlopen-sgid-mod.so
++
++$(objpfx)tst-version-hash-zero.out: \
++  $(objpfx)tst-version-hash-zero-mod.so \
++  $(objpfx)tst-version-hash-zero-refmod.so
++LDFLAGS-tst-version-hash-zero-mod.so = \
++  -Wl,--version-script=tst-version-hash-zero-mod.map
++# The run-time test module tst-version-hash-zero-refmod.so is linked
++# to a stub module, tst-version-hash-zero-linkmod.so, to produce an
++# expected relocation error.
++$(objpfx)tst-version-hash-zero-refmod.so: \
++  $(objpfx)tst-version-hash-zero-linkmod.so
++LDFLAGS-tst-version-hash-zero-linkmod.so = \
++  -Wl,--version-script=tst-version-hash-zero-linkmod.map \
++  -Wl,--soname=tst-version-hash-zero-mod.so
++$(objpfx)tst-version-hash-zero-refmod.so: \
++  $(objpfx)tst-version-hash-zero-linkmod.so
++tst-version-hash-zero-refmod.so-no-z-defs = yes
++
++$(objpfx)tst-dlopen-constructor-null: \
++  $(objpfx)tst-dlopen-constructor-null-mod1.so \
++  $(objpfx)tst-dlopen-constructor-null-mod2.so
++$(objpfx)tst-dlopen-constructor-null-mod2.so: \
++  $(objpfx)tst-dlopen-constructor-null-mod1.so
+diff --git a/elf/dl-close.c b/elf/dl-close.c
+index 47bd3dab81..83e4f012b2 100644
+--- a/elf/dl-close.c
++++ b/elf/dl-close.c
+@@ -433,8 +433,7 @@ _dl_close_worker (struct link_map *map, bool force)
+   /* Notify the debugger we are about to remove some loaded objects.
+      LA_ACT_DELETE has already been signalled above for !unload_any.  */
+   struct r_debug *r = _dl_debug_update (nsid);
+-  r->r_state = RT_DELETE;
+-  _dl_debug_state ();
++  _dl_debug_change_state (r, RT_DELETE);
+   LIBC_PROBE (unmap_start, 2, nsid, r);
+ 
+   if (unload_global)
+@@ -726,8 +725,7 @@ _dl_close_worker (struct link_map *map, bool force)
+   __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
+ 
+   /* Notify the debugger those objects are finalized and gone.  */
+-  r->r_state = RT_CONSISTENT;
+-  _dl_debug_state ();
++  _dl_debug_change_state (r, RT_CONSISTENT);
+   LIBC_PROBE (unmap_complete, 2, nsid, r);
+ 
+ #ifdef SHARED
+diff --git a/elf/dl-debug-symbols.S b/elf/dl-debug-symbols.S
+index 7bcb035826..d789f4e35a 100644
+--- a/elf/dl-debug-symbols.S
++++ b/elf/dl-debug-symbols.S
+@@ -38,3 +38,4 @@
+ _r_debug:
+ _r_debug_extended:
+ 	.zero	R_DEBUG_EXTENDED_SIZE
++rtld_hidden_def (_r_debug)
+diff --git a/elf/dl-debug.c b/elf/dl-debug.c
+index 5ff1460ab7..7052f4a3c1 100644
+--- a/elf/dl-debug.c
++++ b/elf/dl-debug.c
+@@ -16,6 +16,7 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
++#include <assert.h>
+ #include <ldsodefs.h>
+ 
+ 
+@@ -30,23 +31,86 @@ extern const int verify_link_map_members[(VERIFY_MEMBER (l_addr)
+ 					  && VERIFY_MEMBER (l_prev))
+ 					 ? 1 : -1];
+ 
++#ifdef SHARED
++/* r_debug structs for secondary namespaces.  The first namespace is
++   handled separately because its r_debug structure must overlap with
++   the public _r_debug symbol, so the first array element corresponds
++   to LM_ID_BASE + 1.  See elf/dl-debug-symbols.S.  */
++struct r_debug_extended _r_debug_array[DL_NNS - 1];
++
++/* If not null, pointer to the _r_debug in the main executable.  */
++static struct r_debug *_r_debug_main;
++
++void
++_dl_debug_post_relocate (struct link_map *main_map)
++{
++  /* Perform a full symbol search in all objects, to maintain
++     compatibility if interposed _r_debug definitions.  The lookup
++     cannot fail because there is a definition in ld.so, and this
++     function is only called if the ld.so search scope is not empty.  */
++  const ElfW(Sym) *sym = NULL;
++  lookup_t result =_dl_lookup_symbol_x ("_r_debug", main_map, &sym,
++					main_map->l_scope, NULL, 0, 0, NULL);
++  if (sym->st_size >= sizeof (struct r_debug))
++    {
++      struct r_debug *main_r_debug = DL_SYMBOL_ADDRESS (result, sym);
++      if (main_r_debug != &_r_debug_extended.base)
++	{
++	  /* The extended version of the struct is not available in
++	     the main executable because a copy relocation has been
++	     used.  r_map etc. have already been copied as part of the
++	     copy relocation processing.  */
++	  main_r_debug->r_version = 1;
++
++          /* Record that dual updates of the initial link map are
++             required.  */
++          _r_debug_main = main_r_debug;
++	}
++    }
++}
++
++/* Return the r_debug object for the namespace NS.  */
++static inline struct r_debug_extended *
++get_rdebug (Lmid_t ns)
++{
++  if (ns == LM_ID_BASE)
++    return &_r_debug_extended;
++  else
++    return  &_r_debug_array[ns - 1];
++}
++#else /* !SHARED */
++static inline struct r_debug_extended *
++get_rdebug (Lmid_t ns)
++{
++  return &_r_debug_extended; /* There is just one namespace.  */
++}
++#endif  /* !SHARED */
++
+ /* Update the `r_map' member and return the address of `struct r_debug'
+    of the namespace NS. */
+ 
+ struct r_debug *
+ _dl_debug_update (Lmid_t ns)
+ {
+-  struct r_debug_extended *r;
+-  if (ns == LM_ID_BASE)
+-    r = &_r_debug_extended;
+-  else
+-    r = &GL(dl_ns)[ns]._ns_debug;
++  struct r_debug_extended *r = get_rdebug (ns);
+   if (r->base.r_map == NULL)
+     atomic_store_release (&r->base.r_map,
+ 			  (void *) GL(dl_ns)[ns]._ns_loaded);
+   return &r->base;
+ }
+ 
++void
++_dl_debug_change_state (struct r_debug *r, int state)
++{
++  atomic_store_release (&r->r_state, state);
++#ifdef SHARED
++  if (r == &_r_debug_extended.base && _r_debug_main != NULL)
++    /* Update the copy-relocation of _r_debug.  */
++    atomic_store_release (&_r_debug_main->r_state, state);
++#endif
++  _dl_debug_state ();
++}
++
+ /* Initialize _r_debug_extended for the namespace NS.  LDBASE is the
+    run-time load address of the dynamic linker, to be put in
+    _r_debug_extended.r_ldbase.  Return the address of _r_debug.  */
+@@ -54,34 +118,7 @@ _dl_debug_update (Lmid_t ns)
+ struct r_debug *
+ _dl_debug_initialize (ElfW(Addr) ldbase, Lmid_t ns)
+ {
+-  struct r_debug_extended *r, **pp = NULL;
+-
+-  if (ns == LM_ID_BASE)
+-    {
+-      r = &_r_debug_extended;
+-      /* Initialize r_version to 1.  */
+-      if (_r_debug_extended.base.r_version == 0)
+-	_r_debug_extended.base.r_version = 1;
+-    }
+-  else if (DL_NNS > 1)
+-    {
+-      r = &GL(dl_ns)[ns]._ns_debug;
+-      if (r->base.r_brk == 0)
+-	{
+-	  /* Add the new namespace to the linked list.  After a namespace
+-	     is initialized, r_brk becomes non-zero.  A namespace becomes
+-	     empty (r_map == NULL) when it is unused.  But it is never
+-	     removed from the linked list.  */
+-	  struct r_debug_extended *p;
+-	  for (pp = &_r_debug_extended.r_next;
+-	       (p = *pp) != NULL;
+-	       pp = &p->r_next)
+-	    ;
+-
+-	  r->base.r_version = 2;
+-	}
+-    }
+-
++  struct r_debug_extended *r = get_rdebug (ns);
+   if (r->base.r_brk == 0)
+     {
+       /* Tell the debugger where to find the map of loaded objects.
+@@ -89,30 +126,44 @@ _dl_debug_initialize (ElfW(Addr) ldbase, Lmid_t ns)
+ 	 only once.  */
+       r->base.r_ldbase = ldbase ?: _r_debug_extended.base.r_ldbase;
+       r->base.r_brk = (ElfW(Addr)) &_dl_debug_state;
+-      r->r_next = NULL;
++
++#ifdef SHARED
++      /* Add the new namespace to the linked list.  This assumes that
++	 namespaces are allocated in increasing order.  After a
++	 namespace is initialized, r_brk becomes non-zero.  A
++	 namespace becomes empty (r_map == NULL) when it is unused.
++	 But it is never removed from the linked list.  */
++
++      if (ns != LM_ID_BASE)
++	{
++	  r->base.r_version = 2;
++	  if (ns - 1 == LM_ID_BASE)
++	    {
++	      atomic_store_release (&_r_debug_extended.r_next, r);
++	      /* Now there are multiple namespaces.  Note that this
++		 deliberately does not update the copy in the main
++		 executable (if it exists).  */
++	      atomic_store_release (&_r_debug_extended.base.r_version, 2);
++	    }
++	  else
++	    /* Update r_debug_extended of the previous namespace.  */
++	    atomic_store_release (&_r_debug_array[ns - 2].r_next, r);
++	}
++      else
++#endif /* SHARED */
++	r->base.r_version = 1;
+     }
+ 
+   if (r->base.r_map == NULL)
+-    atomic_store_release (&r->base.r_map,
+-			  (void *) GL(dl_ns)[ns]._ns_loaded);
+-
+-  if (pp != NULL)
+     {
+-      atomic_store_release (pp, r);
+-      /* Bump r_version to 2 for the new namespace.  */
+-      atomic_store_release (&_r_debug_extended.base.r_version, 2);
++      struct link_map_public *l = (void *) GL(dl_ns)[ns]._ns_loaded;
++      atomic_store_release (&r->base.r_map, l);
++#ifdef SHARED
++      if (ns == LM_ID_BASE && _r_debug_main != NULL)
++	/* Update the copy-relocation of _r_debug.  */
++	atomic_store_release (&_r_debug_main->r_map, l);
++#endif
+     }
+ 
+   return &r->base;
+ }
+-
+-
+-/* This function exists solely to have a breakpoint set on it by the
+-   debugger.  The debugger is supposed to find this function's address by
+-   examining the r_brk member of struct r_debug, but GDB 4.15 in fact looks
+-   for this particular symbol name in the PT_INTERP file.  */
+-void
+-_dl_debug_state (void)
+-{
+-}
+-rtld_hidden_def (_dl_debug_state)
+diff --git a/elf/dl-debug_state.c b/elf/dl-debug_state.c
+new file mode 100644
+index 0000000000..40c134a49e
+--- /dev/null
++++ b/elf/dl-debug_state.c
+@@ -0,0 +1,30 @@
++/* Debugger hook called after dynamic linker updates.
++   Copyright (C) 1996-2025 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <ldsodefs.h>
++
++/* This function exists solely to have a breakpoint set on it by the
++   debugger.  The debugger is supposed to find this function's address by
++   examining the r_brk member of struct r_debug, but GDB 4.15 in fact looks
++   for this particular symbol name in the PT_INTERP file.  Therefore,
++   this function must not be inlined.  */
++void
++_dl_debug_state (void)
++{
++}
++rtld_hidden_def (_dl_debug_state)
 diff --git a/elf/dl-execstack-tunable.c b/elf/dl-execstack-tunable.c
 new file mode 100644
 index 0000000000..e3b638aeaa
@@ -1271,10 +1946,20 @@
         ph < ph_end; ++ph)
      if (ph->p_type == DLFO_EH_SEGMENT_TYPE)
 diff --git a/elf/dl-load.c b/elf/dl-load.c
-index f905578a65..945dd8a231 100644
+index f905578a65..7b21f1dffa 100644
 --- a/elf/dl-load.c
 +++ b/elf/dl-load.c
-@@ -945,7 +945,7 @@ struct link_map *
+@@ -921,8 +921,7 @@ _dl_notify_new_object (int mode, Lmid_t nsid, struct link_map *l)
+       /* Notify the debugger we have added some objects.  We need to
+ 	 call _dl_debug_initialize in a static program in case dynamic
+ 	 linking has not been used before.  */
+-      r->r_state = RT_ADD;
+-      _dl_debug_state ();
++      _dl_debug_change_state (r, RT_ADD);
+       LIBC_PROBE (map_start, 2, nsid, r);
+     }
+   else
+@@ -945,7 +944,7 @@ struct link_map *
  _dl_map_object_from_fd (const char *name, const char *origname, int fd,
  			struct filebuf *fbp, char *realname,
  			struct link_map *loader, int l_type, int mode,
@@ -1283,7 +1968,7 @@
  {
    struct link_map *l = NULL;
    const ElfW(Ehdr) *header;
-@@ -2180,7 +2180,7 @@ _dl_map_object (struct link_map *loader, const char *name,
+@@ -2180,7 +2179,7 @@ _dl_map_object (struct link_map *loader, const char *name,
  
    void *stack_end = __libc_stack_end;
    return _dl_map_object_from_fd (name, origname, fd, &fb, realname, loader,
@@ -1292,6 +1977,79 @@
  }
  
  struct add_path_state
+diff --git a/elf/dl-lookup.c b/elf/dl-lookup.c
+index ece647f009..2f5cd674f5 100644
+--- a/elf/dl-lookup.c
++++ b/elf/dl-lookup.c
+@@ -100,12 +100,22 @@ check_match (const char *const undef_name,
+ 	  /* We can match the version information or use the
+ 	     default one if it is not hidden.  */
+ 	  ElfW(Half) ndx = verstab[symidx] & 0x7fff;
+-	  if ((map->l_versions[ndx].hash != version->hash
+-	       || strcmp (map->l_versions[ndx].name, version->name))
+-	      && (version->hidden || map->l_versions[ndx].hash
+-		  || (verstab[symidx] & 0x8000)))
+-	    /* It's not the version we want.  */
+-	    return NULL;
++	  if (map->l_versions[ndx].hash == version->hash
++	      && strcmp (map->l_versions[ndx].name, version->name) == 0)
++	    /* This is an exact version match.  Return the symbol below.  */
++	    ;
++	  else
++	    {
++	      if (!version->hidden
++		  && map->l_versions[ndx].name[0] == '\0'
++		  && (verstab[symidx] & 0x8000) == 0
++		  && (*num_versions)++ == 0)
++		/* This is the global default version.  Store it as a
++		   fallback match.  */
++		*versioned_sym = sym;
++
++	      return NULL;
++	    }
+ 	}
+     }
+   else
+diff --git a/elf/dl-open.c b/elf/dl-open.c
+index 4c12ddec59..894a9a7bb6 100644
+--- a/elf/dl-open.c
++++ b/elf/dl-open.c
+@@ -594,6 +594,16 @@ dl_open_worker_begin (void *a)
+       if ((mode & RTLD_GLOBAL) && new->l_global == 0)
+ 	add_to_global_update (new);
+ 
++      /* It is not possible to run the ELF constructor for the new
++	 link map if it has not executed yet: If this dlopen call came
++	 from an ELF constructor that has not put that object into a
++	 consistent state, completing initialization for the entire
++	 scope will expose objects that have this partially
++	 constructed object among its dependencies to this
++	 inconsistent state.  This could happen even with a benign
++	 dlopen (NULL, RTLD_LAZY) call from a constructor of an
++	 initially loaded shared object.  */
++
+       return;
+     }
+ 
+@@ -771,8 +781,7 @@ dl_open_worker (void *a)
+ #ifdef SHARED
+ 	bool was_not_consistent  = r->r_state != RT_CONSISTENT;
+ #endif
+-	r->r_state = RT_CONSISTENT;
+-	_dl_debug_state ();
++	_dl_debug_change_state (r, RT_CONSISTENT);
+ 	LIBC_PROBE (map_complete, 3, nsid, r, args->map);
+ 
+ #ifdef SHARED
+@@ -841,7 +850,7 @@ no more namespaces available for dlmopen()"));
+ 	}
+ 
+       GL(dl_ns)[nsid].libc_map = NULL;
+-      _dl_debug_update (nsid)->r_state = RT_CONSISTENT;
++      _dl_debug_change_state (_dl_debug_update (nsid), RT_CONSISTENT);
+     }
+   /* Never allow loading a DSO in a namespace which is empty.  Such
+      direct placements is only causing problems.  Also don't allow
 diff --git a/elf/dl-reloc-static-pie.c b/elf/dl-reloc-static-pie.c
 index e34bf5f7ce..758bf9893e 100644
 --- a/elf/dl-reloc-static-pie.c
@@ -1352,8 +2110,26 @@
        default: 1
      }
    }
+diff --git a/elf/dl-version.c b/elf/dl-version.c
+index d414bd1e18..2fbf4942b9 100644
+--- a/elf/dl-version.c
++++ b/elf/dl-version.c
+@@ -357,6 +357,13 @@ _dl_check_map_versions (struct link_map *map, int verbose, int trace_mode)
+ 	      ent = (ElfW(Verdef) *) ((char *) ent + ent->vd_next);
+ 	    }
+ 	}
++
++      /* The empty string has ELF hash zero.  This avoids a NULL check
++	 before the version string comparison in check_match in
++	 dl-lookup.c.  */
++      for (unsigned int i = 0; i < map->l_nversions; ++i)
++	if (map->l_versions[i].name == NULL)
++	  map->l_versions[i].name = "";
+     }
+ 
+   /* When there is a DT_VERNEED entry with libc.so on DT_NEEDED, issue
 diff --git a/elf/rtld.c b/elf/rtld.c
-index 00bec15316..c1e9721def 100644
+index 00bec15316..18c96bd5c1 100644
 --- a/elf/rtld.c
 +++ b/elf/rtld.c
 @@ -1242,6 +1242,60 @@ rtld_setup_main_map (struct link_map *main_map)
@@ -1465,6 +2241,138 @@
  
    /* Add the dynamic linker to the TLS list if it also uses TLS.  */
    if (_dl_rtld_map.l_tls_blocksize != 0)
+@@ -1783,8 +1811,7 @@ dl_main (const ElfW(Phdr) *phdr,
+   elf_setup_debug_entry (main_map, r);
+ 
+   /* We start adding objects.  */
+-  r->r_state = RT_ADD;
+-  _dl_debug_state ();
++  _dl_debug_change_state (r, RT_ADD);
+   LIBC_PROBE (init_start, 2, LM_ID_BASE, r);
+ 
+   /* Auditing checkpoint: we are ready to signal that the initial map
+@@ -2319,6 +2346,9 @@ dl_main (const ElfW(Phdr) *phdr,
+ 
+       __rtld_mutex_init ();
+       __rtld_malloc_init_real (main_map);
++
++      /* Update copy-relocated _r_debug if necessary.  */
++      _dl_debug_post_relocate (main_map);
+     }
+ 
+   /* All ld.so initialization is complete.  Apply RELRO.  */
+@@ -2339,8 +2369,7 @@ dl_main (const ElfW(Phdr) *phdr,
+   /* Notify the debugger all new objects are now ready to go.  We must re-get
+      the address since by now the variable might be in another object.  */
+   r = _dl_debug_update (LM_ID_BASE);
+-  r->r_state = RT_CONSISTENT;
+-  _dl_debug_state ();
++  _dl_debug_change_state (r, RT_CONSISTENT);
+   LIBC_PROBE (init_complete, 2, LM_ID_BASE, r);
+ 
+   /* Auditing checkpoint: we have added all objects.  */
+diff --git a/elf/sprof.c b/elf/sprof.c
+index 4baff86d2a..1c9807dd6d 100644
+--- a/elf/sprof.c
++++ b/elf/sprof.c
+@@ -38,6 +38,7 @@
+ #include <sys/mman.h>
+ #include <sys/param.h>
+ #include <sys/stat.h>
++#include <intprops.h>
+ 
+ /* Get libc version number.  */
+ #include "../version.h"
+@@ -410,6 +411,7 @@ load_shobj (const char *name)
+   int fd;
+   ElfW(Shdr) *shdr;
+   size_t pagesize = getpagesize ();
++  struct stat st;
+ 
+   /* Since we use dlopen() we must be prepared to work around the sometimes
+      strange lookup rules for the shared objects.  If we have a file foo.so
+@@ -553,14 +555,39 @@ load_shobj (const char *name)
+     error (EXIT_FAILURE, errno, _("Reopening shared object `%s' failed"),
+ 	   map->l_name);
+ 
++  if (fstat (fd, &st) < 0)
++    error (EXIT_FAILURE, errno, _("stat(%s) failure"), map->l_name);
++
++  /* We're depending on data that's being read from the file, so be a
++     bit paranoid here and make sure the requests are reasonable -
++     i.e. both size and offset are nonnegative and smaller than the
++     file size, as well as the offset of the end of the data.  PREAD
++     would have failed anyway, but this is more robust and explains
++     what happened better.  Note that SZ must be unsigned and OFF may
++     be signed or unsigned.  */
++#define PCHECK(sz1,off1) {						\
++    size_t sz = sz1, end_off;						\
++    off_t off = off1;							\
++    if (sz > st.st_size							\
++	|| off < 0 || off > st.st_size					\
++	|| INT_ADD_WRAPV (sz, off, &end_off)				\
++	|| end_off > st.st_size)					\
++      error (EXIT_FAILURE, ERANGE,					\
++	     _("read outside of file extents %zu + %jd > %jd"),		\
++	     sz, (intmax_t) off, (intmax_t) st.st_size);		\
++	}
++
+   /* Map the section header.  */
+   size_t size = ehdr->e_shnum * sizeof (ElfW(Shdr));
+   shdr = (ElfW(Shdr) *) alloca (size);
++  PCHECK (size, ehdr->e_shoff);
+   if (pread (fd, shdr, size, ehdr->e_shoff) != size)
+     error (EXIT_FAILURE, errno, _("reading of section headers failed"));
+ 
+   /* Get the section header string table.  */
+   char *shstrtab = (char *) alloca (shdr[ehdr->e_shstrndx].sh_size);
++  PCHECK (shdr[ehdr->e_shstrndx].sh_size,
++	  shdr[ehdr->e_shstrndx].sh_offset);
+   if (pread (fd, shstrtab, shdr[ehdr->e_shstrndx].sh_size,
+ 	     shdr[ehdr->e_shstrndx].sh_offset)
+       != shdr[ehdr->e_shstrndx].sh_size)
+@@ -588,6 +615,7 @@ load_shobj (const char *name)
+       size_t size = debuglink_entry->sh_size;
+       char *debuginfo_fname = (char *) alloca (size + 1);
+       debuginfo_fname[size] = '\0';
++      PCHECK (size, debuglink_entry->sh_offset);
+       if (pread (fd, debuginfo_fname, size, debuglink_entry->sh_offset)
+ 	  != size)
+ 	{
+@@ -641,21 +669,32 @@ load_shobj (const char *name)
+       if (fd2 != -1)
+ 	{
+ 	  ElfW(Ehdr) ehdr2;
++	  struct stat st;
++
++	  if (fstat (fd2, &st) < 0)
++	    error (EXIT_FAILURE, errno, _("stat(%s) failure"), workbuf);
+ 
+ 	  /* Read the ELF header.  */
++	  PCHECK (sizeof (ehdr2), 0);
+ 	  if (pread (fd2, &ehdr2, sizeof (ehdr2), 0) != sizeof (ehdr2))
+ 	    error (EXIT_FAILURE, errno,
+ 		   _("reading of ELF header failed"));
+ 
+ 	  /* Map the section header.  */
+-	  size_t size = ehdr2.e_shnum * sizeof (ElfW(Shdr));
++	  size_t size;
++	  if (INT_MULTIPLY_WRAPV (ehdr2.e_shnum, sizeof (ElfW(Shdr)), &size))
++	    error (EXIT_FAILURE, errno, _("too many section headers"));
++	    
+ 	  ElfW(Shdr) *shdr2 = (ElfW(Shdr) *) alloca (size);
++	  PCHECK (size, ehdr2.e_shoff);
+ 	  if (pread (fd2, shdr2, size, ehdr2.e_shoff) != size)
+ 	    error (EXIT_FAILURE, errno,
+ 		   _("reading of section headers failed"));
+ 
+ 	  /* Get the section header string table.  */
+ 	  shstrtab = (char *) alloca (shdr2[ehdr2.e_shstrndx].sh_size);
++	  PCHECK (shdr2[ehdr2.e_shstrndx].sh_size,
++		  shdr2[ehdr2.e_shstrndx].sh_offset);
+ 	  if (pread (fd2, shstrtab, shdr2[ehdr2.e_shstrndx].sh_size,
+ 		     shdr2[ehdr2.e_shstrndx].sh_offset)
+ 	      != shdr2[ehdr2.e_shstrndx].sh_size)
 diff --git a/elf/tst-audit-tlsdesc-dlopen2.c b/elf/tst-audit-tlsdesc-dlopen2.c
 new file mode 100644
 index 0000000000..7ba2c4129a
@@ -1582,6 +2490,210 @@
 +
 +  return LAV_CURRENT;
 +}
+diff --git a/elf/tst-dlmopen4-nonpic.c b/elf/tst-dlmopen4-nonpic.c
+new file mode 100644
+index 0000000000..ad4e409953
+--- /dev/null
++++ b/elf/tst-dlmopen4-nonpic.c
+@@ -0,0 +1,2 @@
++#define BUILD_FOR_NONPIC
++#include "tst-dlmopen4.c"
+diff --git a/elf/tst-dlmopen4-pic.c b/elf/tst-dlmopen4-pic.c
+new file mode 100644
+index 0000000000..919fa85c25
+--- /dev/null
++++ b/elf/tst-dlmopen4-pic.c
+@@ -0,0 +1,2 @@
++#define BUILD_FOR_PIC
++#include "tst-dlmopen4.c"
+diff --git a/elf/tst-dlmopen4.c b/elf/tst-dlmopen4.c
+index 64e007e3dc..5cda024fdf 100644
+--- a/elf/tst-dlmopen4.c
++++ b/elf/tst-dlmopen4.c
+@@ -46,6 +46,15 @@ do_test (void)
+   TEST_COMPARE (debug->base.r_version, 1);
+   TEST_VERIFY_EXIT (debug->r_next == NULL);
+ 
++#ifdef BUILD_FOR_PIC
++  /* In a PIC build, using _r_debug directly should give us the same
++     object.  */
++  TEST_VERIFY (&_r_debug == &debug->base);
++#endif
++#ifdef BUILD_FOR_NONPIC
++  TEST_COMPARE (_r_debug.r_version, 1);
++#endif
++
+   void *h = xdlmopen (LM_ID_NEWLM, "$ORIGIN/tst-dlmopen1mod.so",
+ 		      RTLD_LAZY);
+ 
+@@ -57,6 +66,19 @@ do_test (void)
+   const char *name = basename (debug->r_next->base.r_map->l_name);
+   TEST_COMPARE_STRING (name, "tst-dlmopen1mod.so");
+ 
++#ifdef BUILD_FOR_NONPIC
++  /* If a copy relocation is used, it must be at version 1.  */
++  if (&_r_debug != &debug->base)
++    {
++      TEST_COMPARE (_r_debug.r_version, 1);
++      TEST_COMPARE ((uintptr_t) _r_debug.r_map,
++		    (uintptr_t) debug->base.r_map);
++      TEST_COMPARE (_r_debug.r_brk, debug->base.r_brk);
++      TEST_COMPARE (_r_debug.r_state, debug->base.r_state);
++      TEST_COMPARE (_r_debug.r_ldbase, debug->base.r_ldbase);
++    }
++#endif
++
+   xdlclose (h);
+ 
+   return 0;
+diff --git a/elf/tst-dlopen-constructor-null-mod1.c b/elf/tst-dlopen-constructor-null-mod1.c
+new file mode 100644
+index 0000000000..70a7a0ad46
+--- /dev/null
++++ b/elf/tst-dlopen-constructor-null-mod1.c
+@@ -0,0 +1,55 @@
++/* Module calling dlopen (NULL, RTLD_LAZY) to obtain the global scope.
++   Copyright (C) 2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <dlfcn.h>
++#include <stddef.h>
++#include <stdio.h>
++#include <stdlib.h>
++
++int mod1_status;
++
++static void __attribute__ ((constructor))
++init (void)
++{
++  puts ("info: tst-dlopen-constructor-null-mod1.so constructor");
++
++  void *handle = dlopen (NULL, RTLD_LAZY);
++  if (handle == NULL)
++    {
++      printf ("error: %s\n", dlerror ());
++      exit (1);
++    }
++  puts ("info: dlopen returned");
++  if (dlsym (handle, "malloc") != malloc)
++    {
++      puts ("error: dlsym did not produce expected result");
++      exit (1);
++    }
++  dlclose (handle);
++
++  /* Check that the second module's constructor has not executed.   */
++  if (getenv ("mod2_status") != NULL)
++    {
++      printf ("error: mod2_status environment variable set: %s\n",
++              getenv ("mod2_status"));
++      exit (1);
++    }
++
++  /* Communicate to the second module that the constructor executed.   */
++  mod1_status = 1;
++}
+diff --git a/elf/tst-dlopen-constructor-null-mod2.c b/elf/tst-dlopen-constructor-null-mod2.c
+new file mode 100644
+index 0000000000..d6e945beae
+--- /dev/null
++++ b/elf/tst-dlopen-constructor-null-mod2.c
+@@ -0,0 +1,37 @@
++/* Module whose constructor should not be invoked by dlopen (NULL, RTLD_LAZY).
++   Copyright (C) 2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <stdio.h>
++#include <stdlib.h>
++
++extern int mod1_status;
++int mod2_status;
++
++static void __attribute__ ((constructor))
++init (void)
++{
++  printf ("info: tst-dlopen-constructor-null-mod2.so constructor"
++          " (mod1_status=%d)", mod1_status);
++  if (!(mod1_status == 1 && mod2_status == 0))
++    {
++      puts ("error: mod1_status == 1 && mod2_status == 0 expected");
++      exit (1);
++    }
++  setenv ("mod2_status", "constructed", 1);
++  mod2_status = 1;
++}
+diff --git a/elf/tst-dlopen-constructor-null.c b/elf/tst-dlopen-constructor-null.c
+new file mode 100644
+index 0000000000..db90643325
+--- /dev/null
++++ b/elf/tst-dlopen-constructor-null.c
+@@ -0,0 +1,38 @@
++/* Verify that dlopen (NULL, RTLD_LAZY) does not complete initialization.
++   Copyright (C) 2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++/* This test mimics what the glvndSetupPthreads function in libglvnd
++   does. */
++
++#include <stdlib.h>
++#include <support/check.h>
++
++/* Defined and initialized in the shared objects.  */
++extern int mod1_status;
++extern int mod2_status;
++
++static int
++do_test (void)
++{
++  TEST_COMPARE (mod1_status, 1);
++  TEST_COMPARE (mod2_status, 1);
++  TEST_COMPARE_STRING (getenv ("mod2_status"), "constructed");
++  return 0;
++}
++
++#include <support/test-driver.c>
 diff --git a/elf/tst-dlopen-sgid-mod.c b/elf/tst-dlopen-sgid-mod.c
 new file mode 100644
 index 0000000000..5eb79eef48
@@ -2077,6 +3189,379 @@
 +glibc.rtld.execstack: 1 (min: 0, max: 2)
  glibc.rtld.nns: 0x4 (min: 0x1, max: 0x10)
  glibc.rtld.optional_static_tls: 0x200 (min: 0x0, max: 0x[f]+)
+diff --git a/elf/tst-tls23-mod.c b/elf/tst-tls23-mod.c
+new file mode 100644
+index 0000000000..3ee4c70e40
+--- /dev/null
++++ b/elf/tst-tls23-mod.c
+@@ -0,0 +1,32 @@
++/* DSO used by tst-tls23.
++   Copyright (C) 2025 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <tst-tls23.h>
++
++__thread struct tls tls_var0 __attribute__ ((visibility ("hidden")));
++
++struct tls *
++apply_tls (struct tls *p)
++{
++  INIT_TLS_CALL ();
++  BEFORE_TLS_CALL ();
++  tls_var0 = *p;
++  struct tls *ret = &tls_var0;
++  AFTER_TLS_CALL ();
++  return ret;
++}
+diff --git a/elf/tst-tls23.c b/elf/tst-tls23.c
+new file mode 100644
+index 0000000000..afe594c067
+--- /dev/null
++++ b/elf/tst-tls23.c
+@@ -0,0 +1,106 @@
++/* Test that __tls_get_addr preserves caller-saved registers.
++   Copyright (C) 2025 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <http://www.gnu.org/licenses/>.  */
++
++#include <stdio.h>
++#include <stdlib.h>
++#include <string.h>
++#include <dlfcn.h>
++#include <pthread.h>
++#include <support/xdlfcn.h>
++#include <support/xthread.h>
++#include <support/check.h>
++#include <support/test-driver.h>
++#include <tst-tls23.h>
++
++#ifndef IS_SUPPORTED
++# define IS_SUPPORTED() true
++#endif
++
++/* An architecture can define it to clobber caller-saved registers in
++   malloc below to verify that __tls_get_addr won't change caller-saved
++   registers.  */
++#ifndef PREPARE_MALLOC
++# define PREPARE_MALLOC()
++#endif
++
++extern void * __libc_malloc (size_t);
++
++size_t malloc_counter = 0;
++
++void *
++malloc (size_t n)
++{
++  PREPARE_MALLOC ();
++  malloc_counter++;
++  return __libc_malloc (n);
++}
++
++static void *mod;
++static const char *modname = "tst-tls23-mod.so";
++
++static void
++open_mod (void)
++{
++  mod = xdlopen (modname, RTLD_LAZY);
++  printf ("open %s\n", modname);
++}
++
++static void
++close_mod (void)
++{
++  xdlclose (mod);
++  mod = NULL;
++  printf ("close %s\n", modname);
++}
++
++static void
++access_mod (const char *sym)
++{
++  struct tls var = { -4, -4, -4, -4 };
++  struct tls *(*f) (struct tls *) = xdlsym (mod, sym);
++  /* Check that our malloc is called.  */
++  malloc_counter = 0;
++  struct tls *p = f (&var);
++  TEST_VERIFY (malloc_counter != 0);
++  printf ("access %s: %s() = %p\n", modname, sym, p);
++  TEST_VERIFY_EXIT (memcmp (p, &var, sizeof (var)) == 0);
++  ++(p->a);
++}
++
++static void *
++start (void *arg)
++{
++  access_mod ("apply_tls");
++  return arg;
++}
++
++static int
++do_test (void)
++{
++  if (!IS_SUPPORTED ())
++    return EXIT_UNSUPPORTED;
++
++  open_mod ();
++  pthread_t t = xpthread_create (NULL, start, NULL);
++  xpthread_join (t);
++  close_mod ();
++
++  return 0;
++}
++
++#include <support/test-driver.c>
+diff --git a/elf/tst-tls23.h b/elf/tst-tls23.h
+new file mode 100644
+index 0000000000..d0e734569c
+--- /dev/null
++++ b/elf/tst-tls23.h
+@@ -0,0 +1,40 @@
++/* Test that __tls_get_addr preserves caller-saved registers.
++   Copyright (C) 2025 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <stdint.h>
++
++struct tls
++{
++  int64_t a, b, c, d;
++};
++
++extern struct tls *apply_tls (struct tls *);
++
++/* An architecture can define them to verify that caller-saved registers
++   aren't changed by __tls_get_addr.  */
++#ifndef INIT_TLS_CALL
++# define INIT_TLS_CALL()
++#endif
++
++#ifndef BEFORE_TLS_CALL
++# define BEFORE_TLS_CALL()
++#endif
++
++#ifndef AFTER_TLS_CALL
++# define AFTER_TLS_CALL()
++#endif
+diff --git a/elf/tst-version-hash-zero-linkmod.c b/elf/tst-version-hash-zero-linkmod.c
+new file mode 100644
+index 0000000000..15e2506d01
+--- /dev/null
++++ b/elf/tst-version-hash-zero-linkmod.c
+@@ -0,0 +1,22 @@
++/* Stub module for linking tst-version-hash-zero-refmod.so.
++   Copyright (C) 2025  Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public License as
++   published by the Free Software Foundation; either version 2.1 of the
++   License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; see the file COPYING.LIB.  If
++   not, see <https://www.gnu.org/licenses/>.  */
++
++/* The version script assigns a different symbol version for the stub
++   module.  Loading the module with the incorrect version is expected
++   to fail.  */
++#include "tst-version-hash-zero-mod.c"
+diff --git a/elf/tst-version-hash-zero-linkmod.map b/elf/tst-version-hash-zero-linkmod.map
+new file mode 100644
+index 0000000000..2dba7c22d7
+--- /dev/null
++++ b/elf/tst-version-hash-zero-linkmod.map
+@@ -0,0 +1,7 @@
++Base {
++  local: *;
++};
++
++OTHER_VERSION {
++  global: global_variable;
++} Base;
+diff --git a/elf/tst-version-hash-zero-mod.c b/elf/tst-version-hash-zero-mod.c
+new file mode 100644
+index 0000000000..ac6b0dc4a5
+--- /dev/null
++++ b/elf/tst-version-hash-zero-mod.c
+@@ -0,0 +1,20 @@
++/* Test module with a zero version symbol hash.
++   Copyright (C) 2025  Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public License as
++   published by the Free Software Foundation; either version 2.1 of the
++   License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; see the file COPYING.LIB.  If
++   not, see <https://www.gnu.org/licenses/>.  */
++
++/* The symbol version is assigned by version script.  */
++int global_variable;
+diff --git a/elf/tst-version-hash-zero-mod.map b/elf/tst-version-hash-zero-mod.map
+new file mode 100644
+index 0000000000..41eaff7914
+--- /dev/null
++++ b/elf/tst-version-hash-zero-mod.map
+@@ -0,0 +1,13 @@
++Base {
++  local: *;
++};
++
++/* Define the version so that tst-version-hash-zero-refmod.so passes
++   the initial symbol version check.  */
++OTHER_VERSION {
++} Base;
++
++/* This version string hashes to zero.  */
++PPPPPPPPPPPP {
++  global: global_variable;
++} Base;
+diff --git a/elf/tst-version-hash-zero-refmod.c b/elf/tst-version-hash-zero-refmod.c
+new file mode 100644
+index 0000000000..cd8b3dcef5
+--- /dev/null
++++ b/elf/tst-version-hash-zero-refmod.c
+@@ -0,0 +1,23 @@
++/* Test module that triggers a relocation failure in tst-version-hash-zero.
++   Copyright (C) 2025  Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public License as
++   published by the Free Software Foundation; either version 2.1 of the
++   License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; see the file COPYING.LIB.  If
++   not, see <https://www.gnu.org/licenses/>.  */
++
++/* This is bound to global_variable@@OTHER_VERSION via
++   tst-version-hash-zero-linkmod.so, but at run time, only
++   global_variable@PPPPPPPPPPPP exists.  */
++extern int global_variable;
++int *pointer_variable = &global_variable;
+diff --git a/elf/tst-version-hash-zero.c b/elf/tst-version-hash-zero.c
+new file mode 100644
+index 0000000000..66a0db4f51
+--- /dev/null
++++ b/elf/tst-version-hash-zero.c
+@@ -0,0 +1,56 @@
++/* Symbols with version hash zero should not match any version (bug 29190).
++   Copyright (C) 2025  Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public License as
++   published by the Free Software Foundation; either version 2.1 of the
++   License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; see the file COPYING.LIB.  If
++   not, see <https://www.gnu.org/licenses/>.  */
++
++#include <support/check.h>
++#include <support/xdlfcn.h>
++#include <stddef.h>
++#include <string.h>
++
++static int
++do_test (void)
++{
++  void *handle = xdlopen ("tst-version-hash-zero-mod.so", RTLD_NOW);
++
++  /* This used to crash because some struct r_found_version entries
++     with hash zero did not have valid version strings.  */
++  TEST_VERIFY (xdlvsym (handle, "global_variable", "PPPPPPPPPPPP") != NULL);
++
++  /* Consistency check.  */
++  TEST_VERIFY (xdlsym (handle, "global_variable")
++               == xdlvsym (handle, "global_variable", "PPPPPPPPPPPP"));
++
++  /* This symbol version is supposed to be missing.  */
++  TEST_VERIFY (dlvsym (handle, "global_variable", "OTHER_VERSION") == NULL);
++
++  /* tst-version-hash-zero-refmod.so references
++     global_variable@@OTHER_VERSION and is expected to fail to load.
++     dlvsym sets the hidden flag during lookup.  Relocation does not,
++     so this exercises a different failure case.  */
++  TEST_VERIFY_EXIT (dlopen ("tst-version-hash-zero-refmod.so", RTLD_NOW)
++                    == NULL);
++  const char *message = dlerror ();
++  if (strstr (message,
++              ": undefined symbol: global_variable, version OTHER_VERSION")
++      == NULL)
++    FAIL_EXIT1 ("unexpected dlopen failure: %s", message);
++
++  xdlclose (handle);
++  return 0;
++}
++
++#include <support/test-driver.c>
 diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c
 index 7dba5d8dff..558cfb11a3 100644
 --- a/iconv/iconv_prog.c
@@ -2114,6 +3599,65 @@
      if ! cmp -s "$tmp/out" "$tmp/expected" ; then
          echo "error: iconv output difference" >&$logfd
          echo "*** expected ***" >&$logfd
+diff --git a/include/ctype.h b/include/ctype.h
+index 493a6f80ce..a15e5b6678 100644
+--- a/include/ctype.h
++++ b/include/ctype.h
+@@ -24,33 +24,35 @@ libc_hidden_proto (toupper)
+    NL_CURRENT_INDIRECT.  */
+ 
+ #  include "../locale/localeinfo.h"
+-#  include <libc-tsd.h>
+ 
+ #  ifndef CTYPE_EXTERN_INLINE	/* Used by ctype/ctype-info.c, which see.  */
+ #   define CTYPE_EXTERN_INLINE extern inline
+ #  endif
+ 
+-__libc_tsd_define (extern, const uint16_t *, CTYPE_B)
+-__libc_tsd_define (extern, const int32_t *, CTYPE_TOUPPER)
+-__libc_tsd_define (extern, const int32_t *, CTYPE_TOLOWER)
++extern __thread const uint16_t * __libc_tsd_CTYPE_B
++  attribute_hidden attribute_tls_model_ie;
++extern __thread const int32_t * __libc_tsd_CTYPE_TOUPPER
++  attribute_hidden attribute_tls_model_ie;
++extern __thread const int32_t * __libc_tsd_CTYPE_TOLOWER
++  attribute_hidden attribute_tls_model_ie;
+ 
+ 
+ CTYPE_EXTERN_INLINE const uint16_t ** __attribute__ ((const))
+ __ctype_b_loc (void)
+ {
+-  return __libc_tsd_address (const uint16_t *, CTYPE_B);
++  return &__libc_tsd_CTYPE_B;
+ }
+ 
+ CTYPE_EXTERN_INLINE const int32_t ** __attribute__ ((const))
+ __ctype_toupper_loc (void)
+ {
+-  return __libc_tsd_address (const int32_t *, CTYPE_TOUPPER);
++  return &__libc_tsd_CTYPE_TOUPPER;
+ }
+ 
+ CTYPE_EXTERN_INLINE const int32_t ** __attribute__ ((const))
+ __ctype_tolower_loc (void)
+ {
+-  return __libc_tsd_address (const int32_t *, CTYPE_TOLOWER);
++  return &__libc_tsd_CTYPE_TOLOWER;
+ }
+ 
+ #  ifndef __NO_CTYPE
+@@ -64,6 +66,11 @@ __ctype_tolower_loc (void)
+ #   define __isdigit_l(c, l) ({ int __c = (c); __c >= '0' && __c <= '9'; })
+ #  endif  /* Not __NO_CTYPE.  */
+ 
++/* For use in initializers.  */
++extern const char _nl_C_LC_CTYPE_class[] attribute_hidden;
++extern const uint32_t _nl_C_LC_CTYPE_toupper[] attribute_hidden;
++extern const uint32_t _nl_C_LC_CTYPE_tolower[] attribute_hidden;
++
+ # endif	/* IS_IN (libc).  */
+ #endif  /* Not _ISOMAC.  */
+ 
 diff --git a/include/dlfcn.h b/include/dlfcn.h
 index f49ee1b0c9..a44420fa37 100644
 --- a/include/dlfcn.h
@@ -2128,6 +3672,207 @@
  
  /* Internally used flag.  */
  #define __RTLD_DLOPEN	0x80000000
+diff --git a/include/link.h b/include/link.h
+index 518bfd1670..41e5e54a7b 100644
+--- a/include/link.h
++++ b/include/link.h
+@@ -365,6 +365,8 @@ struct auditstate
+    dynamic linker.  */
+ extern struct r_debug_extended _r_debug_extended attribute_hidden;
+ 
++rtld_hidden_proto (_r_debug)
++
+ #if __ELF_NATIVE_CLASS == 32
+ # define symbind symbind32
+ # define LA_SYMBIND "la_symbind32"
+diff --git a/include/rpc/rpc.h b/include/rpc/rpc.h
+index f5cee6caef..ba967833ad 100644
+--- a/include/rpc/rpc.h
++++ b/include/rpc/rpc.h
+@@ -3,8 +3,6 @@
+ 
+ # ifndef _ISOMAC
+ 
+-#include <libc-tsd.h>
+-
+ /* Now define the internal interfaces.  */
+ extern unsigned long _create_xid (void);
+ 
+@@ -47,7 +45,8 @@ extern void __rpc_thread_key_cleanup (void) attribute_hidden;
+ 
+ extern void __rpc_thread_destroy (void) attribute_hidden;
+ 
+-__libc_tsd_define (extern, struct rpc_thread_variables *, RPC_VARS)
++extern __thread struct rpc_thread_variables *__libc_tsd_RPC_VARS
++  attribute_hidden attribute_tls_model_ie;
+ 
+ #define RPC_THREAD_VARIABLE(x) (__rpc_thread_variables()->x)
+ 
+diff --git a/locale/lc-ctype.c b/locale/lc-ctype.c
+index 867f829be5..47be1c9a39 100644
+--- a/locale/lc-ctype.c
++++ b/locale/lc-ctype.c
+@@ -64,12 +64,9 @@ _nl_postload_ctype (void)
+      in fact using the global locale.  */
+   if (_NL_CURRENT_LOCALE == &_nl_global_locale)
+     {
+-      __libc_tsd_set (const uint16_t *, CTYPE_B,
+-		      (void *) _nl_global_locale.__ctype_b);
+-      __libc_tsd_set (const int32_t *, CTYPE_TOUPPER,
+-		      (void *) _nl_global_locale.__ctype_toupper);
+-      __libc_tsd_set (const int32_t *, CTYPE_TOLOWER,
+-		      (void *) _nl_global_locale.__ctype_tolower);
++      __libc_tsd_CTYPE_B = _nl_global_locale.__ctype_b;
++      __libc_tsd_CTYPE_TOUPPER = _nl_global_locale.__ctype_toupper;
++      __libc_tsd_CTYPE_TOLOWER = _nl_global_locale.__ctype_tolower;
+     }
+ 
+ #include <shlib-compat.h>
+diff --git a/locale/localeinfo.h b/locale/localeinfo.h
+index ab1b5e5659..f503792d04 100644
+--- a/locale/localeinfo.h
++++ b/locale/localeinfo.h
+@@ -236,10 +236,9 @@ extern struct __locale_struct _nl_global_locale attribute_hidden;
+ 
+ /* This fetches the thread-local locale_t pointer, either one set with
+    uselocale or &_nl_global_locale.  */
+-#define _NL_CURRENT_LOCALE	(__libc_tsd_get (locale_t, LOCALE))
+-#include <libc-tsd.h>
+-__libc_tsd_define (extern, locale_t, LOCALE)
+-
++#define _NL_CURRENT_LOCALE	__libc_tsd_LOCALE
++extern __thread locale_t __libc_tsd_LOCALE
++  attribute_hidden attribute_tls_model_ie;
+ 
+ /* For static linking it is desireable to avoid always linking in the code
+    and data for every category when we can tell at link time that they are
+diff --git a/locale/uselocale.c b/locale/uselocale.c
+index 606d043d57..5df87a237f 100644
+--- a/locale/uselocale.c
++++ b/locale/uselocale.c
+@@ -34,7 +34,7 @@ __uselocale (locale_t newloc)
+     {
+       const locale_t locobj
+ 	= newloc == LC_GLOBAL_LOCALE ? &_nl_global_locale : newloc;
+-      __libc_tsd_set (locale_t, LOCALE, locobj);
++      __libc_tsd_LOCALE = locobj;
+ 
+ #ifdef NL_CURRENT_INDIRECT
+       /* Now we must update all the per-category thread-local variables to
+@@ -62,11 +62,9 @@ __uselocale (locale_t newloc)
+ #endif
+ 
+       /* Update the special tsd cache of some locale data.  */
+-      __libc_tsd_set (const uint16_t *, CTYPE_B, (void *) locobj->__ctype_b);
+-      __libc_tsd_set (const int32_t *, CTYPE_TOLOWER,
+-		      (void *) locobj->__ctype_tolower);
+-      __libc_tsd_set (const int32_t *, CTYPE_TOUPPER,
+-		      (void *) locobj->__ctype_toupper);
++      __libc_tsd_CTYPE_B = locobj->__ctype_b;
++      __libc_tsd_CTYPE_TOLOWER = locobj->__ctype_tolower;
++      __libc_tsd_CTYPE_TOUPPER = locobj->__ctype_toupper;
+     }
+ 
+   return oldloc == &_nl_global_locale ? LC_GLOBAL_LOCALE : oldloc;
+diff --git a/locale/xlocale.c b/locale/xlocale.c
+index 30c094d43a..5e25c0e4b8 100644
+--- a/locale/xlocale.c
++++ b/locale/xlocale.c
+@@ -18,18 +18,13 @@
+ 
+ #include <locale.h>
+ #include "localeinfo.h"
++#include <ctype.h>
+ 
+ #define DEFINE_CATEGORY(category, category_name, items, a) \
+ extern struct __locale_data _nl_C_##category;
+ #include "categories.def"
+ #undef	DEFINE_CATEGORY
+ 
+-/* Defined in locale/C-ctype.c.  */
+-extern const char _nl_C_LC_CTYPE_class[] attribute_hidden;
+-extern const char _nl_C_LC_CTYPE_toupper[] attribute_hidden;
+-extern const char _nl_C_LC_CTYPE_tolower[] attribute_hidden;
+-
+-
+ const struct __locale_struct _nl_C_locobj attribute_hidden =
+   {
+     .__locales =
+diff --git a/localedata/locales/bg_BG b/localedata/locales/bg_BG
+index 159a6c3334..eda2a8d01b 100644
+--- a/localedata/locales/bg_BG
++++ b/localedata/locales/bg_BG
+@@ -248,8 +248,8 @@ reorder-end
+ END LC_COLLATE
+ 
+ LC_MONETARY
+-int_curr_symbol           "BGN "
+-currency_symbol           "лв."
++int_curr_symbol           "EUR "
++currency_symbol           "€"
+ mon_decimal_point         ","
+ mon_thousands_sep         " "
+ mon_grouping              3
+diff --git a/malloc/malloc.c b/malloc/malloc.c
+index 27dfd1eb90..9423aba987 100644
+--- a/malloc/malloc.c
++++ b/malloc/malloc.c
+@@ -5106,7 +5106,7 @@ _int_memalign (mstate av, size_t alignment, size_t bytes)
+   INTERNAL_SIZE_T size;
+ 
+   nb = checked_request2size (bytes);
+-  if (nb == 0)
++  if (nb == 0 || alignment > PTRDIFF_MAX)
+     {
+       __set_errno (ENOMEM);
+       return NULL;
+@@ -5122,7 +5122,10 @@ _int_memalign (mstate av, size_t alignment, size_t bytes)
+      we don't find anything in those bins, the common malloc code will
+      scan starting at 2x.  */
+ 
+-  /* Call malloc with worst case padding to hit alignment. */
++  /* Call malloc with worst case padding to hit alignment.  ALIGNMENT is a
++     power of 2, so it tops out at (PTRDIFF_MAX >> 1) + 1, leaving plenty of
++     space to add MINSIZE and whatever checked_request2size adds to BYTES to
++     get NB.  Consequently, total below also does not overflow.  */
+   m = (char *) (_int_malloc (av, nb + alignment + MINSIZE));
+ 
+   if (m == NULL)
+diff --git a/malloc/tst-malloc-too-large.c b/malloc/tst-malloc-too-large.c
+index a548a37b46..a1bda673a3 100644
+--- a/malloc/tst-malloc-too-large.c
++++ b/malloc/tst-malloc-too-large.c
+@@ -152,7 +152,6 @@ test_large_allocations (size_t size)
+ }
+ 
+ 
+-static long pagesize;
+ 
+ /* This function tests the following aligned memory allocation functions
+    using several valid alignments and precedes each allocation test with a
+@@ -171,8 +170,8 @@ test_large_aligned_allocations (size_t size)
+ 
+   /* All aligned memory allocation functions expect an alignment that is a
+      power of 2.  Given this, we test each of them with every valid
+-     alignment from 1 thru PAGESIZE.  */
+-  for (align = 1; align <= pagesize; align *= 2)
++     alignment for the type of ALIGN, i.e. until it wraps to 0.  */
++  for (align = 1; align > 0; align <<= 1)
+     {
+       test_setup ();
+ #if __GNUC_PREREQ (7, 0)
+@@ -265,11 +264,6 @@ do_test (void)
+   DIAG_IGNORE_NEEDS_COMMENT (7, "-Walloc-size-larger-than=");
+ #endif
+ 
+-  /* Aligned memory allocation functions need to be tested up to alignment
+-     size equivalent to page size, which should be a power of 2.  */
+-  pagesize = sysconf (_SC_PAGESIZE);
+-  TEST_VERIFY_EXIT (powerof2 (pagesize));
+-
+   /* Loop 1: Ensure that all allocations with SIZE close to SIZE_MAX, i.e.
+      in the range (SIZE_MAX - 2^14, SIZE_MAX], fail.
+ 
 diff --git a/math/auto-libm-test-in b/math/auto-libm-test-in
 index 01ba689aa8..4f194da19d 100644
 --- a/math/auto-libm-test-in
@@ -2351,8 +4096,65 @@
  			iattr->stacksize = to - (size_t) iattr->stackaddr;
  #endif
  		      /* We succeed and no need to look further.  */
+diff --git a/nptl/pthread_mutex_trylock.c b/nptl/pthread_mutex_trylock.c
+index dbb8fcc754..392619021b 100644
+--- a/nptl/pthread_mutex_trylock.c
++++ b/nptl/pthread_mutex_trylock.c
+@@ -48,7 +48,8 @@ ___pthread_mutex_trylock (pthread_mutex_t *mutex)
+ 	  return 0;
+ 	}
+ 
+-      if (lll_trylock (mutex->__data.__lock) == 0)
++      if (atomic_load_relaxed (&(mutex->__data.__lock)) == 0
++	  && lll_trylock (mutex->__data.__lock) == 0)
+ 	{
+ 	  /* Record the ownership.  */
+ 	  mutex->__data.__owner = id;
+@@ -71,7 +72,10 @@ ___pthread_mutex_trylock (pthread_mutex_t *mutex)
+       /*FALL THROUGH*/
+     case PTHREAD_MUTEX_ADAPTIVE_NP:
+     case PTHREAD_MUTEX_ERRORCHECK_NP:
+-      if (lll_trylock (mutex->__data.__lock) != 0)
++      /* Mutex type is already loaded, lock check overhead should
++         be minimal.  */
++      if (atomic_load_relaxed (&(mutex->__data.__lock)) != 0
++	  || lll_trylock (mutex->__data.__lock) != 0)
+ 	break;
+ 
+       /* Record the ownership.  */
+diff --git a/nss/getXXbyYY_r.c b/nss/getXXbyYY_r.c
+index eae6c3480e..2b0735fb6a 100644
+--- a/nss/getXXbyYY_r.c
++++ b/nss/getXXbyYY_r.c
+@@ -157,19 +157,15 @@ __merge_einval (LOOKUP_TYPE *a,
+ 
+ #define CHECK_MERGE(err, status)		\
+   ({						\
+-    do						\
++    if (err)					\
+       {						\
+-	if (err)				\
+-	  {					\
+-	    __set_errno (err);			\
+-	    if (err == ERANGE)			\
+-	      status = NSS_STATUS_TRYAGAIN;	\
+-	    else				\
+-	      status = NSS_STATUS_UNAVAIL;	\
+-	    break;				\
+-	  }					\
++	__set_errno (err);			\
++	if (err == ERANGE)			\
++	  status = NSS_STATUS_TRYAGAIN;		\
++	else					\
++	  status = NSS_STATUS_UNAVAIL;		\
++	break;					\
+       }						\
+-    while (0);					\
+   })
+ 
+ /* Type of the lookup function we need here.  */
 diff --git a/posix/Makefile b/posix/Makefile
-index a650abf598..0e209a7ed0 100644
+index a650abf598..a9f3e814e5 100644
 --- a/posix/Makefile
 +++ b/posix/Makefile
 @@ -303,6 +303,7 @@ tests := \
@@ -2363,6 +4165,42 @@
    tst-regcomp-truncated \
    tst-regex \
    tst-regex2 \
+@@ -325,6 +326,7 @@ tests := \
+   tst-wait4 \
+   tst-waitid \
+   tst-wordexp-nocmd \
++  tst-wordexp-reuse \
+   tstgetopt \
+   # tests
+ 
+@@ -453,6 +455,8 @@ generated += \
+   tst-rxspencer-no-utf8.mtrace \
+   tst-vfork3-mem.out \
+   tst-vfork3.mtrace \
++  tst-wordexp-reuse-mem.out \
++  tst-wordexp-reuse.mtrace \
+   # generated
+ endif
+ endif
+@@ -488,6 +492,7 @@ tests-special += \
+   $(objpfx)tst-pcre-mem.out \
+   $(objpfx)tst-rxspencer-no-utf8-mem.out \
+   $(objpfx)tst-vfork3-mem.out \
++  $(objpfx)tst-wordexp-reuse.out \
+   # tests-special
+ endif
+ endif
+@@ -771,3 +776,10 @@ $(objpfx)posix-conf-vars-def.h: $(..)scripts/gen-posix-conf-vars.awk \
+ 	$(make-target-directory)
+ 	$(AWK) -f $(filter-out Makefile, $^) > $@.tmp
+ 	mv -f $@.tmp $@
++
++tst-wordexp-reuse-ENV += MALLOC_TRACE=$(objpfx)tst-wordexp-reuse.mtrace \
++			 LD_PRELOAD=$(common-objpfx)/malloc/libc_malloc_debug.so
++
++$(objpfx)tst-wordexp-reuse-mem.out: $(objpfx)tst-wordexp-reuse.out
++	$(common-objpfx)malloc/mtrace $(objpfx)tst-wordexp-reuse.mtrace > $@; \
++	$(evaluate-test)
 diff --git a/posix/environ.c b/posix/environ.c
 index a0ed0d80ea..924effe3cd 100644
 --- a/posix/environ.c
@@ -2586,6 +4424,166 @@
 +}
 +
 +#include <support/test-driver.c>
+diff --git a/posix/tst-wordexp-reuse.c b/posix/tst-wordexp-reuse.c
+new file mode 100644
+index 0000000000..3926b9f557
+--- /dev/null
++++ b/posix/tst-wordexp-reuse.c
+@@ -0,0 +1,89 @@
++/* Test for wordexp with WRDE_REUSE flag.
++   Copyright (C) 2026 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <wordexp.h>
++#include <mcheck.h>
++
++#include <support/check.h>
++
++static int
++do_test (void)
++{
++  mtrace ();
++
++  {
++    wordexp_t p = { 0 };
++    TEST_COMPARE (wordexp ("one", &p, 0), 0);
++    TEST_COMPARE (p.we_wordc, 1);
++    TEST_COMPARE_STRING (p.we_wordv[0], "one");
++    TEST_COMPARE (wordexp ("two", &p, WRDE_REUSE), 0);
++    TEST_COMPARE (p.we_wordc, 1);
++    TEST_COMPARE_STRING (p.we_wordv[0], "two");
++    wordfree (&p);
++  }
++
++  {
++    wordexp_t p = { .we_offs = 2 };
++    TEST_COMPARE (wordexp ("one", &p, 0), 0);
++    TEST_COMPARE (p.we_wordc, 1);
++    TEST_COMPARE_STRING (p.we_wordv[0], "one");
++    TEST_COMPARE (wordexp ("two", &p, WRDE_REUSE | WRDE_DOOFFS), 0);
++    TEST_COMPARE (p.we_wordc, 1);
++    TEST_COMPARE_STRING (p.we_wordv[p.we_offs + 0], "two");
++    wordfree (&p);
++  }
++
++  {
++    wordexp_t p = { 0 };
++    TEST_COMPARE (wordexp ("one", &p, 0), 0);
++    TEST_COMPARE (p.we_wordc, 1);
++    TEST_COMPARE_STRING (p.we_wordv[0], "one");
++    TEST_COMPARE (wordexp ("two", &p, WRDE_REUSE | WRDE_APPEND), 0);
++    TEST_COMPARE (p.we_wordc, 1);
++    TEST_COMPARE_STRING (p.we_wordv[0], "two");
++    wordfree (&p);
++  }
++
++  {
++    wordexp_t p = { .we_offs = 2 };
++    TEST_COMPARE (wordexp ("one", &p, WRDE_DOOFFS), 0);
++    TEST_COMPARE (p.we_wordc, 1);
++    TEST_COMPARE_STRING (p.we_wordv[p.we_offs + 0], "one");
++    TEST_COMPARE (wordexp ("two", &p, WRDE_REUSE
++				      | WRDE_DOOFFS), 0);
++    TEST_COMPARE (p.we_wordc, 1);
++    TEST_COMPARE_STRING (p.we_wordv[p.we_offs + 0], "two");
++    wordfree (&p);
++  }
++
++  {
++    wordexp_t p = { .we_offs = 2 };
++    TEST_COMPARE (wordexp ("one", &p, WRDE_DOOFFS), 0);
++    TEST_COMPARE (p.we_wordc, 1);
++    TEST_COMPARE_STRING (p.we_wordv[p.we_offs + 0], "one");
++    TEST_COMPARE (wordexp ("two", &p, WRDE_REUSE
++				      | WRDE_DOOFFS | WRDE_APPEND), 0);
++    TEST_COMPARE (p.we_wordc, 1);
++    TEST_COMPARE_STRING (p.we_wordv[p.we_offs + 0], "two");
++    wordfree (&p);
++  }
++
++  return 0;
++}
++
++#include <support/test-driver.c>
+diff --git a/posix/wordexp.c b/posix/wordexp.c
+index a69b732801..9df4bb7424 100644
+--- a/posix/wordexp.c
++++ b/posix/wordexp.c
+@@ -2216,7 +2216,9 @@ wordexp (const char *words, wordexp_t *pwordexp, int flags)
+     {
+       /* Minimal implementation of WRDE_REUSE for now */
+       wordfree (pwordexp);
++      old_word.we_wordc = 0;
+       old_word.we_wordv = NULL;
++      pwordexp->we_wordc = 0;
+     }
+ 
+   if ((flags & WRDE_APPEND) == 0)
+diff --git a/resolv/nss_dns/dns-network.c b/resolv/nss_dns/dns-network.c
+index 519f8422ca..e14e959d7c 100644
+--- a/resolv/nss_dns/dns-network.c
++++ b/resolv/nss_dns/dns-network.c
+@@ -207,6 +207,10 @@ _nss_dns_getnetbyaddr_r (uint32_t net, int type, struct netent *result,
+       sprintf (qbuf, "%u.%u.%u.%u.in-addr.arpa", net_bytes[3], net_bytes[2],
+ 	       net_bytes[1], net_bytes[0]);
+       break;
++    default:
++      /* Default network (net is originally zero).  */
++      strcpy (qbuf, "0.0.0.0.in-addr.arpa");
++      break;
+     }
+ 
+   net_buffer.buf = orig_net_buffer = (querybuf *) alloca (1024);
+diff --git a/resolv/tst-resolv-network.c b/resolv/tst-resolv-network.c
+index d9f69649d0..181be80835 100644
+--- a/resolv/tst-resolv-network.c
++++ b/resolv/tst-resolv-network.c
+@@ -46,6 +46,9 @@ handle_code (const struct resolv_response_context *ctx,
+ {
+   switch (code)
+     {
++    case 0:
++      send_ptr (b, qname, qclass, qtype, "0.in-addr.arpa");
++      break;
+     case 1:
+       send_ptr (b, qname, qclass, qtype, "1.in-addr.arpa");
+       break;
+@@ -265,6 +268,9 @@ do_test (void)
+                 "error: TRY_AGAIN\n");
+ 
+   /* Lookup by address, success cases.  */
++  check_reverse (0,
++                 "name: 0.in-addr.arpa\n"
++                 "net: 0x00000000\n");
+   check_reverse (1,
+                  "name: 1.in-addr.arpa\n"
+                  "net: 0x00000001\n");
+diff --git a/stdio-common/printf-parsemb.c b/stdio-common/printf-parsemb.c
+index aad697abbc..a7ba52a9fa 100644
+--- a/stdio-common/printf-parsemb.c
++++ b/stdio-common/printf-parsemb.c
+@@ -17,6 +17,7 @@
+    <https://www.gnu.org/licenses/>.  */
+ 
+ #include <ctype.h>
++#include <errno.h>
+ #include <limits.h>
+ #include <stdlib.h>
+ #include <string.h>
 diff --git a/stdlib/Makefile b/stdlib/Makefile
 index 1c4fa2382f..c9c8f702a2 100644
 --- a/stdlib/Makefile
@@ -2715,6 +4713,29 @@
        exit (EXIT_SUCCESS);
      }
  }
+diff --git a/string/strerror.c b/string/strerror.c
+index 3c02640816..daabec82d3 100644
+--- a/string/strerror.c
++++ b/string/strerror.c
+@@ -21,5 +21,5 @@
+ char *
+ strerror (int errnum)
+ {
+-  return __strerror_l (errnum, __libc_tsd_get (locale_t, LOCALE));
++  return __strerror_l (errnum, __libc_tsd_LOCALE);
+ }
+diff --git a/sunrpc/rpc_thread.c b/sunrpc/rpc_thread.c
+index a04b7ec47f..e20f0a6230 100644
+--- a/sunrpc/rpc_thread.c
++++ b/sunrpc/rpc_thread.c
+@@ -3,7 +3,6 @@
+ #include <assert.h>
+ 
+ #include <libc-lock.h>
+-#include <libc-tsd.h>
+ #include <shlib-compat.h>
+ #include <libc-symbols.h>
+ 
 diff --git a/support/capture_subprocess.h b/support/capture_subprocess.h
 index 91d75e5d6b..b37462d0d1 100644
 --- a/support/capture_subprocess.h
@@ -2737,6 +4758,19 @@
  
  /* Deallocate the subprocess data captured by
     support_capture_subprocess.  */
+diff --git a/support/resolv_test.c b/support/resolv_test.c
+index ab37d3d58c..29e59da958 100644
+--- a/support/resolv_test.c
++++ b/support/resolv_test.c
+@@ -326,7 +326,7 @@ resolv_response_add_name (struct resolv_response_builder *b,
+               crname_target = *ptr;
+             else
+               crname_target = NULL;
+-            TEST_VERIFY (crname_target != crname);
++            TEST_VERIFY_EXIT (crname_target != crname);
+             /* Not added to the tree.  */
+             free (crname);
+           }
 diff --git a/support/support_capture_subprocess.c b/support/support_capture_subprocess.c
 index c3ef478d17..b4e4bf9502 100644
 --- a/support/support_capture_subprocess.c
@@ -2963,6 +4997,90 @@
  }
  
  void
+diff --git a/support/test-container.c b/support/test-container.c
+index 79d3189e2f..e35cd7316f 100644
+--- a/support/test-container.c
++++ b/support/test-container.c
+@@ -705,6 +705,7 @@ check_for_unshare_hints (int require_pidns)
+ 
+       val = -1; /* Sentinel.  */
+       int cnt = fscanf (f, "%d", &val);
++      fclose (f);
+       if (cnt == 1 && val != files[i].bad_value)
+ 	continue;
+ 
+diff --git a/sysdeps/aarch64/Makefile b/sysdeps/aarch64/Makefile
+index 4b7f8a5c07..53f5bd4ce0 100644
+--- a/sysdeps/aarch64/Makefile
++++ b/sysdeps/aarch64/Makefile
+@@ -2,11 +2,15 @@ long-double-fcts = yes
+ 
+ ifeq (yes,$(aarch64-bti))
+ # Mark linker output BTI compatible, it warns on non-BTI inputs.
++# Do not do this for conform tests because they may not be compiled
++# with the appropriate compiler flags.
++ifneq ($(subdir),conform)
+ sysdep-LDFLAGS += -Wl,-z,force-bti
+ # Make warnings fatal outside the test system.
+ LDFLAGS-lib.so += -Wl,--fatal-warnings
+ LDFLAGS-rtld += -Wl,-z,force-bti,--fatal-warnings
+-endif
++endif # $(subdir) != conform
++endif # $(aarch64-bit)
+ 
+ ifeq ($(subdir),elf)
+ sysdep-dl-routines += \
+@@ -75,7 +79,19 @@ sysdep_routines += \
+   __alloc_gcs
+ 
+ tests += \
+-  tst-sme-jmp
++  tst-sme-jmp \
++  tst-sme-signal \
++  tst-sme-za-state \
++  # tests
++tests-internal += \
++  tst-sme-clone \
++  tst-sme-clone3 \
++  tst-sme-fork \
++  tst-sme-vfork \
++  # tests-internal
++
++$(objpfx)tst-sme-clone3: $(objpfx)clone3.o $(objpfx)__arm_za_disable.o
++
+ endif
+ 
+ ifeq ($(subdir),malloc)
+diff --git a/sysdeps/aarch64/__longjmp.S b/sysdeps/aarch64/__longjmp.S
+index 38efddbbae..40f98329d0 100644
+--- a/sysdeps/aarch64/__longjmp.S
++++ b/sysdeps/aarch64/__longjmp.S
+@@ -51,24 +51,7 @@ ENTRY (__longjmp)
+ 
+ #if IS_IN(libc)
+ 	/* Disable ZA state of SME in libc.a and libc.so, but not in ld.so.  */
+-# if HAVE_AARCH64_PAC_RET
+-	PACIASP
+-	cfi_window_save
+-# endif
+-	stp	x29, x30, [sp, -16]!
+-	cfi_adjust_cfa_offset (16)
+-	cfi_rel_offset (x29, 0)
+-	cfi_rel_offset (x30, 8)
+-	mov	x29, sp
+-	bl	__libc_arm_za_disable
+-	ldp	x29, x30, [sp], 16
+-	cfi_adjust_cfa_offset (-16)
+-	cfi_restore (x29)
+-	cfi_restore (x30)
+-# if HAVE_AARCH64_PAC_RET
+-	AUTIASP
+-	cfi_window_save
+-# endif
++	CALL_LIBC_ARM_ZA_DISABLE
+ #endif
+ 
+ 	ldp	x19, x20, [x0, #JB_X19<<3]
 diff --git a/sysdeps/aarch64/fpu/acos_advsimd.c b/sysdeps/aarch64/fpu/acos_advsimd.c
 index 7709b5454f..453f780314 100644
 --- a/sysdeps/aarch64/fpu/acos_advsimd.c
@@ -5807,9 +7925,20 @@
  
  strong_alias (SV_NAME_D1 (log1p), SV_NAME_D1 (logp1))
 diff --git a/sysdeps/aarch64/fpu/pow_sve.c b/sysdeps/aarch64/fpu/pow_sve.c
-index 42d551ca92..b8c1b39dca 100644
+index 42d551ca92..becf1a8410 100644
 --- a/sysdeps/aarch64/fpu/pow_sve.c
 +++ b/sysdeps/aarch64/fpu/pow_sve.c
+@@ -31,8 +31,8 @@
+    The SVE algorithm drops the tail in the exp computation at the price of
+    a lower accuracy, slightly above 1ULP.
+    The SVE algorithm also drops the special treatement of small (< 2^-65) and
+-   large (> 2^63) finite values of |y|, as they only affect non-round to nearest
+-   modes.
++   large (> 2^63) finite values of |y|, as they only affect non-round to
++   nearest modes.
+ 
+    Maximum measured error is 1.04 ULPs:
+    SV_NAME_D2 (pow) (0x1.3d2d45bc848acp+63, -0x1.a48a38b40cd43p-12)
 @@ -44,19 +44,18 @@
  
  /* Data is defined in v_pow_log_data.c.  */
@@ -5881,7 +8010,63 @@
  		  2 * asuint64 (INFINITY) - 1);
  }
  
-@@ -174,16 +198,17 @@ sv_call_specialcase (svfloat64_t x1, svuint64_t u1, svuint64_t u2,
+@@ -132,65 +156,46 @@ sv_zeroinfnan (svbool_t pg, svuint64_t i)
+    a double.  (int32_t)KI is the k used in the argument reduction and exponent
+    adjustment of scale, positive k here means the result may overflow and
+    negative k means the result may underflow.  */
+-static inline double
+-specialcase (double tmp, uint64_t sbits, uint64_t ki)
+-{
+-  double scale;
+-  if ((ki & 0x80000000) == 0)
+-    {
+-      /* k > 0, the exponent of scale might have overflowed by <= 460.  */
+-      sbits -= 1009ull << 52;
+-      scale = asdouble (sbits);
+-      return 0x1p1009 * (scale + scale * tmp);
+-    }
+-  /* k < 0, need special care in the subnormal range.  */
+-  sbits += 1022ull << 52;
+-  /* Note: sbits is signed scale.  */
+-  scale = asdouble (sbits);
+-  double y = scale + scale * tmp;
+-  return 0x1p-1022 * y;
+-}
+-
+-/* Scalar fallback for special cases of SVE pow's exp.  */
+ static inline svfloat64_t
+-sv_call_specialcase (svfloat64_t x1, svuint64_t u1, svuint64_t u2,
+-		     svfloat64_t y, svbool_t cmp)
++specialcase (svfloat64_t tmp, svuint64_t sbits, svuint64_t ki, svbool_t cmp)
+ {
+-  svbool_t p = svpfirst (cmp, svpfalse ());
+-  while (svptest_any (cmp, p))
+-    {
+-      double sx1 = svclastb (p, 0, x1);
+-      uint64_t su1 = svclastb (p, 0, u1);
+-      uint64_t su2 = svclastb (p, 0, u2);
+-      double elem = specialcase (sx1, su1, su2);
+-      svfloat64_t y2 = sv_f64 (elem);
+-      y = svsel (p, y2, y);
+-      p = svpnext_b64 (cmp, p);
+-    }
+-  return y;
++  svbool_t p_pos = svcmpge_n_f64 (cmp, svreinterpret_f64_u64 (ki), 0.0);
++
++  /* Scale up or down depending on sign of k.  */
++  svint64_t offset
++      = svsel_s64 (p_pos, sv_s64 (1009ull << 52), sv_s64 (-1022ull << 52));
++  svfloat64_t factor
++      = svsel_f64 (p_pos, sv_f64 (0x1p1009), sv_f64 (0x1p-1022));
++
++  svuint64_t offset_sbits
++      = svsub_u64_x (cmp, sbits, svreinterpret_u64_s64 (offset));
++  svfloat64_t scale = svreinterpret_f64_u64 (offset_sbits);
++  svfloat64_t res = svmad_f64_x (cmp, scale, tmp, scale);
++  return svmul_f64_x (cmp, res, factor);
+ }
+ 
+ /* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
     additional 15 bits precision.  IX is the bit representation of x, but
     normalized in the subnormal range using the sign bit for the exponent.  */
  static inline svfloat64_t
@@ -5902,7 +8087,16 @@
    svfloat64_t z = svreinterpret_f64 (iz);
    svfloat64_t kd = svcvt_f64_x (pg, k);
  
-@@ -199,40 +224,85 @@ sv_log_inline (svbool_t pg, svuint64_t ix, svfloat64_t *tail)
+   /* log(x) = k*Ln2 + log(c) + log1p(z/c-1).  */
+   /* SVE lookup requires 3 separate lookup tables, as opposed to scalar version
+-     that uses array of structures. We also do the lookup earlier in the code to
+-     make sure it finishes as early as possible.  */
++     that uses array of structures. We also do the lookup earlier in the code
++     to make sure it finishes as early as possible.  */
+   svfloat64_t invc = svld1_gather_index (pg, __v_pow_log_data.invc, i);
+   svfloat64_t logc = svld1_gather_index (pg, __v_pow_log_data.logc, i);
+   svfloat64_t logctail = svld1_gather_index (pg, __v_pow_log_data.logctail, i);
+@@ -199,40 +204,85 @@ sv_log_inline (svbool_t pg, svuint64_t ix, svfloat64_t *tail)
       |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible.  */
    svfloat64_t r = svmad_x (pg, z, invc, -1.0);
    /* k*Ln2 + log(c) + r.  */
@@ -6000,7 +8194,7 @@
  {
    /* 3 types of special cases: tiny (uflow and spurious uflow), huge (oflow)
       and other cases of large values of x (scale * (1 + TMP) oflow).  */
-@@ -240,73 +310,46 @@ sv_exp_inline (svbool_t pg, svfloat64_t x, svfloat64_t xtail,
+@@ -240,77 +290,50 @@ sv_exp_inline (svbool_t pg, svfloat64_t x, svfloat64_t xtail,
    /* |x| is large (|x| >= 512) or tiny (|x| <= 0x1p-54).  */
    svbool_t uoflow = svcmpge (pg, svsub_x (pg, abstop, SmallExp), ThresExp);
  
@@ -6026,32 +8220,10 @@
 -      oflow = svcmpge (pg, abstop, HugeExp);
 +      svbool_t oflow = svcmpge (pg, abstop, HugeExp);
        oflow = svand_z (pg, uoflow, svbic_z (pg, oflow, uflow));
-+
-       /* For large |x| values (512 < |x| < 1024) scale * (1 + TMP) can overflow
+-      /* For large |x| values (512 < |x| < 1024) scale * (1 + TMP) can overflow
 -	 or underflow.  */
 -      special = svbic_z (pg, uoflow, svorr_z (pg, uflow, oflow));
-+    or underflow.  */
-+      svbool_t special = svbic_z (pg, uoflow, svorr_z (pg, uflow, oflow));
-+
-+      /* Update result with special and large cases.  */
-+      z = sv_call_specialcase (tmp, sbits, ki, z, special);
-+
-+      /* Handle underflow and overflow.  */
-+      svbool_t x_is_neg = svcmplt (pg, x, 0);
-+      svuint64_t sign_mask
-+	  = svlsl_x (pg, sign_bias, 52 - V_POW_EXP_TABLE_BITS);
-+      svfloat64_t res_uoflow
-+	  = svsel (x_is_neg, sv_f64 (0.0), sv_f64 (INFINITY));
-+      res_uoflow = svreinterpret_f64 (
-+	  svorr_x (pg, svreinterpret_u64 (res_uoflow), sign_mask));
-+      /* Avoid spurious underflow for tiny x.  */
-+      svfloat64_t res_spurious_uflow
-+	  = svreinterpret_f64 (svorr_x (pg, sign_mask, 0x3ff0000000000000));
-+
-+      z = svsel (oflow, res_uoflow, z);
-+      z = svsel (uflow, res_spurious_uflow, z);
-+      return z;
-     }
+-    }
  
 -  /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)].  */
 -  /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N].  */
@@ -6099,14 +8271,52 @@
 -  svfloat64_t res_spurious_uflow
 -      = svreinterpret_f64 (svorr_x (pg, sign_mask, 0x3ff0000000000000));
 -  z = svsel (uflow, res_spurious_uflow, z);
--
++      /* Handle underflow and overlow in scale.
++	 For large |x| values (512 < |x| < 1024), scale * (1 + TMP) can
++	 overflow or underflow.  */
++      svbool_t special = svbic_z (pg, uoflow, svorr_z (pg, uflow, oflow));
++      if (__glibc_unlikely (svptest_any (pg, special)))
++	z = svsel (special, specialcase (tmp, sbits, ki, special), z);
++
++      /* Handle underflow and overflow in exp.  */
++      svbool_t x_is_neg = svcmplt (pg, x, 0);
++      svuint64_t sign_mask
++	  = svlsl_x (pg, sign_bias, 52 - V_POW_EXP_TABLE_BITS);
++      svfloat64_t res_uoflow
++	  = svsel (x_is_neg, sv_f64 (0.0), sv_f64 (INFINITY));
++      res_uoflow = svreinterpret_f64 (
++	  svorr_x (pg, svreinterpret_u64 (res_uoflow), sign_mask));
++      /* Avoid spurious underflow for tiny x.  */
++      svfloat64_t res_spurious_uflow
++	  = svreinterpret_f64 (svorr_x (pg, sign_mask, 0x3ff0000000000000));
++
++      z = svsel (oflow, res_uoflow, z);
++      z = svsel (uflow, res_spurious_uflow, z);
++      return z;
++    }
+ 
 -  return z;
 +  return sv_exp_core (pg, x, xtail, sign_bias, &tmp, &sbits, &ki, d);
  }
  
  static inline double
-@@ -341,47 +384,39 @@ pow_sc (double x, double y)
+-pow_sc (double x, double y)
++pow_specialcase (double x, double y)
+ {
+   uint64_t ix = asuint64 (x);
+   uint64_t iy = asuint64 (y);
+@@ -339,49 +362,49 @@ pow_sc (double x, double y)
+   return x;
+ }
  
++/* Scalar fallback for special case routines with custom signature.  */
++static svfloat64_t NOINLINE
++sv_pow_specialcase (svfloat64_t x1, svfloat64_t x2, svfloat64_t y,
++		    svbool_t cmp)
++{
++  return sv_call2_f64 (pow_specialcase, x1, x2, y, cmp);
++}
++
  svfloat64_t SV_NAME_D2 (pow) (svfloat64_t x, svfloat64_t y, const svbool_t pg)
  {
 +  const struct data *d = ptr_barrier (&data);
@@ -6163,7 +8373,7 @@
  
        svuint64_t vix_norm = svreinterpret_u64 (svmul_m (xsmall, x, 0x1p52));
        vix_norm = svand_m (xsmall, vix_norm, 0x7fffffffffffffff);
-@@ -391,20 +426,24 @@ svfloat64_t SV_NAME_D2 (pow) (svfloat64_t x, svfloat64_t y, const svbool_t pg)
+@@ -391,21 +414,25 @@ svfloat64_t SV_NAME_D2 (pow) (svfloat64_t x, svfloat64_t y, const svbool_t pg)
  
    /* y_hi = log(ix, &y_lo).  */
    svfloat64_t vlo;
@@ -6192,12 +8402,14 @@
  
    /* Cases of zero/inf/nan x or y.  */
 -  if (__glibc_unlikely (svptest_any (pg, special)))
+-    vz = sv_call2_f64 (pow_sc, x, y, vz, special);
 +  if (__glibc_unlikely (svptest_any (svptrue_b64 (), special)))
-     vz = sv_call2_f64 (pow_sc, x, y, vz, special);
++    vz = sv_pow_specialcase (x, y, vz, special);
  
    return vz;
+ }
 diff --git a/sysdeps/aarch64/fpu/powf_sve.c b/sysdeps/aarch64/fpu/powf_sve.c
-index 29e9acb6fb..7046990aa1 100644
+index 29e9acb6fb..76f54b3522 100644
 --- a/sysdeps/aarch64/fpu/powf_sve.c
 +++ b/sysdeps/aarch64/fpu/powf_sve.c
 @@ -26,7 +26,6 @@
@@ -6234,24 +8446,52 @@
  		  2u * 0x7f800000 - 1);
  }
  
-@@ -150,9 +148,14 @@ powf_specialcase (float x, float y, float z)
+@@ -118,11 +116,10 @@ zeroinfnan (uint32_t ix)
+    preamble of scalar powf except that we do not update ix and sign_bias. This
+    is done in the preamble of the SVE powf.  */
+ static inline float
+-powf_specialcase (float x, float y, float z)
++powf_specialcase (float x, float y)
+ {
+   uint32_t ix = asuint (x);
+   uint32_t iy = asuint (y);
+-  /* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan).  */
+   if (__glibc_unlikely (zeroinfnan (iy)))
+     {
+       if (2 * iy == 0)
+@@ -144,27 +141,15 @@ powf_specialcase (float x, float y, float z)
+ 	x2 = -x2;
+       return iy & 0x80000000 ? 1 / x2 : x2;
+     }
+-  /* We need a return here in case x<0 and y is integer, but all other tests
+-   need to be run.  */
+-  return z;
++  /* Return x for convenience, but make sure result is never used.  */
++  return x;
  }
  
  /* Scalar fallback for special case routines with custom signature.  */
 -static inline svfloat32_t
--sv_call_powf_sc (svfloat32_t x1, svfloat32_t x2, svfloat32_t y, svbool_t cmp)
 +static svfloat32_t NOINLINE
-+sv_call_powf_sc (svfloat32_t x1, svfloat32_t x2, svfloat32_t y)
+ sv_call_powf_sc (svfloat32_t x1, svfloat32_t x2, svfloat32_t y, svbool_t cmp)
  {
-+  /* Special cases of x or y: zero, inf and nan.  */
-+  svbool_t xspecial = sv_zeroinfnan (svptrue_b32 (), svreinterpret_u32 (x1));
-+  svbool_t yspecial = sv_zeroinfnan (svptrue_b32 (), svreinterpret_u32 (x2));
-+  svbool_t cmp = svorr_z (svptrue_b32 (), xspecial, yspecial);
-+
-   svbool_t p = svpfirst (cmp, svpfalse ());
-   while (svptest_any (cmp, p))
-     {
-@@ -182,30 +185,30 @@ sv_powf_core_ext (const svbool_t pg, svuint64_t i, svfloat64_t z, svint64_t k,
+-  svbool_t p = svpfirst (cmp, svpfalse ());
+-  while (svptest_any (cmp, p))
+-    {
+-      float sx1 = svclastb (p, 0, x1);
+-      float sx2 = svclastb (p, 0, x2);
+-      float elem = svclastb (p, 0, y);
+-      elem = powf_specialcase (sx1, sx2, elem);
+-      svfloat32_t y2 = sv_f32 (elem);
+-      y = svsel (p, y2, y);
+-      p = svpnext_b32 (cmp, p);
+-    }
+-  return y;
++  return sv_call2_f32 (powf_specialcase, x1, x2, y, cmp);
+ }
+ 
+ /* Compute core for half of the lanes in double precision.  */
+@@ -182,30 +167,30 @@ sv_powf_core_ext (const svbool_t pg, svuint64_t i, svfloat64_t z, svint64_t k,
  
    /* Polynomial to approximate log1p(r)/ln2.  */
    svfloat64_t logx = A (0);
@@ -6294,35 +8534,31 @@
  
    return p;
  }
-@@ -219,19 +222,16 @@ sv_powf_core (const svbool_t pg, svuint32_t i, svuint32_t iz, svint32_t k,
+@@ -219,19 +204,16 @@ sv_powf_core (const svbool_t pg, svuint32_t i, svuint32_t iz, svint32_t k,
  {
    const svbool_t ptrue = svptrue_b64 ();
  
 -  /* Unpack and promote input vectors (pg, y, z, i, k and sign_bias) into two in
 -     order to perform core computation in double precision.  */
 +  /* Unpack and promote input vectors (pg, y, z, i, k and sign_bias) into two
-+   * in order to perform core computation in double precision.  */
++     in order to perform core computation in double precision.  */
    const svbool_t pg_lo = svunpklo (pg);
    const svbool_t pg_hi = svunpkhi (pg);
--  svfloat64_t y_lo = svcvt_f64_x (
--      ptrue, svreinterpret_f32 (svunpklo (svreinterpret_u32 (y))));
--  svfloat64_t y_hi = svcvt_f64_x (
--      ptrue, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (y))));
+   svfloat64_t y_lo = svcvt_f64_x (
+       ptrue, svreinterpret_f32 (svunpklo (svreinterpret_u32 (y))));
+   svfloat64_t y_hi = svcvt_f64_x (
+       ptrue, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (y))));
 -  svfloat32_t z = svreinterpret_f32 (iz);
 -  svfloat64_t z_lo = svcvt_f64_x (
 -      ptrue, svreinterpret_f32 (svunpklo (svreinterpret_u32 (z))));
 -  svfloat64_t z_hi = svcvt_f64_x (
 -      ptrue, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (z))));
-+  svfloat64_t y_lo
-+      = svcvt_f64_x (pg, svreinterpret_f32 (svunpklo (svreinterpret_u32 (y))));
-+  svfloat64_t y_hi
-+      = svcvt_f64_x (pg, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (y))));
-+  svfloat64_t z_lo = svcvt_f64_x (pg, svreinterpret_f32 (svunpklo (iz)));
-+  svfloat64_t z_hi = svcvt_f64_x (pg, svreinterpret_f32 (svunpkhi (iz)));
++  svfloat64_t z_lo = svcvt_f64_x (ptrue, svreinterpret_f32 (svunpklo (iz)));
++  svfloat64_t z_hi = svcvt_f64_x (ptrue, svreinterpret_f32 (svunpkhi (iz)));
    svuint64_t i_lo = svunpklo (i);
    svuint64_t i_hi = svunpkhi (i);
    svint64_t k_lo = svunpklo (k);
-@@ -258,9 +258,9 @@ sv_powf_core (const svbool_t pg, svuint32_t i, svuint32_t iz, svint32_t k,
+@@ -258,9 +240,9 @@ sv_powf_core (const svbool_t pg, svuint32_t i, svuint32_t iz, svint32_t k,
  /* Implementation of SVE powf.
     Provides the same accuracy as AdvSIMD powf, since it relies on the same
     algorithm. The theoretical maximum error is under 2.60 ULPs.
@@ -6335,7 +8571,7 @@
  svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg)
  {
    const struct data *d = ptr_barrier (&data);
-@@ -269,21 +269,19 @@ svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg)
+@@ -269,21 +251,19 @@ svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg)
    svuint32_t viy0 = svreinterpret_u32 (y);
  
    /* Negative x cases.  */
@@ -6361,7 +8597,7 @@
        /* Set to SignBias if x is negative and y is odd.  */
        sign_bias = svsel (yisodd_xisneg, sv_u32 (d->sign_bias), sv_u32 (0));
      }
-@@ -294,8 +292,8 @@ svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg)
+@@ -294,8 +274,8 @@ svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg)
    svbool_t cmp = svorr_z (pg, xspecial, yspecial);
  
    /* Small cases of x: |x| < 0x1p-126.  */
@@ -6372,7 +8608,7 @@
      {
        /* Normalize subnormal x so exponent becomes negative.  */
        svuint32_t vix_norm = svreinterpret_u32 (svmul_x (xsmall, x, Norm));
-@@ -304,32 +302,35 @@ svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg)
+@@ -304,31 +284,34 @@ svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg)
        vix = svsel (xsmall, vix_norm, vix);
      }
    /* Part of core computation carried in working precision.  */
@@ -6396,7 +8632,7 @@
 +			 (23 - V_POWF_EXP2_TABLE_BITS));
 +
 +  /* Compute core in extended precision and return intermediate ylogx results
-+   * to handle cases of underflow and underflow in exp.  */
++     to handle cases of underflow and overflow in exp.  */
    svfloat32_t ylogx;
 -  svfloat32_t ret = sv_powf_core (pg, i, iz, k, y, sign_bias, &ylogx, d);
 +  svfloat32_t ret
@@ -6420,11 +8656,81 @@
 +  ret = svsel (yint_or_xpos, ret, sv_f32 (__builtin_nanf ("")));
  
 -  if (__glibc_unlikely (svptest_any (pg, cmp)))
--    return sv_call_powf_sc (x, y, ret, cmp);
 +  if (__glibc_unlikely (svptest_any (cmp, cmp)))
-+    return sv_call_powf_sc (x, y, ret);
+     return sv_call_powf_sc (x, y, ret, cmp);
  
    return ret;
+diff --git a/sysdeps/aarch64/fpu/sinh_advsimd.c b/sysdeps/aarch64/fpu/sinh_advsimd.c
+index 0d6a4856f8..b6b60262c6 100644
+--- a/sysdeps/aarch64/fpu/sinh_advsimd.c
++++ b/sysdeps/aarch64/fpu/sinh_advsimd.c
+@@ -24,36 +24,26 @@ static const struct data
+ {
+   struct v_expm1_data d;
+   uint64x2_t halff;
+-#if WANT_SIMD_EXCEPT
+-  uint64x2_t tiny_bound, thresh;
+-#else
+   float64x2_t large_bound;
+-#endif
+ } data = {
+   .d = V_EXPM1_DATA,
+   .halff = V2 (0x3fe0000000000000),
+-#if WANT_SIMD_EXCEPT
+-  /* 2^-26, below which sinh(x) rounds to x.  */
+-  .tiny_bound = V2 (0x3e50000000000000),
+-  /* asuint(large_bound) - asuint(tiny_bound).  */
+-  .thresh = V2 (0x0230000000000000),
+-#else
+   /* 2^9. expm1 helper overflows for large input.  */
+   .large_bound = V2 (0x1p+9),
+-#endif
+ };
+ 
+ static float64x2_t NOINLINE VPCS_ATTR
+-special_case (float64x2_t x)
++special_case (float64x2_t x, float64x2_t t, float64x2_t halfsign,
++	      uint64x2_t special)
+ {
+-  return v_call_f64 (sinh, x, x, v_u64 (-1));
++  return v_call_f64 (sinh, x, vmulq_f64 (t, halfsign), special);
+ }
+ 
+ /* Approximation for vector double-precision sinh(x) using expm1.
+    sinh(x) = (exp(x) - exp(-x)) / 2.
+    The greatest observed error is 2.52 ULP:
+-   _ZGVnN2v_sinh(-0x1.a098a2177a2b9p-2) got -0x1.ac2f05bb66fccp-2
+-				       want -0x1.ac2f05bb66fc9p-2.  */
++   _ZGVnN2v_sinh(0x1.9f6ff2ab6fb19p-2) got 0x1.aaed83a3153ccp-2
++				      want 0x1.aaed83a3153c9p-2.  */
+ float64x2_t VPCS_ATTR V_NAME_D1 (sinh) (float64x2_t x)
+ {
+   const struct data *d = ptr_barrier (&data);
+@@ -63,21 +53,16 @@ float64x2_t VPCS_ATTR V_NAME_D1 (sinh) (float64x2_t x)
+   float64x2_t halfsign = vreinterpretq_f64_u64 (
+       vbslq_u64 (v_u64 (0x8000000000000000), ix, d->halff));
+ 
+-#if WANT_SIMD_EXCEPT
+-  uint64x2_t special = vcgeq_u64 (
+-      vsubq_u64 (vreinterpretq_u64_f64 (ax), d->tiny_bound), d->thresh);
+-#else
+   uint64x2_t special = vcageq_f64 (x, d->large_bound);
+-#endif
+-
+-  /* Fall back to scalar variant for all lanes if any of them are special.  */
+-  if (__glibc_unlikely (v_any_u64 (special)))
+-    return special_case (x);
+ 
+   /* Up to the point that expm1 overflows, we can use it to calculate sinh
+      using a slight rearrangement of the definition of sinh. This allows us to
+      retain acceptable accuracy for very small inputs.  */
+   float64x2_t t = expm1_inline (ax, &d->d);
+   t = vaddq_f64 (t, vdivq_f64 (t, vaddq_f64 (t, v_f64 (1.0))));
++
++  if (__glibc_unlikely (v_any_u64 (special)))
++    return special_case (x, t, halfsign, special);
++
+   return vmulq_f64 (t, halfsign);
  }
 diff --git a/sysdeps/aarch64/fpu/sinh_sve.c b/sysdeps/aarch64/fpu/sinh_sve.c
 index 963453f812..072ba8fca9 100644
@@ -6839,6 +9145,234 @@
  }
 -
  #endif
+diff --git a/sysdeps/aarch64/fpu/sv_math.h b/sysdeps/aarch64/fpu/sv_math.h
+index 3d576df4cc..65d7f0ff20 100644
+--- a/sysdeps/aarch64/fpu/sv_math.h
++++ b/sysdeps/aarch64/fpu/sv_math.h
+@@ -24,11 +24,29 @@
+ 
+ #include "vecmath_config.h"
+ 
++#if !defined(__ARM_FEATURE_SVE_BITS) || __ARM_FEATURE_SVE_BITS == 0
++/* If not specified by -msve-vector-bits, assume maximum vector length.  */
++# define SVE_VECTOR_BYTES 256
++#else
++# define SVE_VECTOR_BYTES (__ARM_FEATURE_SVE_BITS / 8)
++#endif
++#define SVE_NUM_FLTS (SVE_VECTOR_BYTES / sizeof (float))
++#define SVE_NUM_DBLS (SVE_VECTOR_BYTES / sizeof (double))
++/* Predicate is stored as one bit per byte of VL so requires VL / 64 bytes.  */
++#define SVE_NUM_PG_BYTES (SVE_VECTOR_BYTES / sizeof (uint64_t))
++
+ #define SV_NAME_F1(fun) _ZGVsMxv_##fun##f
+ #define SV_NAME_D1(fun) _ZGVsMxv_##fun
+ #define SV_NAME_F2(fun) _ZGVsMxvv_##fun##f
+ #define SV_NAME_D2(fun) _ZGVsMxvv_##fun
+ 
++static inline void
++svstr_p (uint8_t *dst, svbool_t p)
++{
++  /* Predicate STR does not currently have an intrinsic.  */
++  __asm__("str %0, [%x1]\n" : : "Upa"(p), "r"(dst) : "memory");
++}
++
+ /* Double precision.  */
+ static inline svint64_t
+ sv_s64 (int64_t x)
+@@ -51,33 +69,35 @@ sv_f64 (double x)
+ static inline svfloat64_t
+ sv_call_f64 (double (*f) (double), svfloat64_t x, svfloat64_t y, svbool_t cmp)
+ {
+-  svbool_t p = svpfirst (cmp, svpfalse ());
+-  while (svptest_any (cmp, p))
++  double tmp[SVE_NUM_DBLS];
++  uint8_t pg_bits[SVE_NUM_PG_BYTES];
++  svstr_p (pg_bits, cmp);
++  svst1 (svptrue_b64 (), tmp, svsel (cmp, x, y));
++
++  for (int i = 0; i < svcntd (); i++)
+     {
+-      double elem = svclastb_n_f64 (p, 0, x);
+-      elem = (*f) (elem);
+-      svfloat64_t y2 = svdup_n_f64 (elem);
+-      y = svsel_f64 (p, y2, y);
+-      p = svpnext_b64 (cmp, p);
++      if (pg_bits[i] & 1)
++	tmp[i] = f (tmp[i]);
+     }
+-  return y;
++  return svld1 (svptrue_b64 (), tmp);
+ }
+ 
+ static inline svfloat64_t
+ sv_call2_f64 (double (*f) (double, double), svfloat64_t x1, svfloat64_t x2,
+ 	      svfloat64_t y, svbool_t cmp)
+ {
+-  svbool_t p = svpfirst (cmp, svpfalse ());
+-  while (svptest_any (cmp, p))
++  double tmp1[SVE_NUM_DBLS], tmp2[SVE_NUM_DBLS];
++  uint8_t pg_bits[SVE_NUM_PG_BYTES];
++  svstr_p (pg_bits, cmp);
++  svst1 (svptrue_b64 (), tmp1, svsel (cmp, x1, y));
++  svst1 (cmp, tmp2, x2);
++
++  for (int i = 0; i < svcntd (); i++)
+     {
+-      double elem1 = svclastb_n_f64 (p, 0, x1);
+-      double elem2 = svclastb_n_f64 (p, 0, x2);
+-      double ret = (*f) (elem1, elem2);
+-      svfloat64_t y2 = svdup_n_f64 (ret);
+-      y = svsel_f64 (p, y2, y);
+-      p = svpnext_b64 (cmp, p);
++      if (pg_bits[i] & 1)
++	tmp1[i] = f (tmp1[i], tmp2[i]);
+     }
+-  return y;
++  return svld1 (svptrue_b64 (), tmp1);
+ }
+ 
+ static inline svuint64_t
+@@ -109,33 +129,40 @@ sv_f32 (float x)
+ static inline svfloat32_t
+ sv_call_f32 (float (*f) (float), svfloat32_t x, svfloat32_t y, svbool_t cmp)
+ {
+-  svbool_t p = svpfirst (cmp, svpfalse ());
+-  while (svptest_any (cmp, p))
++  float tmp[SVE_NUM_FLTS];
++  uint8_t pg_bits[SVE_NUM_PG_BYTES];
++  svstr_p (pg_bits, cmp);
++  svst1 (svptrue_b32 (), tmp, svsel (cmp, x, y));
++
++  for (int i = 0; i < svcntd (); i++)
+     {
+-      float elem = svclastb_n_f32 (p, 0, x);
+-      elem = f (elem);
+-      svfloat32_t y2 = svdup_n_f32 (elem);
+-      y = svsel_f32 (p, y2, y);
+-      p = svpnext_b32 (cmp, p);
++      uint8_t p = pg_bits[i];
++      if (p & 1)
++	tmp[i * 2] = f (tmp[i * 2]);
++      if (p & (1 << 4))
++	tmp[i * 2 + 1] = f (tmp[i * 2 + 1]);
+     }
+-  return y;
++  return svld1 (svptrue_b32 (), tmp);
+ }
+ 
+ static inline svfloat32_t
+ sv_call2_f32 (float (*f) (float, float), svfloat32_t x1, svfloat32_t x2,
+ 	      svfloat32_t y, svbool_t cmp)
+ {
+-  svbool_t p = svpfirst (cmp, svpfalse ());
+-  while (svptest_any (cmp, p))
++  float tmp1[SVE_NUM_FLTS], tmp2[SVE_NUM_FLTS];
++  uint8_t pg_bits[SVE_NUM_PG_BYTES];
++  svstr_p (pg_bits, cmp);
++  svst1 (svptrue_b32 (), tmp1, svsel (cmp, x1, y));
++  svst1 (cmp, tmp2, x2);
++
++  for (int i = 0; i < svcntd (); i++)
+     {
+-      float elem1 = svclastb_n_f32 (p, 0, x1);
+-      float elem2 = svclastb_n_f32 (p, 0, x2);
+-      float ret = f (elem1, elem2);
+-      svfloat32_t y2 = svdup_n_f32 (ret);
+-      y = svsel_f32 (p, y2, y);
+-      p = svpnext_b32 (cmp, p);
++      uint8_t p = pg_bits[i];
++      if (p & 1)
++	tmp1[i * 2] = f (tmp1[i * 2], tmp2[i * 2]);
++      if (p & (1 << 4))
++	tmp1[i * 2 + 1] = f (tmp1[i * 2 + 1], tmp2[i * 2 + 1]);
+     }
+-  return y;
++  return svld1 (svptrue_b32 (), tmp1);
+ }
+-
+ #endif
+diff --git a/sysdeps/aarch64/fpu/tan_advsimd.c b/sysdeps/aarch64/fpu/tan_advsimd.c
+index 825c9754b3..d391a003d8 100644
+--- a/sysdeps/aarch64/fpu/tan_advsimd.c
++++ b/sysdeps/aarch64/fpu/tan_advsimd.c
+@@ -25,9 +25,7 @@ static const struct data
+   float64x2_t poly[9];
+   double half_pi[2];
+   float64x2_t two_over_pi, shift;
+-#if !WANT_SIMD_EXCEPT
+   float64x2_t range_val;
+-#endif
+ } data = {
+   /* Coefficients generated using FPMinimax.  */
+   .poly = { V2 (0x1.5555555555556p-2), V2 (0x1.1111111110a63p-3),
+@@ -38,20 +36,17 @@ static const struct data
+   .half_pi = { 0x1.921fb54442d18p0, 0x1.1a62633145c07p-54 },
+   .two_over_pi = V2 (0x1.45f306dc9c883p-1),
+   .shift = V2 (0x1.8p52),
+-#if !WANT_SIMD_EXCEPT
+   .range_val = V2 (0x1p23),
+-#endif
+ };
+ 
+ #define RangeVal 0x4160000000000000  /* asuint64(0x1p23).  */
+ #define TinyBound 0x3e50000000000000 /* asuint64(2^-26).  */
+-#define Thresh 0x310000000000000     /* RangeVal - TinyBound.  */
+ 
+ /* Special cases (fall back to scalar calls).  */
+ static float64x2_t VPCS_ATTR NOINLINE
+-special_case (float64x2_t x)
++special_case (float64x2_t x, float64x2_t n, float64x2_t d, uint64x2_t special)
+ {
+-  return v_call_f64 (tan, x, x, v_u64 (-1));
++  return v_call_f64 (tan, x, vdivq_f64 (n, d), special);
+ }
+ 
+ /* Vector approximation for double-precision tan.
+@@ -65,14 +60,6 @@ float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x)
+      very large inputs. Fall back to scalar routine for all lanes if any are
+      too large, or Inf/NaN. If fenv exceptions are expected, also fall back for
+      tiny input to avoid underflow.  */
+-#if WANT_SIMD_EXCEPT
+-  uint64x2_t iax = vreinterpretq_u64_f64 (vabsq_f64 (x));
+-  /* iax - tiny_bound > range_val - tiny_bound.  */
+-  uint64x2_t special
+-      = vcgtq_u64 (vsubq_u64 (iax, v_u64 (TinyBound)), v_u64 (Thresh));
+-  if (__glibc_unlikely (v_any_u64 (special)))
+-    return special_case (x);
+-#endif
+ 
+   /* q = nearest integer to 2 * x / pi.  */
+   float64x2_t q
+@@ -81,9 +68,8 @@ float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x)
+ 
+   /* Use q to reduce x to r in [-pi/4, pi/4], by:
+      r = x - q * pi/2, in extended precision.  */
+-  float64x2_t r = x;
+   float64x2_t half_pi = vld1q_f64 (dat->half_pi);
+-  r = vfmsq_laneq_f64 (r, q, half_pi, 0);
++  float64x2_t r = vfmsq_laneq_f64 (x, q, half_pi, 0);
+   r = vfmsq_laneq_f64 (r, q, half_pi, 1);
+   /* Further reduce r to [-pi/8, pi/8], to be reconstructed using double angle
+      formula.  */
+@@ -114,12 +100,13 @@ float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x)
+ 
+   uint64x2_t no_recip = vtstq_u64 (vreinterpretq_u64_s64 (qi), v_u64 (1));
+ 
+-#if !WANT_SIMD_EXCEPT
+   uint64x2_t special = vcageq_f64 (x, dat->range_val);
++  float64x2_t swap = vbslq_f64 (no_recip, n, vnegq_f64 (d));
++  d = vbslq_f64 (no_recip, d, n);
++  n = swap;
++
+   if (__glibc_unlikely (v_any_u64 (special)))
+-    return special_case (x);
+-#endif
++    return special_case (x, n, d, special);
+ 
+-  return vdivq_f64 (vbslq_f64 (no_recip, n, vnegq_f64 (d)),
+-		    vbslq_f64 (no_recip, d, n));
++  return vdivq_f64 (n, d);
+ }
 diff --git a/sysdeps/aarch64/fpu/tanh_sve.c b/sysdeps/aarch64/fpu/tanh_sve.c
 index 789cc6854f..5869419010 100644
 --- a/sysdeps/aarch64/fpu/tanh_sve.c
@@ -7021,6 +9555,54 @@
 -  return svdiv_x (pg, q, qp2);
 +  return svreinterpret_f64 (svorr_x (pg, sign_bit, svreinterpret_u64 (y)));
  }
+diff --git a/sysdeps/aarch64/fpu/tanpi_sve.c b/sysdeps/aarch64/fpu/tanpi_sve.c
+index 57c643ae29..bfe6828e1f 100644
+--- a/sysdeps/aarch64/fpu/tanpi_sve.c
++++ b/sysdeps/aarch64/fpu/tanpi_sve.c
+@@ -1,6 +1,6 @@
+ /* Double-precision (SVE) tanpi function
+ 
+-   Copyright (C) 2024 Free Software Foundation, Inc.
++   Copyright (C) 2024-2025 Free Software Foundation, Inc.
+    This file is part of the GNU C Library.
+ 
+    The GNU C Library is free software; you can redistribute it and/or
+@@ -58,10 +58,10 @@ svfloat64_t SV_NAME_D1 (tanpi) (svfloat64_t x, const svbool_t pg)
+   svfloat64_t r2 = svmul_x (pg, r, r);
+   svfloat64_t r4 = svmul_x (pg, r2, r2);
+ 
+-  svfloat64_t c_1_3 = svld1rq (pg, &d->c1);
+-  svfloat64_t c_5_7 = svld1rq (pg, &d->c5);
+-  svfloat64_t c_9_11 = svld1rq (pg, &d->c9);
+-  svfloat64_t c_13_14 = svld1rq (pg, &d->c13);
++  svfloat64_t c_1_3 = svld1rq (svptrue_b64 (), &d->c1);
++  svfloat64_t c_5_7 = svld1rq (svptrue_b64 (), &d->c5);
++  svfloat64_t c_9_11 = svld1rq (svptrue_b64 (), &d->c9);
++  svfloat64_t c_13_14 = svld1rq (svptrue_b64 (), &d->c13);
+   svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), r2, c_1_3, 0);
+   svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), r2, c_1_3, 1);
+   svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), r2, c_5_7, 0);
+diff --git a/sysdeps/aarch64/fpu/tanpif_sve.c b/sysdeps/aarch64/fpu/tanpif_sve.c
+index 0285f56f34..6894379564 100644
+--- a/sysdeps/aarch64/fpu/tanpif_sve.c
++++ b/sysdeps/aarch64/fpu/tanpif_sve.c
+@@ -1,6 +1,6 @@
+ /* Single-precision (SVE) tanpi function
+ 
+-   Copyright (C) 2024 Free Software Foundation, Inc.
++   Copyright (C) 2024-2025 Free Software Foundation, Inc.
+    This file is part of the GNU C Library.
+ 
+    The GNU C Library is free software; you can redistribute it and/or
+@@ -37,7 +37,7 @@ const static struct v_tanpif_data
+ svfloat32_t SV_NAME_F1 (tanpi) (svfloat32_t x, const svbool_t pg)
+ {
+   const struct v_tanpif_data *d = ptr_barrier (&tanpif_data);
+-  svfloat32_t odd_coeffs = svld1rq (pg, &d->c1);
++  svfloat32_t odd_coeffs = svld1rq (svptrue_b32 (), &d->c1);
+   svfloat32_t n = svrintn_x (pg, x);
+ 
+   /* inf produces nan that propagates.  */
 diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile
 index 772b16a358..1c3c392513 100644
 --- a/sysdeps/aarch64/multiarch/Makefile
@@ -7196,6 +9778,792 @@
 +
 +END (__memset_sve_zva64)
 +#endif
+diff --git a/sysdeps/aarch64/preconfigure b/sysdeps/aarch64/preconfigure
+index 19657b627b..e1b772c586 100644
+--- a/sysdeps/aarch64/preconfigure
++++ b/sysdeps/aarch64/preconfigure
+@@ -3,5 +3,6 @@ aarch64*)
+ 	base_machine=aarch64
+ 	machine=aarch64
+ 	mtls_descriptor=desc
++	mtls_traditional=trad
+ 	;;
+ esac
+diff --git a/sysdeps/aarch64/setjmp.S b/sysdeps/aarch64/setjmp.S
+index b630ca099a..e175ec4d36 100644
+--- a/sysdeps/aarch64/setjmp.S
++++ b/sysdeps/aarch64/setjmp.S
+@@ -37,6 +37,12 @@ ENTRY (__sigsetjmp)
+ 	PTR_ARG (0)
+ 
+ 1:
++
++#if IS_IN(libc)
++	/* Disable ZA state of SME in libc.a and libc.so, but not in ld.so.  */
++	CALL_LIBC_ARM_ZA_DISABLE
++#endif
++
+ 	stp	x19, x20, [x0, #JB_X19<<3]
+ 	stp	x21, x22, [x0, #JB_X21<<3]
+ 	stp	x23, x24, [x0, #JB_X23<<3]
+diff --git a/sysdeps/aarch64/tst-sme-clone.c b/sysdeps/aarch64/tst-sme-clone.c
+new file mode 100644
+index 0000000000..b6ad54fa37
+--- /dev/null
++++ b/sysdeps/aarch64/tst-sme-clone.c
+@@ -0,0 +1,54 @@
++/* Test that ZA state of SME is cleared in both parent and child
++   when clone() syscall is used.
++   Copyright (C) 2025 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include "tst-sme-skeleton.c"
++
++#include <signal.h>
++#include <support/xsched.h>
++
++static int
++fun (void * const arg)
++{
++  printf ("in child: %s\n", (const char *)arg);
++  /* Check that ZA state of SME was disabled in child.  */
++  check_sme_za_state ("after clone in child", /* Clear.  */ true);
++  return 0;
++}
++
++static char __attribute__((aligned(16)))
++stack[1024 * 1024];
++
++static void
++run (struct blk *ptr)
++{
++  char *syscall_name = (char *)"clone";
++  printf ("in parent: before %s\n", syscall_name);
++
++  /* Enabled ZA state so that effect of disabling be observable.  */
++  enable_sme_za_state (ptr);
++  check_sme_za_state ("before clone", /* Clear.  */ false);
++
++  pid_t pid = xclone (fun, syscall_name, stack, sizeof (stack),
++		      CLONE_NEWUSER | CLONE_NEWNS | SIGCHLD);
++
++  /* Check that ZA state of SME was disabled in parent.  */
++  check_sme_za_state ("after clone in parent", /* Clear.  */ true);
++
++  TEST_VERIFY (xwaitpid (pid, NULL, 0) == pid);
++}
+diff --git a/sysdeps/aarch64/tst-sme-clone3.c b/sysdeps/aarch64/tst-sme-clone3.c
+new file mode 100644
+index 0000000000..f420d5984d
+--- /dev/null
++++ b/sysdeps/aarch64/tst-sme-clone3.c
+@@ -0,0 +1,84 @@
++/* Test that ZA state of SME is cleared in both parent and child
++   when clone3() syscall is used.
++   Copyright (C) 2025 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include "tst-sme-skeleton.c"
++
++#include <clone3.h>
++
++#include <errno.h>
++#include <signal.h>
++#include <support/xsched.h>
++
++/* Since clone3 is not a public symbol, we link this test explicitly
++   with clone3.o and have to provide this declaration.  */
++int __clone3 (struct clone_args *cl_args, size_t size,
++	    int (*func)(void *arg), void *arg);
++
++static int
++fun (void * const arg)
++{
++  printf ("in child: %s\n", (const char *)arg);
++  /* Check that ZA state of SME was disabled in child.  */
++  check_sme_za_state ("after clone3 in child", /* Clear.  */ true);
++  return 0;
++}
++
++static char __attribute__((aligned(16)))
++stack[1024 * 1024];
++
++/* Required by __arm_za_disable.o and provided by the startup code
++   as a hidden symbol.  */
++uint64_t _dl_hwcap2;
++
++static void
++run (struct blk *ptr)
++{
++  _dl_hwcap2 = getauxval (AT_HWCAP2);
++
++  char *syscall_name = (char *)"clone3";
++  struct clone_args args = {
++    .flags = CLONE_VM | CLONE_VFORK,
++    .exit_signal = SIGCHLD,
++    .stack = (uintptr_t) stack,
++    .stack_size = sizeof (stack),
++  };
++  printf ("in parent: before %s\n", syscall_name);
++
++  /* Enabled ZA state so that effect of disabling be observable.  */
++  enable_sme_za_state (ptr);
++  check_sme_za_state ("before clone3", /* Clear.  */ false);
++
++  pid_t pid = __clone3 (&args, sizeof (args), fun, syscall_name);
++
++  /* Check that ZA state of SME was disabled in parent.  */
++  check_sme_za_state ("after clone3 in parent", /* Clear.  */ true);
++
++  printf ("%s child pid: %d\n", syscall_name, pid);
++
++  xwaitpid (pid, NULL, 0);
++  printf ("in parent: after %s\n", syscall_name);
++}
++
++/* Workaround to simplify linking with clone3.o.  */
++void __syscall_error(int code)
++{
++  int err = -code;
++  fprintf (stderr, "syscall error %d (%s)\n", err, strerror (err));
++  exit (err);
++}
+diff --git a/sysdeps/aarch64/tst-sme-fork.c b/sysdeps/aarch64/tst-sme-fork.c
+new file mode 100644
+index 0000000000..b003b08884
+--- /dev/null
++++ b/sysdeps/aarch64/tst-sme-fork.c
+@@ -0,0 +1,43 @@
++/* Test that ZA state of SME is cleared in both parent and child
++   when fork() function is used.
++   Copyright (C) 2025 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include "tst-sme-skeleton.c"
++
++static void
++run (struct blk *blk)
++{
++  /* Enabled ZA state so that effect of disabling be observable.  */
++  enable_sme_za_state (blk);
++  check_sme_za_state ("before fork", /* Clear.  */ false);
++  fflush (stdout);
++
++  pid_t pid = xfork ();
++
++  if (pid == 0)
++    {
++      /* Check that ZA state of SME was disabled in child.  */
++      check_sme_za_state ("after fork in child", /* Clear.  */ true);
++      exit (0);
++    }
++
++  /* Check that ZA state of SME was disabled in parent.  */
++  check_sme_za_state ("after fork in parent", /* Clear.  */ true);
++
++  TEST_VERIFY (xwaitpid (pid, NULL, 0) == pid);
++}
+diff --git a/sysdeps/aarch64/tst-sme-helper.h b/sysdeps/aarch64/tst-sme-helper.h
+new file mode 100644
+index 0000000000..ab9c503e45
+--- /dev/null
++++ b/sysdeps/aarch64/tst-sme-helper.h
+@@ -0,0 +1,94 @@
++/* Utility functions for SME tests.
++   Copyright (C) 2025 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++struct blk {
++  void *za_save_buffer;
++  uint16_t num_za_save_slices;
++  char __reserved[6];
++};
++
++/* Read SVCR to get SM (bit0) and ZA (bit1) state.  */
++static unsigned long
++get_svcr (void)
++{
++  register unsigned long x0 asm ("x0");
++  asm volatile (
++    ".inst   0xd53b4240  /* mrs     x0, svcr  */\n"
++    : "=r" (x0));
++  return x0;
++}
++
++/* Returns tpidr2.  */
++static void *
++get_tpidr2 (void)
++{
++  register unsigned long x0 asm ("x0");
++  asm volatile (
++    ".inst   0xd53bd0a0  /* mrs     x0, tpidr2_el0  */\n"
++    : "=r"(x0) :: "memory");
++  return (void *) x0;
++}
++
++/* Obtains current streaming SVE vector register size.  */
++static unsigned long
++get_svl (void)
++{
++  register unsigned long x0 asm ("x0");
++  asm volatile (
++    ".inst   0x04bf5820  /* rdsvl   x0, 1  */\n"
++    : "=r" (x0));
++  return x0;
++}
++
++/* PSTATE.ZA = 1, set ZA state to active.  */
++static void
++start_za (void)
++{
++  asm volatile (
++    ".inst   0xd503457f  /* smstart za  */");
++}
++
++/* Load data into ZA byte by byte from p.  */
++static void __attribute__ ((noinline))
++load_za (const void *buf, unsigned long svl)
++{
++  register unsigned long x15 asm ("x15") = 0;
++  register unsigned long x16 asm ("x16") = (unsigned long)buf;
++  register unsigned long x17 asm ("x17") = svl;
++
++  asm volatile (
++    ".inst   0xd503437f  /* smstart sm  */\n"
++    ".L_ldr_loop:\n"
++    ".inst   0xe1006200  /* ldr     za[w15, 0], [x16]  */\n"
++    "add     w15, w15, 1\n"
++    ".inst   0x04305030  /* addvl   x16, x16, 1  */\n"
++    "cmp     w15, w17\n"
++    "bne     .L_ldr_loop\n"
++    ".inst   0xd503427f  /* smstop  sm  */\n"
++    : "+r"(x15), "+r"(x16), "+r"(x17));
++}
++
++/* Set tpidr2 to BLK.  */
++static void
++set_tpidr2 (struct blk *blk)
++{
++  register unsigned long x0 asm ("x0") = (unsigned long)blk;
++  asm volatile (
++    ".inst   0xd51bd0a0  /* msr     tpidr2_el0, x0  */\n"
++    :: "r"(x0) : "memory");
++}
+diff --git a/sysdeps/aarch64/tst-sme-jmp.c b/sysdeps/aarch64/tst-sme-jmp.c
+index 62c419f6c1..b2d21c6e1a 100644
+--- a/sysdeps/aarch64/tst-sme-jmp.c
++++ b/sysdeps/aarch64/tst-sme-jmp.c
+@@ -27,87 +27,15 @@
+ #include <support/support.h>
+ #include <support/test-driver.h>
+ 
+-struct blk {
+-  void *za_save_buffer;
+-  uint16_t num_za_save_slices;
+-  char __reserved[6];
+-};
++#include "tst-sme-helper.h"
+ 
++/* Streaming SVE vector register size.  */
+ static unsigned long svl;
++
+ static uint8_t *za_orig;
+ static uint8_t *za_dump;
+ static uint8_t *za_save;
+ 
+-static unsigned long
+-get_svl (void)
+-{
+-  register unsigned long x0 asm ("x0");
+-  asm volatile (
+-    ".inst   0x04bf5820  /* rdsvl   x0, 1  */\n"
+-    : "=r" (x0));
+-  return x0;
+-}
+-
+-/* PSTATE.ZA = 1, set ZA state to active.  */
+-static void
+-start_za (void)
+-{
+-  asm volatile (
+-    ".inst   0xd503457f  /* smstart za  */");
+-}
+-
+-/* Read SVCR to get SM (bit0) and ZA (bit1) state.  */
+-static unsigned long
+-get_svcr (void)
+-{
+-  register unsigned long x0 asm ("x0");
+-  asm volatile (
+-    ".inst   0xd53b4240  /* mrs     x0, svcr  */\n"
+-    : "=r" (x0));
+-  return x0;
+-}
+-
+-/* Load data into ZA byte by byte from p.  */
+-static void __attribute__ ((noinline))
+-load_za (const void *p)
+-{
+-  register unsigned long x15 asm ("x15") = 0;
+-  register unsigned long x16 asm ("x16") = (unsigned long)p;
+-  register unsigned long x17 asm ("x17") = svl;
+-
+-  asm volatile (
+-    ".inst   0xd503437f  /* smstart sm  */\n"
+-    ".L_ldr_loop:\n"
+-    ".inst   0xe1006200  /* ldr     za[w15, 0], [x16]  */\n"
+-    "add     w15, w15, 1\n"
+-    ".inst   0x04305030  /* addvl   x16, x16, 1  */\n"
+-    "cmp     w15, w17\n"
+-    "bne     .L_ldr_loop\n"
+-    ".inst   0xd503427f  /* smstop  sm  */\n"
+-    : "+r"(x15), "+r"(x16), "+r"(x17));
+-}
+-
+-/* Set tpidr2 to BLK.  */
+-static void
+-set_tpidr2 (struct blk *blk)
+-{
+-  register unsigned long x0 asm ("x0") = (unsigned long)blk;
+-  asm volatile (
+-    ".inst   0xd51bd0a0  /* msr     tpidr2_el0, x0  */\n"
+-    :: "r"(x0) : "memory");
+-}
+-
+-/* Returns tpidr2.  */
+-static void *
+-get_tpidr2 (void)
+-{
+-  register unsigned long x0 asm ("x0");
+-  asm volatile (
+-    ".inst   0xd53bd0a0  /* mrs     x0, tpidr2_el0  */\n"
+-    : "=r"(x0) :: "memory");
+-  return (void *) x0;
+-}
+-
+ static void
+ print_data(const char *msg, void *p)
+ {
+@@ -157,7 +85,7 @@ longjmp_test (void)
+     FAIL_EXIT1 ("svcr != 0: %lu", svcr);
+   set_tpidr2 (&blk);
+   start_za ();
+-  load_za (za_orig);
++  load_za (za_orig, svl);
+ 
+   print_data ("za save space", za_save);
+   p = get_tpidr2 ();
+@@ -168,8 +96,8 @@ longjmp_test (void)
+     {
+       p = get_tpidr2 ();
+       printf ("before longjmp: tp2 = %p\n", p);
+-      if (p != &blk)
+-	FAIL_EXIT1 ("tpidr2 is clobbered");
++      if (p != NULL)
++	FAIL_EXIT1 ("tpidr2 has not been reset to null");
+       do_longjmp (env);
+       FAIL_EXIT1 ("longjmp returned");
+     }
+@@ -206,7 +134,7 @@ setcontext_test (void)
+     FAIL_EXIT1 ("svcr != 0: %lu", svcr);
+   set_tpidr2 (&blk);
+   start_za ();
+-  load_za (za_orig);
++  load_za (za_orig, svl);
+ 
+   print_data ("za save space", za_save);
+   p = get_tpidr2 ();
+diff --git a/sysdeps/aarch64/tst-sme-signal.c b/sysdeps/aarch64/tst-sme-signal.c
+new file mode 100644
+index 0000000000..b4b07bcc44
+--- /dev/null
++++ b/sysdeps/aarch64/tst-sme-signal.c
+@@ -0,0 +1,115 @@
++/* Test handling of SME state in a signal handler.
++   Copyright (C) 2025 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include "tst-sme-skeleton.c"
++
++#include <support/xsignal.h>
++
++static struct _aarch64_ctx *
++extension (void *p)
++{
++  return p;
++}
++
++#ifndef TPIDR2_MAGIC
++#define TPIDR2_MAGIC 0x54504902
++#endif
++
++#ifndef ZA_MAGIC
++#define ZA_MAGIC 0x54366345
++#endif
++
++#ifndef ZT_MAGIC
++#define ZT_MAGIC 0x5a544e01
++#endif
++
++#ifndef EXTRA_MAGIC
++#define EXTRA_MAGIC 0x45585401
++#endif
++
++/* We use a pipe to make sure that the final check of the SME state
++   happens after signal handler finished.  */
++static int pipefd[2];
++
++#define WRITE(msg) xwrite (1, msg, sizeof (msg));
++
++static void
++handler (int signo, siginfo_t *si, void *ctx)
++{
++  TEST_VERIFY (signo == SIGUSR1);
++  WRITE ("in the handler\n");
++  check_sme_za_state ("during signal", true /* State is clear.  */);
++  ucontext_t *uc = ctx;
++  void *p = uc->uc_mcontext.__reserved;
++  unsigned int found = 0;
++  uint32_t m;
++  while ((m = extension (p)->magic))
++    {
++      if (m == TPIDR2_MAGIC)
++        {
++          WRITE ("found TPIDR2_MAGIC\n");
++          found += 1;
++        }
++      if (m == ZA_MAGIC)
++        {
++          WRITE ("found ZA_MAGIC\n");
++          found += 1;
++        }
++      if (m == ZT_MAGIC)
++        {
++          WRITE ("found ZT_MAGIC\n");
++          found += 1;
++        }
++      if (m == EXTRA_MAGIC)
++        {
++          WRITE ("found EXTRA_MAGIC\n");
++          struct { struct _aarch64_ctx h; uint64_t data; } *e = p;
++          p = (char *)e->data;
++          continue;
++        }
++      p = (char *)p + extension (p)->size;
++    }
++  TEST_COMPARE (found, 3);
++
++  /* Signal that the wait is over (see below).  */
++  char message = '\0';
++  xwrite (pipefd[1], &message, 1);
++}
++
++static void
++run (struct blk *blk)
++{
++  xpipe (pipefd);
++
++  struct sigaction sigact;
++  sigemptyset (&sigact.sa_mask);
++  sigact.sa_flags = 0;
++  sigact.sa_flags |= SA_SIGINFO;
++  sigact.sa_sigaction = handler;
++  xsigaction (SIGUSR1, &sigact, NULL);
++
++  enable_sme_za_state (blk);
++  check_sme_za_state ("before signal", false /* State is not clear.  */);
++  xraise (SIGUSR1);
++
++  /* Wait for signal handler to complete.  */
++  char response;
++  xread (pipefd[0], &response, 1);
++
++  check_sme_za_state ("after signal", false /* State is not clear.  */);
++}
+diff --git a/sysdeps/aarch64/tst-sme-skeleton.c b/sysdeps/aarch64/tst-sme-skeleton.c
+new file mode 100644
+index 0000000000..ba84dda1cb
+--- /dev/null
++++ b/sysdeps/aarch64/tst-sme-skeleton.c
+@@ -0,0 +1,101 @@
++/* Template for SME tests.
++   Copyright (C) 2025 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <stdio.h>
++#include <stddef.h>
++#include <stdint.h>
++#include <string.h>
++#include <sys/auxv.h>
++
++#include <support/check.h>
++#include <support/support.h>
++#include <support/xstdlib.h>
++#include <support/xunistd.h>
++#include <support/test-driver.h>
++
++#include "tst-sme-helper.h"
++
++/* Streaming SVE vector register size.  */
++static unsigned long svl;
++
++static uint8_t *state;
++
++static void
++enable_sme_za_state (struct blk *blk)
++{
++  start_za ();
++  set_tpidr2 (blk);
++  load_za (blk, svl);
++}
++
++/* Check if SME state is disabled (when CLEAR is true) or
++   enabled (when CLEAR is false).  */
++static void
++check_sme_za_state (const char msg[], bool clear)
++{
++  unsigned long svcr = get_svcr ();
++  void *tpidr2 = get_tpidr2 ();
++  printf ("[%s]\n", msg);
++  printf ("svcr = %016lx\n", svcr);
++  printf ("tpidr2 = %016lx\n", (unsigned long)tpidr2);
++  if (clear)
++    {
++      TEST_VERIFY (svcr == 0);
++      TEST_VERIFY (tpidr2 == NULL);
++    }
++  else
++    {
++      TEST_VERIFY (svcr != 0);
++      TEST_VERIFY (tpidr2 != NULL);
++    }
++}
++
++/* Should be defined in actual test that includes this
++   skeleton file. */
++static void
++run (struct blk *ptr);
++
++static int
++do_test (void)
++{
++  unsigned long hwcap2 = getauxval (AT_HWCAP2);
++  if ((hwcap2 & HWCAP2_SME) == 0)
++    return EXIT_UNSUPPORTED;
++
++  /* Get current streaming SVE vector length in bytes.  */
++  svl = get_svl ();
++  printf ("svl: %lu\n", svl);
++
++  TEST_VERIFY_EXIT (!(svl < 16 || svl % 16 != 0 || svl >= (1 << 16)));
++
++  /* Initialise buffer for ZA state of SME.  */
++  state = xmalloc (svl * svl);
++  memset (state, 1, svl * svl);
++  struct blk blk = {
++    .za_save_buffer = state,
++    .num_za_save_slices = svl,
++    .__reserved = {0},
++  };
++
++  run (&blk);
++
++  free (state);
++  return 0;
++}
++
++#include <support/test-driver.c>
+diff --git a/sysdeps/aarch64/tst-sme-vfork.c b/sysdeps/aarch64/tst-sme-vfork.c
+new file mode 100644
+index 0000000000..3feea065e5
+--- /dev/null
++++ b/sysdeps/aarch64/tst-sme-vfork.c
+@@ -0,0 +1,43 @@
++/* Test that ZA state of SME is cleared in both parent and child
++   when vfork() function is used.
++   Copyright (C) 2025 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include "tst-sme-skeleton.c"
++
++static void
++run (struct blk *blk)
++{
++  /* Enabled ZA state so that effect of disabling be observable.  */
++  enable_sme_za_state (blk);
++  check_sme_za_state ("before vfork", /* Clear.  */ false);
++  fflush (stdout);
++
++  pid_t pid = vfork ();
++
++  if (pid == 0)
++    {
++      /* Check that ZA state of SME was disabled in child.  */
++      check_sme_za_state ("after vfork in child", /* Clear.  */ true);
++      _exit (0);
++    }
++
++  /* Check that ZA state of SME was disabled in parent.  */
++  check_sme_za_state ("after vfork in parent", /* Clear.  */ true);
++
++  TEST_VERIFY (xwaitpid (pid, NULL, 0) == pid);
++}
+diff --git a/sysdeps/aarch64/tst-sme-za-state.c b/sysdeps/aarch64/tst-sme-za-state.c
+new file mode 100644
+index 0000000000..00118ef506
+--- /dev/null
++++ b/sysdeps/aarch64/tst-sme-za-state.c
+@@ -0,0 +1,52 @@
++/* Test for SME ZA state being cleared on setjmp and longjmp.
++   Copyright (C) 2025 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include "tst-sme-skeleton.c"
++
++#include <setjmp.h>
++
++static void
++run (struct blk *ptr)
++{
++  jmp_buf buf;
++  int ret;
++
++  check_sme_za_state ("initial state", /* Clear.  */ true);
++
++  /* Enabled ZA state so that effect of disabling be observable.  */
++  enable_sme_za_state (ptr);
++  check_sme_za_state ("before setjmp", /* Clear.  */ false);
++
++  if ((ret = setjmp (buf)) == 0)
++    {
++      check_sme_za_state ("after setjmp", /* Clear.  */ true);
++
++      /* Enabled ZA state so that effect of disabling be observable.  */
++      enable_sme_za_state (ptr);
++      check_sme_za_state ("before longjmp", /* Clear.  */ false);
++
++      longjmp (buf, 42);
++
++      /* Unreachable.  */
++      TEST_VERIFY (false);
++      __builtin_unreachable ();
++    }
++
++  TEST_COMPARE (ret, 42);
++  check_sme_za_state ("after longjmp", /* Clear.  */ true);
++}
 diff --git a/sysdeps/arm/find_exidx.c b/sysdeps/arm/find_exidx.c
 index 60021a072c..468e016214 100644
 --- a/sysdeps/arm/find_exidx.c
@@ -7218,9 +10586,18 @@
    *pcount = data.dlfo_eh_count;
    return (_Unwind_Ptr) data.dlfo_eh_frame;
 diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
-index e871f27ff2..ddb34a1588 100644
+index e871f27ff2..5cb685e470 100644
 --- a/sysdeps/generic/ldsodefs.h
 +++ b/sysdeps/generic/ldsodefs.h
+@@ -351,7 +351,7 @@ struct rtld_global
+       void (*free) (void *);
+     } _ns_unique_sym_table;
+     /* Keep track of changes to each namespace' list.  */
+-    struct r_debug_extended _ns_debug;
++    struct r_debug_extended _ns_debug_unused;
+   } _dl_ns[DL_NNS];
+   /* One higher than index of last used namespace.  */
+   EXTERN size_t _dl_nns;
 @@ -695,10 +695,23 @@ extern const ElfW(Phdr) *_dl_phdr;
  extern size_t _dl_phnum;
  #endif
@@ -7246,6 +10623,666 @@
  
  /* Variable pointing to the end of the stack (or close to it).  This value
     must be constant over the runtime of the application.  Some programs
+@@ -1041,15 +1054,29 @@ extern void _dl_debug_state (void);
+ rtld_hidden_proto (_dl_debug_state)
+ 
+ /* Initialize `struct r_debug_extended' for the namespace NS.  LDBASE
+-   is the run-time load address of the dynamic linker, to be put in the
+-   `r_ldbase' member.  Return the address of the structure.  */
++   is the run-time load address of the dynamic linker, to be put in
++   the `r_ldbase' member.
++
++   Return the address of the r_debug structure for the namespace.
++   This is not merely a convenience or optimization, but it is
++   necessary for the LIBC_PROBE Systemtap/debugger probes to work
++   reliably: direct variable access can create probes that tools
++   cannot consume.  */
+ extern struct r_debug *_dl_debug_initialize (ElfW(Addr) ldbase, Lmid_t ns)
+      attribute_hidden;
+ 
++/* This is called after relocation processing to handle a potential
++   copy relocation for _r_debug.  */
++void _dl_debug_post_relocate (struct link_map *main_map) attribute_hidden;
++
+ /* Update the `r_map' member and return the address of `struct r_debug'
+    of the namespace NS.  */
+ extern struct r_debug *_dl_debug_update (Lmid_t ns) attribute_hidden;
+ 
++/* Update R->r_state to STATE and notify the debugger by calling
++   _dl_debug_state.  */
++void _dl_debug_change_state (struct r_debug *r, int state) attribute_hidden;
++
+ /* Initialize the basic data structure for the search paths.  SOURCE
+    is either "LD_LIBRARY_PATH" or "--library-path".
+    GLIBC_HWCAPS_PREPEND adds additional glibc-hwcaps subdirectories to
+diff --git a/sysdeps/generic/libc-tsd.h b/sysdeps/generic/libc-tsd.h
+deleted file mode 100644
+index b95e4094f6..0000000000
+--- a/sysdeps/generic/libc-tsd.h
++++ /dev/null
+@@ -1,60 +0,0 @@
+-/* libc-internal interface for thread-specific data.  Stub or TLS version.
+-   Copyright (C) 1998-2025 Free Software Foundation, Inc.
+-   This file is part of the GNU C Library.
+-
+-   The GNU C Library is free software; you can redistribute it and/or
+-   modify it under the terms of the GNU Lesser General Public
+-   License as published by the Free Software Foundation; either
+-   version 2.1 of the License, or (at your option) any later version.
+-
+-   The GNU C Library is distributed in the hope that it will be useful,
+-   but WITHOUT ANY WARRANTY; without even the implied warranty of
+-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+-   Lesser General Public License for more details.
+-
+-   You should have received a copy of the GNU Lesser General Public
+-   License along with the GNU C Library; if not, see
+-   <https://www.gnu.org/licenses/>.  */
+-
+-#ifndef _GENERIC_LIBC_TSD_H
+-#define _GENERIC_LIBC_TSD_H 1
+-
+-/* This file defines the following macros for accessing a small fixed
+-   set of thread-specific `void *' data used only internally by libc.
+-
+-   __libc_tsd_define(CLASS, TYPE, KEY)	-- Define or declare a datum with TYPE
+-					   for KEY.  CLASS can be `static' for
+-					   keys used in only one source file,
+-					   empty for global definitions, or
+-					   `extern' for global declarations.
+-   __libc_tsd_address(TYPE, KEY)	-- Return the `TYPE *' pointing to
+-					   the current thread's datum for KEY.
+-   __libc_tsd_get(TYPE, KEY)		-- Return the `TYPE' datum for KEY.
+-   __libc_tsd_set(TYPE, KEY, VALUE)	-- Set the datum for KEY to VALUE.
+-
+-   The set of available KEY's will usually be provided as an enum,
+-   and contains (at least):
+-		_LIBC_TSD_KEY_MALLOC
+-		_LIBC_TSD_KEY_DL_ERROR
+-		_LIBC_TSD_KEY_RPC_VARS
+-   All uses must be the literal _LIBC_TSD_* name in the __libc_tsd_* macros.
+-   Some implementations may not provide any enum at all and instead
+-   using string pasting in the macros.  */
+-
+-#include <tls.h>
+-
+-/* When full support for __thread variables is available, this interface is
+-   just a trivial wrapper for it.  Without TLS, this is the generic/stub
+-   implementation for wholly single-threaded systems.
+-
+-   We don't define an enum for the possible key values, because the KEYs
+-   translate directly into variables by macro magic.  */
+-
+-#define __libc_tsd_define(CLASS, TYPE, KEY)	\
+-  CLASS __thread TYPE __libc_tsd_##KEY attribute_tls_model_ie;
+-
+-#define __libc_tsd_address(TYPE, KEY)		(&__libc_tsd_##KEY)
+-#define __libc_tsd_get(TYPE, KEY)		(__libc_tsd_##KEY)
+-#define __libc_tsd_set(TYPE, KEY, VALUE)	(__libc_tsd_##KEY = (VALUE))
+-
+-#endif	/* libc-tsd.h */
+diff --git a/sysdeps/i386/Makefile b/sysdeps/i386/Makefile
+index a2e8c0b128..c0c017b899 100644
+--- a/sysdeps/i386/Makefile
++++ b/sysdeps/i386/Makefile
+@@ -30,7 +30,9 @@ stack-align-test-flags += -malign-double
+ endif
+ 
+ ifeq ($(subdir),elf)
+-sysdep-dl-routines += tlsdesc dl-tlsdesc
++sysdep-dl-routines += \
++  dl-tls-get-addr \
++# sysdep-dl-routines
+ 
+ tests += tst-audit3
+ modules-names += tst-auditmod3a tst-auditmod3b
+@@ -58,6 +60,15 @@ $(objpfx)tst-ld-sse-use.out: ../sysdeps/i386/tst-ld-sse-use.sh $(objpfx)ld.so
+ 	@echo "Checking ld.so for SSE register use.  This will take a few seconds..."
+ 	$(BASH) $< $(objpfx) '$(NM)' '$(OBJDUMP)' '$(READELF)' > $@; \
+ 	$(evaluate-test)
++
++tests-special += $(objpfx)check-gnu-tls.out
++
++$(objpfx)check-gnu-tls.out: $(common-objpfx)libc.so
++	LC_ALL=C $(READELF) -V -W $< \
++		| sed -ne '/.gnu.version_d/, /.gnu.version_r/ p' \
++		| grep GLIBC_ABI_GNU_TLS > $@; \
++	$(evaluate-test)
++generated += check-gnu-tls.out
+ else
+ CFLAGS-.os += $(if $(filter rtld-%.os,$(@F)), $(rtld-CFLAGS))
+ endif
+diff --git a/sysdeps/i386/Versions b/sysdeps/i386/Versions
+index 36e23b466a..9c84c8ef04 100644
+--- a/sysdeps/i386/Versions
++++ b/sysdeps/i386/Versions
+@@ -28,6 +28,11 @@ libc {
+   GLIBC_2.13 {
+     __fentry__;
+   }
++  GLIBC_ABI_GNU_TLS {
++    # This symbol is used only for empty version map and will be removed
++    # by scripts/versions.awk.
++    __placeholder_only_for_empty_version_map;
++  }
+ }
+ libm {
+   GLIBC_2.1 {
+diff --git a/sysdeps/i386/dl-tls-get-addr.c b/sysdeps/i386/dl-tls-get-addr.c
+new file mode 100644
+index 0000000000..c97e5c57be
+--- /dev/null
++++ b/sysdeps/i386/dl-tls-get-addr.c
+@@ -0,0 +1,68 @@
++/* Ifunc selector for ___tls_get_addr.
++   Copyright (C) 2025 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#ifdef SHARED
++# define ___tls_get_addr __redirect____tls_get_addr
++# include <dl-tls.h>
++# undef ___tls_get_addr
++# undef __tls_get_addr
++
++# define SYMBOL_NAME ___tls_get_addr
++# include <init-arch.h>
++
++extern __typeof (REDIRECT_NAME) OPTIMIZE (fnsave) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (fxsave) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (xsave) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (xsavec) attribute_hidden;
++
++static inline void *
++IFUNC_SELECTOR (void)
++{
++  const struct cpu_features* cpu_features = __get_cpu_features ();
++
++  if (cpu_features->xsave_state_size != 0)
++    {
++      if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC))
++	return OPTIMIZE (xsavec);
++      else
++	return OPTIMIZE (xsave);
++    }
++  else if (CPU_FEATURE_USABLE_P (cpu_features, FXSR))
++    return OPTIMIZE (fxsave);
++  return OPTIMIZE (fnsave);
++}
++
++libc_ifunc_redirected (__redirect____tls_get_addr, ___tls_get_addr,
++		       IFUNC_SELECTOR ());
++
++/* The special thing about the x86 TLS ABI is that we have two
++   variants of the __tls_get_addr function with different calling
++   conventions.  The GNU version, which we are mostly concerned here,
++   takes the parameter in a register.  The name is changed by adding
++   an additional underscore at the beginning.  The Sun version uses
++   the normal calling convention.  */
++
++rtld_hidden_proto (___tls_get_addr)
++rtld_hidden_def (___tls_get_addr)
++
++void *
++__tls_get_addr (tls_index *ti)
++{
++  return ___tls_get_addr (ti);
++}
++#endif
+diff --git a/sysdeps/i386/dl-tls.h b/sysdeps/i386/dl-tls.h
+index f453931d78..ef605c5b0d 100644
+--- a/sysdeps/i386/dl-tls.h
++++ b/sysdeps/i386/dl-tls.h
+@@ -37,34 +37,14 @@ typedef struct dl_tls_index
+ /* This is the prototype for the GNU version.  */
+ extern void *___tls_get_addr (tls_index *ti)
+      __attribute__ ((__regparm__ (1)));
+-extern void *___tls_get_addr_internal (tls_index *ti)
+-     __attribute__ ((__regparm__ (1))) attribute_hidden;
+-
+ # if IS_IN (rtld)
+-/* The special thing about the x86 TLS ABI is that we have two
+-   variants of the __tls_get_addr function with different calling
+-   conventions.  The GNU version, which we are mostly concerned here,
+-   takes the parameter in a register.  The name is changed by adding
+-   an additional underscore at the beginning.  The Sun version uses
+-   the normal calling convention.  */
+-void *
+-__tls_get_addr (tls_index *ti)
+-{
+-  return ___tls_get_addr_internal (ti);
+-}
+-
+-
+ /* Prepare using the definition of __tls_get_addr in the generic
+    version of this file.  */
+-# define __tls_get_addr __attribute__ ((__regparm__ (1))) ___tls_get_addr
+-strong_alias (___tls_get_addr, ___tls_get_addr_internal)
+-rtld_hidden_proto (___tls_get_addr)
+-rtld_hidden_def (___tls_get_addr)
+-#else
+-
++# define __tls_get_addr \
++    __attribute__ ((__regparm__ (1))) ___tls_get_addr_internal
++# else
+ /* Users should get the better interface.  */
+-# define __tls_get_addr ___tls_get_addr
+-
++#  define __tls_get_addr ___tls_get_addr
+ # endif
+ #endif
+ 
+diff --git a/sysdeps/i386/dl-tlsdesc-dynamic.h b/sysdeps/i386/dl-tlsdesc-dynamic.h
+index 6aec06d15c..be9ecd659b 100644
+--- a/sysdeps/i386/dl-tlsdesc-dynamic.h
++++ b/sysdeps/i386/dl-tlsdesc-dynamic.h
+@@ -16,34 +16,6 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
+-#undef REGISTER_SAVE_AREA
+-
+-#if !defined USE_FNSAVE && (STATE_SAVE_ALIGNMENT % 16) != 0
+-# error STATE_SAVE_ALIGNMENT must be multiple of 16
+-#endif
+-
+-#if DL_RUNTIME_RESOLVE_REALIGN_STACK
+-# ifdef USE_FNSAVE
+-#  error USE_FNSAVE shouldn't be defined
+-# endif
+-# ifdef USE_FXSAVE
+-/* Use fxsave to save all registers.  */
+-#  define REGISTER_SAVE_AREA	512
+-# endif
+-#else
+-# ifdef USE_FNSAVE
+-/* Use fnsave to save x87 FPU stack registers.  */
+-#  define REGISTER_SAVE_AREA	108
+-# else
+-#  ifndef USE_FXSAVE
+-#   error USE_FXSAVE must be defined
+-#  endif
+-/* Use fxsave to save all registers.  Add 12 bytes to align the stack
+-   to 16 bytes.  */
+-#  define REGISTER_SAVE_AREA	(512 + 12)
+-# endif
+-#endif
+-
+ 	.hidden _dl_tlsdesc_dynamic
+ 	.global	_dl_tlsdesc_dynamic
+ 	.type	_dl_tlsdesc_dynamic,@function
+@@ -104,85 +76,7 @@ _dl_tlsdesc_dynamic:
+ 	ret
+ 	.p2align 4,,7
+ 2:
+-	cfi_adjust_cfa_offset (32)
+-#if DL_RUNTIME_RESOLVE_REALIGN_STACK
+-	movl	%ebx, -28(%esp)
+-	movl	%esp, %ebx
+-	cfi_def_cfa_register(%ebx)
+-	and	$-STATE_SAVE_ALIGNMENT, %esp
+-#endif
+-#ifdef REGISTER_SAVE_AREA
+-	subl	$REGISTER_SAVE_AREA, %esp
+-# if !DL_RUNTIME_RESOLVE_REALIGN_STACK
+-	cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
+-# endif
+-#else
+-# if !DL_RUNTIME_RESOLVE_REALIGN_STACK
+-#  error DL_RUNTIME_RESOLVE_REALIGN_STACK must be true
+-# endif
+-	/* Allocate stack space of the required size to save the state.  */
+-	LOAD_PIC_REG (cx)
+-	subl	RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET+_rtld_local_ro@GOTOFF(%ecx), %esp
+-#endif
+-#ifdef USE_FNSAVE
+-	fnsave	(%esp)
+-#elif defined USE_FXSAVE
+-	fxsave	(%esp)
+-#else
+-	/* Save the argument for ___tls_get_addr in EAX.  */
+-	movl	%eax, %ecx
+-	movl	$TLSDESC_CALL_STATE_SAVE_MASK, %eax
+-	xorl	%edx, %edx
+-	/* Clear the XSAVE Header.  */
+-# ifdef USE_XSAVE
+-	movl	%edx, (512)(%esp)
+-	movl	%edx, (512 + 4 * 1)(%esp)
+-	movl	%edx, (512 + 4 * 2)(%esp)
+-	movl	%edx, (512 + 4 * 3)(%esp)
+-# endif
+-	movl	%edx, (512 + 4 * 4)(%esp)
+-	movl	%edx, (512 + 4 * 5)(%esp)
+-	movl	%edx, (512 + 4 * 6)(%esp)
+-	movl	%edx, (512 + 4 * 7)(%esp)
+-	movl	%edx, (512 + 4 * 8)(%esp)
+-	movl	%edx, (512 + 4 * 9)(%esp)
+-	movl	%edx, (512 + 4 * 10)(%esp)
+-	movl	%edx, (512 + 4 * 11)(%esp)
+-	movl	%edx, (512 + 4 * 12)(%esp)
+-	movl	%edx, (512 + 4 * 13)(%esp)
+-	movl	%edx, (512 + 4 * 14)(%esp)
+-	movl	%edx, (512 + 4 * 15)(%esp)
+-# ifdef USE_XSAVE
+-	xsave	(%esp)
+-# else
+-	xsavec	(%esp)
+-# endif
+-	/* Restore the argument for ___tls_get_addr in EAX.  */
+-	movl	%ecx, %eax
+-#endif
+-	call	HIDDEN_JUMPTARGET (___tls_get_addr)
+-	/* Get register content back.  */
+-#ifdef USE_FNSAVE
+-	frstor	(%esp)
+-#elif defined USE_FXSAVE
+-	fxrstor	(%esp)
+-#else
+-	/* Save and retore ___tls_get_addr return value stored in EAX.  */
+-	movl	%eax, %ecx
+-	movl	$TLSDESC_CALL_STATE_SAVE_MASK, %eax
+-	xorl	%edx, %edx
+-	xrstor	(%esp)
+-	movl	%ecx, %eax
+-#endif
+-#if DL_RUNTIME_RESOLVE_REALIGN_STACK
+-	mov	%ebx, %esp
+-	cfi_def_cfa_register(%esp)
+-	movl	-28(%esp), %ebx
+-	cfi_restore(%ebx)
+-#else
+-	addl	$REGISTER_SAVE_AREA, %esp
+-	cfi_adjust_cfa_offset(-REGISTER_SAVE_AREA)
+-#endif
++#include "tls-get-addr-wrapper.h"
+ 	jmp	1b
+ 	cfi_endproc
+ 	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
+diff --git a/sysdeps/i386/dl-tlsdesc.S b/sysdeps/i386/dl-tlsdesc.S
+index c080993a60..c914ca4220 100644
+--- a/sysdeps/i386/dl-tlsdesc.S
++++ b/sysdeps/i386/dl-tlsdesc.S
+@@ -22,23 +22,6 @@
+ #include <features-offsets.h>
+ #include "tlsdesc.h"
+ 
+-#ifndef DL_STACK_ALIGNMENT
+-/* Due to GCC bug:
+-
+-   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066
+-
+-   __tls_get_addr may be called with 4-byte stack alignment.  Although
+-   this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume
+-   that stack will be always aligned at 16 bytes.  */
+-# define DL_STACK_ALIGNMENT 4
+-#endif
+-
+-/* True if _dl_tlsdesc_dynamic should align stack for STATE_SAVE or align
+-   stack to MINIMUM_ALIGNMENT bytes before calling ___tls_get_addr.  */
+-#define DL_RUNTIME_RESOLVE_REALIGN_STACK \
+-  (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \
+-   || MINIMUM_ALIGNMENT > DL_STACK_ALIGNMENT)
+-
+ 	.text
+ 
+      /* This function is used to compute the TP offset for symbols in
+diff --git a/sysdeps/i386/tls-get-addr-wrapper.h b/sysdeps/i386/tls-get-addr-wrapper.h
+new file mode 100644
+index 0000000000..0708e5ad1d
+--- /dev/null
++++ b/sysdeps/i386/tls-get-addr-wrapper.h
+@@ -0,0 +1,127 @@
++/* Wrapper of i386 ___tls_get_addr to save and restore vector registers.
++   Copyright (C) 2025 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#undef REGISTER_SAVE_AREA
++
++#if !defined USE_FNSAVE && (STATE_SAVE_ALIGNMENT % 16) != 0
++# error STATE_SAVE_ALIGNMENT must be multiple of 16
++#endif
++
++#if DL_RUNTIME_RESOLVE_REALIGN_STACK
++# ifdef USE_FNSAVE
++#  error USE_FNSAVE shouldn't be defined
++# endif
++# ifdef USE_FXSAVE
++/* Use fxsave to save all registers.  */
++#  define REGISTER_SAVE_AREA	512
++# endif
++#else
++# ifdef USE_FNSAVE
++/* Use fnsave to save x87 FPU stack registers.  */
++#  define REGISTER_SAVE_AREA	108
++# else
++#  ifndef USE_FXSAVE
++#   error USE_FXSAVE must be defined
++#  endif
++/* Use fxsave to save all registers.  Add 12 bytes to align the stack
++   to 16 bytes.  */
++#  define REGISTER_SAVE_AREA	(512 + 12)
++# endif
++#endif
++
++#if DL_RUNTIME_RESOLVE_REALIGN_STACK
++	movl	%ebx, 28(%esp)
++	movl	%esp, %ebx
++	cfi_def_cfa_register(%ebx)
++	and	$-STATE_SAVE_ALIGNMENT, %esp
++#endif
++#ifdef REGISTER_SAVE_AREA
++	subl	$REGISTER_SAVE_AREA, %esp
++# if !DL_RUNTIME_RESOLVE_REALIGN_STACK
++	cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
++# endif
++#else
++# if !DL_RUNTIME_RESOLVE_REALIGN_STACK
++#  error DL_RUNTIME_RESOLVE_REALIGN_STACK must be true
++# endif
++	/* Allocate stack space of the required size to save the state.  */
++	LOAD_PIC_REG (cx)
++	subl	RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET \
++		+XSAVE_STATE_SIZE_OFFSET+_rtld_local_ro@GOTOFF(%ecx), %esp
++#endif
++#ifdef USE_FNSAVE
++	fnsave	(%esp)
++#elif defined USE_FXSAVE
++	fxsave	(%esp)
++#else
++	/* Save the argument for ___tls_get_addr in EAX.  */
++	movl	%eax, %ecx
++	movl	$TLSDESC_CALL_STATE_SAVE_MASK, %eax
++	xorl	%edx, %edx
++	/* Clear the XSAVE Header.  */
++# ifdef USE_XSAVE
++	movl	%edx, (512)(%esp)
++	movl	%edx, (512 + 4 * 1)(%esp)
++	movl	%edx, (512 + 4 * 2)(%esp)
++	movl	%edx, (512 + 4 * 3)(%esp)
++# endif
++	movl	%edx, (512 + 4 * 4)(%esp)
++	movl	%edx, (512 + 4 * 5)(%esp)
++	movl	%edx, (512 + 4 * 6)(%esp)
++	movl	%edx, (512 + 4 * 7)(%esp)
++	movl	%edx, (512 + 4 * 8)(%esp)
++	movl	%edx, (512 + 4 * 9)(%esp)
++	movl	%edx, (512 + 4 * 10)(%esp)
++	movl	%edx, (512 + 4 * 11)(%esp)
++	movl	%edx, (512 + 4 * 12)(%esp)
++	movl	%edx, (512 + 4 * 13)(%esp)
++	movl	%edx, (512 + 4 * 14)(%esp)
++	movl	%edx, (512 + 4 * 15)(%esp)
++# ifdef USE_XSAVE
++	xsave	(%esp)
++# else
++	xsavec	(%esp)
++# endif
++	/* Restore the argument for ___tls_get_addr in EAX.  */
++	movl	%ecx, %eax
++#endif
++	call	___tls_get_addr_internal
++	/* Get register content back.  */
++#ifdef USE_FNSAVE
++	frstor	(%esp)
++#elif defined USE_FXSAVE
++	fxrstor	(%esp)
++#else
++	/* Save and retore ___tls_get_addr return value stored in EAX.  */
++	movl	%eax, %ecx
++	movl	$TLSDESC_CALL_STATE_SAVE_MASK, %eax
++	xorl	%edx, %edx
++	xrstor	(%esp)
++	movl	%ecx, %eax
++#endif
++#if DL_RUNTIME_RESOLVE_REALIGN_STACK
++	mov	%ebx, %esp
++	cfi_def_cfa_register(%esp)
++	movl	28(%esp), %ebx
++	cfi_restore(%ebx)
++#else
++	addl	$REGISTER_SAVE_AREA, %esp
++	cfi_adjust_cfa_offset(-REGISTER_SAVE_AREA)
++#endif
++
++#undef STATE_SAVE_ALIGNMENT
+diff --git a/sysdeps/i386/tls_get_addr.S b/sysdeps/i386/tls_get_addr.S
+new file mode 100644
+index 0000000000..7d143d8a23
+--- /dev/null
++++ b/sysdeps/i386/tls_get_addr.S
+@@ -0,0 +1,57 @@
++/* Thread-local storage handling in the ELF dynamic linker.  i386 version.
++   Copyright (C) 2025 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <tls.h>
++#include <cpu-features-offsets.h>
++#include <features-offsets.h>
++
++	.text
++#ifdef SHARED
++# define USE_FNSAVE
++# define MINIMUM_ALIGNMENT	4
++# define STATE_SAVE_ALIGNMENT	4
++# define ___tls_get_addr	_____tls_get_addr_fnsave
++# include "tls_get_addr.h"
++# undef ___tls_get_addr
++# undef MINIMUM_ALIGNMENT
++# undef USE_FNSAVE
++
++# define MINIMUM_ALIGNMENT	16
++
++# define USE_FXSAVE
++# define STATE_SAVE_ALIGNMENT	16
++# define ___tls_get_addr	_____tls_get_addr_fxsave
++# include "tls_get_addr.h"
++# undef ___tls_get_addr
++# undef USE_FXSAVE
++
++# define USE_XSAVE
++# define STATE_SAVE_ALIGNMENT	64
++# define ___tls_get_addr	_____tls_get_addr_xsave
++# include "tls_get_addr.h"
++# undef ___tls_get_addr
++# undef USE_XSAVE
++
++# define USE_XSAVEC
++# define STATE_SAVE_ALIGNMENT	64
++# define ___tls_get_addr	_____tls_get_addr_xsavec
++# include "tls_get_addr.h"
++# undef ___tls_get_addr
++# undef USE_XSAVEC
++#endif /* SHARED */
+diff --git a/sysdeps/i386/tls_get_addr.h b/sysdeps/i386/tls_get_addr.h
+new file mode 100644
+index 0000000000..1825798724
+--- /dev/null
++++ b/sysdeps/i386/tls_get_addr.h
+@@ -0,0 +1,42 @@
++/* Thread-local storage handling in the ELF dynamic linker.  i386 version.
++   Copyright (C) 2025 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++	.hidden ___tls_get_addr
++	.global	___tls_get_addr
++	.type	___tls_get_addr,@function
++
++	/* This function is a wrapper of ___tls_get_addr_internal to
++	   preserve caller-saved vector registers.  */
++
++	cfi_startproc
++	.align 16
++___tls_get_addr:
++	/* Like all TLS resolvers, preserve call-clobbered registers.
++	   We need two scratch regs anyway.  */
++	subl	$32, %esp
++	cfi_adjust_cfa_offset (32)
++	movl	%ecx, 20(%esp)
++	movl	%edx, 24(%esp)
++#include "tls-get-addr-wrapper.h"
++	movl	20(%esp), %ecx
++	movl	24(%esp), %edx
++	addl	$32, %esp
++	cfi_adjust_cfa_offset (-32)
++	ret
++	cfi_endproc
++	.size	___tls_get_addr, .-___tls_get_addr
 diff --git a/sysdeps/ieee754/dbl-64/e_atanh.c b/sysdeps/ieee754/dbl-64/e_atanh.c
 index 1e09e46f0f..d1c71b2aa4 100644
 --- a/sysdeps/ieee754/dbl-64/e_atanh.c
@@ -7388,6 +11425,30 @@
  	    {
  	      if (sgn)
  		return -st[j].rh - st[j].rl;
+diff --git a/sysdeps/loongarch/preconfigure b/sysdeps/loongarch/preconfigure
+index 0d1e9ed8df..6726ab8302 100644
+--- a/sysdeps/loongarch/preconfigure
++++ b/sysdeps/loongarch/preconfigure
+@@ -44,6 +44,7 @@ loongarch*)
+ 
+     base_machine=loongarch
+     mtls_descriptor=desc
++    mtls_traditional=trad
+     ;;
+ esac
+ 
+diff --git a/sysdeps/loongarch/preconfigure.ac b/sysdeps/loongarch/preconfigure.ac
+index df07dbf41f..56402261df 100644
+--- a/sysdeps/loongarch/preconfigure.ac
++++ b/sysdeps/loongarch/preconfigure.ac
+@@ -42,6 +42,7 @@ loongarch*)
+ 
+     base_machine=loongarch
+     mtls_descriptor=desc
++    mtls_traditional=trad
+     ;;
+ esac
+ 
 diff --git a/sysdeps/mach/hurd/dl-execstack.c b/sysdeps/mach/hurd/dl-execstack.c
 index 0617d3a161..dc4719bd38 100644
 --- a/sysdeps/mach/hurd/dl-execstack.c
@@ -7445,6 +11506,22 @@
  
  
  /* Cleanup buffers */
+diff --git a/sysdeps/powerpc/Makefile b/sysdeps/powerpc/Makefile
+index 5e6cb07ce6..5cdb64f29b 100644
+--- a/sysdeps/powerpc/Makefile
++++ b/sysdeps/powerpc/Makefile
+@@ -28,6 +28,11 @@ tst-cache-ppc-static-dlopen-ENV = LD_LIBRARY_PATH=$(objpfx):$(common-objpfx):$(c
+ $(objpfx)tst-cache-ppc-static-dlopen.out: $(objpfx)mod-cache-ppc.so
+ 
+ $(objpfx)tst-cache-ppc: $(objpfx)mod-cache-ppc.so
++
++# The test checks if the __tls_get_addr does not clobber caller-saved
++# register, so disable the powerpc specific optimization to force a
++# __tls_get_addr call.
++LDFLAGS-tst-tls23-mod.so = -Wl,--no-tls-get-addr-optimize
+ endif
+ 
+ ifneq (no,$(multi-arch))
 diff --git a/sysdeps/powerpc/powerpc64/le/power10/memchr.S b/sysdeps/powerpc/powerpc64/le/power10/memchr.S
 deleted file mode 100644
 index 96ad5a2e1d..0000000000
@@ -7767,37 +11844,20 @@
 -weak_alias (__memchr, memchr)
 -libc_hidden_builtin_def (memchr)
 diff --git a/sysdeps/powerpc/powerpc64/le/power10/strcmp.S b/sysdeps/powerpc/powerpc64/le/power10/strcmp.S
-deleted file mode 100644
-index fffa1ee0a9..0000000000
+index fffa1ee0a9..0d4a53317c 100644
 --- a/sysdeps/powerpc/powerpc64/le/power10/strcmp.S
-+++ /dev/null
-@@ -1,233 +0,0 @@
--/* Optimized strcmp implementation for PowerPC64/POWER10.
++++ b/sysdeps/powerpc/powerpc64/le/power10/strcmp.S
+@@ -1,5 +1,5 @@
+ /* Optimized strcmp implementation for PowerPC64/POWER10.
 -   Copyright (C) 2021-2025 Free Software Foundation, Inc.
--   This file is part of the GNU C Library.
--
--   The GNU C Library is free software; you can redistribute it and/or
--   modify it under the terms of the GNU Lesser General Public
--   License as published by the Free Software Foundation; either
--   version 2.1 of the License, or (at your option) any later version.
--
--   The GNU C Library is distributed in the hope that it will be useful,
--   but WITHOUT ANY WARRANTY; without even the implied warranty of
--   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
--   Lesser General Public License for more details.
--
--   You should have received a copy of the GNU Lesser General Public
--   License along with the GNU C Library; if not, see
--   <https://www.gnu.org/licenses/>.  */
--#include <sysdep.h>
--
--#ifndef STRCMP
--# define STRCMP strcmp
--#endif
--
--/* Implements the function
--   int [r3] strcmp (const char *s1 [r3], const char *s2 [r4]).  */
--
++   Copyright (C) 2025 Free Software Foundation, Inc.
+    This file is part of the GNU C Library.
+ 
+    The GNU C Library is free software; you can redistribute it and/or
+@@ -24,16 +24,6 @@
+ /* Implements the function
+    int [r3] strcmp (const char *s1 [r3], const char *s2 [r4]).  */
+ 
 -/* TODO: Change this to actual instructions when minimum binutils is upgraded
 -   to 2.27.  Macros are defined below for these newer instructions in order
 -   to maintain compatibility.  */
@@ -7808,44 +11868,36 @@
 -	| ((1)<<(32-11))	     \
 -	| ((ra)<<(32-16))	     \
 -	| dq)
--
--#define COMPARE_16(vreg1,vreg2,offset)  \
--	lxv       vreg1+32,offset(r3);  \
--	lxv       vreg2+32,offset(r4);	\
--	vcmpnezb. v7,vreg1,vreg2;	\
--	bne       cr6,L(different);     \
--
--#define COMPARE_32(vreg1,vreg2,offset,label1,label2) \
+ 
+ #define COMPARE_16(vreg1,vreg2,offset)  \
+ 	lxv       vreg1+32,offset(r3);  \
+@@ -42,8 +32,8 @@
+ 	bne       cr6,L(different);     \
+ 
+ #define COMPARE_32(vreg1,vreg2,offset,label1,label2) \
 -	LXVP(vreg1+32,offset,r3);                    \
 -	LXVP(vreg2+32,offset,r4);                    \
--	vcmpnezb. v7,vreg1+1,vreg2+1;                \
--	bne	  cr6,L(label1);                     \
--	vcmpnezb. v7,vreg1,vreg2;                    \
--	bne	  cr6,L(label2);                     \
--
--#define TAIL(vreg1,vreg2)     \
--	vctzlsbb r6,v7;	      \
--	vextubrx r5,r6,vreg1; \
--	vextubrx r4,r6,vreg2; \
--	subf	 r3,r4,r5;    \
--	blr;                  \
--
--#define CHECK_N_BYTES(reg1,reg2,len_reg) \
--	sldi	  r0,len_reg,56;         \
--	lxvl	  32+v4,reg1,r0;         \
--	lxvl	  32+v5,reg2,r0;         \
--	add	  reg1,reg1,len_reg;     \
--	add	  reg2,reg2,len_reg;     \
++	lxvp	  vreg1+32,offset(r3);               \
++	lxvp	  vreg2+32,offset(r4);               \
+ 	vcmpnezb. v7,vreg1+1,vreg2+1;                \
+ 	bne	  cr6,L(label1);                     \
+ 	vcmpnezb. v7,vreg1,vreg2;                    \
+@@ -62,120 +52,77 @@
+ 	lxvl	  32+v5,reg2,r0;         \
+ 	add	  reg1,reg1,len_reg;     \
+ 	add	  reg2,reg2,len_reg;     \
 -	vcmpnezb  v7,v4,v5;              \
--	vctzlsbb  r6,v7;                 \
--	cmpld	  cr7,r6,len_reg;        \
--	blt	  cr7,L(different);      \
--
++	vcmpnezb. v7,v4,v5;              \
+ 	vctzlsbb  r6,v7;                 \
+ 	cmpld	  cr7,r6,len_reg;        \
+ 	blt	  cr7,L(different);      \
+ 
 -	/* TODO: change this to .machine power10 when the minimum required
 -	binutils allows it.  */
--
+ 
 -	.machine  power9
--ENTRY_TOCLESS (STRCMP, 4)
++	.machine  power10
+ ENTRY_TOCLESS (STRCMP, 4)
 -	andi.	r7,r3,4095
 -	andi.	r8,r4,4095
 -	cmpldi	cr0,r7,4096-16
@@ -7862,7 +11914,28 @@
 -	cmpld	cr7,r7,r5
 -	beq	cr7,L(same_aligned)
 -	blt	cr7,L(nalign1_min)
--
++	li	 r11,16
++	/* eq bit of cr1 used as swap status flag to indicate if
++	source pointers were swapped.  */
++	crclr	 4*cr1+eq
++	andi.	 r7,r3,15
++	sub	 r7,r11,r7	/* r7(nalign1) = 16 - (str1 & 15).  */
++	andi.	 r9,r4,15
++	sub	 r5,r11,r9	/* r5(nalign2) = 16 - (str2 & 15).  */
++	cmpld	 cr7,r7,r5
++	beq	 cr7,L(same_aligned)
++	blt	 cr7,L(nalign1_min)
++	/* Swap r3 and r4, and r7 and r5 such that r3 and r7 hold the
++	pointer which is closer to the next 16B boundary so that only
++	one CHECK_N_BYTES is needed before entering the loop below.  */
++	mr	 r8,r4
++	mr	 r4,r3
++	mr	 r3,r8
++	mr	 r12,r7
++	mr	 r7,r5
++	mr	 r5,r12
++	crset	 4*cr1+eq	/* Set bit on swapping source pointers.  */
+ 
 -	/* nalign2 is minimum and s2 pointer is aligned.  */
 -	CHECK_N_BYTES(r3,r4,r5)
 -	/* Are we on the 64B hunk which crosses a page?  */
@@ -7876,8 +11949,9 @@
 -	b	L(compare_64B_unaligned)
 -
 -	/* nalign1 is minimum and s1 pointer is aligned.  */
--L(nalign1_min):
--	CHECK_N_BYTES(r3,r4,r7)
++	.p2align 5
+ L(nalign1_min):
+ 	CHECK_N_BYTES(r3,r4,r7)
 -	/* Are we on the 64B hunk which crosses a page?  */
 -	andi.	r10,r4,63	/* Determine offset into 64B hunk.  */
 -	andi.	r8,r4,15	/* The offset into the 16B hunk.  */
@@ -7886,8 +11960,8 @@
 -	rlwinm. r7,r7,26,0x3F	/* ((r4-4096))>>6&63.  */
 -	beq	L(compare_64_pagecross)
 -	mtctr	r7
--
--	.p2align 5
+ 
+ 	.p2align 5
 -L(compare_64B_unaligned):
 -	COMPARE_16(v4,v5,0)
 -	COMPARE_16(v4,v5,16)
@@ -7896,13 +11970,30 @@
 -	addi	r3,r3,64
 -	addi	r4,r4,64
 -	bdnz	L(compare_64B_unaligned)
--
++L(s1_aligned):
++	/* r9 and r5 is number of bytes to be read after and before
++	 page boundary correspondingly.  */
++	sub 	r5,r5,r7
++	subfic	r9,r5,16
++	/* Now let r7 hold the count of quadwords which can be
++	checked without crossing a page boundary. quadword offset is
++	(str2>>4)&0xFF.  */
++	rlwinm	r7,r4,28,0xFF
++	/* Below check is required only for first iteration. For second
++	iteration and beyond, the new loop counter is always 255.  */
++	cmpldi	r7,255
++	beq	L(L3)
++	/* Get the initial loop count by 255-((str2>>4)&0xFF).  */
++	subfic  r11,r7,255
+ 
 -	/* Cross the page boundary of s2, carefully. Only for first
 -	iteration we have to get the count of 64B blocks to be checked.
 -	From second iteration and beyond, loop counter is always 63.  */
 -L(compare_64_pagecross):
 -	li	r11, 63
--	mtctr	r11
++	.p2align 5
++L(L1):
+ 	mtctr	r11
 -	cmpldi	r10,16
 -	ble	L(cross_4)
 -	cmpldi	r10,32
@@ -7937,8 +12028,12 @@
 -	CHECK_N_BYTES(r3,r4,r9)
 -	CHECK_N_BYTES(r3,r4,r8)
 -	COMPARE_16(v4,v5,0)
--	addi	r3,r3,16
--	addi	r4,r4,16
++
++	.p2align 5
++L(L2):
++	COMPARE_16(v4,v5,0)	/* Load 16B blocks using lxv.  */
+ 	addi	r3,r3,16
+ 	addi	r4,r4,16
 -	b	L(compare_64B_unaligned)
 -L(cross_4):
 -	COMPARE_16(v4,v5,0)
@@ -7946,103 +12041,92 @@
 -	COMPARE_16(v4,v5,32)
 -	addi	r3,r3,48
 -	addi	r4,r4,48
--	CHECK_N_BYTES(r3,r4,r9)
++	bdnz	L(L2)
++	/* Cross the page boundary of s2, carefully.  */
++
++	.p2align 5
++L(L3):
++	CHECK_N_BYTES(r3,r4,r5)
+ 	CHECK_N_BYTES(r3,r4,r9)
 -	CHECK_N_BYTES(r3,r4,r8)
 -	b	L(compare_64B_unaligned)
--
--L(same_aligned):
--	CHECK_N_BYTES(r3,r4,r7)
--        /* Align s1 to 32B and adjust s2 address.
--	   Use lxvp only if both s1 and s2 are 32B aligned.  */
--	COMPARE_16(v4,v5,0)
--	COMPARE_16(v4,v5,16)
--	COMPARE_16(v4,v5,32)
--	COMPARE_16(v4,v5,48)
--	addi	r3,r3,64
--	addi	r4,r4,64
--	COMPARE_16(v4,v5,0)
--	COMPARE_16(v4,v5,16)
--
--	clrldi	r6,r3,59
--	subfic	r5,r6,32
--	add	r3,r3,r5
--	add	r4,r4,r5
--	andi.	r5,r4,0x1F
--	beq	cr0,L(32B_aligned_loop)
--
--	.p2align 5
--L(16B_aligned_loop):
--	COMPARE_16(v4,v5,0)
--	COMPARE_16(v4,v5,16)
--	COMPARE_16(v4,v5,32)
--	COMPARE_16(v4,v5,48)
--	addi	r3,r3,64
--	addi	r4,r4,64
--	b	L(16B_aligned_loop)
--
--	/* Calculate and return the difference.  */
--L(different):
++	li 	r11,255		/* Load the new loop counter.  */
++	b	L(L1)
+ 
++	.p2align 5
+ L(same_aligned):
+ 	CHECK_N_BYTES(r3,r4,r7)
+         /* Align s1 to 32B and adjust s2 address.
+@@ -208,26 +155,31 @@ L(16B_aligned_loop):
+ 
+ 	/* Calculate and return the difference.  */
+ L(different):
 -	TAIL(v4,v5)
--
--	.p2align 5
--L(32B_aligned_loop):
--	COMPARE_32(v14,v16,0,tail1,tail2)
++	vctzlsbb r6,v7
++	vextubrx r5,r6,v4
++	vextubrx r4,r6,v5
++	bt  	 4*cr1+eq,L(swapped)
++	subf	 r3,r4,r5
++	blr
++
++	/* If src pointers were swapped, then swap the
++	indices and calculate the return value.  */
++L(swapped):
++	subf     r3,r5,r4
++	blr
+ 
+ 	.p2align 5
+ L(32B_aligned_loop):
+ 	COMPARE_32(v14,v16,0,tail1,tail2)
 -	COMPARE_32(v18,v20,32,tail3,tail4)
 -	COMPARE_32(v22,v24,64,tail5,tail6)
 -	COMPARE_32(v26,v28,96,tail7,tail8)
--	addi	r3,r3,128
--	addi	r4,r4,128
--	b	L(32B_aligned_loop)
--
--L(tail1): TAIL(v15,v17)
--L(tail2): TAIL(v14,v16)
++	COMPARE_32(v14,v16,32,tail1,tail2)
++	COMPARE_32(v14,v16,64,tail1,tail2)
++	COMPARE_32(v14,v16,96,tail1,tail2)
+ 	addi	r3,r3,128
+ 	addi	r4,r4,128
+ 	b	L(32B_aligned_loop)
+ 
+ L(tail1): TAIL(v15,v17)
+ L(tail2): TAIL(v14,v16)
 -L(tail3): TAIL(v19,v21)
 -L(tail4): TAIL(v18,v20)
 -L(tail5): TAIL(v23,v25)
 -L(tail6): TAIL(v22,v24)
 -L(tail7): TAIL(v27,v29)
 -L(tail8): TAIL(v26,v28)
--
--END (STRCMP)
--libc_hidden_builtin_def (strcmp)
+ 
+ END (STRCMP)
+ libc_hidden_builtin_def (strcmp)
+diff --git a/sysdeps/powerpc/powerpc64/le/power10/strlen.S b/sysdeps/powerpc/powerpc64/le/power10/strlen.S
+index 4985a9291b..a4c5498740 100644
+--- a/sysdeps/powerpc/powerpc64/le/power10/strlen.S
++++ b/sysdeps/powerpc/powerpc64/le/power10/strlen.S
+@@ -31,7 +31,7 @@
+ #  define FUNCNAME RAWMEMCHR
+ # endif
+ # define MCOUNT_NARGS 2
+-# define VREG_ZERO v20
++# define VREG_ZERO v17
+ # define OFF_START_LOOP 256
+ # define RAWMEMCHR_SUBTRACT_VECTORS \
+ 	vsububm   v4,v4,v18;	    \
 diff --git a/sysdeps/powerpc/powerpc64/le/power10/strncmp.S b/sysdeps/powerpc/powerpc64/le/power10/strncmp.S
-deleted file mode 100644
-index 10700dd400..0000000000
+index 10700dd400..6e09fcb7f2 100644
 --- a/sysdeps/powerpc/powerpc64/le/power10/strncmp.S
-+++ /dev/null
-@@ -1,271 +0,0 @@
--/* Optimized strncmp implementation for PowerPC64/POWER10.
++++ b/sysdeps/powerpc/powerpc64/le/power10/strncmp.S
+@@ -1,5 +1,5 @@
+ /* Optimized strncmp implementation for PowerPC64/POWER10.
 -   Copyright (C) 2024-2025 Free Software Foundation, Inc.
--   This file is part of the GNU C Library.
--
--   The GNU C Library is free software; you can redistribute it and/or
--   modify it under the terms of the GNU Lesser General Public
--   License as published by the Free Software Foundation; either
--   version 2.1 of the License, or (at your option) any later version.
--
--   The GNU C Library is distributed in the hope that it will be useful,
--   but WITHOUT ANY WARRANTY; without even the implied warranty of
--   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
--   Lesser General Public License for more details.
--
--   You should have received a copy of the GNU Lesser General Public
--   License along with the GNU C Library; if not, see
--   <https://www.gnu.org/licenses/>.  */
--
--#include <sysdep.h>
--
--/* Implements the function
--
--   int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t [r5] n)
--
--   The implementation uses unaligned doubleword access to avoid specialized
--   code paths depending of data alignment for first 32 bytes and uses
--   vectorised loops after that.  */
--
--#ifndef STRNCMP
--# define STRNCMP strncmp
--#endif
--
++   Copyright (C) 2025 Free Software Foundation, Inc.
+    This file is part of the GNU C Library.
+ 
+    The GNU C Library is free software; you can redistribute it and/or
+@@ -30,17 +30,6 @@
+ # define STRNCMP strncmp
+ #endif
+ 
 -/* TODO: Change this to actual instructions when minimum binutils is upgraded
 -   to 2.27.  Macros are defined below for these newer instructions in order
 -   to maintain compatibility.  */
@@ -8054,236 +12138,58 @@
 -	| ((ra)<<(32-16))            \
 -	| dq)
 -
--#define COMPARE_16(vreg1,vreg2,offset) \
--	lxv	  vreg1+32,offset(r3); \
--	lxv	  vreg2+32,offset(r4); \
--	vcmpnezb. v7,vreg1,vreg2;      \
--	bne	  cr6,L(different);    \
--	cmpldi	  cr7,r5,16;           \
--	ble	  cr7,L(ret0);         \
--	addi	  r5,r5,-16;
--
--#define COMPARE_32(vreg1,vreg2,offset,label1,label2) \
+ #define COMPARE_16(vreg1,vreg2,offset) \
+ 	lxv	  vreg1+32,offset(r3); \
+ 	lxv	  vreg2+32,offset(r4); \
+@@ -51,8 +40,8 @@
+ 	addi	  r5,r5,-16;
+ 
+ #define COMPARE_32(vreg1,vreg2,offset,label1,label2) \
 -	LXVP(vreg1+32,offset,r3);                    \
 -	LXVP(vreg2+32,offset,r4);                    \
--	vcmpnezb. v7,vreg1+1,vreg2+1;                \
--	bne	  cr6,L(label1);                     \
--	vcmpnezb. v7,vreg1,vreg2;                    \
--	bne	  cr6,L(label2);                     \
--	cmpldi	  cr7,r5,32;                         \
--	ble	  cr7,L(ret0);                       \
--	addi	  r5,r5,-32;
--
--#define TAIL_FIRST_16B(vreg1,vreg2) \
--	vctzlsbb r6,v7;             \
--	cmpld	 cr7,r5,r6;         \
--	ble	 cr7,L(ret0);       \
--	vextubrx r5,r6,vreg1;       \
--	vextubrx r4,r6,vreg2;       \
--	subf	 r3,r4,r5;          \
--	blr;
--
--#define TAIL_SECOND_16B(vreg1,vreg2) \
--	vctzlsbb r6,v7;              \
--	addi	 r0,r6,16;           \
--	cmpld	 cr7,r5,r0;          \
--	ble	 cr7,L(ret0);        \
--	vextubrx r5,r6,vreg1;        \
--	vextubrx r4,r6,vreg2;        \
--	subf	 r3,r4,r5;           \
--	blr;
--
--#define CHECK_N_BYTES(reg1,reg2,len_reg) \
--	sldi	  r6,len_reg,56;	 \
--	lxvl	  32+v4,reg1,r6;	 \
--	lxvl	  32+v5,reg2,r6;	 \
--	add	  reg1,reg1,len_reg;	 \
--	add	  reg2,reg2,len_reg;	 \
--	vcmpnezb  v7,v4,v5;		 \
--	vctzlsbb  r6,v7;		 \
--	cmpld	  cr7,r6,len_reg;	 \
--	blt	  cr7,L(different);	 \
--	cmpld	  cr7,r5,len_reg;	 \
--	ble	  cr7,L(ret0);		 \
--	sub	  r5,r5,len_reg;	 \
--
++	lxvp	  vreg1+32,offset(r3);               \
++	lxvp	  vreg2+32,offset(r4);               \
+ 	vcmpnezb. v7,vreg1+1,vreg2+1;                \
+ 	bne	  cr6,L(label1);                     \
+ 	vcmpnezb. v7,vreg1,vreg2;                    \
+@@ -94,9 +83,7 @@
+ 	ble	  cr7,L(ret0);		 \
+ 	sub	  r5,r5,len_reg;	 \
+ 
 -	/* TODO: change this to .machine power10 when the minimum required
 -	 binutils allows it.  */
 -	.machine  power9
--ENTRY_TOCLESS (STRNCMP, 4)
--	/* Check if size is 0.  */
--	cmpdi	 cr0,r5,0
--	beq	 cr0,L(ret0)
--	andi.   r7,r3,4095
--	andi.   r8,r4,4095
--	cmpldi  cr0,r7,4096-16
--	cmpldi  cr1,r8,4096-16
--	bgt     cr0,L(crosses)
--	bgt     cr1,L(crosses)
--	COMPARE_16(v4,v5,0)
--	addi	r3,r3,16
--	addi	r4,r4,16
--
--L(crosses):
--	andi.	 r7,r3,15
--	subfic	 r7,r7,16	/* r7(nalign1) = 16 - (str1 & 15).  */
--	andi.	 r9,r4,15
--	subfic	 r8,r9,16	/* r8(nalign2) = 16 - (str2 & 15).  */
--	cmpld	 cr7,r7,r8
--	beq	 cr7,L(same_aligned)
--	blt	 cr7,L(nalign1_min)
--
--	/* nalign2 is minimum and s2 pointer is aligned.  */
--	CHECK_N_BYTES(r3,r4,r8)
--	/* Are we on the 64B hunk which crosses a page?  */
--	andi.   r10,r3,63       /* Determine offset into 64B hunk.  */
--	andi.   r8,r3,15        /* The offset into the 16B hunk.  */
--	neg     r7,r3
--	andi.   r9,r7,15        /* Number of bytes after a 16B cross.  */
--	rlwinm. r7,r7,26,0x3F   /* ((r4-4096))>>6&63.  */
--	beq     L(compare_64_pagecross)
--	mtctr   r7
--	b       L(compare_64B_unaligned)
--
--	/* nalign1 is minimum and s1 pointer is aligned.  */
--L(nalign1_min):
--	CHECK_N_BYTES(r3,r4,r7)
--	/* Are we on the 64B hunk which crosses a page?  */
--	andi.   r10,r4,63       /* Determine offset into 64B hunk.  */
--	andi.   r8,r4,15        /* The offset into the 16B hunk.  */
--	neg     r7,r4
--	andi.   r9,r7,15        /* Number of bytes after a 16B cross.  */
--	rlwinm. r7,r7,26,0x3F   /* ((r4-4096))>>6&63.  */
--	beq     L(compare_64_pagecross)
--	mtctr   r7
--
--	.p2align 5
--L(compare_64B_unaligned):
--	COMPARE_16(v4,v5,0)
--	COMPARE_16(v4,v5,16)
--	COMPARE_16(v4,v5,32)
--	COMPARE_16(v4,v5,48)
--	addi    r3,r3,64
--	addi    r4,r4,64
--	bdnz    L(compare_64B_unaligned)
--
--	/* Cross the page boundary of s2, carefully. Only for first
--	iteration we have to get the count of 64B blocks to be checked.
--	From second iteration and beyond, loop counter is always 63.  */
--L(compare_64_pagecross):
--	li      r11, 63
--	mtctr   r11
--	cmpldi  r10,16
--	ble     L(cross_4)
--	cmpldi  r10,32
--	ble     L(cross_3)
--	cmpldi  r10,48
--	ble     L(cross_2)
--L(cross_1):
--	CHECK_N_BYTES(r3,r4,r9)
--	CHECK_N_BYTES(r3,r4,r8)
--	COMPARE_16(v4,v5,0)
--	COMPARE_16(v4,v5,16)
--	COMPARE_16(v4,v5,32)
--	addi    r3,r3,48
--	addi    r4,r4,48
--	b       L(compare_64B_unaligned)
--L(cross_2):
--	COMPARE_16(v4,v5,0)
--	addi    r3,r3,16
--	addi    r4,r4,16
--	CHECK_N_BYTES(r3,r4,r9)
--	CHECK_N_BYTES(r3,r4,r8)
--	COMPARE_16(v4,v5,0)
--	COMPARE_16(v4,v5,16)
--	addi    r3,r3,32
--	addi    r4,r4,32
--	b       L(compare_64B_unaligned)
--L(cross_3):
--	COMPARE_16(v4,v5,0)
--	COMPARE_16(v4,v5,16)
--	addi    r3,r3,32
--	addi    r4,r4,32
--	CHECK_N_BYTES(r3,r4,r9)
--	CHECK_N_BYTES(r3,r4,r8)
--	COMPARE_16(v4,v5,0)
--	addi    r3,r3,16
--	addi    r4,r4,16
--	b       L(compare_64B_unaligned)
--L(cross_4):
--	COMPARE_16(v4,v5,0)
--	COMPARE_16(v4,v5,16)
--	COMPARE_16(v4,v5,32)
--	addi    r3,r3,48
--	addi    r4,r4,48
--	CHECK_N_BYTES(r3,r4,r9)
--	CHECK_N_BYTES(r3,r4,r8)
--	b       L(compare_64B_unaligned)
--
--L(same_aligned):
--	CHECK_N_BYTES(r3,r4,r7)
--	/* Align s1 to 32B and adjust s2 address.
--	   Use lxvp only if both s1 and s2 are 32B aligned.  */
--	COMPARE_16(v4,v5,0)
--	COMPARE_16(v4,v5,16)
--	COMPARE_16(v4,v5,32)
--	COMPARE_16(v4,v5,48)
--	addi	r3,r3,64
--	addi	r4,r4,64
--	COMPARE_16(v4,v5,0)
--	COMPARE_16(v4,v5,16)
--	addi	r5,r5,32
--
--	clrldi  r6,r3,59
--	subfic	r7,r6,32
--	add	r3,r3,r7
--	add	r4,r4,r7
--	subf	r5,r7,r5
--	andi.	r7,r4,0x1F
--	beq	cr0,L(32B_aligned_loop)
--
--	.p2align 5
--L(16B_aligned_loop):
--	COMPARE_16(v4,v5,0)
--	COMPARE_16(v4,v5,16)
--	COMPARE_16(v4,v5,32)
--	COMPARE_16(v4,v5,48)
--	addi	r3,r3,64
--	addi	r4,r4,64
--	b	L(16B_aligned_loop)
--
--	/* Calculate and return the difference.  */
--L(different):
--	TAIL_FIRST_16B(v4,v5)
--
--	.p2align 5
--L(32B_aligned_loop):
--	COMPARE_32(v14,v16,0,tail1,tail2)
++	.machine  power10
+ ENTRY_TOCLESS (STRNCMP, 4)
+ 	/* Check if size is 0.  */
+ 	cmpdi	 cr0,r5,0
+@@ -246,21 +233,15 @@ L(different):
+ 	.p2align 5
+ L(32B_aligned_loop):
+ 	COMPARE_32(v14,v16,0,tail1,tail2)
 -	COMPARE_32(v18,v20,32,tail3,tail4)
 -	COMPARE_32(v22,v24,64,tail5,tail6)
 -	COMPARE_32(v26,v28,96,tail7,tail8)
--	addi	r3,r3,128
--	addi	r4,r4,128
--	b	L(32B_aligned_loop)
--
--L(tail1): TAIL_FIRST_16B(v15,v17)
--L(tail2): TAIL_SECOND_16B(v14,v16)
++	COMPARE_32(v14,v16,32,tail1,tail2)
++	COMPARE_32(v14,v16,64,tail1,tail2)
++	COMPARE_32(v14,v16,96,tail1,tail2)
+ 	addi	r3,r3,128
+ 	addi	r4,r4,128
+ 	b	L(32B_aligned_loop)
+ 
+ L(tail1): TAIL_FIRST_16B(v15,v17)
+ L(tail2): TAIL_SECOND_16B(v14,v16)
 -L(tail3): TAIL_FIRST_16B(v19,v21)
 -L(tail4): TAIL_SECOND_16B(v18,v20)
 -L(tail5): TAIL_FIRST_16B(v23,v25)
 -L(tail6): TAIL_SECOND_16B(v22,v24)
 -L(tail7): TAIL_FIRST_16B(v27,v29)
 -L(tail8): TAIL_SECOND_16B(v26,v28)
--
--	.p2align 5
--L(ret0):
--	li	r3,0
--	blr
--
--END(STRNCMP)
--libc_hidden_builtin_def(strncmp)
+ 
+ 	.p2align 5
+ L(ret0):
 diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile
-index dc7c5b14ee..142e6c24c7 100644
+index dc7c5b14ee..c40feea250 100644
 --- a/sysdeps/powerpc/powerpc64/multiarch/Makefile
 +++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile
 @@ -31,12 +31,11 @@ sysdep_routines += memcpy-power8-cached memcpy-power7 memcpy-a2 memcpy-power6 \
@@ -8298,27 +12204,17 @@
 -		   strlen-power10
 +sysdep_routines += memcmp-power10 memcpy-power10 memmove-power10 memset-power10 \
 +		   rawmemchr-power9 rawmemchr-power10 \
-+		   strcmp-power9 strncmp-power9 \
++		   strcmp-power9 strcmp-power10 strncmp-power9 strncmp-power10 \
 +		   strcpy-power9 strcat-power10 stpcpy-power9 \
 +		   strlen-power9 strncpy-power9 stpncpy-power9 strlen-power10
  endif
  CFLAGS-strncase-power7.c += -mcpu=power7 -funroll-loops
  CFLAGS-strncase_l-power7.c += -mcpu=power7 -funroll-loops
 diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
-index 0a31a5853c..de288a0d80 100644
+index 0a31a5853c..65debb5ee5 100644
 --- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
 +++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
-@@ -164,9 +164,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
-   /* Support sysdeps/powerpc/powerpc64/multiarch/strncmp.c.  */
-   IFUNC_IMPL (i, name, strncmp,
- #ifdef __LITTLE_ENDIAN__
--	      IFUNC_IMPL_ADD (array, i, strncmp, hwcap2 & PPC_FEATURE2_ARCH_3_1
--			      && hwcap & PPC_FEATURE_HAS_VSX,
--			      __strncmp_power10)
- 	      IFUNC_IMPL_ADD (array, i, strncmp, hwcap2 & PPC_FEATURE2_ARCH_3_00
- 			      && hwcap & PPC_FEATURE_HAS_ALTIVEC,
- 			      __strncmp_power9)
-@@ -229,12 +226,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+@@ -229,12 +229,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
  
    /* Support sysdeps/powerpc/powerpc64/multiarch/memchr.c.  */
    IFUNC_IMPL (i, name, memchr,
@@ -8331,17 +12227,6 @@
  	      IFUNC_IMPL_ADD (array, i, memchr,
  			      hwcap2 & PPC_FEATURE2_ARCH_2_07
  			      && hwcap & PPC_FEATURE_HAS_ALTIVEC,
-@@ -386,10 +377,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
-   /* Support sysdeps/powerpc/powerpc64/multiarch/strcmp.c.  */
-   IFUNC_IMPL (i, name, strcmp,
- #ifdef __LITTLE_ENDIAN__
--	      IFUNC_IMPL_ADD (array, i, strcmp,
--			      (hwcap2 & PPC_FEATURE2_ARCH_3_1)
--			      && (hwcap & PPC_FEATURE_HAS_VSX),
--			      __strcmp_power10)
- 	      IFUNC_IMPL_ADD (array, i, strcmp,
- 			      hwcap2 & PPC_FEATURE2_ARCH_3_00
- 			      && hwcap & PPC_FEATURE_HAS_ALTIVEC,
 diff --git a/sysdeps/powerpc/powerpc64/multiarch/memchr-power10.S b/sysdeps/powerpc/powerpc64/multiarch/memchr-power10.S
 deleted file mode 100644
 index c9d2f4efd1..0000000000
@@ -8411,111 +12296,26 @@
  weak_alias (__memchr, memchr)
  libc_hidden_builtin_def (memchr)
 diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcmp-power10.S b/sysdeps/powerpc/powerpc64/multiarch/strcmp-power10.S
-deleted file mode 100644
-index 7b45fcd63a..0000000000
+index 7b45fcd63a..a4ee7fb53c 100644
 --- a/sysdeps/powerpc/powerpc64/multiarch/strcmp-power10.S
-+++ /dev/null
-@@ -1,26 +0,0 @@
--/* Optimized strcmp implementation for POWER10/PPC64.
++++ b/sysdeps/powerpc/powerpc64/multiarch/strcmp-power10.S
+@@ -1,5 +1,5 @@
+ /* Optimized strcmp implementation for POWER10/PPC64.
 -   Copyright (C) 2021-2025 Free Software Foundation, Inc.
--   This file is part of the GNU C Library.
--
--   The GNU C Library is free software; you can redistribute it and/or
--   modify it under the terms of the GNU Lesser General Public
--   License as published by the Free Software Foundation; either
--   version 2.1 of the License, or (at your option) any later version.
--
--   The GNU C Library is distributed in the hope that it will be useful,
--   but WITHOUT ANY WARRANTY; without even the implied warranty of
--   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
--   Lesser General Public License for more details.
--
--   You should have received a copy of the GNU Lesser General Public
--   License along with the GNU C Library; if not, see
--   <https://www.gnu.org/licenses/>.  */
--
--#if defined __LITTLE_ENDIAN__ && IS_IN (libc)
--#define STRCMP __strcmp_power10
--
--#undef libc_hidden_builtin_def
--#define libc_hidden_builtin_def(name)
--
--#include <sysdeps/powerpc/powerpc64/le/power10/strcmp.S>
--#endif /* __LITTLE_ENDIAN__ && IS_IN (libc) */
-diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcmp.c b/sysdeps/powerpc/powerpc64/multiarch/strcmp.c
-index 3c636e3bbc..7c77c084a7 100644
---- a/sysdeps/powerpc/powerpc64/multiarch/strcmp.c
-+++ b/sysdeps/powerpc/powerpc64/multiarch/strcmp.c
-@@ -29,16 +29,12 @@ extern __typeof (strcmp) __strcmp_power7 attribute_hidden;
- extern __typeof (strcmp) __strcmp_power8 attribute_hidden;
- # ifdef __LITTLE_ENDIAN__
- extern __typeof (strcmp) __strcmp_power9 attribute_hidden;
--extern __typeof (strcmp) __strcmp_power10 attribute_hidden;
- # endif
- 
- # undef strcmp
++   Copyright (C) 2025 Free Software Foundation, Inc.
+    This file is part of the GNU C Library.
  
- libc_ifunc_redirected (__redirect_strcmp, strcmp,
- # ifdef __LITTLE_ENDIAN__
--		        (hwcap2 & PPC_FEATURE2_ARCH_3_1
--			 && hwcap & PPC_FEATURE_HAS_VSX)
--			? __strcmp_power10 :
- 			(hwcap2 & PPC_FEATURE2_ARCH_3_00
- 			 && hwcap & PPC_FEATURE_HAS_ALTIVEC)
- 			? __strcmp_power9 :
+    The GNU C Library is free software; you can redistribute it and/or
 diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp-power10.S b/sysdeps/powerpc/powerpc64/multiarch/strncmp-power10.S
-deleted file mode 100644
-index 43879085e2..0000000000
+index 43879085e2..bb25bc75b8 100644
 --- a/sysdeps/powerpc/powerpc64/multiarch/strncmp-power10.S
-+++ /dev/null
-@@ -1,25 +0,0 @@
++++ b/sysdeps/powerpc/powerpc64/multiarch/strncmp-power10.S
+@@ -1,4 +1,4 @@
 -/* Copyright (C) 2024-2025 Free Software Foundation, Inc.
--   This file is part of the GNU C Library.
--
--   The GNU C Library is free software; you can redistribute it and/or
--   modify it under the terms of the GNU Lesser General Public
--   License as published by the Free Software Foundation; either
--   version 2.1 of the License, or (at your option) any later version.
--
--   The GNU C Library is distributed in the hope that it will be useful,
--   but WITHOUT ANY WARRANTY; without even the implied warranty of
--   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
--   Lesser General Public License for more details.
--
--   You should have received a copy of the GNU Lesser General Public
--   License along with the GNU C Library; if not, see
--   <https://www.gnu.org/licenses/>.  */
--
--#if defined __LITTLE_ENDIAN__ && IS_IN (libc)
--#define STRNCMP __strncmp_power10
--
--#undef libc_hidden_builtin_def
--#define libc_hidden_builtin_def(name)
--
--#include <sysdeps/powerpc/powerpc64/le/power10/strncmp.S>
--#endif
-diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c
-index 0a664a620d..4cfe27fa45 100644
---- a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c
-+++ b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c
-@@ -29,7 +29,6 @@ extern __typeof (strncmp) __strncmp_ppc attribute_hidden;
- extern __typeof (strncmp) __strncmp_power8 attribute_hidden;
- # ifdef __LITTLE_ENDIAN__
- extern __typeof (strncmp) __strncmp_power9 attribute_hidden;
--extern __typeof (strncmp) __strncmp_power10 attribute_hidden;
- # endif
- # undef strncmp
++/* Copyright (C) 2025 Free Software Foundation, Inc.
+    This file is part of the GNU C Library.
  
-@@ -37,9 +36,6 @@ extern __typeof (strncmp) __strncmp_power10 attribute_hidden;
-    ifunc symbol properly.  */
- libc_ifunc_redirected (__redirect_strncmp, strncmp,
- # ifdef __LITTLE_ENDIAN__
--			(hwcap2 & PPC_FEATURE2_ARCH_3_1
--			 && hwcap & PPC_FEATURE_HAS_VSX)
--			? __strncmp_power10 :
- 			(hwcap2 & PPC_FEATURE2_ARCH_3_00
- 			 && hwcap & PPC_FEATURE_HAS_ALTIVEC)
- 			? __strncmp_power9 :
+    The GNU C Library is free software; you can redistribute it and/or
 diff --git a/sysdeps/pthread/Makefile b/sysdeps/pthread/Makefile
 index a123e28a57..7fcbc72bc0 100644
 --- a/sysdeps/pthread/Makefile
@@ -8964,6 +12764,34 @@
  
  ifeq ($(subdir),stdlib)
  gen-as-const-headers += ucontext_i.sym
+diff --git a/sysdeps/unix/sysv/linux/aarch64/clone.S b/sysdeps/unix/sysv/linux/aarch64/clone.S
+index 97e1afa57f..bab0ce7719 100644
+--- a/sysdeps/unix/sysv/linux/aarch64/clone.S
++++ b/sysdeps/unix/sysv/linux/aarch64/clone.S
+@@ -51,6 +51,9 @@ ENTRY(__clone)
+ 	and	x1, x1, -16
+ 	cbz	x1, .Lsyscall_error
+ 
++	/* Clear ZA state of SME.  */
++	CALL_LIBC_ARM_ZA_DISABLE
++
+ 	/* Do the system call.  */
+ 	/* X0:flags, x1:newsp, x2:parenttidptr, x3:newtls, x4:childtid.  */
+ 	mov	x0, x2                  /* flags  */
+diff --git a/sysdeps/unix/sysv/linux/aarch64/clone3.S b/sysdeps/unix/sysv/linux/aarch64/clone3.S
+index 443e117bf9..f1f22c9865 100644
+--- a/sysdeps/unix/sysv/linux/aarch64/clone3.S
++++ b/sysdeps/unix/sysv/linux/aarch64/clone3.S
+@@ -50,6 +50,9 @@ ENTRY(__clone3)
+ 	cbz	x10, .Lsyscall_error	/* No NULL cl_args pointer.  */
+ 	cbz	x2, .Lsyscall_error	/* No NULL function pointer.  */
+ 
++    /* Clear ZA state of SME.  */
++	CALL_LIBC_ARM_ZA_DISABLE
++
+ 	/* Do the system call, the kernel expects:
+ 	   x8: system call number
+ 	   x0: cl_args
 diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
 index 6d63c8a9ec..1acc82d077 100644
 --- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
@@ -8976,6 +12804,95 @@
  
  #define DCZID_DZP_MASK (1 << 4)
  #define DCZID_BS_MASK (0xf)
+diff --git a/sysdeps/unix/sysv/linux/aarch64/setcontext.S b/sysdeps/unix/sysv/linux/aarch64/setcontext.S
+index 695fc5b9b5..dedf3798a4 100644
+--- a/sysdeps/unix/sysv/linux/aarch64/setcontext.S
++++ b/sysdeps/unix/sysv/linux/aarch64/setcontext.S
+@@ -49,25 +49,8 @@ ENTRY (__setcontext)
+ 	cbz	x0, 1f
+ 	b	C_SYMBOL_NAME (__syscall_error)
+ 1:
+-	/* Disable ZA of SME.  */
+-#if HAVE_AARCH64_PAC_RET
+-	PACIASP
+-	cfi_window_save
+-#endif
+-	stp	x29, x30, [sp, -16]!
+-	cfi_adjust_cfa_offset (16)
+-	cfi_rel_offset (x29, 0)
+-	cfi_rel_offset (x30, 8)
+-	mov	x29, sp
+-	bl	__libc_arm_za_disable
+-	ldp	x29, x30, [sp], 16
+-	cfi_adjust_cfa_offset (-16)
+-	cfi_restore (x29)
+-	cfi_restore (x30)
+-#if HAVE_AARCH64_PAC_RET
+-	AUTIASP
+-	cfi_window_save
+-#endif
++	/* Clear ZA state of SME.  */
++	CALL_LIBC_ARM_ZA_DISABLE
+ 	/* Restore the general purpose registers.  */
+ 	mov	x0, x9
+ 	cfi_def_cfa (x0, 0)
+diff --git a/sysdeps/unix/sysv/linux/aarch64/sysdep.h b/sysdeps/unix/sysv/linux/aarch64/sysdep.h
+index b813805931..048fe11ad7 100644
+--- a/sysdeps/unix/sysv/linux/aarch64/sysdep.h
++++ b/sysdeps/unix/sysv/linux/aarch64/sysdep.h
+@@ -150,6 +150,19 @@
+     mov x8, SYS_ify (syscall_name);		\
+     svc 0
+ 
++/* Clear ZA state of SME (ASM version).  */
++/* The __libc_arm_za_disable function has special calling convention
++   that allows to call it without stack manipulation and preserving
++   most of the registers.  */
++	.macro CALL_LIBC_ARM_ZA_DISABLE
++	cfi_remember_state
++	mov		x13, x30
++	cfi_register(x30, x13)
++	bl		__libc_arm_za_disable
++	mov		x30, x13
++	cfi_restore_state
++	.endm
++
+ #else /* not __ASSEMBLER__ */
+ 
+ # ifdef __LP64__
+@@ -235,6 +248,32 @@
+ #undef HAVE_INTERNAL_BRK_ADDR_SYMBOL
+ #define HAVE_INTERNAL_BRK_ADDR_SYMBOL 1
+ 
++/* Clear ZA state of SME (C version).  */
++/* The __libc_arm_za_disable function has special calling convention
++   that allows to call it without stack manipulation and preserving
++   most of the registers.  */
++#define CALL_LIBC_ARM_ZA_DISABLE()			\
++({							\
++  unsigned long int __tmp;				\
++  asm volatile (					\
++  "	.cfi_remember_state\n"			\
++  "	mov		%0, x30\n"			\
++  "	.cfi_register x30, %0\n"      \
++  "	bl		__libc_arm_za_disable\n"	\
++  "	mov		x30, %0\n"			\
++  "	.cfi_restore_state\n"			\
++  : "=r" (__tmp)					\
++  :							\
++  : "x14", "x15", "x16", "x17", "x18", "memory" );	\
++})
++
++/* Do clear ZA state of SME before making normal clone syscall.  */
++#define INLINE_CLONE_SYSCALL(a0, a1, a2, a3, a4)	\
++({							\
++  CALL_LIBC_ARM_ZA_DISABLE ();				\
++  INLINE_SYSCALL_CALL (clone, a0, a1, a2, a3, a4);	\
++})
++
+ #endif	/* __ASSEMBLER__ */
+ 
+ #endif /* linux/aarch64/sysdep.h */
 diff --git a/sysdeps/unix/sysv/linux/aarch64/tst-aarch64-pkey.c b/sysdeps/unix/sysv/linux/aarch64/tst-aarch64-pkey.c
 index 3ff33ef72a..c884efc3b4 100644
 --- a/sysdeps/unix/sysv/linux/aarch64/tst-aarch64-pkey.c
@@ -9614,6 +13531,20 @@
 +}
 +
 +#include <support/test-driver.c>
+diff --git a/sysdeps/unix/sysv/linux/aarch64/vfork.S b/sysdeps/unix/sysv/linux/aarch64/vfork.S
+index d5943a7485..2600bc9be3 100644
+--- a/sysdeps/unix/sysv/linux/aarch64/vfork.S
++++ b/sysdeps/unix/sysv/linux/aarch64/vfork.S
+@@ -27,6 +27,9 @@
+ 
+ ENTRY (__vfork)
+ 
++	/* Clear ZA state of SME.  */
++	CALL_LIBC_ARM_ZA_DISABLE
++
+ 	mov	x0, #0x4111	/* CLONE_VM | CLONE_VFORK | SIGCHLD */
+ 	mov	x1, sp
+ 	DO_CALL (clone, 2)
 diff --git a/sysdeps/unix/sysv/linux/bits/sched.h b/sysdeps/unix/sysv/linux/bits/sched.h
 index 3656e98eda..39b0b3d19c 100644
 --- a/sysdeps/unix/sysv/linux/bits/sched.h
@@ -9678,10 +13609,25 @@
        int ret = INTERNAL_SYSCALL_CALL (rseq, RSEQ_SELF (), size, 0, RSEQ_SIG);
        if (!INTERNAL_SYSCALL_ERROR_P (ret))
 diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile
-index 5311b594af..01b0192ddf 100644
+index 5311b594af..c814060e08 100644
 --- a/sysdeps/x86/Makefile
 +++ b/sysdeps/x86/Makefile
-@@ -21,6 +21,9 @@ tests += \
+@@ -4,7 +4,13 @@ endif
+ 
+ ifeq ($(subdir),elf)
+ sysdep_routines += get-cpuid-feature-leaf
+-sysdep-dl-routines += dl-get-cpu-features
++sysdep-dl-routines += \
++  dl-get-cpu-features \
++  dl-tlsdesc \
++  tls_get_addr \
++  tlsdesc \
++# sysdep-dl-routines
++
+ sysdep_headers += \
+   bits/platform/features.h \
+   bits/platform/x86.h \
+@@ -21,6 +27,9 @@ tests += \
    tst-cpu-features-supports-static \
    tst-get-cpu-features \
    tst-get-cpu-features-static \
@@ -9691,7 +13637,7 @@
    tst-hwcap-tunables \
  # tests
  tests-static += \
-@@ -91,6 +94,25 @@ CFLAGS-tst-gnu2-tls2.c += -msse
+@@ -91,6 +100,42 @@ CFLAGS-tst-gnu2-tls2.c += -msse
  CFLAGS-tst-gnu2-tls2mod0.c += -msse2 -mtune=haswell
  CFLAGS-tst-gnu2-tls2mod1.c += -msse2 -mtune=haswell
  CFLAGS-tst-gnu2-tls2mod2.c += -msse2 -mtune=haswell
@@ -9714,9 +13660,40 @@
 +  $(objpfx)tst-gnu2-tls2mod0.so \
 +  $(objpfx)tst-gnu2-tls2mod1.so \
 +  $(objpfx)tst-gnu2-tls2mod2.so
++
++CFLAGS-tst-tls23.c += -msse2
++CFLAGS-tst-tls23-mod.c += -msse2 -mtune=haswell
++
++LDFLAGS-tst-tls23 += -rdynamic
++tst-tls23-mod.so-no-z-defs = yes
++
++$(objpfx)tst-tls23-mod.so: $(libsupport)
++
++tests-special += $(objpfx)check-gnu2-tls.out
++
++$(objpfx)check-gnu2-tls.out: $(common-objpfx)libc.so
++	LC_ALL=C $(READELF) -V -W $< \
++		| sed -ne '/.gnu.version_d/, /.gnu.version_r/ p' \
++		| grep GLIBC_ABI_GNU2_TLS > $@; \
++	$(evaluate-test)
++generated += check-gnu2-tls.out
  endif
  
  ifeq ($(subdir),math)
+diff --git a/sysdeps/x86/Versions b/sysdeps/x86/Versions
+index 4b10c4b5d7..e8dcfccbe4 100644
+--- a/sysdeps/x86/Versions
++++ b/sysdeps/x86/Versions
+@@ -7,4 +7,9 @@ libc {
+   GLIBC_2.33 {
+     __x86_get_cpuid_feature_leaf;
+   }
++  GLIBC_ABI_GNU2_TLS {
++    # This symbol is used only for empty version map and will be removed
++    # by scripts/versions.awk.
++    __placeholder_only_for_empty_version_map;
++  }
+ }
 diff --git a/sysdeps/x86/bits/floatn.h b/sysdeps/x86/bits/floatn.h
 index d197cb10dd..4674165bd7 100644
 --- a/sysdeps/x86/bits/floatn.h
@@ -9749,7 +13726,7 @@
  #  endif
  
 diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
-index 27abaca8b7..e50f1d6932 100644
+index 27abaca8b7..6b1651a6f0 100644
 --- a/sysdeps/x86/cpu-features.c
 +++ b/sysdeps/x86/cpu-features.c
 @@ -24,6 +24,7 @@
@@ -9910,17 +13887,19 @@
    INTEL_ATOM_GRANDRIDGE,
    INTEL_ATOM_TREMONT,
  
-@@ -575,7 +540,9 @@ enum
+@@ -575,7 +540,11 @@ enum
    INTEL_BIGCORE_METEORLAKE,
    INTEL_BIGCORE_LUNARLAKE,
    INTEL_BIGCORE_ARROWLAKE,
 +  INTEL_BIGCORE_PANTHERLAKE,
    INTEL_BIGCORE_GRANITERAPIDS,
 +  INTEL_BIGCORE_DIAMONDRAPIDS,
++  INTEL_BIGCORE_WILDCATLAKE,
++  INTEL_BIGCORE_NOVALAKE,
  
    /* Mixed (bigcore + atom SOC).  */
    INTEL_MIXED_LAKEFIELD,
-@@ -589,7 +556,7 @@ enum
+@@ -589,7 +558,7 @@ enum
    INTEL_UNKNOWN,
  };
  
@@ -9929,7 +13908,7 @@
  intel_get_fam6_microarch (unsigned int model,
  			  __attribute__ ((unused)) unsigned int stepping)
  {
-@@ -620,6 +587,8 @@ intel_get_fam6_microarch (unsigned int model,
+@@ -620,6 +589,8 @@ intel_get_fam6_microarch (unsigned int model,
        return INTEL_ATOM_GOLDMONT_PLUS;
      case 0xAF:
        return INTEL_ATOM_SIERRAFOREST;
@@ -9938,7 +13917,7 @@
      case 0xB6:
        return INTEL_ATOM_GRANDRIDGE;
      case 0x86:
-@@ -727,8 +696,12 @@ intel_get_fam6_microarch (unsigned int model,
+@@ -727,8 +698,14 @@ intel_get_fam6_microarch (unsigned int model,
        return INTEL_BIGCORE_METEORLAKE;
      case 0xbd:
        return INTEL_BIGCORE_LUNARLAKE;
@@ -9948,10 +13927,12 @@
        return INTEL_BIGCORE_ARROWLAKE;
 +    case 0xCC:
 +      return INTEL_BIGCORE_PANTHERLAKE;
++    case 0xD5:
++      return INTEL_BIGCORE_WILDCATLAKE;
      case 0xAD:
      case 0xAE:
        return INTEL_BIGCORE_GRANITERAPIDS;
-@@ -792,133 +765,20 @@ init_cpu_features (struct cpu_features *cpu_features)
+@@ -792,133 +769,20 @@ init_cpu_features (struct cpu_features *cpu_features)
        cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset]
  	  &= ~bit_arch_Avoid_Non_Temporal_Memset;
  
@@ -10090,7 +14071,7 @@
  	    case INTEL_BIGCORE_SKYLAKE_AVX512:
  	      /* 0x55 (Skylake-avx512) && stepping <= 5 disable TSX. */
  	      if (stepping <= 5)
-@@ -927,38 +787,163 @@ init_cpu_features (struct cpu_features *cpu_features)
+@@ -927,38 +791,176 @@ init_cpu_features (struct cpu_features *cpu_features)
  
  	    case INTEL_BIGCORE_KABYLAKE:
  	      /* NB: Although the errata documents that for model == 0x8e
@@ -10146,17 +14127,28 @@
 +	      break;
  	    }
  	}
++      else if (family == 18)
++	switch (model)
++	  {
++	  case 0x01:
++	  case 0x03:
++	    microarch = INTEL_BIGCORE_NOVALAKE;
++	    break;
++
++	  default:
++	    break;
++	  }
 +      else if (family == 19)
 +	switch (model)
 +	  {
 +	  case 0x01:
 +	    microarch = INTEL_BIGCORE_DIAMONDRAPIDS;
 +	    break;
- 
++
 +	  default:
 +	    break;
 +	  }
-+
+ 
 +      switch (microarch)
 +	{
 +	  /* Atom / KNL tuning.  */
@@ -10262,6 +14254,8 @@
 +	case INTEL_BIGCORE_LUNARLAKE:
 +	case INTEL_BIGCORE_ARROWLAKE:
 +	case INTEL_BIGCORE_PANTHERLAKE:
++	case INTEL_BIGCORE_WILDCATLAKE:
++	case INTEL_BIGCORE_NOVALAKE:
 +	case INTEL_BIGCORE_SAPPHIRERAPIDS:
 +	case INTEL_BIGCORE_EMERALDRAPIDS:
 +	case INTEL_BIGCORE_GRANITERAPIDS:
@@ -10275,7 +14269,7 @@
  
        /* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER
           if AVX512ER is available.  Don't use AVX512 to avoid lower CPU
-@@ -1159,6 +1144,9 @@ no_cpuid:
+@@ -1159,6 +1161,9 @@ no_cpuid:
  	       TUNABLE_CALLBACK (set_prefer_map_32bit_exec));
  #endif
  
@@ -10285,7 +14279,7 @@
    bool disable_xsave_features = false;
  
    if (!CPU_FEATURE_USABLE_P (cpu_features, OSXSAVE))
-@@ -1212,6 +1200,7 @@ no_cpuid:
+@@ -1212,6 +1217,7 @@ no_cpuid:
  
        CPU_FEATURE_UNSET (cpu_features, FMA4);
      }
@@ -10347,7 +14341,7 @@
  /* Unused for x86.  */
  # define INIT_ARCH()
 diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
-index 541393f1dc..c3c73e75dd 100644
+index 541393f1dc..b8e963b654 100644
 --- a/sysdeps/x86/sysdep.h
 +++ b/sysdeps/x86/sysdep.h
 @@ -102,6 +102,9 @@
@@ -10370,6 +14364,36 @@
  /* States to be included in xsave_state_size.  */
  # define FULL_STATE_SAVE_MASK		STATE_SAVE_MASK
  #endif
+@@ -177,6 +183,29 @@
+ 
+ #define atom_text_section .section ".text.atom", "ax"
+ 
++#ifndef DL_STACK_ALIGNMENT
++/* Due to GCC bug:
++
++   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066
++
++   __tls_get_addr may be called with 8-byte/4-byte stack alignment.
++   Although this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't
++   assume that stack will be always aligned at 16 bytes.  */
++# ifdef __x86_64__
++#  define DL_STACK_ALIGNMENT 8
++#  define MINIMUM_ALIGNMENT 16
++# else
++#  define DL_STACK_ALIGNMENT 4
++# endif
++#endif
++
++/* True if _dl_runtime_resolve/_dl_tlsdesc_dynamic should align stack for
++   STATE_SAVE or align stack to MINIMUM_ALIGNMENT bytes before calling
++   _dl_fixup/__tls_get_addr.  */
++#define DL_RUNTIME_RESOLVE_REALIGN_STACK \
++  (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \
++   || MINIMUM_ALIGNMENT > DL_STACK_ALIGNMENT)
++
+ #endif	/* __ASSEMBLER__ */
+ 
+ #endif	/* _X86_SYSDEP_H */
 diff --git a/sysdeps/x86/tst-gnu2-tls2-x86-noxsave.c b/sysdeps/x86/tst-gnu2-tls2-x86-noxsave.c
 new file mode 100644
 index 0000000000..f0024c143d
@@ -10391,11 +14415,90 @@
 +++ b/sysdeps/x86/tst-gnu2-tls2-x86-noxsavexsavec.c
 @@ -0,0 +1 @@
 +#include <elf/tst-gnu2-tls2.c>
+diff --git a/sysdeps/x86/tst-tls23.c b/sysdeps/x86/tst-tls23.c
+new file mode 100644
+index 0000000000..6130d91cf8
+--- /dev/null
++++ b/sysdeps/x86/tst-tls23.c
+@@ -0,0 +1,22 @@
++#ifndef __x86_64__
++#include <sys/platform/x86.h>
++
++#define IS_SUPPORTED() CPU_FEATURE_ACTIVE (SSE2)
++#endif
++
++/* Set XMM0...XMM7 to all 1s.  */
++#define PREPARE_MALLOC()					\
++{								\
++  asm volatile ("pcmpeqd %%xmm0, %%xmm0" : : : "xmm0" );	\
++  asm volatile ("pcmpeqd %%xmm1, %%xmm1" : : : "xmm1" );	\
++  asm volatile ("pcmpeqd %%xmm2, %%xmm2" : : : "xmm2" );	\
++  asm volatile ("pcmpeqd %%xmm3, %%xmm3" : : : "xmm3" );	\
++  asm volatile ("pcmpeqd %%xmm4, %%xmm4" : : : "xmm4" );	\
++  asm volatile ("pcmpeqd %%xmm5, %%xmm5" : : : "xmm5" );	\
++  asm volatile ("pcmpeqd %%xmm6, %%xmm6" : : : "xmm6" );	\
++  asm volatile ("pcmpeqd %%xmm7, %%xmm7" : : : "xmm7" );	\
++}
++
++#include <elf/tst-tls23.c>
++
++v2di v1, v2, v3;
+diff --git a/sysdeps/x86/tst-tls23.h b/sysdeps/x86/tst-tls23.h
+new file mode 100644
+index 0000000000..21cee4ca07
+--- /dev/null
++++ b/sysdeps/x86/tst-tls23.h
+@@ -0,0 +1,35 @@
++/* Test that __tls_get_addr preserves XMM registers.
++   Copyright (C) 2025 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <support/check.h>
++
++typedef long long v2di __attribute__((vector_size(16)));
++extern v2di v1, v2, v3;
++
++#define BEFORE_TLS_CALL()					\
++  v1 = __extension__(v2di){0, 0};				\
++  v2 = __extension__(v2di){0, 0};
++
++#define AFTER_TLS_CALL()					\
++  v3 = __extension__(v2di){0, 0};				\
++  asm volatile ("" : "+x" (v3));				\
++  union { v2di x; long long a[2]; } u;				\
++  u.x = v3;							\
++  TEST_VERIFY_EXIT (u.a[0] == 0 && u.a[1] == 0);
++
++#include <elf/tst-tls23.h>
 diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
-index 9d31685e02..5723ec1847 100644
+index 9d31685e02..a1dc28a700 100644
 --- a/sysdeps/x86_64/Makefile
 +++ b/sysdeps/x86_64/Makefile
-@@ -142,7 +142,6 @@ CFLAGS-tst-avxmod.c += $(AVX-CFLAGS)
+@@ -41,9 +41,6 @@ ifeq ($(subdir),elf)
+ CFLAGS-.os += $(if $(filter $(@F),$(patsubst %,%.os,$(all-rtld-routines))),\
+ 		   -mno-mmx)
+ 
+-sysdep-dl-routines += tlsdesc dl-tlsdesc tls_get_addr
+-
+-tests += ifuncmain8
+ modules-names += ifuncmod8
+ 
+ $(objpfx)ifuncmain8: $(objpfx)ifuncmod8.so
+@@ -142,7 +139,6 @@ CFLAGS-tst-avxmod.c += $(AVX-CFLAGS)
  AVX512-CFLAGS = -mavx512f
  CFLAGS-tst-audit10-aux.c += $(AVX512-CFLAGS)
  CFLAGS-tst-auditmod10a.c += $(AVX512-CFLAGS)
@@ -10403,6 +14506,38 @@
  CFLAGS-tst-avx512-aux.c += $(AVX512-CFLAGS)
  CFLAGS-tst-avx512mod.c += $(AVX512-CFLAGS)
  
+@@ -212,6 +208,15 @@ LDFLAGS-tst-plt-rewrite2 = -Wl,-z,now
+ LDFLAGS-tst-plt-rewritemod2.so = -Wl,-z,now,-z,undefs
+ tst-plt-rewrite2-ENV = GLIBC_TUNABLES=glibc.cpu.plt_rewrite=2
+ $(objpfx)tst-plt-rewrite2: $(objpfx)tst-plt-rewritemod2.so
++
++tests-special += $(objpfx)check-dt-x86-64-plt.out
++
++$(objpfx)check-dt-x86-64-plt.out: $(common-objpfx)libc.so
++	LC_ALL=C $(READELF) -V -W $< \
++		| sed -ne '/.gnu.version_d/, /.gnu.version_r/ p' \
++		| grep GLIBC_ABI_DT_X86_64_PLT > $@; \
++	$(evaluate-test)
++generated += check-dt-x86-64-plt.out
+ endif
+ 
+ test-internal-extras += tst-gnu2-tls2mod1
+diff --git a/sysdeps/x86_64/Versions b/sysdeps/x86_64/Versions
+index e94758b236..6a989ad3b3 100644
+--- a/sysdeps/x86_64/Versions
++++ b/sysdeps/x86_64/Versions
+@@ -5,6 +5,11 @@ libc {
+   GLIBC_2.13 {
+     __fentry__;
+   }
++  GLIBC_ABI_DT_X86_64_PLT {
++    # This symbol is used only for empty version map and will be removed
++    # by scripts/versions.awk.
++    __placeholder_only_for_empty_version_map;
++  }
+ }
+ libm {
+   GLIBC_2.1 {
 diff --git a/sysdeps/x86_64/dl-tlsdesc-dynamic.h b/sysdeps/x86_64/dl-tlsdesc-dynamic.h
 index 9965ddd2c0..4f496de8c8 100644
 --- a/sysdeps/x86_64/dl-tlsdesc-dynamic.h
@@ -10416,6 +14551,70 @@
  #endif
  	/* Besides rdi and rsi, saved above, save rcx, rdx, r8, r9,
  	   r10 and r11.  */
+diff --git a/sysdeps/x86_64/dl-tlsdesc.S b/sysdeps/x86_64/dl-tlsdesc.S
+index d1bb125560..9a55fc52bb 100644
+--- a/sysdeps/x86_64/dl-tlsdesc.S
++++ b/sysdeps/x86_64/dl-tlsdesc.S
+@@ -22,7 +22,6 @@
+ #include <features-offsets.h>
+ #include <isa-level.h>
+ #include "tlsdesc.h"
+-#include "dl-trampoline-save.h"
+ 
+ /* Area on stack to save and restore registers used for parameter
+    passing when calling _dl_tlsdesc_dynamic.  */
+diff --git a/sysdeps/x86_64/dl-trampoline-save.h b/sysdeps/x86_64/dl-trampoline-save.h
+deleted file mode 100644
+index 761128d980..0000000000
+--- a/sysdeps/x86_64/dl-trampoline-save.h
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* x86-64 PLT trampoline register save macros.
+-   Copyright (C) 2024-2025 Free Software Foundation, Inc.
+-   This file is part of the GNU C Library.
+-
+-   The GNU C Library is free software; you can redistribute it and/or
+-   modify it under the terms of the GNU Lesser General Public
+-   License as published by the Free Software Foundation; either
+-   version 2.1 of the License, or (at your option) any later version.
+-
+-   The GNU C Library is distributed in the hope that it will be useful,
+-   but WITHOUT ANY WARRANTY; without even the implied warranty of
+-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+-   Lesser General Public License for more details.
+-
+-   You should have received a copy of the GNU Lesser General Public
+-   License along with the GNU C Library; if not, see
+-   <https://www.gnu.org/licenses/>.  */
+-
+-#ifndef DL_STACK_ALIGNMENT
+-/* Due to GCC bug:
+-
+-   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066
+-
+-   __tls_get_addr may be called with 8-byte stack alignment.  Although
+-   this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume
+-   that stack will be always aligned at 16 bytes.  */
+-# define DL_STACK_ALIGNMENT 8
+-#endif
+-
+-/* True if _dl_runtime_resolve should align stack for STATE_SAVE or align
+-   stack to 16 bytes before calling _dl_fixup.  */
+-#define DL_RUNTIME_RESOLVE_REALIGN_STACK \
+-  (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \
+-   || 16 > DL_STACK_ALIGNMENT)
+diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S
+index a055722e64..ac85f96794 100644
+--- a/sysdeps/x86_64/dl-trampoline.S
++++ b/sysdeps/x86_64/dl-trampoline.S
+@@ -22,7 +22,6 @@
+ #include <features-offsets.h>
+ #include <link-defines.h>
+ #include <isa-level.h>
+-#include "dl-trampoline-save.h"
+ 
+ /* Area on stack to save and restore registers used for parameter
+    passing when calling _dl_fixup.  */
 diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile
 index e823d2fcc6..3403422443 100644
 --- a/sysdeps/x86_64/fpu/multiarch/Makefile
@@ -10650,6 +14849,19 @@
  				     1,
  				     __wcpncpy_generic))
  
+diff --git a/sysdeps/x86_64/multiarch/ifunc-wmemset.h b/sysdeps/x86_64/multiarch/ifunc-wmemset.h
+index f95cca6ae5..50af138230 100644
+--- a/sysdeps/x86_64/multiarch/ifunc-wmemset.h
++++ b/sysdeps/x86_64/multiarch/ifunc-wmemset.h
+@@ -35,7 +35,7 @@ IFUNC_SELECTOR (void)
+ 
+   if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+       && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
+-				      AVX_Fast_Unaligned_Load, !))
++				      AVX_Fast_Unaligned_Load,))
+     {
+       if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
+ 	{
 diff --git a/sysdeps/x86_64/tst-auditmod10b.c b/sysdeps/x86_64/tst-auditmod10b.c
 index 6eb21b6f06..0b994ef0f0 100644
 --- a/sysdeps/x86_64/tst-auditmod10b.c
@@ -10796,3 +15008,15 @@
  
    return 0;
  }
+diff --git a/time/strftime_l.c b/time/strftime_l.c
+index f51d926b46..5cb5f5d213 100644
+--- a/time/strftime_l.c
++++ b/time/strftime_l.c
+@@ -40,6 +40,7 @@
+ #endif
+ 
+ #include <ctype.h>
++#include <errno.h>
+ #include <sys/types.h>		/* Some systems define `time_t' here.  */
+ 
+ #ifdef TIME_WITH_SYS_TIME
diff -Nru glibc-2.41/debian/patches/i386/local-revert-i386-add-GLIBC_ABI_GNU2_TLS-version.diff glibc-2.41/debian/patches/i386/local-revert-i386-add-GLIBC_ABI_GNU2_TLS-version.diff
--- glibc-2.41/debian/patches/i386/local-revert-i386-add-GLIBC_ABI_GNU2_TLS-version.diff	1970-01-01 00:00:00.000000000 +0000
+++ glibc-2.41/debian/patches/i386/local-revert-i386-add-GLIBC_ABI_GNU2_TLS-version.diff	2026-03-01 20:10:06.000000000 +0000
@@ -0,0 +1,62 @@
+--- a/sysdeps/x86/Makefile
++++ b/sysdeps/x86/Makefile
+@@ -127,15 +127,6 @@ LDFLAGS-tst-tls23 += -rdynamic
+ tst-tls23-mod.so-no-z-defs = yes
+ 
+ $(objpfx)tst-tls23-mod.so: $(libsupport)
+-
+-tests-special += $(objpfx)check-gnu2-tls.out
+-
+-$(objpfx)check-gnu2-tls.out: $(common-objpfx)libc.so
+-	LC_ALL=C $(READELF) -V -W $< \
+-		| sed -ne '/.gnu.version_d/, /.gnu.version_r/ p' \
+-		| grep GLIBC_ABI_GNU2_TLS > $@; \
+-	$(evaluate-test)
+-generated += check-gnu2-tls.out
+ endif
+ 
+ ifeq ($(subdir),math)
+--- a/sysdeps/x86/Versions
++++ b/sysdeps/x86/Versions
+@@ -7,9 +7,4 @@ libc {
+   GLIBC_2.33 {
+     __x86_get_cpuid_feature_leaf;
+   }
+-  GLIBC_ABI_GNU2_TLS {
+-    # This symbol is used only for empty version map and will be removed
+-    # by scripts/versions.awk.
+-    __placeholder_only_for_empty_version_map;
+-  }
+ }
+--- a/sysdeps/x86_64/Makefile
++++ b/sysdeps/x86_64/Makefile
+@@ -217,6 +217,15 @@ $(objpfx)check-dt-x86-64-plt.out: $(common-objpfx)libc.so
+ 		| grep GLIBC_ABI_DT_X86_64_PLT > $@; \
+ 	$(evaluate-test)
+ generated += check-dt-x86-64-plt.out
++
++tests-special += $(objpfx)check-gnu2-tls.out
++
++$(objpfx)check-gnu2-tls.out: $(common-objpfx)libc.so
++	LC_ALL=C $(READELF) -V -W $< \
++		| sed -ne '/.gnu.version_d/, /.gnu.version_r/ p' \
++		| grep GLIBC_ABI_GNU2_TLS > $@; \
++	$(evaluate-test)
++generated += check-gnu2-tls.out
+ endif
+ 
+ test-internal-extras += tst-gnu2-tls2mod1
+--- a/sysdeps/x86_64/Versions
++++ b/sysdeps/x86_64/Versions
+@@ -5,6 +5,11 @@ libc {
+   GLIBC_2.13 {
+     __fentry__;
+   }
++  GLIBC_ABI_GNU2_TLS {
++    # This symbol is used only for empty version map and will be removed
++    # by scripts/versions.awk.
++    __placeholder_only_for_empty_version_map;
++  }
+   GLIBC_ABI_DT_X86_64_PLT {
+     # This symbol is used only for empty version map and will be removed
+     # by scripts/versions.awk.
diff -Nru glibc-2.41/debian/patches/i386/local-revert-i386-add-GLIBC_ABI_GNU_TLS-version.diff glibc-2.41/debian/patches/i386/local-revert-i386-add-GLIBC_ABI_GNU_TLS-version.diff
--- glibc-2.41/debian/patches/i386/local-revert-i386-add-GLIBC_ABI_GNU_TLS-version.diff	1970-01-01 00:00:00.000000000 +0000
+++ glibc-2.41/debian/patches/i386/local-revert-i386-add-GLIBC_ABI_GNU_TLS-version.diff	2026-03-01 20:10:06.000000000 +0000
@@ -0,0 +1,32 @@
+--- a/sysdeps/i386/Makefile
++++ b/sysdeps/i386/Makefile
+@@ -60,15 +60,6 @@ $(objpfx)tst-ld-sse-use.out: ../sysdeps/i386/tst-ld-sse-use.sh $(objpfx)ld.so
+ 	@echo "Checking ld.so for SSE register use.  This will take a few seconds..."
+ 	$(BASH) $< $(objpfx) '$(NM)' '$(OBJDUMP)' '$(READELF)' > $@; \
+ 	$(evaluate-test)
+-
+-tests-special += $(objpfx)check-gnu-tls.out
+-
+-$(objpfx)check-gnu-tls.out: $(common-objpfx)libc.so
+-	LC_ALL=C $(READELF) -V -W $< \
+-		| sed -ne '/.gnu.version_d/, /.gnu.version_r/ p' \
+-		| grep GLIBC_ABI_GNU_TLS > $@; \
+-	$(evaluate-test)
+-generated += check-gnu-tls.out
+ else
+ CFLAGS-.os += $(if $(filter rtld-%.os,$(@F)), $(rtld-CFLAGS))
+ endif
+--- a/sysdeps/i386/Versions
++++ b/sysdeps/i386/Versions
+@@ -28,11 +28,6 @@ libc {
+   GLIBC_2.13 {
+     __fentry__;
+   }
+-  GLIBC_ABI_GNU_TLS {
+-    # This symbol is used only for empty version map and will be removed
+-    # by scripts/versions.awk.
+-    __placeholder_only_for_empty_version_map;
+-  }
+ }
+ libm {
+   GLIBC_2.1 {
diff -Nru glibc-2.41/debian/patches/series glibc-2.41/debian/patches/series
--- glibc-2.41/debian/patches/series	2025-12-28 16:32:28.000000000 +0000
+++ glibc-2.41/debian/patches/series	2026-03-01 20:10:06.000000000 +0000
@@ -1,5 +1,12 @@
 git-updates.diff
 
+# Revert addition of symbol versions used as ABI flags, as the dpkg-shlibdeps
+# version in trixie is not able to handle them (see #1122107)
+i386/local-revert-i386-add-GLIBC_ABI_GNU_TLS-version.diff
+i386/local-revert-i386-add-GLIBC_ABI_GNU2_TLS-version.diff
+amd64/local-revert-x86-64-add-GLIBC_ABI_DT_X86_64_PLT-version.diff
+amd64/local-revert-x86-64-add-GLIBC_ABI_GNU2_TLS-version.diff
+
 locale/check-unknown-symbols.diff
 locale/locale-print-LANGUAGE.diff
 locale/LC_IDENTIFICATION-optional-fields.diff