Version in base suite: 2.41-12+deb13u1 Base version: glibc_2.41-12+deb13u1 Target version: glibc_2.41-12+deb13u2 Base file: /srv/ftp-master.debian.org/ftp/pool/main/g/glibc/glibc_2.41-12+deb13u1.dsc Target file: /srv/ftp-master.debian.org/policy/pool/main/g/glibc/glibc_2.41-12+deb13u2.dsc changelog | 51 patches/amd64/local-revert-x86-64-add-GLIBC_ABI_DT_X86_64_PLT-version.diff | 32 patches/amd64/local-revert-x86-64-add-GLIBC_ABI_GNU2_TLS-version.diff | 32 patches/git-updates.diff | 5428 ++++++++-- patches/i386/local-revert-i386-add-GLIBC_ABI_GNU2_TLS-version.diff | 62 patches/i386/local-revert-i386-add-GLIBC_ABI_GNU_TLS-version.diff | 32 patches/series | 7 7 files changed, 5042 insertions(+), 602 deletions(-) dpkg-source: warning: cannot verify inline signature for /srv/release.debian.org/tmp/tmp9zgelfuh/glibc_2.41-12+deb13u1.dsc: no acceptable signature found dpkg-source: warning: cannot verify inline signature for /srv/release.debian.org/tmp/tmp9zgelfuh/glibc_2.41-12+deb13u2.dsc: no acceptable signature found diff -Nru glibc-2.41/debian/changelog glibc-2.41/debian/changelog --- glibc-2.41/debian/changelog 2025-12-28 16:32:33.000000000 +0000 +++ glibc-2.41/debian/changelog 2026-03-01 20:10:06.000000000 +0000 @@ -1,3 +1,54 @@ +glibc (2.41-12+deb13u2) trixie; urgency=medium + + * debian/patches/git-updates.diff: update from upstream stable branch: + - Fix a null pointer dereference in macros in multithreaded + programs with multiple libc.so. + - Fix _r_debug handling when interposed by the main executable, restoring + compatibility with Dyninst. + - Fix a null pointer dereference in symbol lookup when the symbol version + hash value is zero. + - Add a new test for dlopen (NULL, RTLD_LAZY) from an ELF constructor. + - Preserve vector registers in the i386 TLS slow path. + - Add GLIBC_ABI_GNU2_TLS and GLIBC_ABI_GNU_TLS symbol versions and i386. + - Add GLIBC_ABI_GNU2_TLS and GLIBC_ABI_DT_X86_64_PLT symbol versions on + amd64. + - Fix NSS group merge not reacting to ERANGE during merge. + - Detect Intel Nova and Wildcat processors and use the same ifunc + selection as for Intel Panther Lake. + - Fix typo in wmemset ifunc selector that caused AVX2/AVX512 paths to be + skipped. + - Fix incorrect return values and improve special case handling in arm64 + SVE pow/powf and tanpi/tanpif implementations. + - Optimise SVE scalar callbacks on arm64. + - Correct SME handling on arm64 by disabling ZA state in setjmp and + sigsetjmp, clearing ZA state in clone/clone3. + - Fix conform tests on arm64 when the toolchain does not default to + -mbranch-protection=standard. + - Fix performance instability in AdvSIMD tan and sinh function on arm64 + - Fix and restore POWER10 optimized strcmp/strncmp functions on ppc64el, + they got previously disabled 2.41-8 due to a security issue. + - Fix POWER optimized rawmemchr function on ppc64el. + - Validate pread size and offset for overflow when reading ELF + headers in the sprof utility. + - Minor fixes to testsuite support code. + - Optimize trylock for high cache contention workloads. + - Fix and integer overflow in _int_memalign leading to heap corruption + (CVE-2026-0861). Closes: #1125678. + - Fix stack contents leak in getnetbyaddr (CVE-2026-0915). Closes: + #1125748. + - Fix bug in wordexp, which could return uninitialized memory when using + WRDE_REUSE together with WRDE_APPEND (CVE-2025-15281). Closes: #1126266. + - Switch currency symbol for the bg_BG locale to euro. + * Revert addition of symbol versions used as ABI flags, as the + dpkg-shlibdeps version in trixie is not able to handle them (see + #1122107): + - local-revert-x86-64-add-GLIBC_ABI_DT_X86_64_PLT-version.diff + - local-revert-x86-64-add-GLIBC_ABI_GNU2_TLS-version.diff + - local-revert-i386-add-GLIBC_ABI_GNU2_TLS-version.diff + - local-revert-i386-add-GLIBC_ABI_GNU_TLS-version.diff + + -- Aurelien Jarno Sun, 01 Mar 2026 21:10:06 +0100 + glibc (2.41-12+deb13u1) trixie; urgency=medium * debian/patches/git-updates.diff: update from upstream stable branch: diff -Nru glibc-2.41/debian/patches/amd64/local-revert-x86-64-add-GLIBC_ABI_DT_X86_64_PLT-version.diff glibc-2.41/debian/patches/amd64/local-revert-x86-64-add-GLIBC_ABI_DT_X86_64_PLT-version.diff --- glibc-2.41/debian/patches/amd64/local-revert-x86-64-add-GLIBC_ABI_DT_X86_64_PLT-version.diff 1970-01-01 00:00:00.000000000 +0000 +++ glibc-2.41/debian/patches/amd64/local-revert-x86-64-add-GLIBC_ABI_DT_X86_64_PLT-version.diff 2026-03-01 20:10:06.000000000 +0000 @@ -0,0 +1,32 @@ +--- a/sysdeps/x86_64/Makefile ++++ b/sysdeps/x86_64/Makefile +@@ -209,15 +209,6 @@ LDFLAGS-tst-plt-rewritemod2.so = -Wl,-z,now,-z,undefs + tst-plt-rewrite2-ENV = GLIBC_TUNABLES=glibc.cpu.plt_rewrite=2 + $(objpfx)tst-plt-rewrite2: $(objpfx)tst-plt-rewritemod2.so + +-tests-special += $(objpfx)check-dt-x86-64-plt.out +- +-$(objpfx)check-dt-x86-64-plt.out: $(common-objpfx)libc.so +- LC_ALL=C $(READELF) -V -W $< \ +- | sed -ne '/.gnu.version_d/, /.gnu.version_r/ p' \ +- | grep GLIBC_ABI_DT_X86_64_PLT > $@; \ +- $(evaluate-test) +-generated += check-dt-x86-64-plt.out +- + tests-special += $(objpfx)check-gnu2-tls.out + + $(objpfx)check-gnu2-tls.out: $(common-objpfx)libc.so +--- a/sysdeps/x86_64/Versions ++++ b/sysdeps/x86_64/Versions +@@ -10,11 +10,6 @@ libc { + # by scripts/versions.awk. + __placeholder_only_for_empty_version_map; + } +- GLIBC_ABI_DT_X86_64_PLT { +- # This symbol is used only for empty version map and will be removed +- # by scripts/versions.awk. +- __placeholder_only_for_empty_version_map; +- } + } + libm { + GLIBC_2.1 { diff -Nru glibc-2.41/debian/patches/amd64/local-revert-x86-64-add-GLIBC_ABI_GNU2_TLS-version.diff glibc-2.41/debian/patches/amd64/local-revert-x86-64-add-GLIBC_ABI_GNU2_TLS-version.diff --- glibc-2.41/debian/patches/amd64/local-revert-x86-64-add-GLIBC_ABI_GNU2_TLS-version.diff 1970-01-01 00:00:00.000000000 +0000 +++ glibc-2.41/debian/patches/amd64/local-revert-x86-64-add-GLIBC_ABI_GNU2_TLS-version.diff 2026-03-01 20:10:06.000000000 +0000 @@ -0,0 +1,32 @@ +--- a/sysdeps/x86_64/Makefile ++++ b/sysdeps/x86_64/Makefile +@@ -208,15 +208,6 @@ LDFLAGS-tst-plt-rewrite2 = -Wl,-z,now + LDFLAGS-tst-plt-rewritemod2.so = -Wl,-z,now,-z,undefs + tst-plt-rewrite2-ENV = GLIBC_TUNABLES=glibc.cpu.plt_rewrite=2 + $(objpfx)tst-plt-rewrite2: $(objpfx)tst-plt-rewritemod2.so +- +-tests-special += $(objpfx)check-gnu2-tls.out +- +-$(objpfx)check-gnu2-tls.out: $(common-objpfx)libc.so +- LC_ALL=C $(READELF) -V -W $< \ +- | sed -ne '/.gnu.version_d/, /.gnu.version_r/ p' \ +- | grep GLIBC_ABI_GNU2_TLS > $@; \ +- $(evaluate-test) +-generated += check-gnu2-tls.out + endif + + test-internal-extras += tst-gnu2-tls2mod1 +--- a/sysdeps/x86_64/Versions ++++ b/sysdeps/x86_64/Versions +@@ -5,11 +5,6 @@ libc { + GLIBC_2.13 { + __fentry__; + } +- GLIBC_ABI_GNU2_TLS { +- # This symbol is used only for empty version map and will be removed +- # by scripts/versions.awk. +- __placeholder_only_for_empty_version_map; +- } + } + libm { + GLIBC_2.1 { diff -Nru glibc-2.41/debian/patches/git-updates.diff glibc-2.41/debian/patches/git-updates.diff --- glibc-2.41/debian/patches/git-updates.diff 2025-12-28 16:32:28.000000000 +0000 +++ glibc-2.41/debian/patches/git-updates.diff 2026-03-01 20:10:06.000000000 +0000 @@ -22,10 +22,10 @@ $(common-objdir):$(subst $(empty) ,:,$(patsubst ../$(subdir),.,$(rpath-dirs:%=$(common-objpfx)%))) else # build-static diff --git a/NEWS b/NEWS -index b11422b060..f77d1471c3 100644 +index b11422b060..b6bab59ca6 100644 --- a/NEWS +++ b/NEWS -@@ -5,6 +5,39 @@ See the end for copying conditions. +@@ -5,6 +5,47 @@ See the end for copying conditions. Please send GNU C library bug reports via using `glibc' in the "product" field. @@ -39,7 +39,10 @@ + +The following bugs were resolved with this release: + ++ [19341] ctype: Fallback initialization of TLS using relocations ++ [29190] Fix handling of symbol versions which hash to zero + [31943] _dl_find_object can fail if ld.so contains gaps between load segments ++ [32483] ctype macros segfault in multithreaded programs with multiple libc.so + [32269] RISC-V IFUNC resolver cannot access gp pointer + [32626] math: math: log10p1f is not correctly rounded + [32627] math: math: sinhf is not correctly rounded @@ -60,7 +63,12 @@ + [32994] stdlib: resolve a double lock init issue after fork + [33164] iconv -o should not create executable files + [33185] Fix double-free after allocation failure in regcomp ++ [33234] Use TLS initial-exec model for __libc_tsd_CTYPE_* thread variables + [33245] nptl: nptl: error in internal cancellation syscall handling ++ [33361] nss: Group merge does not react to ERANGE during merge ++ [33601] aarch64: Do not link conform tests with -Wl,-z,force-bti ++ [33814] glob: wordexp with WRDE_REUSE and WRDE_APPEND may return ++ uninitialized memory + Version 2.41 @@ -661,7 +669,7 @@ have-insert = @libc_cv_insert@ have-glob-dat-reloc = @libc_cv_has_glob_dat@ diff --git a/configure b/configure -index eb8abd0054..674d1d7e4a 100755 +index eb8abd0054..f5bd5dc7ac 100755 --- a/configure +++ b/configure @@ -659,6 +659,7 @@ libc_cv_has_glob_dat @@ -672,7 +680,17 @@ ASFLAGS_config libc_cv_cc_with_libunwind libc_cv_insert -@@ -7114,6 +7115,40 @@ if test $libc_cv_as_noexecstack = yes; then +@@ -4928,6 +4929,9 @@ with_fp_cond=1 + # A preconfigure script may define another name to TLS descriptor variant + mtls_descriptor=gnu2 + ++# A preconfigure script may define another name to traditional TLS variant ++mtls_traditional=gnu ++ + if frags=`ls -d $srcdir/sysdeps/*/preconfigure 2> /dev/null` + then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for sysdeps preconfigure fragments" >&5 +@@ -7114,6 +7118,40 @@ if test $libc_cv_as_noexecstack = yes; then fi @@ -713,7 +731,49 @@ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for linker that supports -z execstack" >&5 printf %s "checking for linker that supports -z execstack... " >&6; } libc_linker_feature=no -@@ -8643,6 +8678,35 @@ if test $libc_cv_builtin_trap = yes; then +@@ -7452,6 +7490,41 @@ rm -f conftest* + config_vars="$config_vars + have-test-mtls-descriptor = $libc_cv_test_mtls_descriptor" + ++ ++cat > conftest.c <&5 ++printf %s "checking for traditional tls support in testing... " >&6; } ++if test ${libc_cv_test_mtls_traditional+y} ++then : ++ printf %s "(cached) " >&6 ++else case e in #( ++ e) if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=$mtls_traditional -nostdlib -nostartfiles -shared conftest.c -o conftest 1>&5' ++ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 ++ (eval $ac_try) 2>&5 ++ ac_status=$? ++ printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 ++ test $ac_status = 0; }; } ++ then ++ libc_cv_test_mtls_traditional=$mtls_traditional ++ else ++ libc_cv_test_mtls_traditional=no ++ fi ;; ++esac ++fi ++{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_test_mtls_traditional" >&5 ++printf "%s\n" "$libc_cv_test_mtls_traditional" >&6; } ++ ++CC="$saved_CC" ++ ++rm -f conftest* ++config_vars="$config_vars ++have-test-mtls-traditional = $libc_cv_test_mtls_traditional" ++ + conftest_code=" + void __foo (void) + { +@@ -8643,6 +8716,35 @@ if test $libc_cv_builtin_trap = yes; then fi @@ -749,7 +809,7 @@ ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' -@@ -8908,6 +8972,104 @@ printf "%s\n" "$libc_linker_feature" >&6; } +@@ -8908,6 +9010,104 @@ printf "%s\n" "$libc_linker_feature" >&6; } config_vars="$config_vars load-address-ldflag = $libc_cv_load_address_ldflag" @@ -855,10 +915,20 @@ printf %s "checking if we can build programs as PIE... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext diff --git a/configure.ac b/configure.ac -index 050bfa65e3..57cd24c87d 100644 +index 050bfa65e3..e55ee81959 100644 --- a/configure.ac +++ b/configure.ac -@@ -1318,6 +1318,10 @@ if test $libc_cv_as_noexecstack = yes; then +@@ -483,6 +483,9 @@ with_fp_cond=1 + # A preconfigure script may define another name to TLS descriptor variant + mtls_descriptor=gnu2 + ++# A preconfigure script may define another name to traditional TLS variant ++mtls_traditional=gnu ++ + dnl Let sysdeps/*/preconfigure act here. + LIBC_PRECONFIGURE([$srcdir], [for sysdeps]) + +@@ -1318,6 +1321,10 @@ if test $libc_cv_as_noexecstack = yes; then fi AC_SUBST(ASFLAGS_config) @@ -869,7 +939,24 @@ LIBC_LINKER_FEATURE([-z execstack], [-Wl,-z,execstack], [libc_cv_z_execstack=yes], [libc_cv_z_execstack=no]) AC_SUBST(libc_cv_z_execstack) -@@ -1820,6 +1824,17 @@ if test $libc_cv_builtin_trap = yes; then +@@ -1396,6 +1403,16 @@ LIBC_TRY_TEST_CC_COMMAND([for tls descriptor support], + LIBC_CONFIG_VAR([have-test-mtls-descriptor], + [$libc_cv_test_mtls_descriptor]) + ++dnl Check if TEST_CC support traditional tls. ++LIBC_TRY_TEST_CC_COMMAND([for traditional tls support], ++ [$conftest_code], ++ [-fPIC -mtls-dialect=$mtls_traditional -nostdlib -nostartfiles -shared], ++ libc_cv_test_mtls_traditional, ++ [libc_cv_test_mtls_traditional=$mtls_traditional], ++ [libc_cv_test_mtls_traditional=no]) ++LIBC_CONFIG_VAR([have-test-mtls-traditional], ++ [$libc_cv_test_mtls_traditional]) ++ + dnl clang emits an warning for a double alias redirection, to warn the + dnl original symbol is sed even when weak definition overrides it. + dnl It is a usual pattern for weak_alias, where multiple alias point to +@@ -1820,6 +1837,17 @@ if test $libc_cv_builtin_trap = yes; then AC_DEFINE([HAVE_BUILTIN_TRAP]) fi @@ -887,7 +974,7 @@ dnl C++ feature tests. AC_LANG_PUSH([C++]) -@@ -1992,6 +2007,23 @@ LIBC_LINKER_FEATURE([-Ttext-segment=$libc_cv_pde_load_address], +@@ -1992,6 +2020,23 @@ LIBC_LINKER_FEATURE([-Ttext-segment=$libc_cv_pde_load_address], [libc_cv_load_address_ldflag=]) LIBC_CONFIG_VAR([load-address-ldflag], [$libc_cv_load_address_ldflag]) @@ -911,8 +998,221 @@ AC_MSG_CHECKING(if we can build programs as PIE) AC_COMPILE_IFELSE([AC_LANG_SOURCE([[#ifdef PIE_UNSUPPORTED # error PIE is not supported +diff --git a/ctype/Makefile b/ctype/Makefile +index 64848bd02e..783b689bbf 100644 +--- a/ctype/Makefile ++++ b/ctype/Makefile +@@ -36,6 +36,23 @@ aux := ctype-info + + tests := \ + test_ctype \ ++ tst-ctype-tls-dlmopen \ ++ tst-ctype-tls-dlopen-static \ + # tests + ++tests-static := \ ++ tst-ctype-tls-dlopen-static \ ++ # tests-static ++ ++modules-names := \ ++ tst-ctype-tls-mod \ ++ # modules-names ++ + include ../Rules ++ ++$(objpfx)tst-ctype-tls-dlmopen: $(shared-thread-library) ++$(objpfx)tst-ctype-tls-dlmopen.out: $(objpfx)tst-ctype-tls-mod.so ++$(objpfx)tst-ctype-tls-dlopen-static: $(static-thread-library) ++$(objpfx)tst-ctype-tls-dlopen-static.out: $(objpfx)tst-ctype-tls-mod.so ++tst-ctype-tls-dlopen-static-ENV = \ ++ LD_LIBRARY_PATH=$(ld-library-path):$(common-objpfx):$(common-objpfx)elf +diff --git a/ctype/ctype-info.c b/ctype/ctype-info.c +index 5ee578e901..fb5acf9419 100644 +--- a/ctype/ctype-info.c ++++ b/ctype/ctype-info.c +@@ -19,20 +19,28 @@ + #include + #include + +-__libc_tsd_define (, const uint16_t *, CTYPE_B) +-__libc_tsd_define (, const int32_t *, CTYPE_TOLOWER) +-__libc_tsd_define (, const int32_t *, CTYPE_TOUPPER) ++/* Fallback initialization using relocations. See the _nl_C_locobj ++ initializers in locale/xlocale.c. Usually, this is overwritten by ++ __ctype_init before user code runs, but this does not happen for ++ threads in secondary namespaces. With the initializers, secondary ++ namespaces at least get locale data from the C locale. */ ++__thread const uint16_t * __libc_tsd_CTYPE_B attribute_tls_model_ie ++ = (const uint16_t *) _nl_C_LC_CTYPE_class + 128; ++__thread const int32_t * __libc_tsd_CTYPE_TOLOWER attribute_tls_model_ie ++ = (const int32_t *) _nl_C_LC_CTYPE_tolower + 128; ++__thread const int32_t * __libc_tsd_CTYPE_TOUPPER attribute_tls_model_ie ++ = (const int32_t *) _nl_C_LC_CTYPE_toupper + 128; + + + void + __ctype_init (void) + { +- const uint16_t **bp = __libc_tsd_address (const uint16_t *, CTYPE_B); +- *bp = (const uint16_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_CLASS) + 128; +- const int32_t **up = __libc_tsd_address (const int32_t *, CTYPE_TOUPPER); +- *up = ((int32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TOUPPER) + 128); +- const int32_t **lp = __libc_tsd_address (const int32_t *, CTYPE_TOLOWER); +- *lp = ((int32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TOLOWER) + 128); ++ __libc_tsd_CTYPE_B ++ = ((const uint16_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_CLASS)) + 128; ++ __libc_tsd_CTYPE_TOUPPER ++ = ((const int32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TOUPPER)) + 128; ++ __libc_tsd_CTYPE_TOLOWER = ++ ((const int32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TOLOWER)) + 128; + } + libc_hidden_def (__ctype_init) + +@@ -41,10 +49,7 @@ libc_hidden_def (__ctype_init) + #if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3) + + /* Defined in locale/C-ctype.c. */ +-extern const char _nl_C_LC_CTYPE_class[] attribute_hidden; + extern const char _nl_C_LC_CTYPE_class32[] attribute_hidden; +-extern const char _nl_C_LC_CTYPE_toupper[] attribute_hidden; +-extern const char _nl_C_LC_CTYPE_tolower[] attribute_hidden; + extern const char _nl_C_LC_CTYPE_class_upper[] attribute_hidden; + extern const char _nl_C_LC_CTYPE_class_lower[] attribute_hidden; + extern const char _nl_C_LC_CTYPE_class_alpha[] attribute_hidden; +diff --git a/ctype/tst-ctype-tls-dlmopen.c b/ctype/tst-ctype-tls-dlmopen.c +new file mode 100644 +index 0000000000..f7eeb65551 +--- /dev/null ++++ b/ctype/tst-ctype-tls-dlmopen.c +@@ -0,0 +1,2 @@ ++#define DO_STATIC_TEST 0 ++#include "tst-ctype-tls-skeleton.c" +diff --git a/ctype/tst-ctype-tls-dlopen-static.c b/ctype/tst-ctype-tls-dlopen-static.c +new file mode 100644 +index 0000000000..c2c09c362c +--- /dev/null ++++ b/ctype/tst-ctype-tls-dlopen-static.c +@@ -0,0 +1,2 @@ ++#define DO_STATIC_TEST 1 ++#include "tst-ctype-tls-skeleton.c" +diff --git a/ctype/tst-ctype-tls-mod.c b/ctype/tst-ctype-tls-mod.c +new file mode 100644 +index 0000000000..52cbb9dcb6 +--- /dev/null ++++ b/ctype/tst-ctype-tls-mod.c +@@ -0,0 +1,37 @@ ++/* Wrappers for macros in a secondary namespace. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++int ++my_isalpha (int ch) ++{ ++ return isalpha (ch); ++} ++ ++int ++my_toupper (int ch) ++{ ++ return toupper (ch); ++} ++ ++int ++my_tolower (int ch) ++{ ++ return tolower (ch); ++} +diff --git a/ctype/tst-ctype-tls-skeleton.c b/ctype/tst-ctype-tls-skeleton.c +new file mode 100644 +index 0000000000..8c53e35899 +--- /dev/null ++++ b/ctype/tst-ctype-tls-skeleton.c +@@ -0,0 +1,67 @@ ++/* Test that in a secondary namespace works. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* Before this file is included, define DO_STATIC_TEST to 0 or 1. ++ With 0, dlmopen is used for the test. With 1, dlopen is used. */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static int (*my_isalpha) (int); ++static int (*my_toupper) (int); ++static int (*my_tolower) (int); ++ ++static void * ++checks (void *ignore) ++{ ++ TEST_VERIFY (my_isalpha ('a')); ++ TEST_VERIFY (!my_isalpha ('0')); ++ TEST_COMPARE (my_toupper ('a'), 'A'); ++ TEST_COMPARE (my_toupper ('A'), 'A'); ++ TEST_COMPARE (my_tolower ('a'), 'a'); ++ TEST_COMPARE (my_tolower ('A'), 'a'); ++ return NULL; ++} ++ ++static int ++do_test (void) ++{ ++ char *dso = xasprintf ("%s/ctype/tst-ctype-tls-mod.so", support_objdir_root); ++#if DO_STATIC_TEST ++ void *handle = xdlopen (dso, RTLD_LAZY); ++#else ++ void *handle = xdlmopen (LM_ID_NEWLM, dso, RTLD_LAZY); ++#endif ++ my_isalpha = xdlsym (handle, "my_isalpha"); ++ my_toupper = xdlsym (handle, "my_toupper"); ++ my_tolower = xdlsym (handle, "my_tolower"); ++ ++ checks (NULL); ++ xpthread_join (xpthread_create (NULL, checks, NULL)); ++ ++ xdlclose (handle); ++ free (dso); ++ ++ return 0; ++} ++ ++#include diff --git a/elf/Makefile b/elf/Makefile -index 4b1d0d8741..b8064ef14c 100644 +index 4b1d0d8741..d8a0c0c4ae 100644 --- a/elf/Makefile +++ b/elf/Makefile @@ -34,7 +34,6 @@ routines = \ @@ -923,7 +1223,11 @@ dl-iteratephdr \ dl-libc \ dl-origin \ -@@ -61,6 +60,8 @@ dl-routines = \ +@@ -58,9 +57,12 @@ dl-routines = \ + dl-close \ + dl-debug \ + dl-debug-symbols \ ++ dl-debug_state \ dl-deps \ dl-exception \ dl-execstack \ @@ -932,7 +1236,7 @@ dl-fini \ dl-init \ dl-load \ -@@ -266,6 +267,7 @@ tests-static-normal := \ +@@ -266,6 +268,7 @@ tests-static-normal := \ tst-array1-static \ tst-array5-static \ tst-dl-iter-static \ @@ -940,7 +1244,7 @@ tst-dst-static \ tst-env-setuid-static \ tst-getauxval-static \ -@@ -379,6 +381,7 @@ tests += \ +@@ -379,6 +382,7 @@ tests += \ tst-align3 \ tst-audit-tlsdesc \ tst-audit-tlsdesc-dlopen \ @@ -948,7 +1252,34 @@ tst-audit1 \ tst-audit2 \ tst-audit8 \ -@@ -532,6 +535,8 @@ tests-internal += \ +@@ -415,7 +419,10 @@ tests += \ + tst-dlmopen1 \ + tst-dlmopen3 \ + tst-dlmopen4 \ ++ tst-dlmopen4-nonpic \ ++ tst-dlmopen4-pic \ + tst-dlopen-auditdup \ ++ tst-dlopen-constructor-null \ + tst-dlopen-self \ + tst-dlopen-tlsmodid \ + tst-dlopen-tlsreinit1 \ +@@ -490,6 +497,7 @@ tests += \ + tst-tls21 \ + tst-tls22 \ + tst-tls22-gnu2 \ ++ tst-tls23 \ + tst-tlsalign \ + tst-tlsalign-extern \ + tst-tlsgap \ +@@ -497,6 +505,7 @@ tests += \ + tst-unique2 \ + tst-unwind-ctor \ + tst-unwind-main \ ++ tst-version-hash-zero \ + unload3 \ + unload4 \ + unload5 \ +@@ -532,6 +541,8 @@ tests-internal += \ tst-dl_find_object-threads \ tst-dlmopen2 \ tst-hash-collision3 \ @@ -957,7 +1288,7 @@ tst-ptrguard1 \ tst-stackguard1 \ tst-tls-surplus \ -@@ -543,6 +548,10 @@ tests-internal += \ +@@ -543,6 +554,10 @@ tests-internal += \ unload2 \ # tests-internal @@ -968,7 +1299,7 @@ tests-container += \ tst-dlopen-self-container \ tst-dlopen-tlsmodid-container \ -@@ -567,9 +576,11 @@ tests-execstack-yes = \ +@@ -567,9 +582,11 @@ tests-execstack-yes = \ tst-execstack \ tst-execstack-needed \ tst-execstack-prog \ @@ -981,7 +1312,7 @@ # tests-execstack-static-yes ifeq (yes,$(run-built-tests)) tests-execstack-special-yes = \ -@@ -863,6 +874,7 @@ modules-names += \ +@@ -863,6 +880,7 @@ modules-names += \ tst-auditmanymod8 \ tst-auditmanymod9 \ tst-auditmod-tlsdesc \ @@ -989,15 +1320,35 @@ tst-auditmod1 \ tst-auditmod11 \ tst-auditmod12 \ -@@ -905,6 +917,7 @@ modules-names += \ +@@ -905,6 +923,9 @@ modules-names += \ tst-dlmopen1mod \ tst-dlopen-auditdup-auditmod \ tst-dlopen-auditdupmod \ ++ tst-dlopen-constructor-null-mod1 \ ++ tst-dlopen-constructor-null-mod2 \ + tst-dlopen-sgid-mod \ tst-dlopen-tlsreinitmod1 \ tst-dlopen-tlsreinitmod2 \ tst-dlopen-tlsreinitmod3 \ -@@ -1144,6 +1157,10 @@ tests-pie += \ +@@ -1003,6 +1024,7 @@ modules-names += \ + tst-tls22-mod1-gnu2 \ + tst-tls22-mod2 \ + tst-tls22-mod2-gnu2 \ ++ tst-tls23-mod \ + tst-tlsalign-lib \ + tst-tlsgap-mod0 \ + tst-tlsgap-mod1 \ +@@ -1033,6 +1055,9 @@ modules-names += \ + tst-unique2mod1 \ + tst-unique2mod2 \ + tst-unwind-ctor-lib \ ++ tst-version-hash-zero-linkmod \ ++ tst-version-hash-zero-mod \ ++ tst-version-hash-zero-refmod \ + unload2dep \ + unload2mod \ + unload3mod1 \ +@@ -1144,6 +1169,10 @@ tests-pie += \ tst-pie1 \ tst-pie2 \ # tests-pie @@ -1008,7 +1359,7 @@ ifneq (,$(load-address-ldflag)) tests += \ tst-pie-address \ -@@ -1159,6 +1176,10 @@ tests += \ +@@ -1159,6 +1188,10 @@ tests += \ tests-static += \ tst-pie-address-static \ # tests-static @@ -1019,7 +1370,7 @@ LDFLAGS-tst-pie-address-static += \ $(load-address-ldflag)=$(pde-load-address) endif -@@ -1988,6 +2009,9 @@ $(objpfx)tst-execstack.out: $(objpfx)tst-execstack-mod.so +@@ -1988,6 +2021,9 @@ $(objpfx)tst-execstack.out: $(objpfx)tst-execstack-mod.so CPPFLAGS-tst-execstack.c += -DUSE_PTHREADS=0 LDFLAGS-tst-execstack = -Wl,-z,noexecstack LDFLAGS-tst-execstack-mod.so = -Wl,-z,execstack @@ -1029,7 +1380,7 @@ $(objpfx)tst-execstack-needed: $(objpfx)tst-execstack-mod.so LDFLAGS-tst-execstack-needed = -Wl,-z,noexecstack -@@ -1996,7 +2020,18 @@ LDFLAGS-tst-execstack-prog = -Wl,-z,execstack +@@ -1996,7 +2032,18 @@ LDFLAGS-tst-execstack-prog = -Wl,-z,execstack CFLAGS-tst-execstack-prog.c += -Wno-trampolines CFLAGS-tst-execstack-mod.c += -Wno-trampolines @@ -1048,7 +1399,7 @@ CFLAGS-tst-execstack-prog-static.c += -Wno-trampolines ifeq (yes,$(build-hardcoded-path-in-tests)) -@@ -2074,6 +2109,7 @@ $(objpfx)tst-array5-static-cmp.out: tst-array5-static.exp \ +@@ -2074,6 +2121,7 @@ $(objpfx)tst-array5-static-cmp.out: tst-array5-static.exp \ CFLAGS-tst-pie1.c += $(pie-ccflag) CFLAGS-tst-pie2.c += $(pie-ccflag) @@ -1056,7 +1407,21 @@ CFLAGS-tst-pie-address.c += $(pie-ccflag) $(objpfx)tst-piemod1.so: $(libsupport) -@@ -3189,6 +3225,9 @@ $(objpfx)tst-audit-tlsdesc.out: $(objpfx)tst-auditmod-tlsdesc.so +@@ -2206,6 +2254,13 @@ $(objpfx)tst-dlmopen3.out: $(objpfx)tst-dlmopen1mod.so + + $(objpfx)tst-dlmopen4.out: $(objpfx)tst-dlmopen1mod.so + ++CFLAGS-tst-dlmopen4-pic.c += -fPIC ++$(objpfx)tst-dlmopen4-pic.out: $(objpfx)tst-dlmopen1mod.so ++ ++CFLAGS-tst-dlmopen4-nonpic.c += -fno-pie ++tst-dlmopen4-nonpic-no-pie = yes ++$(objpfx)tst-dlmopen4-nonpic.out: $(objpfx)tst-dlmopen1mod.so ++ + $(objpfx)tst-audit1.out: $(objpfx)tst-auditmod1.so + tst-audit1-ENV = LD_AUDIT=$(objpfx)tst-auditmod1.so + +@@ -3189,6 +3244,9 @@ $(objpfx)tst-audit-tlsdesc.out: $(objpfx)tst-auditmod-tlsdesc.so tst-audit-tlsdesc-ENV = LD_AUDIT=$(objpfx)tst-auditmod-tlsdesc.so $(objpfx)tst-audit-tlsdesc-dlopen.out: $(objpfx)tst-auditmod-tlsdesc.so tst-audit-tlsdesc-dlopen-ENV = LD_AUDIT=$(objpfx)tst-auditmod-tlsdesc.so @@ -1066,12 +1431,322 @@ $(objpfx)tst-dlmopen-twice.out: \ $(objpfx)tst-dlmopen-twice-mod1.so \ -@@ -3392,3 +3431,5 @@ $(objpfx)tst-nolink-libc-2: $(objpfx)tst-nolink-libc.o +@@ -3354,6 +3412,13 @@ tst-tls22-mod1-gnu2.so-no-z-defs = yes + tst-tls22-mod2.so-no-z-defs = yes + tst-tls22-mod2-gnu2.so-no-z-defs = yes + ++$(objpfx)tst-tls23: $(shared-thread-library) ++$(objpfx)tst-tls23.out: $(objpfx)tst-tls23-mod.so ++ ++ifneq (no,$(have-test-mtls-traditional)) ++CFLAGS-tst-tls23-mod.c += -mtls-dialect=$(have-test-mtls-traditional) ++endif ++ + ifeq ($(have-test-cc-cflags-fsemantic-interposition),yes) + # Compiler may default to -fno-semantic-interposition. These modules + # must be compiled with -fsemantic-interposition. +@@ -3392,3 +3457,28 @@ $(objpfx)tst-nolink-libc-2: $(objpfx)tst-nolink-libc.o -Wl,--dynamic-linker=$(objpfx)ld.so $(objpfx)tst-nolink-libc-2.out: $(objpfx)tst-nolink-libc-2 $(objpfx)ld.so $< > $@ 2>&1; $(evaluate-test) + +$(objpfx)tst-dlopen-sgid.out: $(objpfx)tst-dlopen-sgid-mod.so ++ ++$(objpfx)tst-version-hash-zero.out: \ ++ $(objpfx)tst-version-hash-zero-mod.so \ ++ $(objpfx)tst-version-hash-zero-refmod.so ++LDFLAGS-tst-version-hash-zero-mod.so = \ ++ -Wl,--version-script=tst-version-hash-zero-mod.map ++# The run-time test module tst-version-hash-zero-refmod.so is linked ++# to a stub module, tst-version-hash-zero-linkmod.so, to produce an ++# expected relocation error. ++$(objpfx)tst-version-hash-zero-refmod.so: \ ++ $(objpfx)tst-version-hash-zero-linkmod.so ++LDFLAGS-tst-version-hash-zero-linkmod.so = \ ++ -Wl,--version-script=tst-version-hash-zero-linkmod.map \ ++ -Wl,--soname=tst-version-hash-zero-mod.so ++$(objpfx)tst-version-hash-zero-refmod.so: \ ++ $(objpfx)tst-version-hash-zero-linkmod.so ++tst-version-hash-zero-refmod.so-no-z-defs = yes ++ ++$(objpfx)tst-dlopen-constructor-null: \ ++ $(objpfx)tst-dlopen-constructor-null-mod1.so \ ++ $(objpfx)tst-dlopen-constructor-null-mod2.so ++$(objpfx)tst-dlopen-constructor-null-mod2.so: \ ++ $(objpfx)tst-dlopen-constructor-null-mod1.so +diff --git a/elf/dl-close.c b/elf/dl-close.c +index 47bd3dab81..83e4f012b2 100644 +--- a/elf/dl-close.c ++++ b/elf/dl-close.c +@@ -433,8 +433,7 @@ _dl_close_worker (struct link_map *map, bool force) + /* Notify the debugger we are about to remove some loaded objects. + LA_ACT_DELETE has already been signalled above for !unload_any. */ + struct r_debug *r = _dl_debug_update (nsid); +- r->r_state = RT_DELETE; +- _dl_debug_state (); ++ _dl_debug_change_state (r, RT_DELETE); + LIBC_PROBE (unmap_start, 2, nsid, r); + + if (unload_global) +@@ -726,8 +725,7 @@ _dl_close_worker (struct link_map *map, bool force) + __rtld_lock_unlock_recursive (GL(dl_load_tls_lock)); + + /* Notify the debugger those objects are finalized and gone. */ +- r->r_state = RT_CONSISTENT; +- _dl_debug_state (); ++ _dl_debug_change_state (r, RT_CONSISTENT); + LIBC_PROBE (unmap_complete, 2, nsid, r); + + #ifdef SHARED +diff --git a/elf/dl-debug-symbols.S b/elf/dl-debug-symbols.S +index 7bcb035826..d789f4e35a 100644 +--- a/elf/dl-debug-symbols.S ++++ b/elf/dl-debug-symbols.S +@@ -38,3 +38,4 @@ + _r_debug: + _r_debug_extended: + .zero R_DEBUG_EXTENDED_SIZE ++rtld_hidden_def (_r_debug) +diff --git a/elf/dl-debug.c b/elf/dl-debug.c +index 5ff1460ab7..7052f4a3c1 100644 +--- a/elf/dl-debug.c ++++ b/elf/dl-debug.c +@@ -16,6 +16,7 @@ + License along with the GNU C Library; if not, see + . */ + ++#include + #include + + +@@ -30,23 +31,86 @@ extern const int verify_link_map_members[(VERIFY_MEMBER (l_addr) + && VERIFY_MEMBER (l_prev)) + ? 1 : -1]; + ++#ifdef SHARED ++/* r_debug structs for secondary namespaces. The first namespace is ++ handled separately because its r_debug structure must overlap with ++ the public _r_debug symbol, so the first array element corresponds ++ to LM_ID_BASE + 1. See elf/dl-debug-symbols.S. */ ++struct r_debug_extended _r_debug_array[DL_NNS - 1]; ++ ++/* If not null, pointer to the _r_debug in the main executable. */ ++static struct r_debug *_r_debug_main; ++ ++void ++_dl_debug_post_relocate (struct link_map *main_map) ++{ ++ /* Perform a full symbol search in all objects, to maintain ++ compatibility if interposed _r_debug definitions. The lookup ++ cannot fail because there is a definition in ld.so, and this ++ function is only called if the ld.so search scope is not empty. */ ++ const ElfW(Sym) *sym = NULL; ++ lookup_t result =_dl_lookup_symbol_x ("_r_debug", main_map, &sym, ++ main_map->l_scope, NULL, 0, 0, NULL); ++ if (sym->st_size >= sizeof (struct r_debug)) ++ { ++ struct r_debug *main_r_debug = DL_SYMBOL_ADDRESS (result, sym); ++ if (main_r_debug != &_r_debug_extended.base) ++ { ++ /* The extended version of the struct is not available in ++ the main executable because a copy relocation has been ++ used. r_map etc. have already been copied as part of the ++ copy relocation processing. */ ++ main_r_debug->r_version = 1; ++ ++ /* Record that dual updates of the initial link map are ++ required. */ ++ _r_debug_main = main_r_debug; ++ } ++ } ++} ++ ++/* Return the r_debug object for the namespace NS. */ ++static inline struct r_debug_extended * ++get_rdebug (Lmid_t ns) ++{ ++ if (ns == LM_ID_BASE) ++ return &_r_debug_extended; ++ else ++ return &_r_debug_array[ns - 1]; ++} ++#else /* !SHARED */ ++static inline struct r_debug_extended * ++get_rdebug (Lmid_t ns) ++{ ++ return &_r_debug_extended; /* There is just one namespace. */ ++} ++#endif /* !SHARED */ ++ + /* Update the `r_map' member and return the address of `struct r_debug' + of the namespace NS. */ + + struct r_debug * + _dl_debug_update (Lmid_t ns) + { +- struct r_debug_extended *r; +- if (ns == LM_ID_BASE) +- r = &_r_debug_extended; +- else +- r = &GL(dl_ns)[ns]._ns_debug; ++ struct r_debug_extended *r = get_rdebug (ns); + if (r->base.r_map == NULL) + atomic_store_release (&r->base.r_map, + (void *) GL(dl_ns)[ns]._ns_loaded); + return &r->base; + } + ++void ++_dl_debug_change_state (struct r_debug *r, int state) ++{ ++ atomic_store_release (&r->r_state, state); ++#ifdef SHARED ++ if (r == &_r_debug_extended.base && _r_debug_main != NULL) ++ /* Update the copy-relocation of _r_debug. */ ++ atomic_store_release (&_r_debug_main->r_state, state); ++#endif ++ _dl_debug_state (); ++} ++ + /* Initialize _r_debug_extended for the namespace NS. LDBASE is the + run-time load address of the dynamic linker, to be put in + _r_debug_extended.r_ldbase. Return the address of _r_debug. */ +@@ -54,34 +118,7 @@ _dl_debug_update (Lmid_t ns) + struct r_debug * + _dl_debug_initialize (ElfW(Addr) ldbase, Lmid_t ns) + { +- struct r_debug_extended *r, **pp = NULL; +- +- if (ns == LM_ID_BASE) +- { +- r = &_r_debug_extended; +- /* Initialize r_version to 1. */ +- if (_r_debug_extended.base.r_version == 0) +- _r_debug_extended.base.r_version = 1; +- } +- else if (DL_NNS > 1) +- { +- r = &GL(dl_ns)[ns]._ns_debug; +- if (r->base.r_brk == 0) +- { +- /* Add the new namespace to the linked list. After a namespace +- is initialized, r_brk becomes non-zero. A namespace becomes +- empty (r_map == NULL) when it is unused. But it is never +- removed from the linked list. */ +- struct r_debug_extended *p; +- for (pp = &_r_debug_extended.r_next; +- (p = *pp) != NULL; +- pp = &p->r_next) +- ; +- +- r->base.r_version = 2; +- } +- } +- ++ struct r_debug_extended *r = get_rdebug (ns); + if (r->base.r_brk == 0) + { + /* Tell the debugger where to find the map of loaded objects. +@@ -89,30 +126,44 @@ _dl_debug_initialize (ElfW(Addr) ldbase, Lmid_t ns) + only once. */ + r->base.r_ldbase = ldbase ?: _r_debug_extended.base.r_ldbase; + r->base.r_brk = (ElfW(Addr)) &_dl_debug_state; +- r->r_next = NULL; ++ ++#ifdef SHARED ++ /* Add the new namespace to the linked list. This assumes that ++ namespaces are allocated in increasing order. After a ++ namespace is initialized, r_brk becomes non-zero. A ++ namespace becomes empty (r_map == NULL) when it is unused. ++ But it is never removed from the linked list. */ ++ ++ if (ns != LM_ID_BASE) ++ { ++ r->base.r_version = 2; ++ if (ns - 1 == LM_ID_BASE) ++ { ++ atomic_store_release (&_r_debug_extended.r_next, r); ++ /* Now there are multiple namespaces. Note that this ++ deliberately does not update the copy in the main ++ executable (if it exists). */ ++ atomic_store_release (&_r_debug_extended.base.r_version, 2); ++ } ++ else ++ /* Update r_debug_extended of the previous namespace. */ ++ atomic_store_release (&_r_debug_array[ns - 2].r_next, r); ++ } ++ else ++#endif /* SHARED */ ++ r->base.r_version = 1; + } + + if (r->base.r_map == NULL) +- atomic_store_release (&r->base.r_map, +- (void *) GL(dl_ns)[ns]._ns_loaded); +- +- if (pp != NULL) + { +- atomic_store_release (pp, r); +- /* Bump r_version to 2 for the new namespace. */ +- atomic_store_release (&_r_debug_extended.base.r_version, 2); ++ struct link_map_public *l = (void *) GL(dl_ns)[ns]._ns_loaded; ++ atomic_store_release (&r->base.r_map, l); ++#ifdef SHARED ++ if (ns == LM_ID_BASE && _r_debug_main != NULL) ++ /* Update the copy-relocation of _r_debug. */ ++ atomic_store_release (&_r_debug_main->r_map, l); ++#endif + } + + return &r->base; + } +- +- +-/* This function exists solely to have a breakpoint set on it by the +- debugger. The debugger is supposed to find this function's address by +- examining the r_brk member of struct r_debug, but GDB 4.15 in fact looks +- for this particular symbol name in the PT_INTERP file. */ +-void +-_dl_debug_state (void) +-{ +-} +-rtld_hidden_def (_dl_debug_state) +diff --git a/elf/dl-debug_state.c b/elf/dl-debug_state.c +new file mode 100644 +index 0000000000..40c134a49e +--- /dev/null ++++ b/elf/dl-debug_state.c +@@ -0,0 +1,30 @@ ++/* Debugger hook called after dynamic linker updates. ++ Copyright (C) 1996-2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++/* This function exists solely to have a breakpoint set on it by the ++ debugger. The debugger is supposed to find this function's address by ++ examining the r_brk member of struct r_debug, but GDB 4.15 in fact looks ++ for this particular symbol name in the PT_INTERP file. Therefore, ++ this function must not be inlined. */ ++void ++_dl_debug_state (void) ++{ ++} ++rtld_hidden_def (_dl_debug_state) diff --git a/elf/dl-execstack-tunable.c b/elf/dl-execstack-tunable.c new file mode 100644 index 0000000000..e3b638aeaa @@ -1271,10 +1946,20 @@ ph < ph_end; ++ph) if (ph->p_type == DLFO_EH_SEGMENT_TYPE) diff --git a/elf/dl-load.c b/elf/dl-load.c -index f905578a65..945dd8a231 100644 +index f905578a65..7b21f1dffa 100644 --- a/elf/dl-load.c +++ b/elf/dl-load.c -@@ -945,7 +945,7 @@ struct link_map * +@@ -921,8 +921,7 @@ _dl_notify_new_object (int mode, Lmid_t nsid, struct link_map *l) + /* Notify the debugger we have added some objects. We need to + call _dl_debug_initialize in a static program in case dynamic + linking has not been used before. */ +- r->r_state = RT_ADD; +- _dl_debug_state (); ++ _dl_debug_change_state (r, RT_ADD); + LIBC_PROBE (map_start, 2, nsid, r); + } + else +@@ -945,7 +944,7 @@ struct link_map * _dl_map_object_from_fd (const char *name, const char *origname, int fd, struct filebuf *fbp, char *realname, struct link_map *loader, int l_type, int mode, @@ -1283,7 +1968,7 @@ { struct link_map *l = NULL; const ElfW(Ehdr) *header; -@@ -2180,7 +2180,7 @@ _dl_map_object (struct link_map *loader, const char *name, +@@ -2180,7 +2179,7 @@ _dl_map_object (struct link_map *loader, const char *name, void *stack_end = __libc_stack_end; return _dl_map_object_from_fd (name, origname, fd, &fb, realname, loader, @@ -1292,6 +1977,79 @@ } struct add_path_state +diff --git a/elf/dl-lookup.c b/elf/dl-lookup.c +index ece647f009..2f5cd674f5 100644 +--- a/elf/dl-lookup.c ++++ b/elf/dl-lookup.c +@@ -100,12 +100,22 @@ check_match (const char *const undef_name, + /* We can match the version information or use the + default one if it is not hidden. */ + ElfW(Half) ndx = verstab[symidx] & 0x7fff; +- if ((map->l_versions[ndx].hash != version->hash +- || strcmp (map->l_versions[ndx].name, version->name)) +- && (version->hidden || map->l_versions[ndx].hash +- || (verstab[symidx] & 0x8000))) +- /* It's not the version we want. */ +- return NULL; ++ if (map->l_versions[ndx].hash == version->hash ++ && strcmp (map->l_versions[ndx].name, version->name) == 0) ++ /* This is an exact version match. Return the symbol below. */ ++ ; ++ else ++ { ++ if (!version->hidden ++ && map->l_versions[ndx].name[0] == '\0' ++ && (verstab[symidx] & 0x8000) == 0 ++ && (*num_versions)++ == 0) ++ /* This is the global default version. Store it as a ++ fallback match. */ ++ *versioned_sym = sym; ++ ++ return NULL; ++ } + } + } + else +diff --git a/elf/dl-open.c b/elf/dl-open.c +index 4c12ddec59..894a9a7bb6 100644 +--- a/elf/dl-open.c ++++ b/elf/dl-open.c +@@ -594,6 +594,16 @@ dl_open_worker_begin (void *a) + if ((mode & RTLD_GLOBAL) && new->l_global == 0) + add_to_global_update (new); + ++ /* It is not possible to run the ELF constructor for the new ++ link map if it has not executed yet: If this dlopen call came ++ from an ELF constructor that has not put that object into a ++ consistent state, completing initialization for the entire ++ scope will expose objects that have this partially ++ constructed object among its dependencies to this ++ inconsistent state. This could happen even with a benign ++ dlopen (NULL, RTLD_LAZY) call from a constructor of an ++ initially loaded shared object. */ ++ + return; + } + +@@ -771,8 +781,7 @@ dl_open_worker (void *a) + #ifdef SHARED + bool was_not_consistent = r->r_state != RT_CONSISTENT; + #endif +- r->r_state = RT_CONSISTENT; +- _dl_debug_state (); ++ _dl_debug_change_state (r, RT_CONSISTENT); + LIBC_PROBE (map_complete, 3, nsid, r, args->map); + + #ifdef SHARED +@@ -841,7 +850,7 @@ no more namespaces available for dlmopen()")); + } + + GL(dl_ns)[nsid].libc_map = NULL; +- _dl_debug_update (nsid)->r_state = RT_CONSISTENT; ++ _dl_debug_change_state (_dl_debug_update (nsid), RT_CONSISTENT); + } + /* Never allow loading a DSO in a namespace which is empty. Such + direct placements is only causing problems. Also don't allow diff --git a/elf/dl-reloc-static-pie.c b/elf/dl-reloc-static-pie.c index e34bf5f7ce..758bf9893e 100644 --- a/elf/dl-reloc-static-pie.c @@ -1352,8 +2110,26 @@ default: 1 } } +diff --git a/elf/dl-version.c b/elf/dl-version.c +index d414bd1e18..2fbf4942b9 100644 +--- a/elf/dl-version.c ++++ b/elf/dl-version.c +@@ -357,6 +357,13 @@ _dl_check_map_versions (struct link_map *map, int verbose, int trace_mode) + ent = (ElfW(Verdef) *) ((char *) ent + ent->vd_next); + } + } ++ ++ /* The empty string has ELF hash zero. This avoids a NULL check ++ before the version string comparison in check_match in ++ dl-lookup.c. */ ++ for (unsigned int i = 0; i < map->l_nversions; ++i) ++ if (map->l_versions[i].name == NULL) ++ map->l_versions[i].name = ""; + } + + /* When there is a DT_VERNEED entry with libc.so on DT_NEEDED, issue diff --git a/elf/rtld.c b/elf/rtld.c -index 00bec15316..c1e9721def 100644 +index 00bec15316..18c96bd5c1 100644 --- a/elf/rtld.c +++ b/elf/rtld.c @@ -1242,6 +1242,60 @@ rtld_setup_main_map (struct link_map *main_map) @@ -1465,6 +2241,138 @@ /* Add the dynamic linker to the TLS list if it also uses TLS. */ if (_dl_rtld_map.l_tls_blocksize != 0) +@@ -1783,8 +1811,7 @@ dl_main (const ElfW(Phdr) *phdr, + elf_setup_debug_entry (main_map, r); + + /* We start adding objects. */ +- r->r_state = RT_ADD; +- _dl_debug_state (); ++ _dl_debug_change_state (r, RT_ADD); + LIBC_PROBE (init_start, 2, LM_ID_BASE, r); + + /* Auditing checkpoint: we are ready to signal that the initial map +@@ -2319,6 +2346,9 @@ dl_main (const ElfW(Phdr) *phdr, + + __rtld_mutex_init (); + __rtld_malloc_init_real (main_map); ++ ++ /* Update copy-relocated _r_debug if necessary. */ ++ _dl_debug_post_relocate (main_map); + } + + /* All ld.so initialization is complete. Apply RELRO. */ +@@ -2339,8 +2369,7 @@ dl_main (const ElfW(Phdr) *phdr, + /* Notify the debugger all new objects are now ready to go. We must re-get + the address since by now the variable might be in another object. */ + r = _dl_debug_update (LM_ID_BASE); +- r->r_state = RT_CONSISTENT; +- _dl_debug_state (); ++ _dl_debug_change_state (r, RT_CONSISTENT); + LIBC_PROBE (init_complete, 2, LM_ID_BASE, r); + + /* Auditing checkpoint: we have added all objects. */ +diff --git a/elf/sprof.c b/elf/sprof.c +index 4baff86d2a..1c9807dd6d 100644 +--- a/elf/sprof.c ++++ b/elf/sprof.c +@@ -38,6 +38,7 @@ + #include + #include + #include ++#include + + /* Get libc version number. */ + #include "../version.h" +@@ -410,6 +411,7 @@ load_shobj (const char *name) + int fd; + ElfW(Shdr) *shdr; + size_t pagesize = getpagesize (); ++ struct stat st; + + /* Since we use dlopen() we must be prepared to work around the sometimes + strange lookup rules for the shared objects. If we have a file foo.so +@@ -553,14 +555,39 @@ load_shobj (const char *name) + error (EXIT_FAILURE, errno, _("Reopening shared object `%s' failed"), + map->l_name); + ++ if (fstat (fd, &st) < 0) ++ error (EXIT_FAILURE, errno, _("stat(%s) failure"), map->l_name); ++ ++ /* We're depending on data that's being read from the file, so be a ++ bit paranoid here and make sure the requests are reasonable - ++ i.e. both size and offset are nonnegative and smaller than the ++ file size, as well as the offset of the end of the data. PREAD ++ would have failed anyway, but this is more robust and explains ++ what happened better. Note that SZ must be unsigned and OFF may ++ be signed or unsigned. */ ++#define PCHECK(sz1,off1) { \ ++ size_t sz = sz1, end_off; \ ++ off_t off = off1; \ ++ if (sz > st.st_size \ ++ || off < 0 || off > st.st_size \ ++ || INT_ADD_WRAPV (sz, off, &end_off) \ ++ || end_off > st.st_size) \ ++ error (EXIT_FAILURE, ERANGE, \ ++ _("read outside of file extents %zu + %jd > %jd"), \ ++ sz, (intmax_t) off, (intmax_t) st.st_size); \ ++ } ++ + /* Map the section header. */ + size_t size = ehdr->e_shnum * sizeof (ElfW(Shdr)); + shdr = (ElfW(Shdr) *) alloca (size); ++ PCHECK (size, ehdr->e_shoff); + if (pread (fd, shdr, size, ehdr->e_shoff) != size) + error (EXIT_FAILURE, errno, _("reading of section headers failed")); + + /* Get the section header string table. */ + char *shstrtab = (char *) alloca (shdr[ehdr->e_shstrndx].sh_size); ++ PCHECK (shdr[ehdr->e_shstrndx].sh_size, ++ shdr[ehdr->e_shstrndx].sh_offset); + if (pread (fd, shstrtab, shdr[ehdr->e_shstrndx].sh_size, + shdr[ehdr->e_shstrndx].sh_offset) + != shdr[ehdr->e_shstrndx].sh_size) +@@ -588,6 +615,7 @@ load_shobj (const char *name) + size_t size = debuglink_entry->sh_size; + char *debuginfo_fname = (char *) alloca (size + 1); + debuginfo_fname[size] = '\0'; ++ PCHECK (size, debuglink_entry->sh_offset); + if (pread (fd, debuginfo_fname, size, debuglink_entry->sh_offset) + != size) + { +@@ -641,21 +669,32 @@ load_shobj (const char *name) + if (fd2 != -1) + { + ElfW(Ehdr) ehdr2; ++ struct stat st; ++ ++ if (fstat (fd2, &st) < 0) ++ error (EXIT_FAILURE, errno, _("stat(%s) failure"), workbuf); + + /* Read the ELF header. */ ++ PCHECK (sizeof (ehdr2), 0); + if (pread (fd2, &ehdr2, sizeof (ehdr2), 0) != sizeof (ehdr2)) + error (EXIT_FAILURE, errno, + _("reading of ELF header failed")); + + /* Map the section header. */ +- size_t size = ehdr2.e_shnum * sizeof (ElfW(Shdr)); ++ size_t size; ++ if (INT_MULTIPLY_WRAPV (ehdr2.e_shnum, sizeof (ElfW(Shdr)), &size)) ++ error (EXIT_FAILURE, errno, _("too many section headers")); ++ + ElfW(Shdr) *shdr2 = (ElfW(Shdr) *) alloca (size); ++ PCHECK (size, ehdr2.e_shoff); + if (pread (fd2, shdr2, size, ehdr2.e_shoff) != size) + error (EXIT_FAILURE, errno, + _("reading of section headers failed")); + + /* Get the section header string table. */ + shstrtab = (char *) alloca (shdr2[ehdr2.e_shstrndx].sh_size); ++ PCHECK (shdr2[ehdr2.e_shstrndx].sh_size, ++ shdr2[ehdr2.e_shstrndx].sh_offset); + if (pread (fd2, shstrtab, shdr2[ehdr2.e_shstrndx].sh_size, + shdr2[ehdr2.e_shstrndx].sh_offset) + != shdr2[ehdr2.e_shstrndx].sh_size) diff --git a/elf/tst-audit-tlsdesc-dlopen2.c b/elf/tst-audit-tlsdesc-dlopen2.c new file mode 100644 index 0000000000..7ba2c4129a @@ -1582,6 +2490,210 @@ + + return LAV_CURRENT; +} +diff --git a/elf/tst-dlmopen4-nonpic.c b/elf/tst-dlmopen4-nonpic.c +new file mode 100644 +index 0000000000..ad4e409953 +--- /dev/null ++++ b/elf/tst-dlmopen4-nonpic.c +@@ -0,0 +1,2 @@ ++#define BUILD_FOR_NONPIC ++#include "tst-dlmopen4.c" +diff --git a/elf/tst-dlmopen4-pic.c b/elf/tst-dlmopen4-pic.c +new file mode 100644 +index 0000000000..919fa85c25 +--- /dev/null ++++ b/elf/tst-dlmopen4-pic.c +@@ -0,0 +1,2 @@ ++#define BUILD_FOR_PIC ++#include "tst-dlmopen4.c" +diff --git a/elf/tst-dlmopen4.c b/elf/tst-dlmopen4.c +index 64e007e3dc..5cda024fdf 100644 +--- a/elf/tst-dlmopen4.c ++++ b/elf/tst-dlmopen4.c +@@ -46,6 +46,15 @@ do_test (void) + TEST_COMPARE (debug->base.r_version, 1); + TEST_VERIFY_EXIT (debug->r_next == NULL); + ++#ifdef BUILD_FOR_PIC ++ /* In a PIC build, using _r_debug directly should give us the same ++ object. */ ++ TEST_VERIFY (&_r_debug == &debug->base); ++#endif ++#ifdef BUILD_FOR_NONPIC ++ TEST_COMPARE (_r_debug.r_version, 1); ++#endif ++ + void *h = xdlmopen (LM_ID_NEWLM, "$ORIGIN/tst-dlmopen1mod.so", + RTLD_LAZY); + +@@ -57,6 +66,19 @@ do_test (void) + const char *name = basename (debug->r_next->base.r_map->l_name); + TEST_COMPARE_STRING (name, "tst-dlmopen1mod.so"); + ++#ifdef BUILD_FOR_NONPIC ++ /* If a copy relocation is used, it must be at version 1. */ ++ if (&_r_debug != &debug->base) ++ { ++ TEST_COMPARE (_r_debug.r_version, 1); ++ TEST_COMPARE ((uintptr_t) _r_debug.r_map, ++ (uintptr_t) debug->base.r_map); ++ TEST_COMPARE (_r_debug.r_brk, debug->base.r_brk); ++ TEST_COMPARE (_r_debug.r_state, debug->base.r_state); ++ TEST_COMPARE (_r_debug.r_ldbase, debug->base.r_ldbase); ++ } ++#endif ++ + xdlclose (h); + + return 0; +diff --git a/elf/tst-dlopen-constructor-null-mod1.c b/elf/tst-dlopen-constructor-null-mod1.c +new file mode 100644 +index 0000000000..70a7a0ad46 +--- /dev/null ++++ b/elf/tst-dlopen-constructor-null-mod1.c +@@ -0,0 +1,55 @@ ++/* Module calling dlopen (NULL, RTLD_LAZY) to obtain the global scope. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++ ++int mod1_status; ++ ++static void __attribute__ ((constructor)) ++init (void) ++{ ++ puts ("info: tst-dlopen-constructor-null-mod1.so constructor"); ++ ++ void *handle = dlopen (NULL, RTLD_LAZY); ++ if (handle == NULL) ++ { ++ printf ("error: %s\n", dlerror ()); ++ exit (1); ++ } ++ puts ("info: dlopen returned"); ++ if (dlsym (handle, "malloc") != malloc) ++ { ++ puts ("error: dlsym did not produce expected result"); ++ exit (1); ++ } ++ dlclose (handle); ++ ++ /* Check that the second module's constructor has not executed. */ ++ if (getenv ("mod2_status") != NULL) ++ { ++ printf ("error: mod2_status environment variable set: %s\n", ++ getenv ("mod2_status")); ++ exit (1); ++ } ++ ++ /* Communicate to the second module that the constructor executed. */ ++ mod1_status = 1; ++} +diff --git a/elf/tst-dlopen-constructor-null-mod2.c b/elf/tst-dlopen-constructor-null-mod2.c +new file mode 100644 +index 0000000000..d6e945beae +--- /dev/null ++++ b/elf/tst-dlopen-constructor-null-mod2.c +@@ -0,0 +1,37 @@ ++/* Module whose constructor should not be invoked by dlopen (NULL, RTLD_LAZY). ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++extern int mod1_status; ++int mod2_status; ++ ++static void __attribute__ ((constructor)) ++init (void) ++{ ++ printf ("info: tst-dlopen-constructor-null-mod2.so constructor" ++ " (mod1_status=%d)", mod1_status); ++ if (!(mod1_status == 1 && mod2_status == 0)) ++ { ++ puts ("error: mod1_status == 1 && mod2_status == 0 expected"); ++ exit (1); ++ } ++ setenv ("mod2_status", "constructed", 1); ++ mod2_status = 1; ++} +diff --git a/elf/tst-dlopen-constructor-null.c b/elf/tst-dlopen-constructor-null.c +new file mode 100644 +index 0000000000..db90643325 +--- /dev/null ++++ b/elf/tst-dlopen-constructor-null.c +@@ -0,0 +1,38 @@ ++/* Verify that dlopen (NULL, RTLD_LAZY) does not complete initialization. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* This test mimics what the glvndSetupPthreads function in libglvnd ++ does. */ ++ ++#include ++#include ++ ++/* Defined and initialized in the shared objects. */ ++extern int mod1_status; ++extern int mod2_status; ++ ++static int ++do_test (void) ++{ ++ TEST_COMPARE (mod1_status, 1); ++ TEST_COMPARE (mod2_status, 1); ++ TEST_COMPARE_STRING (getenv ("mod2_status"), "constructed"); ++ return 0; ++} ++ ++#include diff --git a/elf/tst-dlopen-sgid-mod.c b/elf/tst-dlopen-sgid-mod.c new file mode 100644 index 0000000000..5eb79eef48 @@ -2077,6 +3189,379 @@ +glibc.rtld.execstack: 1 (min: 0, max: 2) glibc.rtld.nns: 0x4 (min: 0x1, max: 0x10) glibc.rtld.optional_static_tls: 0x200 (min: 0x0, max: 0x[f]+) +diff --git a/elf/tst-tls23-mod.c b/elf/tst-tls23-mod.c +new file mode 100644 +index 0000000000..3ee4c70e40 +--- /dev/null ++++ b/elf/tst-tls23-mod.c +@@ -0,0 +1,32 @@ ++/* DSO used by tst-tls23. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++__thread struct tls tls_var0 __attribute__ ((visibility ("hidden"))); ++ ++struct tls * ++apply_tls (struct tls *p) ++{ ++ INIT_TLS_CALL (); ++ BEFORE_TLS_CALL (); ++ tls_var0 = *p; ++ struct tls *ret = &tls_var0; ++ AFTER_TLS_CALL (); ++ return ret; ++} +diff --git a/elf/tst-tls23.c b/elf/tst-tls23.c +new file mode 100644 +index 0000000000..afe594c067 +--- /dev/null ++++ b/elf/tst-tls23.c +@@ -0,0 +1,106 @@ ++/* Test that __tls_get_addr preserves caller-saved registers. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifndef IS_SUPPORTED ++# define IS_SUPPORTED() true ++#endif ++ ++/* An architecture can define it to clobber caller-saved registers in ++ malloc below to verify that __tls_get_addr won't change caller-saved ++ registers. */ ++#ifndef PREPARE_MALLOC ++# define PREPARE_MALLOC() ++#endif ++ ++extern void * __libc_malloc (size_t); ++ ++size_t malloc_counter = 0; ++ ++void * ++malloc (size_t n) ++{ ++ PREPARE_MALLOC (); ++ malloc_counter++; ++ return __libc_malloc (n); ++} ++ ++static void *mod; ++static const char *modname = "tst-tls23-mod.so"; ++ ++static void ++open_mod (void) ++{ ++ mod = xdlopen (modname, RTLD_LAZY); ++ printf ("open %s\n", modname); ++} ++ ++static void ++close_mod (void) ++{ ++ xdlclose (mod); ++ mod = NULL; ++ printf ("close %s\n", modname); ++} ++ ++static void ++access_mod (const char *sym) ++{ ++ struct tls var = { -4, -4, -4, -4 }; ++ struct tls *(*f) (struct tls *) = xdlsym (mod, sym); ++ /* Check that our malloc is called. */ ++ malloc_counter = 0; ++ struct tls *p = f (&var); ++ TEST_VERIFY (malloc_counter != 0); ++ printf ("access %s: %s() = %p\n", modname, sym, p); ++ TEST_VERIFY_EXIT (memcmp (p, &var, sizeof (var)) == 0); ++ ++(p->a); ++} ++ ++static void * ++start (void *arg) ++{ ++ access_mod ("apply_tls"); ++ return arg; ++} ++ ++static int ++do_test (void) ++{ ++ if (!IS_SUPPORTED ()) ++ return EXIT_UNSUPPORTED; ++ ++ open_mod (); ++ pthread_t t = xpthread_create (NULL, start, NULL); ++ xpthread_join (t); ++ close_mod (); ++ ++ return 0; ++} ++ ++#include +diff --git a/elf/tst-tls23.h b/elf/tst-tls23.h +new file mode 100644 +index 0000000000..d0e734569c +--- /dev/null ++++ b/elf/tst-tls23.h +@@ -0,0 +1,40 @@ ++/* Test that __tls_get_addr preserves caller-saved registers. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++struct tls ++{ ++ int64_t a, b, c, d; ++}; ++ ++extern struct tls *apply_tls (struct tls *); ++ ++/* An architecture can define them to verify that caller-saved registers ++ aren't changed by __tls_get_addr. */ ++#ifndef INIT_TLS_CALL ++# define INIT_TLS_CALL() ++#endif ++ ++#ifndef BEFORE_TLS_CALL ++# define BEFORE_TLS_CALL() ++#endif ++ ++#ifndef AFTER_TLS_CALL ++# define AFTER_TLS_CALL() ++#endif +diff --git a/elf/tst-version-hash-zero-linkmod.c b/elf/tst-version-hash-zero-linkmod.c +new file mode 100644 +index 0000000000..15e2506d01 +--- /dev/null ++++ b/elf/tst-version-hash-zero-linkmod.c +@@ -0,0 +1,22 @@ ++/* Stub module for linking tst-version-hash-zero-refmod.so. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public License as ++ published by the Free Software Foundation; either version 2.1 of the ++ License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; see the file COPYING.LIB. If ++ not, see . */ ++ ++/* The version script assigns a different symbol version for the stub ++ module. Loading the module with the incorrect version is expected ++ to fail. */ ++#include "tst-version-hash-zero-mod.c" +diff --git a/elf/tst-version-hash-zero-linkmod.map b/elf/tst-version-hash-zero-linkmod.map +new file mode 100644 +index 0000000000..2dba7c22d7 +--- /dev/null ++++ b/elf/tst-version-hash-zero-linkmod.map +@@ -0,0 +1,7 @@ ++Base { ++ local: *; ++}; ++ ++OTHER_VERSION { ++ global: global_variable; ++} Base; +diff --git a/elf/tst-version-hash-zero-mod.c b/elf/tst-version-hash-zero-mod.c +new file mode 100644 +index 0000000000..ac6b0dc4a5 +--- /dev/null ++++ b/elf/tst-version-hash-zero-mod.c +@@ -0,0 +1,20 @@ ++/* Test module with a zero version symbol hash. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public License as ++ published by the Free Software Foundation; either version 2.1 of the ++ License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; see the file COPYING.LIB. If ++ not, see . */ ++ ++/* The symbol version is assigned by version script. */ ++int global_variable; +diff --git a/elf/tst-version-hash-zero-mod.map b/elf/tst-version-hash-zero-mod.map +new file mode 100644 +index 0000000000..41eaff7914 +--- /dev/null ++++ b/elf/tst-version-hash-zero-mod.map +@@ -0,0 +1,13 @@ ++Base { ++ local: *; ++}; ++ ++/* Define the version so that tst-version-hash-zero-refmod.so passes ++ the initial symbol version check. */ ++OTHER_VERSION { ++} Base; ++ ++/* This version string hashes to zero. */ ++PPPPPPPPPPPP { ++ global: global_variable; ++} Base; +diff --git a/elf/tst-version-hash-zero-refmod.c b/elf/tst-version-hash-zero-refmod.c +new file mode 100644 +index 0000000000..cd8b3dcef5 +--- /dev/null ++++ b/elf/tst-version-hash-zero-refmod.c +@@ -0,0 +1,23 @@ ++/* Test module that triggers a relocation failure in tst-version-hash-zero. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public License as ++ published by the Free Software Foundation; either version 2.1 of the ++ License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; see the file COPYING.LIB. If ++ not, see . */ ++ ++/* This is bound to global_variable@@OTHER_VERSION via ++ tst-version-hash-zero-linkmod.so, but at run time, only ++ global_variable@PPPPPPPPPPPP exists. */ ++extern int global_variable; ++int *pointer_variable = &global_variable; +diff --git a/elf/tst-version-hash-zero.c b/elf/tst-version-hash-zero.c +new file mode 100644 +index 0000000000..66a0db4f51 +--- /dev/null ++++ b/elf/tst-version-hash-zero.c +@@ -0,0 +1,56 @@ ++/* Symbols with version hash zero should not match any version (bug 29190). ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public License as ++ published by the Free Software Foundation; either version 2.1 of the ++ License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; see the file COPYING.LIB. If ++ not, see . */ ++ ++#include ++#include ++#include ++#include ++ ++static int ++do_test (void) ++{ ++ void *handle = xdlopen ("tst-version-hash-zero-mod.so", RTLD_NOW); ++ ++ /* This used to crash because some struct r_found_version entries ++ with hash zero did not have valid version strings. */ ++ TEST_VERIFY (xdlvsym (handle, "global_variable", "PPPPPPPPPPPP") != NULL); ++ ++ /* Consistency check. */ ++ TEST_VERIFY (xdlsym (handle, "global_variable") ++ == xdlvsym (handle, "global_variable", "PPPPPPPPPPPP")); ++ ++ /* This symbol version is supposed to be missing. */ ++ TEST_VERIFY (dlvsym (handle, "global_variable", "OTHER_VERSION") == NULL); ++ ++ /* tst-version-hash-zero-refmod.so references ++ global_variable@@OTHER_VERSION and is expected to fail to load. ++ dlvsym sets the hidden flag during lookup. Relocation does not, ++ so this exercises a different failure case. */ ++ TEST_VERIFY_EXIT (dlopen ("tst-version-hash-zero-refmod.so", RTLD_NOW) ++ == NULL); ++ const char *message = dlerror (); ++ if (strstr (message, ++ ": undefined symbol: global_variable, version OTHER_VERSION") ++ == NULL) ++ FAIL_EXIT1 ("unexpected dlopen failure: %s", message); ++ ++ xdlclose (handle); ++ return 0; ++} ++ ++#include diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c index 7dba5d8dff..558cfb11a3 100644 --- a/iconv/iconv_prog.c @@ -2114,6 +3599,65 @@ if ! cmp -s "$tmp/out" "$tmp/expected" ; then echo "error: iconv output difference" >&$logfd echo "*** expected ***" >&$logfd +diff --git a/include/ctype.h b/include/ctype.h +index 493a6f80ce..a15e5b6678 100644 +--- a/include/ctype.h ++++ b/include/ctype.h +@@ -24,33 +24,35 @@ libc_hidden_proto (toupper) + NL_CURRENT_INDIRECT. */ + + # include "../locale/localeinfo.h" +-# include + + # ifndef CTYPE_EXTERN_INLINE /* Used by ctype/ctype-info.c, which see. */ + # define CTYPE_EXTERN_INLINE extern inline + # endif + +-__libc_tsd_define (extern, const uint16_t *, CTYPE_B) +-__libc_tsd_define (extern, const int32_t *, CTYPE_TOUPPER) +-__libc_tsd_define (extern, const int32_t *, CTYPE_TOLOWER) ++extern __thread const uint16_t * __libc_tsd_CTYPE_B ++ attribute_hidden attribute_tls_model_ie; ++extern __thread const int32_t * __libc_tsd_CTYPE_TOUPPER ++ attribute_hidden attribute_tls_model_ie; ++extern __thread const int32_t * __libc_tsd_CTYPE_TOLOWER ++ attribute_hidden attribute_tls_model_ie; + + + CTYPE_EXTERN_INLINE const uint16_t ** __attribute__ ((const)) + __ctype_b_loc (void) + { +- return __libc_tsd_address (const uint16_t *, CTYPE_B); ++ return &__libc_tsd_CTYPE_B; + } + + CTYPE_EXTERN_INLINE const int32_t ** __attribute__ ((const)) + __ctype_toupper_loc (void) + { +- return __libc_tsd_address (const int32_t *, CTYPE_TOUPPER); ++ return &__libc_tsd_CTYPE_TOUPPER; + } + + CTYPE_EXTERN_INLINE const int32_t ** __attribute__ ((const)) + __ctype_tolower_loc (void) + { +- return __libc_tsd_address (const int32_t *, CTYPE_TOLOWER); ++ return &__libc_tsd_CTYPE_TOLOWER; + } + + # ifndef __NO_CTYPE +@@ -64,6 +66,11 @@ __ctype_tolower_loc (void) + # define __isdigit_l(c, l) ({ int __c = (c); __c >= '0' && __c <= '9'; }) + # endif /* Not __NO_CTYPE. */ + ++/* For use in initializers. */ ++extern const char _nl_C_LC_CTYPE_class[] attribute_hidden; ++extern const uint32_t _nl_C_LC_CTYPE_toupper[] attribute_hidden; ++extern const uint32_t _nl_C_LC_CTYPE_tolower[] attribute_hidden; ++ + # endif /* IS_IN (libc). */ + #endif /* Not _ISOMAC. */ + diff --git a/include/dlfcn.h b/include/dlfcn.h index f49ee1b0c9..a44420fa37 100644 --- a/include/dlfcn.h @@ -2128,6 +3672,207 @@ /* Internally used flag. */ #define __RTLD_DLOPEN 0x80000000 +diff --git a/include/link.h b/include/link.h +index 518bfd1670..41e5e54a7b 100644 +--- a/include/link.h ++++ b/include/link.h +@@ -365,6 +365,8 @@ struct auditstate + dynamic linker. */ + extern struct r_debug_extended _r_debug_extended attribute_hidden; + ++rtld_hidden_proto (_r_debug) ++ + #if __ELF_NATIVE_CLASS == 32 + # define symbind symbind32 + # define LA_SYMBIND "la_symbind32" +diff --git a/include/rpc/rpc.h b/include/rpc/rpc.h +index f5cee6caef..ba967833ad 100644 +--- a/include/rpc/rpc.h ++++ b/include/rpc/rpc.h +@@ -3,8 +3,6 @@ + + # ifndef _ISOMAC + +-#include +- + /* Now define the internal interfaces. */ + extern unsigned long _create_xid (void); + +@@ -47,7 +45,8 @@ extern void __rpc_thread_key_cleanup (void) attribute_hidden; + + extern void __rpc_thread_destroy (void) attribute_hidden; + +-__libc_tsd_define (extern, struct rpc_thread_variables *, RPC_VARS) ++extern __thread struct rpc_thread_variables *__libc_tsd_RPC_VARS ++ attribute_hidden attribute_tls_model_ie; + + #define RPC_THREAD_VARIABLE(x) (__rpc_thread_variables()->x) + +diff --git a/locale/lc-ctype.c b/locale/lc-ctype.c +index 867f829be5..47be1c9a39 100644 +--- a/locale/lc-ctype.c ++++ b/locale/lc-ctype.c +@@ -64,12 +64,9 @@ _nl_postload_ctype (void) + in fact using the global locale. */ + if (_NL_CURRENT_LOCALE == &_nl_global_locale) + { +- __libc_tsd_set (const uint16_t *, CTYPE_B, +- (void *) _nl_global_locale.__ctype_b); +- __libc_tsd_set (const int32_t *, CTYPE_TOUPPER, +- (void *) _nl_global_locale.__ctype_toupper); +- __libc_tsd_set (const int32_t *, CTYPE_TOLOWER, +- (void *) _nl_global_locale.__ctype_tolower); ++ __libc_tsd_CTYPE_B = _nl_global_locale.__ctype_b; ++ __libc_tsd_CTYPE_TOUPPER = _nl_global_locale.__ctype_toupper; ++ __libc_tsd_CTYPE_TOLOWER = _nl_global_locale.__ctype_tolower; + } + + #include +diff --git a/locale/localeinfo.h b/locale/localeinfo.h +index ab1b5e5659..f503792d04 100644 +--- a/locale/localeinfo.h ++++ b/locale/localeinfo.h +@@ -236,10 +236,9 @@ extern struct __locale_struct _nl_global_locale attribute_hidden; + + /* This fetches the thread-local locale_t pointer, either one set with + uselocale or &_nl_global_locale. */ +-#define _NL_CURRENT_LOCALE (__libc_tsd_get (locale_t, LOCALE)) +-#include +-__libc_tsd_define (extern, locale_t, LOCALE) +- ++#define _NL_CURRENT_LOCALE __libc_tsd_LOCALE ++extern __thread locale_t __libc_tsd_LOCALE ++ attribute_hidden attribute_tls_model_ie; + + /* For static linking it is desireable to avoid always linking in the code + and data for every category when we can tell at link time that they are +diff --git a/locale/uselocale.c b/locale/uselocale.c +index 606d043d57..5df87a237f 100644 +--- a/locale/uselocale.c ++++ b/locale/uselocale.c +@@ -34,7 +34,7 @@ __uselocale (locale_t newloc) + { + const locale_t locobj + = newloc == LC_GLOBAL_LOCALE ? &_nl_global_locale : newloc; +- __libc_tsd_set (locale_t, LOCALE, locobj); ++ __libc_tsd_LOCALE = locobj; + + #ifdef NL_CURRENT_INDIRECT + /* Now we must update all the per-category thread-local variables to +@@ -62,11 +62,9 @@ __uselocale (locale_t newloc) + #endif + + /* Update the special tsd cache of some locale data. */ +- __libc_tsd_set (const uint16_t *, CTYPE_B, (void *) locobj->__ctype_b); +- __libc_tsd_set (const int32_t *, CTYPE_TOLOWER, +- (void *) locobj->__ctype_tolower); +- __libc_tsd_set (const int32_t *, CTYPE_TOUPPER, +- (void *) locobj->__ctype_toupper); ++ __libc_tsd_CTYPE_B = locobj->__ctype_b; ++ __libc_tsd_CTYPE_TOLOWER = locobj->__ctype_tolower; ++ __libc_tsd_CTYPE_TOUPPER = locobj->__ctype_toupper; + } + + return oldloc == &_nl_global_locale ? LC_GLOBAL_LOCALE : oldloc; +diff --git a/locale/xlocale.c b/locale/xlocale.c +index 30c094d43a..5e25c0e4b8 100644 +--- a/locale/xlocale.c ++++ b/locale/xlocale.c +@@ -18,18 +18,13 @@ + + #include + #include "localeinfo.h" ++#include + + #define DEFINE_CATEGORY(category, category_name, items, a) \ + extern struct __locale_data _nl_C_##category; + #include "categories.def" + #undef DEFINE_CATEGORY + +-/* Defined in locale/C-ctype.c. */ +-extern const char _nl_C_LC_CTYPE_class[] attribute_hidden; +-extern const char _nl_C_LC_CTYPE_toupper[] attribute_hidden; +-extern const char _nl_C_LC_CTYPE_tolower[] attribute_hidden; +- +- + const struct __locale_struct _nl_C_locobj attribute_hidden = + { + .__locales = +diff --git a/localedata/locales/bg_BG b/localedata/locales/bg_BG +index 159a6c3334..eda2a8d01b 100644 +--- a/localedata/locales/bg_BG ++++ b/localedata/locales/bg_BG +@@ -248,8 +248,8 @@ reorder-end + END LC_COLLATE + + LC_MONETARY +-int_curr_symbol "BGN " +-currency_symbol "лв." ++int_curr_symbol "EUR " ++currency_symbol "€" + mon_decimal_point "," + mon_thousands_sep " " + mon_grouping 3 +diff --git a/malloc/malloc.c b/malloc/malloc.c +index 27dfd1eb90..9423aba987 100644 +--- a/malloc/malloc.c ++++ b/malloc/malloc.c +@@ -5106,7 +5106,7 @@ _int_memalign (mstate av, size_t alignment, size_t bytes) + INTERNAL_SIZE_T size; + + nb = checked_request2size (bytes); +- if (nb == 0) ++ if (nb == 0 || alignment > PTRDIFF_MAX) + { + __set_errno (ENOMEM); + return NULL; +@@ -5122,7 +5122,10 @@ _int_memalign (mstate av, size_t alignment, size_t bytes) + we don't find anything in those bins, the common malloc code will + scan starting at 2x. */ + +- /* Call malloc with worst case padding to hit alignment. */ ++ /* Call malloc with worst case padding to hit alignment. ALIGNMENT is a ++ power of 2, so it tops out at (PTRDIFF_MAX >> 1) + 1, leaving plenty of ++ space to add MINSIZE and whatever checked_request2size adds to BYTES to ++ get NB. Consequently, total below also does not overflow. */ + m = (char *) (_int_malloc (av, nb + alignment + MINSIZE)); + + if (m == NULL) +diff --git a/malloc/tst-malloc-too-large.c b/malloc/tst-malloc-too-large.c +index a548a37b46..a1bda673a3 100644 +--- a/malloc/tst-malloc-too-large.c ++++ b/malloc/tst-malloc-too-large.c +@@ -152,7 +152,6 @@ test_large_allocations (size_t size) + } + + +-static long pagesize; + + /* This function tests the following aligned memory allocation functions + using several valid alignments and precedes each allocation test with a +@@ -171,8 +170,8 @@ test_large_aligned_allocations (size_t size) + + /* All aligned memory allocation functions expect an alignment that is a + power of 2. Given this, we test each of them with every valid +- alignment from 1 thru PAGESIZE. */ +- for (align = 1; align <= pagesize; align *= 2) ++ alignment for the type of ALIGN, i.e. until it wraps to 0. */ ++ for (align = 1; align > 0; align <<= 1) + { + test_setup (); + #if __GNUC_PREREQ (7, 0) +@@ -265,11 +264,6 @@ do_test (void) + DIAG_IGNORE_NEEDS_COMMENT (7, "-Walloc-size-larger-than="); + #endif + +- /* Aligned memory allocation functions need to be tested up to alignment +- size equivalent to page size, which should be a power of 2. */ +- pagesize = sysconf (_SC_PAGESIZE); +- TEST_VERIFY_EXIT (powerof2 (pagesize)); +- + /* Loop 1: Ensure that all allocations with SIZE close to SIZE_MAX, i.e. + in the range (SIZE_MAX - 2^14, SIZE_MAX], fail. + diff --git a/math/auto-libm-test-in b/math/auto-libm-test-in index 01ba689aa8..4f194da19d 100644 --- a/math/auto-libm-test-in @@ -2351,8 +4096,65 @@ iattr->stacksize = to - (size_t) iattr->stackaddr; #endif /* We succeed and no need to look further. */ +diff --git a/nptl/pthread_mutex_trylock.c b/nptl/pthread_mutex_trylock.c +index dbb8fcc754..392619021b 100644 +--- a/nptl/pthread_mutex_trylock.c ++++ b/nptl/pthread_mutex_trylock.c +@@ -48,7 +48,8 @@ ___pthread_mutex_trylock (pthread_mutex_t *mutex) + return 0; + } + +- if (lll_trylock (mutex->__data.__lock) == 0) ++ if (atomic_load_relaxed (&(mutex->__data.__lock)) == 0 ++ && lll_trylock (mutex->__data.__lock) == 0) + { + /* Record the ownership. */ + mutex->__data.__owner = id; +@@ -71,7 +72,10 @@ ___pthread_mutex_trylock (pthread_mutex_t *mutex) + /*FALL THROUGH*/ + case PTHREAD_MUTEX_ADAPTIVE_NP: + case PTHREAD_MUTEX_ERRORCHECK_NP: +- if (lll_trylock (mutex->__data.__lock) != 0) ++ /* Mutex type is already loaded, lock check overhead should ++ be minimal. */ ++ if (atomic_load_relaxed (&(mutex->__data.__lock)) != 0 ++ || lll_trylock (mutex->__data.__lock) != 0) + break; + + /* Record the ownership. */ +diff --git a/nss/getXXbyYY_r.c b/nss/getXXbyYY_r.c +index eae6c3480e..2b0735fb6a 100644 +--- a/nss/getXXbyYY_r.c ++++ b/nss/getXXbyYY_r.c +@@ -157,19 +157,15 @@ __merge_einval (LOOKUP_TYPE *a, + + #define CHECK_MERGE(err, status) \ + ({ \ +- do \ ++ if (err) \ + { \ +- if (err) \ +- { \ +- __set_errno (err); \ +- if (err == ERANGE) \ +- status = NSS_STATUS_TRYAGAIN; \ +- else \ +- status = NSS_STATUS_UNAVAIL; \ +- break; \ +- } \ ++ __set_errno (err); \ ++ if (err == ERANGE) \ ++ status = NSS_STATUS_TRYAGAIN; \ ++ else \ ++ status = NSS_STATUS_UNAVAIL; \ ++ break; \ + } \ +- while (0); \ + }) + + /* Type of the lookup function we need here. */ diff --git a/posix/Makefile b/posix/Makefile -index a650abf598..0e209a7ed0 100644 +index a650abf598..a9f3e814e5 100644 --- a/posix/Makefile +++ b/posix/Makefile @@ -303,6 +303,7 @@ tests := \ @@ -2363,6 +4165,42 @@ tst-regcomp-truncated \ tst-regex \ tst-regex2 \ +@@ -325,6 +326,7 @@ tests := \ + tst-wait4 \ + tst-waitid \ + tst-wordexp-nocmd \ ++ tst-wordexp-reuse \ + tstgetopt \ + # tests + +@@ -453,6 +455,8 @@ generated += \ + tst-rxspencer-no-utf8.mtrace \ + tst-vfork3-mem.out \ + tst-vfork3.mtrace \ ++ tst-wordexp-reuse-mem.out \ ++ tst-wordexp-reuse.mtrace \ + # generated + endif + endif +@@ -488,6 +492,7 @@ tests-special += \ + $(objpfx)tst-pcre-mem.out \ + $(objpfx)tst-rxspencer-no-utf8-mem.out \ + $(objpfx)tst-vfork3-mem.out \ ++ $(objpfx)tst-wordexp-reuse.out \ + # tests-special + endif + endif +@@ -771,3 +776,10 @@ $(objpfx)posix-conf-vars-def.h: $(..)scripts/gen-posix-conf-vars.awk \ + $(make-target-directory) + $(AWK) -f $(filter-out Makefile, $^) > $@.tmp + mv -f $@.tmp $@ ++ ++tst-wordexp-reuse-ENV += MALLOC_TRACE=$(objpfx)tst-wordexp-reuse.mtrace \ ++ LD_PRELOAD=$(common-objpfx)/malloc/libc_malloc_debug.so ++ ++$(objpfx)tst-wordexp-reuse-mem.out: $(objpfx)tst-wordexp-reuse.out ++ $(common-objpfx)malloc/mtrace $(objpfx)tst-wordexp-reuse.mtrace > $@; \ ++ $(evaluate-test) diff --git a/posix/environ.c b/posix/environ.c index a0ed0d80ea..924effe3cd 100644 --- a/posix/environ.c @@ -2586,6 +4424,166 @@ +} + +#include +diff --git a/posix/tst-wordexp-reuse.c b/posix/tst-wordexp-reuse.c +new file mode 100644 +index 0000000000..3926b9f557 +--- /dev/null ++++ b/posix/tst-wordexp-reuse.c +@@ -0,0 +1,89 @@ ++/* Test for wordexp with WRDE_REUSE flag. ++ Copyright (C) 2026 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++#include ++ ++static int ++do_test (void) ++{ ++ mtrace (); ++ ++ { ++ wordexp_t p = { 0 }; ++ TEST_COMPARE (wordexp ("one", &p, 0), 0); ++ TEST_COMPARE (p.we_wordc, 1); ++ TEST_COMPARE_STRING (p.we_wordv[0], "one"); ++ TEST_COMPARE (wordexp ("two", &p, WRDE_REUSE), 0); ++ TEST_COMPARE (p.we_wordc, 1); ++ TEST_COMPARE_STRING (p.we_wordv[0], "two"); ++ wordfree (&p); ++ } ++ ++ { ++ wordexp_t p = { .we_offs = 2 }; ++ TEST_COMPARE (wordexp ("one", &p, 0), 0); ++ TEST_COMPARE (p.we_wordc, 1); ++ TEST_COMPARE_STRING (p.we_wordv[0], "one"); ++ TEST_COMPARE (wordexp ("two", &p, WRDE_REUSE | WRDE_DOOFFS), 0); ++ TEST_COMPARE (p.we_wordc, 1); ++ TEST_COMPARE_STRING (p.we_wordv[p.we_offs + 0], "two"); ++ wordfree (&p); ++ } ++ ++ { ++ wordexp_t p = { 0 }; ++ TEST_COMPARE (wordexp ("one", &p, 0), 0); ++ TEST_COMPARE (p.we_wordc, 1); ++ TEST_COMPARE_STRING (p.we_wordv[0], "one"); ++ TEST_COMPARE (wordexp ("two", &p, WRDE_REUSE | WRDE_APPEND), 0); ++ TEST_COMPARE (p.we_wordc, 1); ++ TEST_COMPARE_STRING (p.we_wordv[0], "two"); ++ wordfree (&p); ++ } ++ ++ { ++ wordexp_t p = { .we_offs = 2 }; ++ TEST_COMPARE (wordexp ("one", &p, WRDE_DOOFFS), 0); ++ TEST_COMPARE (p.we_wordc, 1); ++ TEST_COMPARE_STRING (p.we_wordv[p.we_offs + 0], "one"); ++ TEST_COMPARE (wordexp ("two", &p, WRDE_REUSE ++ | WRDE_DOOFFS), 0); ++ TEST_COMPARE (p.we_wordc, 1); ++ TEST_COMPARE_STRING (p.we_wordv[p.we_offs + 0], "two"); ++ wordfree (&p); ++ } ++ ++ { ++ wordexp_t p = { .we_offs = 2 }; ++ TEST_COMPARE (wordexp ("one", &p, WRDE_DOOFFS), 0); ++ TEST_COMPARE (p.we_wordc, 1); ++ TEST_COMPARE_STRING (p.we_wordv[p.we_offs + 0], "one"); ++ TEST_COMPARE (wordexp ("two", &p, WRDE_REUSE ++ | WRDE_DOOFFS | WRDE_APPEND), 0); ++ TEST_COMPARE (p.we_wordc, 1); ++ TEST_COMPARE_STRING (p.we_wordv[p.we_offs + 0], "two"); ++ wordfree (&p); ++ } ++ ++ return 0; ++} ++ ++#include +diff --git a/posix/wordexp.c b/posix/wordexp.c +index a69b732801..9df4bb7424 100644 +--- a/posix/wordexp.c ++++ b/posix/wordexp.c +@@ -2216,7 +2216,9 @@ wordexp (const char *words, wordexp_t *pwordexp, int flags) + { + /* Minimal implementation of WRDE_REUSE for now */ + wordfree (pwordexp); ++ old_word.we_wordc = 0; + old_word.we_wordv = NULL; ++ pwordexp->we_wordc = 0; + } + + if ((flags & WRDE_APPEND) == 0) +diff --git a/resolv/nss_dns/dns-network.c b/resolv/nss_dns/dns-network.c +index 519f8422ca..e14e959d7c 100644 +--- a/resolv/nss_dns/dns-network.c ++++ b/resolv/nss_dns/dns-network.c +@@ -207,6 +207,10 @@ _nss_dns_getnetbyaddr_r (uint32_t net, int type, struct netent *result, + sprintf (qbuf, "%u.%u.%u.%u.in-addr.arpa", net_bytes[3], net_bytes[2], + net_bytes[1], net_bytes[0]); + break; ++ default: ++ /* Default network (net is originally zero). */ ++ strcpy (qbuf, "0.0.0.0.in-addr.arpa"); ++ break; + } + + net_buffer.buf = orig_net_buffer = (querybuf *) alloca (1024); +diff --git a/resolv/tst-resolv-network.c b/resolv/tst-resolv-network.c +index d9f69649d0..181be80835 100644 +--- a/resolv/tst-resolv-network.c ++++ b/resolv/tst-resolv-network.c +@@ -46,6 +46,9 @@ handle_code (const struct resolv_response_context *ctx, + { + switch (code) + { ++ case 0: ++ send_ptr (b, qname, qclass, qtype, "0.in-addr.arpa"); ++ break; + case 1: + send_ptr (b, qname, qclass, qtype, "1.in-addr.arpa"); + break; +@@ -265,6 +268,9 @@ do_test (void) + "error: TRY_AGAIN\n"); + + /* Lookup by address, success cases. */ ++ check_reverse (0, ++ "name: 0.in-addr.arpa\n" ++ "net: 0x00000000\n"); + check_reverse (1, + "name: 1.in-addr.arpa\n" + "net: 0x00000001\n"); +diff --git a/stdio-common/printf-parsemb.c b/stdio-common/printf-parsemb.c +index aad697abbc..a7ba52a9fa 100644 +--- a/stdio-common/printf-parsemb.c ++++ b/stdio-common/printf-parsemb.c +@@ -17,6 +17,7 @@ + . */ + + #include ++#include + #include + #include + #include diff --git a/stdlib/Makefile b/stdlib/Makefile index 1c4fa2382f..c9c8f702a2 100644 --- a/stdlib/Makefile @@ -2715,6 +4713,29 @@ exit (EXIT_SUCCESS); } } +diff --git a/string/strerror.c b/string/strerror.c +index 3c02640816..daabec82d3 100644 +--- a/string/strerror.c ++++ b/string/strerror.c +@@ -21,5 +21,5 @@ + char * + strerror (int errnum) + { +- return __strerror_l (errnum, __libc_tsd_get (locale_t, LOCALE)); ++ return __strerror_l (errnum, __libc_tsd_LOCALE); + } +diff --git a/sunrpc/rpc_thread.c b/sunrpc/rpc_thread.c +index a04b7ec47f..e20f0a6230 100644 +--- a/sunrpc/rpc_thread.c ++++ b/sunrpc/rpc_thread.c +@@ -3,7 +3,6 @@ + #include + + #include +-#include + #include + #include + diff --git a/support/capture_subprocess.h b/support/capture_subprocess.h index 91d75e5d6b..b37462d0d1 100644 --- a/support/capture_subprocess.h @@ -2737,6 +4758,19 @@ /* Deallocate the subprocess data captured by support_capture_subprocess. */ +diff --git a/support/resolv_test.c b/support/resolv_test.c +index ab37d3d58c..29e59da958 100644 +--- a/support/resolv_test.c ++++ b/support/resolv_test.c +@@ -326,7 +326,7 @@ resolv_response_add_name (struct resolv_response_builder *b, + crname_target = *ptr; + else + crname_target = NULL; +- TEST_VERIFY (crname_target != crname); ++ TEST_VERIFY_EXIT (crname_target != crname); + /* Not added to the tree. */ + free (crname); + } diff --git a/support/support_capture_subprocess.c b/support/support_capture_subprocess.c index c3ef478d17..b4e4bf9502 100644 --- a/support/support_capture_subprocess.c @@ -2963,6 +4997,90 @@ } void +diff --git a/support/test-container.c b/support/test-container.c +index 79d3189e2f..e35cd7316f 100644 +--- a/support/test-container.c ++++ b/support/test-container.c +@@ -705,6 +705,7 @@ check_for_unshare_hints (int require_pidns) + + val = -1; /* Sentinel. */ + int cnt = fscanf (f, "%d", &val); ++ fclose (f); + if (cnt == 1 && val != files[i].bad_value) + continue; + +diff --git a/sysdeps/aarch64/Makefile b/sysdeps/aarch64/Makefile +index 4b7f8a5c07..53f5bd4ce0 100644 +--- a/sysdeps/aarch64/Makefile ++++ b/sysdeps/aarch64/Makefile +@@ -2,11 +2,15 @@ long-double-fcts = yes + + ifeq (yes,$(aarch64-bti)) + # Mark linker output BTI compatible, it warns on non-BTI inputs. ++# Do not do this for conform tests because they may not be compiled ++# with the appropriate compiler flags. ++ifneq ($(subdir),conform) + sysdep-LDFLAGS += -Wl,-z,force-bti + # Make warnings fatal outside the test system. + LDFLAGS-lib.so += -Wl,--fatal-warnings + LDFLAGS-rtld += -Wl,-z,force-bti,--fatal-warnings +-endif ++endif # $(subdir) != conform ++endif # $(aarch64-bit) + + ifeq ($(subdir),elf) + sysdep-dl-routines += \ +@@ -75,7 +79,19 @@ sysdep_routines += \ + __alloc_gcs + + tests += \ +- tst-sme-jmp ++ tst-sme-jmp \ ++ tst-sme-signal \ ++ tst-sme-za-state \ ++ # tests ++tests-internal += \ ++ tst-sme-clone \ ++ tst-sme-clone3 \ ++ tst-sme-fork \ ++ tst-sme-vfork \ ++ # tests-internal ++ ++$(objpfx)tst-sme-clone3: $(objpfx)clone3.o $(objpfx)__arm_za_disable.o ++ + endif + + ifeq ($(subdir),malloc) +diff --git a/sysdeps/aarch64/__longjmp.S b/sysdeps/aarch64/__longjmp.S +index 38efddbbae..40f98329d0 100644 +--- a/sysdeps/aarch64/__longjmp.S ++++ b/sysdeps/aarch64/__longjmp.S +@@ -51,24 +51,7 @@ ENTRY (__longjmp) + + #if IS_IN(libc) + /* Disable ZA state of SME in libc.a and libc.so, but not in ld.so. */ +-# if HAVE_AARCH64_PAC_RET +- PACIASP +- cfi_window_save +-# endif +- stp x29, x30, [sp, -16]! +- cfi_adjust_cfa_offset (16) +- cfi_rel_offset (x29, 0) +- cfi_rel_offset (x30, 8) +- mov x29, sp +- bl __libc_arm_za_disable +- ldp x29, x30, [sp], 16 +- cfi_adjust_cfa_offset (-16) +- cfi_restore (x29) +- cfi_restore (x30) +-# if HAVE_AARCH64_PAC_RET +- AUTIASP +- cfi_window_save +-# endif ++ CALL_LIBC_ARM_ZA_DISABLE + #endif + + ldp x19, x20, [x0, #JB_X19<<3] diff --git a/sysdeps/aarch64/fpu/acos_advsimd.c b/sysdeps/aarch64/fpu/acos_advsimd.c index 7709b5454f..453f780314 100644 --- a/sysdeps/aarch64/fpu/acos_advsimd.c @@ -5807,9 +7925,20 @@ strong_alias (SV_NAME_D1 (log1p), SV_NAME_D1 (logp1)) diff --git a/sysdeps/aarch64/fpu/pow_sve.c b/sysdeps/aarch64/fpu/pow_sve.c -index 42d551ca92..b8c1b39dca 100644 +index 42d551ca92..becf1a8410 100644 --- a/sysdeps/aarch64/fpu/pow_sve.c +++ b/sysdeps/aarch64/fpu/pow_sve.c +@@ -31,8 +31,8 @@ + The SVE algorithm drops the tail in the exp computation at the price of + a lower accuracy, slightly above 1ULP. + The SVE algorithm also drops the special treatement of small (< 2^-65) and +- large (> 2^63) finite values of |y|, as they only affect non-round to nearest +- modes. ++ large (> 2^63) finite values of |y|, as they only affect non-round to ++ nearest modes. + + Maximum measured error is 1.04 ULPs: + SV_NAME_D2 (pow) (0x1.3d2d45bc848acp+63, -0x1.a48a38b40cd43p-12) @@ -44,19 +44,18 @@ /* Data is defined in v_pow_log_data.c. */ @@ -5881,7 +8010,63 @@ 2 * asuint64 (INFINITY) - 1); } -@@ -174,16 +198,17 @@ sv_call_specialcase (svfloat64_t x1, svuint64_t u1, svuint64_t u2, +@@ -132,65 +156,46 @@ sv_zeroinfnan (svbool_t pg, svuint64_t i) + a double. (int32_t)KI is the k used in the argument reduction and exponent + adjustment of scale, positive k here means the result may overflow and + negative k means the result may underflow. */ +-static inline double +-specialcase (double tmp, uint64_t sbits, uint64_t ki) +-{ +- double scale; +- if ((ki & 0x80000000) == 0) +- { +- /* k > 0, the exponent of scale might have overflowed by <= 460. */ +- sbits -= 1009ull << 52; +- scale = asdouble (sbits); +- return 0x1p1009 * (scale + scale * tmp); +- } +- /* k < 0, need special care in the subnormal range. */ +- sbits += 1022ull << 52; +- /* Note: sbits is signed scale. */ +- scale = asdouble (sbits); +- double y = scale + scale * tmp; +- return 0x1p-1022 * y; +-} +- +-/* Scalar fallback for special cases of SVE pow's exp. */ + static inline svfloat64_t +-sv_call_specialcase (svfloat64_t x1, svuint64_t u1, svuint64_t u2, +- svfloat64_t y, svbool_t cmp) ++specialcase (svfloat64_t tmp, svuint64_t sbits, svuint64_t ki, svbool_t cmp) + { +- svbool_t p = svpfirst (cmp, svpfalse ()); +- while (svptest_any (cmp, p)) +- { +- double sx1 = svclastb (p, 0, x1); +- uint64_t su1 = svclastb (p, 0, u1); +- uint64_t su2 = svclastb (p, 0, u2); +- double elem = specialcase (sx1, su1, su2); +- svfloat64_t y2 = sv_f64 (elem); +- y = svsel (p, y2, y); +- p = svpnext_b64 (cmp, p); +- } +- return y; ++ svbool_t p_pos = svcmpge_n_f64 (cmp, svreinterpret_f64_u64 (ki), 0.0); ++ ++ /* Scale up or down depending on sign of k. */ ++ svint64_t offset ++ = svsel_s64 (p_pos, sv_s64 (1009ull << 52), sv_s64 (-1022ull << 52)); ++ svfloat64_t factor ++ = svsel_f64 (p_pos, sv_f64 (0x1p1009), sv_f64 (0x1p-1022)); ++ ++ svuint64_t offset_sbits ++ = svsub_u64_x (cmp, sbits, svreinterpret_u64_s64 (offset)); ++ svfloat64_t scale = svreinterpret_f64_u64 (offset_sbits); ++ svfloat64_t res = svmad_f64_x (cmp, scale, tmp, scale); ++ return svmul_f64_x (cmp, res, factor); + } + + /* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about additional 15 bits precision. IX is the bit representation of x, but normalized in the subnormal range using the sign bit for the exponent. */ static inline svfloat64_t @@ -5902,7 +8087,16 @@ svfloat64_t z = svreinterpret_f64 (iz); svfloat64_t kd = svcvt_f64_x (pg, k); -@@ -199,40 +224,85 @@ sv_log_inline (svbool_t pg, svuint64_t ix, svfloat64_t *tail) + /* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */ + /* SVE lookup requires 3 separate lookup tables, as opposed to scalar version +- that uses array of structures. We also do the lookup earlier in the code to +- make sure it finishes as early as possible. */ ++ that uses array of structures. We also do the lookup earlier in the code ++ to make sure it finishes as early as possible. */ + svfloat64_t invc = svld1_gather_index (pg, __v_pow_log_data.invc, i); + svfloat64_t logc = svld1_gather_index (pg, __v_pow_log_data.logc, i); + svfloat64_t logctail = svld1_gather_index (pg, __v_pow_log_data.logctail, i); +@@ -199,40 +204,85 @@ sv_log_inline (svbool_t pg, svuint64_t ix, svfloat64_t *tail) |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */ svfloat64_t r = svmad_x (pg, z, invc, -1.0); /* k*Ln2 + log(c) + r. */ @@ -6000,7 +8194,7 @@ { /* 3 types of special cases: tiny (uflow and spurious uflow), huge (oflow) and other cases of large values of x (scale * (1 + TMP) oflow). */ -@@ -240,73 +310,46 @@ sv_exp_inline (svbool_t pg, svfloat64_t x, svfloat64_t xtail, +@@ -240,77 +290,50 @@ sv_exp_inline (svbool_t pg, svfloat64_t x, svfloat64_t xtail, /* |x| is large (|x| >= 512) or tiny (|x| <= 0x1p-54). */ svbool_t uoflow = svcmpge (pg, svsub_x (pg, abstop, SmallExp), ThresExp); @@ -6026,32 +8220,10 @@ - oflow = svcmpge (pg, abstop, HugeExp); + svbool_t oflow = svcmpge (pg, abstop, HugeExp); oflow = svand_z (pg, uoflow, svbic_z (pg, oflow, uflow)); -+ - /* For large |x| values (512 < |x| < 1024) scale * (1 + TMP) can overflow +- /* For large |x| values (512 < |x| < 1024) scale * (1 + TMP) can overflow - or underflow. */ - special = svbic_z (pg, uoflow, svorr_z (pg, uflow, oflow)); -+ or underflow. */ -+ svbool_t special = svbic_z (pg, uoflow, svorr_z (pg, uflow, oflow)); -+ -+ /* Update result with special and large cases. */ -+ z = sv_call_specialcase (tmp, sbits, ki, z, special); -+ -+ /* Handle underflow and overflow. */ -+ svbool_t x_is_neg = svcmplt (pg, x, 0); -+ svuint64_t sign_mask -+ = svlsl_x (pg, sign_bias, 52 - V_POW_EXP_TABLE_BITS); -+ svfloat64_t res_uoflow -+ = svsel (x_is_neg, sv_f64 (0.0), sv_f64 (INFINITY)); -+ res_uoflow = svreinterpret_f64 ( -+ svorr_x (pg, svreinterpret_u64 (res_uoflow), sign_mask)); -+ /* Avoid spurious underflow for tiny x. */ -+ svfloat64_t res_spurious_uflow -+ = svreinterpret_f64 (svorr_x (pg, sign_mask, 0x3ff0000000000000)); -+ -+ z = svsel (oflow, res_uoflow, z); -+ z = svsel (uflow, res_spurious_uflow, z); -+ return z; - } +- } - /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */ - /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */ @@ -6099,14 +8271,52 @@ - svfloat64_t res_spurious_uflow - = svreinterpret_f64 (svorr_x (pg, sign_mask, 0x3ff0000000000000)); - z = svsel (uflow, res_spurious_uflow, z); -- ++ /* Handle underflow and overlow in scale. ++ For large |x| values (512 < |x| < 1024), scale * (1 + TMP) can ++ overflow or underflow. */ ++ svbool_t special = svbic_z (pg, uoflow, svorr_z (pg, uflow, oflow)); ++ if (__glibc_unlikely (svptest_any (pg, special))) ++ z = svsel (special, specialcase (tmp, sbits, ki, special), z); ++ ++ /* Handle underflow and overflow in exp. */ ++ svbool_t x_is_neg = svcmplt (pg, x, 0); ++ svuint64_t sign_mask ++ = svlsl_x (pg, sign_bias, 52 - V_POW_EXP_TABLE_BITS); ++ svfloat64_t res_uoflow ++ = svsel (x_is_neg, sv_f64 (0.0), sv_f64 (INFINITY)); ++ res_uoflow = svreinterpret_f64 ( ++ svorr_x (pg, svreinterpret_u64 (res_uoflow), sign_mask)); ++ /* Avoid spurious underflow for tiny x. */ ++ svfloat64_t res_spurious_uflow ++ = svreinterpret_f64 (svorr_x (pg, sign_mask, 0x3ff0000000000000)); ++ ++ z = svsel (oflow, res_uoflow, z); ++ z = svsel (uflow, res_spurious_uflow, z); ++ return z; ++ } + - return z; + return sv_exp_core (pg, x, xtail, sign_bias, &tmp, &sbits, &ki, d); } static inline double -@@ -341,47 +384,39 @@ pow_sc (double x, double y) +-pow_sc (double x, double y) ++pow_specialcase (double x, double y) + { + uint64_t ix = asuint64 (x); + uint64_t iy = asuint64 (y); +@@ -339,49 +362,49 @@ pow_sc (double x, double y) + return x; + } ++/* Scalar fallback for special case routines with custom signature. */ ++static svfloat64_t NOINLINE ++sv_pow_specialcase (svfloat64_t x1, svfloat64_t x2, svfloat64_t y, ++ svbool_t cmp) ++{ ++ return sv_call2_f64 (pow_specialcase, x1, x2, y, cmp); ++} ++ svfloat64_t SV_NAME_D2 (pow) (svfloat64_t x, svfloat64_t y, const svbool_t pg) { + const struct data *d = ptr_barrier (&data); @@ -6163,7 +8373,7 @@ svuint64_t vix_norm = svreinterpret_u64 (svmul_m (xsmall, x, 0x1p52)); vix_norm = svand_m (xsmall, vix_norm, 0x7fffffffffffffff); -@@ -391,20 +426,24 @@ svfloat64_t SV_NAME_D2 (pow) (svfloat64_t x, svfloat64_t y, const svbool_t pg) +@@ -391,21 +414,25 @@ svfloat64_t SV_NAME_D2 (pow) (svfloat64_t x, svfloat64_t y, const svbool_t pg) /* y_hi = log(ix, &y_lo). */ svfloat64_t vlo; @@ -6192,12 +8402,14 @@ /* Cases of zero/inf/nan x or y. */ - if (__glibc_unlikely (svptest_any (pg, special))) +- vz = sv_call2_f64 (pow_sc, x, y, vz, special); + if (__glibc_unlikely (svptest_any (svptrue_b64 (), special))) - vz = sv_call2_f64 (pow_sc, x, y, vz, special); ++ vz = sv_pow_specialcase (x, y, vz, special); return vz; + } diff --git a/sysdeps/aarch64/fpu/powf_sve.c b/sysdeps/aarch64/fpu/powf_sve.c -index 29e9acb6fb..7046990aa1 100644 +index 29e9acb6fb..76f54b3522 100644 --- a/sysdeps/aarch64/fpu/powf_sve.c +++ b/sysdeps/aarch64/fpu/powf_sve.c @@ -26,7 +26,6 @@ @@ -6234,24 +8446,52 @@ 2u * 0x7f800000 - 1); } -@@ -150,9 +148,14 @@ powf_specialcase (float x, float y, float z) +@@ -118,11 +116,10 @@ zeroinfnan (uint32_t ix) + preamble of scalar powf except that we do not update ix and sign_bias. This + is done in the preamble of the SVE powf. */ + static inline float +-powf_specialcase (float x, float y, float z) ++powf_specialcase (float x, float y) + { + uint32_t ix = asuint (x); + uint32_t iy = asuint (y); +- /* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan). */ + if (__glibc_unlikely (zeroinfnan (iy))) + { + if (2 * iy == 0) +@@ -144,27 +141,15 @@ powf_specialcase (float x, float y, float z) + x2 = -x2; + return iy & 0x80000000 ? 1 / x2 : x2; + } +- /* We need a return here in case x<0 and y is integer, but all other tests +- need to be run. */ +- return z; ++ /* Return x for convenience, but make sure result is never used. */ ++ return x; } /* Scalar fallback for special case routines with custom signature. */ -static inline svfloat32_t --sv_call_powf_sc (svfloat32_t x1, svfloat32_t x2, svfloat32_t y, svbool_t cmp) +static svfloat32_t NOINLINE -+sv_call_powf_sc (svfloat32_t x1, svfloat32_t x2, svfloat32_t y) + sv_call_powf_sc (svfloat32_t x1, svfloat32_t x2, svfloat32_t y, svbool_t cmp) { -+ /* Special cases of x or y: zero, inf and nan. */ -+ svbool_t xspecial = sv_zeroinfnan (svptrue_b32 (), svreinterpret_u32 (x1)); -+ svbool_t yspecial = sv_zeroinfnan (svptrue_b32 (), svreinterpret_u32 (x2)); -+ svbool_t cmp = svorr_z (svptrue_b32 (), xspecial, yspecial); -+ - svbool_t p = svpfirst (cmp, svpfalse ()); - while (svptest_any (cmp, p)) - { -@@ -182,30 +185,30 @@ sv_powf_core_ext (const svbool_t pg, svuint64_t i, svfloat64_t z, svint64_t k, +- svbool_t p = svpfirst (cmp, svpfalse ()); +- while (svptest_any (cmp, p)) +- { +- float sx1 = svclastb (p, 0, x1); +- float sx2 = svclastb (p, 0, x2); +- float elem = svclastb (p, 0, y); +- elem = powf_specialcase (sx1, sx2, elem); +- svfloat32_t y2 = sv_f32 (elem); +- y = svsel (p, y2, y); +- p = svpnext_b32 (cmp, p); +- } +- return y; ++ return sv_call2_f32 (powf_specialcase, x1, x2, y, cmp); + } + + /* Compute core for half of the lanes in double precision. */ +@@ -182,30 +167,30 @@ sv_powf_core_ext (const svbool_t pg, svuint64_t i, svfloat64_t z, svint64_t k, /* Polynomial to approximate log1p(r)/ln2. */ svfloat64_t logx = A (0); @@ -6294,35 +8534,31 @@ return p; } -@@ -219,19 +222,16 @@ sv_powf_core (const svbool_t pg, svuint32_t i, svuint32_t iz, svint32_t k, +@@ -219,19 +204,16 @@ sv_powf_core (const svbool_t pg, svuint32_t i, svuint32_t iz, svint32_t k, { const svbool_t ptrue = svptrue_b64 (); - /* Unpack and promote input vectors (pg, y, z, i, k and sign_bias) into two in - order to perform core computation in double precision. */ + /* Unpack and promote input vectors (pg, y, z, i, k and sign_bias) into two -+ * in order to perform core computation in double precision. */ ++ in order to perform core computation in double precision. */ const svbool_t pg_lo = svunpklo (pg); const svbool_t pg_hi = svunpkhi (pg); -- svfloat64_t y_lo = svcvt_f64_x ( -- ptrue, svreinterpret_f32 (svunpklo (svreinterpret_u32 (y)))); -- svfloat64_t y_hi = svcvt_f64_x ( -- ptrue, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (y)))); + svfloat64_t y_lo = svcvt_f64_x ( + ptrue, svreinterpret_f32 (svunpklo (svreinterpret_u32 (y)))); + svfloat64_t y_hi = svcvt_f64_x ( + ptrue, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (y)))); - svfloat32_t z = svreinterpret_f32 (iz); - svfloat64_t z_lo = svcvt_f64_x ( - ptrue, svreinterpret_f32 (svunpklo (svreinterpret_u32 (z)))); - svfloat64_t z_hi = svcvt_f64_x ( - ptrue, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (z)))); -+ svfloat64_t y_lo -+ = svcvt_f64_x (pg, svreinterpret_f32 (svunpklo (svreinterpret_u32 (y)))); -+ svfloat64_t y_hi -+ = svcvt_f64_x (pg, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (y)))); -+ svfloat64_t z_lo = svcvt_f64_x (pg, svreinterpret_f32 (svunpklo (iz))); -+ svfloat64_t z_hi = svcvt_f64_x (pg, svreinterpret_f32 (svunpkhi (iz))); ++ svfloat64_t z_lo = svcvt_f64_x (ptrue, svreinterpret_f32 (svunpklo (iz))); ++ svfloat64_t z_hi = svcvt_f64_x (ptrue, svreinterpret_f32 (svunpkhi (iz))); svuint64_t i_lo = svunpklo (i); svuint64_t i_hi = svunpkhi (i); svint64_t k_lo = svunpklo (k); -@@ -258,9 +258,9 @@ sv_powf_core (const svbool_t pg, svuint32_t i, svuint32_t iz, svint32_t k, +@@ -258,9 +240,9 @@ sv_powf_core (const svbool_t pg, svuint32_t i, svuint32_t iz, svint32_t k, /* Implementation of SVE powf. Provides the same accuracy as AdvSIMD powf, since it relies on the same algorithm. The theoretical maximum error is under 2.60 ULPs. @@ -6335,7 +8571,7 @@ svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg) { const struct data *d = ptr_barrier (&data); -@@ -269,21 +269,19 @@ svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg) +@@ -269,21 +251,19 @@ svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg) svuint32_t viy0 = svreinterpret_u32 (y); /* Negative x cases. */ @@ -6361,7 +8597,7 @@ /* Set to SignBias if x is negative and y is odd. */ sign_bias = svsel (yisodd_xisneg, sv_u32 (d->sign_bias), sv_u32 (0)); } -@@ -294,8 +292,8 @@ svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg) +@@ -294,8 +274,8 @@ svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg) svbool_t cmp = svorr_z (pg, xspecial, yspecial); /* Small cases of x: |x| < 0x1p-126. */ @@ -6372,7 +8608,7 @@ { /* Normalize subnormal x so exponent becomes negative. */ svuint32_t vix_norm = svreinterpret_u32 (svmul_x (xsmall, x, Norm)); -@@ -304,32 +302,35 @@ svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg) +@@ -304,31 +284,34 @@ svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg) vix = svsel (xsmall, vix_norm, vix); } /* Part of core computation carried in working precision. */ @@ -6396,7 +8632,7 @@ + (23 - V_POWF_EXP2_TABLE_BITS)); + + /* Compute core in extended precision and return intermediate ylogx results -+ * to handle cases of underflow and underflow in exp. */ ++ to handle cases of underflow and overflow in exp. */ svfloat32_t ylogx; - svfloat32_t ret = sv_powf_core (pg, i, iz, k, y, sign_bias, &ylogx, d); + svfloat32_t ret @@ -6420,11 +8656,81 @@ + ret = svsel (yint_or_xpos, ret, sv_f32 (__builtin_nanf (""))); - if (__glibc_unlikely (svptest_any (pg, cmp))) -- return sv_call_powf_sc (x, y, ret, cmp); + if (__glibc_unlikely (svptest_any (cmp, cmp))) -+ return sv_call_powf_sc (x, y, ret); + return sv_call_powf_sc (x, y, ret, cmp); return ret; +diff --git a/sysdeps/aarch64/fpu/sinh_advsimd.c b/sysdeps/aarch64/fpu/sinh_advsimd.c +index 0d6a4856f8..b6b60262c6 100644 +--- a/sysdeps/aarch64/fpu/sinh_advsimd.c ++++ b/sysdeps/aarch64/fpu/sinh_advsimd.c +@@ -24,36 +24,26 @@ static const struct data + { + struct v_expm1_data d; + uint64x2_t halff; +-#if WANT_SIMD_EXCEPT +- uint64x2_t tiny_bound, thresh; +-#else + float64x2_t large_bound; +-#endif + } data = { + .d = V_EXPM1_DATA, + .halff = V2 (0x3fe0000000000000), +-#if WANT_SIMD_EXCEPT +- /* 2^-26, below which sinh(x) rounds to x. */ +- .tiny_bound = V2 (0x3e50000000000000), +- /* asuint(large_bound) - asuint(tiny_bound). */ +- .thresh = V2 (0x0230000000000000), +-#else + /* 2^9. expm1 helper overflows for large input. */ + .large_bound = V2 (0x1p+9), +-#endif + }; + + static float64x2_t NOINLINE VPCS_ATTR +-special_case (float64x2_t x) ++special_case (float64x2_t x, float64x2_t t, float64x2_t halfsign, ++ uint64x2_t special) + { +- return v_call_f64 (sinh, x, x, v_u64 (-1)); ++ return v_call_f64 (sinh, x, vmulq_f64 (t, halfsign), special); + } + + /* Approximation for vector double-precision sinh(x) using expm1. + sinh(x) = (exp(x) - exp(-x)) / 2. + The greatest observed error is 2.52 ULP: +- _ZGVnN2v_sinh(-0x1.a098a2177a2b9p-2) got -0x1.ac2f05bb66fccp-2 +- want -0x1.ac2f05bb66fc9p-2. */ ++ _ZGVnN2v_sinh(0x1.9f6ff2ab6fb19p-2) got 0x1.aaed83a3153ccp-2 ++ want 0x1.aaed83a3153c9p-2. */ + float64x2_t VPCS_ATTR V_NAME_D1 (sinh) (float64x2_t x) + { + const struct data *d = ptr_barrier (&data); +@@ -63,21 +53,16 @@ float64x2_t VPCS_ATTR V_NAME_D1 (sinh) (float64x2_t x) + float64x2_t halfsign = vreinterpretq_f64_u64 ( + vbslq_u64 (v_u64 (0x8000000000000000), ix, d->halff)); + +-#if WANT_SIMD_EXCEPT +- uint64x2_t special = vcgeq_u64 ( +- vsubq_u64 (vreinterpretq_u64_f64 (ax), d->tiny_bound), d->thresh); +-#else + uint64x2_t special = vcageq_f64 (x, d->large_bound); +-#endif +- +- /* Fall back to scalar variant for all lanes if any of them are special. */ +- if (__glibc_unlikely (v_any_u64 (special))) +- return special_case (x); + + /* Up to the point that expm1 overflows, we can use it to calculate sinh + using a slight rearrangement of the definition of sinh. This allows us to + retain acceptable accuracy for very small inputs. */ + float64x2_t t = expm1_inline (ax, &d->d); + t = vaddq_f64 (t, vdivq_f64 (t, vaddq_f64 (t, v_f64 (1.0)))); ++ ++ if (__glibc_unlikely (v_any_u64 (special))) ++ return special_case (x, t, halfsign, special); ++ + return vmulq_f64 (t, halfsign); } diff --git a/sysdeps/aarch64/fpu/sinh_sve.c b/sysdeps/aarch64/fpu/sinh_sve.c index 963453f812..072ba8fca9 100644 @@ -6839,6 +9145,234 @@ } - #endif +diff --git a/sysdeps/aarch64/fpu/sv_math.h b/sysdeps/aarch64/fpu/sv_math.h +index 3d576df4cc..65d7f0ff20 100644 +--- a/sysdeps/aarch64/fpu/sv_math.h ++++ b/sysdeps/aarch64/fpu/sv_math.h +@@ -24,11 +24,29 @@ + + #include "vecmath_config.h" + ++#if !defined(__ARM_FEATURE_SVE_BITS) || __ARM_FEATURE_SVE_BITS == 0 ++/* If not specified by -msve-vector-bits, assume maximum vector length. */ ++# define SVE_VECTOR_BYTES 256 ++#else ++# define SVE_VECTOR_BYTES (__ARM_FEATURE_SVE_BITS / 8) ++#endif ++#define SVE_NUM_FLTS (SVE_VECTOR_BYTES / sizeof (float)) ++#define SVE_NUM_DBLS (SVE_VECTOR_BYTES / sizeof (double)) ++/* Predicate is stored as one bit per byte of VL so requires VL / 64 bytes. */ ++#define SVE_NUM_PG_BYTES (SVE_VECTOR_BYTES / sizeof (uint64_t)) ++ + #define SV_NAME_F1(fun) _ZGVsMxv_##fun##f + #define SV_NAME_D1(fun) _ZGVsMxv_##fun + #define SV_NAME_F2(fun) _ZGVsMxvv_##fun##f + #define SV_NAME_D2(fun) _ZGVsMxvv_##fun + ++static inline void ++svstr_p (uint8_t *dst, svbool_t p) ++{ ++ /* Predicate STR does not currently have an intrinsic. */ ++ __asm__("str %0, [%x1]\n" : : "Upa"(p), "r"(dst) : "memory"); ++} ++ + /* Double precision. */ + static inline svint64_t + sv_s64 (int64_t x) +@@ -51,33 +69,35 @@ sv_f64 (double x) + static inline svfloat64_t + sv_call_f64 (double (*f) (double), svfloat64_t x, svfloat64_t y, svbool_t cmp) + { +- svbool_t p = svpfirst (cmp, svpfalse ()); +- while (svptest_any (cmp, p)) ++ double tmp[SVE_NUM_DBLS]; ++ uint8_t pg_bits[SVE_NUM_PG_BYTES]; ++ svstr_p (pg_bits, cmp); ++ svst1 (svptrue_b64 (), tmp, svsel (cmp, x, y)); ++ ++ for (int i = 0; i < svcntd (); i++) + { +- double elem = svclastb_n_f64 (p, 0, x); +- elem = (*f) (elem); +- svfloat64_t y2 = svdup_n_f64 (elem); +- y = svsel_f64 (p, y2, y); +- p = svpnext_b64 (cmp, p); ++ if (pg_bits[i] & 1) ++ tmp[i] = f (tmp[i]); + } +- return y; ++ return svld1 (svptrue_b64 (), tmp); + } + + static inline svfloat64_t + sv_call2_f64 (double (*f) (double, double), svfloat64_t x1, svfloat64_t x2, + svfloat64_t y, svbool_t cmp) + { +- svbool_t p = svpfirst (cmp, svpfalse ()); +- while (svptest_any (cmp, p)) ++ double tmp1[SVE_NUM_DBLS], tmp2[SVE_NUM_DBLS]; ++ uint8_t pg_bits[SVE_NUM_PG_BYTES]; ++ svstr_p (pg_bits, cmp); ++ svst1 (svptrue_b64 (), tmp1, svsel (cmp, x1, y)); ++ svst1 (cmp, tmp2, x2); ++ ++ for (int i = 0; i < svcntd (); i++) + { +- double elem1 = svclastb_n_f64 (p, 0, x1); +- double elem2 = svclastb_n_f64 (p, 0, x2); +- double ret = (*f) (elem1, elem2); +- svfloat64_t y2 = svdup_n_f64 (ret); +- y = svsel_f64 (p, y2, y); +- p = svpnext_b64 (cmp, p); ++ if (pg_bits[i] & 1) ++ tmp1[i] = f (tmp1[i], tmp2[i]); + } +- return y; ++ return svld1 (svptrue_b64 (), tmp1); + } + + static inline svuint64_t +@@ -109,33 +129,40 @@ sv_f32 (float x) + static inline svfloat32_t + sv_call_f32 (float (*f) (float), svfloat32_t x, svfloat32_t y, svbool_t cmp) + { +- svbool_t p = svpfirst (cmp, svpfalse ()); +- while (svptest_any (cmp, p)) ++ float tmp[SVE_NUM_FLTS]; ++ uint8_t pg_bits[SVE_NUM_PG_BYTES]; ++ svstr_p (pg_bits, cmp); ++ svst1 (svptrue_b32 (), tmp, svsel (cmp, x, y)); ++ ++ for (int i = 0; i < svcntd (); i++) + { +- float elem = svclastb_n_f32 (p, 0, x); +- elem = f (elem); +- svfloat32_t y2 = svdup_n_f32 (elem); +- y = svsel_f32 (p, y2, y); +- p = svpnext_b32 (cmp, p); ++ uint8_t p = pg_bits[i]; ++ if (p & 1) ++ tmp[i * 2] = f (tmp[i * 2]); ++ if (p & (1 << 4)) ++ tmp[i * 2 + 1] = f (tmp[i * 2 + 1]); + } +- return y; ++ return svld1 (svptrue_b32 (), tmp); + } + + static inline svfloat32_t + sv_call2_f32 (float (*f) (float, float), svfloat32_t x1, svfloat32_t x2, + svfloat32_t y, svbool_t cmp) + { +- svbool_t p = svpfirst (cmp, svpfalse ()); +- while (svptest_any (cmp, p)) ++ float tmp1[SVE_NUM_FLTS], tmp2[SVE_NUM_FLTS]; ++ uint8_t pg_bits[SVE_NUM_PG_BYTES]; ++ svstr_p (pg_bits, cmp); ++ svst1 (svptrue_b32 (), tmp1, svsel (cmp, x1, y)); ++ svst1 (cmp, tmp2, x2); ++ ++ for (int i = 0; i < svcntd (); i++) + { +- float elem1 = svclastb_n_f32 (p, 0, x1); +- float elem2 = svclastb_n_f32 (p, 0, x2); +- float ret = f (elem1, elem2); +- svfloat32_t y2 = svdup_n_f32 (ret); +- y = svsel_f32 (p, y2, y); +- p = svpnext_b32 (cmp, p); ++ uint8_t p = pg_bits[i]; ++ if (p & 1) ++ tmp1[i * 2] = f (tmp1[i * 2], tmp2[i * 2]); ++ if (p & (1 << 4)) ++ tmp1[i * 2 + 1] = f (tmp1[i * 2 + 1], tmp2[i * 2 + 1]); + } +- return y; ++ return svld1 (svptrue_b32 (), tmp1); + } +- + #endif +diff --git a/sysdeps/aarch64/fpu/tan_advsimd.c b/sysdeps/aarch64/fpu/tan_advsimd.c +index 825c9754b3..d391a003d8 100644 +--- a/sysdeps/aarch64/fpu/tan_advsimd.c ++++ b/sysdeps/aarch64/fpu/tan_advsimd.c +@@ -25,9 +25,7 @@ static const struct data + float64x2_t poly[9]; + double half_pi[2]; + float64x2_t two_over_pi, shift; +-#if !WANT_SIMD_EXCEPT + float64x2_t range_val; +-#endif + } data = { + /* Coefficients generated using FPMinimax. */ + .poly = { V2 (0x1.5555555555556p-2), V2 (0x1.1111111110a63p-3), +@@ -38,20 +36,17 @@ static const struct data + .half_pi = { 0x1.921fb54442d18p0, 0x1.1a62633145c07p-54 }, + .two_over_pi = V2 (0x1.45f306dc9c883p-1), + .shift = V2 (0x1.8p52), +-#if !WANT_SIMD_EXCEPT + .range_val = V2 (0x1p23), +-#endif + }; + + #define RangeVal 0x4160000000000000 /* asuint64(0x1p23). */ + #define TinyBound 0x3e50000000000000 /* asuint64(2^-26). */ +-#define Thresh 0x310000000000000 /* RangeVal - TinyBound. */ + + /* Special cases (fall back to scalar calls). */ + static float64x2_t VPCS_ATTR NOINLINE +-special_case (float64x2_t x) ++special_case (float64x2_t x, float64x2_t n, float64x2_t d, uint64x2_t special) + { +- return v_call_f64 (tan, x, x, v_u64 (-1)); ++ return v_call_f64 (tan, x, vdivq_f64 (n, d), special); + } + + /* Vector approximation for double-precision tan. +@@ -65,14 +60,6 @@ float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x) + very large inputs. Fall back to scalar routine for all lanes if any are + too large, or Inf/NaN. If fenv exceptions are expected, also fall back for + tiny input to avoid underflow. */ +-#if WANT_SIMD_EXCEPT +- uint64x2_t iax = vreinterpretq_u64_f64 (vabsq_f64 (x)); +- /* iax - tiny_bound > range_val - tiny_bound. */ +- uint64x2_t special +- = vcgtq_u64 (vsubq_u64 (iax, v_u64 (TinyBound)), v_u64 (Thresh)); +- if (__glibc_unlikely (v_any_u64 (special))) +- return special_case (x); +-#endif + + /* q = nearest integer to 2 * x / pi. */ + float64x2_t q +@@ -81,9 +68,8 @@ float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x) + + /* Use q to reduce x to r in [-pi/4, pi/4], by: + r = x - q * pi/2, in extended precision. */ +- float64x2_t r = x; + float64x2_t half_pi = vld1q_f64 (dat->half_pi); +- r = vfmsq_laneq_f64 (r, q, half_pi, 0); ++ float64x2_t r = vfmsq_laneq_f64 (x, q, half_pi, 0); + r = vfmsq_laneq_f64 (r, q, half_pi, 1); + /* Further reduce r to [-pi/8, pi/8], to be reconstructed using double angle + formula. */ +@@ -114,12 +100,13 @@ float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x) + + uint64x2_t no_recip = vtstq_u64 (vreinterpretq_u64_s64 (qi), v_u64 (1)); + +-#if !WANT_SIMD_EXCEPT + uint64x2_t special = vcageq_f64 (x, dat->range_val); ++ float64x2_t swap = vbslq_f64 (no_recip, n, vnegq_f64 (d)); ++ d = vbslq_f64 (no_recip, d, n); ++ n = swap; ++ + if (__glibc_unlikely (v_any_u64 (special))) +- return special_case (x); +-#endif ++ return special_case (x, n, d, special); + +- return vdivq_f64 (vbslq_f64 (no_recip, n, vnegq_f64 (d)), +- vbslq_f64 (no_recip, d, n)); ++ return vdivq_f64 (n, d); + } diff --git a/sysdeps/aarch64/fpu/tanh_sve.c b/sysdeps/aarch64/fpu/tanh_sve.c index 789cc6854f..5869419010 100644 --- a/sysdeps/aarch64/fpu/tanh_sve.c @@ -7021,6 +9555,54 @@ - return svdiv_x (pg, q, qp2); + return svreinterpret_f64 (svorr_x (pg, sign_bit, svreinterpret_u64 (y))); } +diff --git a/sysdeps/aarch64/fpu/tanpi_sve.c b/sysdeps/aarch64/fpu/tanpi_sve.c +index 57c643ae29..bfe6828e1f 100644 +--- a/sysdeps/aarch64/fpu/tanpi_sve.c ++++ b/sysdeps/aarch64/fpu/tanpi_sve.c +@@ -1,6 +1,6 @@ + /* Double-precision (SVE) tanpi function + +- Copyright (C) 2024 Free Software Foundation, Inc. ++ Copyright (C) 2024-2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or +@@ -58,10 +58,10 @@ svfloat64_t SV_NAME_D1 (tanpi) (svfloat64_t x, const svbool_t pg) + svfloat64_t r2 = svmul_x (pg, r, r); + svfloat64_t r4 = svmul_x (pg, r2, r2); + +- svfloat64_t c_1_3 = svld1rq (pg, &d->c1); +- svfloat64_t c_5_7 = svld1rq (pg, &d->c5); +- svfloat64_t c_9_11 = svld1rq (pg, &d->c9); +- svfloat64_t c_13_14 = svld1rq (pg, &d->c13); ++ svfloat64_t c_1_3 = svld1rq (svptrue_b64 (), &d->c1); ++ svfloat64_t c_5_7 = svld1rq (svptrue_b64 (), &d->c5); ++ svfloat64_t c_9_11 = svld1rq (svptrue_b64 (), &d->c9); ++ svfloat64_t c_13_14 = svld1rq (svptrue_b64 (), &d->c13); + svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), r2, c_1_3, 0); + svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), r2, c_1_3, 1); + svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), r2, c_5_7, 0); +diff --git a/sysdeps/aarch64/fpu/tanpif_sve.c b/sysdeps/aarch64/fpu/tanpif_sve.c +index 0285f56f34..6894379564 100644 +--- a/sysdeps/aarch64/fpu/tanpif_sve.c ++++ b/sysdeps/aarch64/fpu/tanpif_sve.c +@@ -1,6 +1,6 @@ + /* Single-precision (SVE) tanpi function + +- Copyright (C) 2024 Free Software Foundation, Inc. ++ Copyright (C) 2024-2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or +@@ -37,7 +37,7 @@ const static struct v_tanpif_data + svfloat32_t SV_NAME_F1 (tanpi) (svfloat32_t x, const svbool_t pg) + { + const struct v_tanpif_data *d = ptr_barrier (&tanpif_data); +- svfloat32_t odd_coeffs = svld1rq (pg, &d->c1); ++ svfloat32_t odd_coeffs = svld1rq (svptrue_b32 (), &d->c1); + svfloat32_t n = svrintn_x (pg, x); + + /* inf produces nan that propagates. */ diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile index 772b16a358..1c3c392513 100644 --- a/sysdeps/aarch64/multiarch/Makefile @@ -7196,6 +9778,792 @@ + +END (__memset_sve_zva64) +#endif +diff --git a/sysdeps/aarch64/preconfigure b/sysdeps/aarch64/preconfigure +index 19657b627b..e1b772c586 100644 +--- a/sysdeps/aarch64/preconfigure ++++ b/sysdeps/aarch64/preconfigure +@@ -3,5 +3,6 @@ aarch64*) + base_machine=aarch64 + machine=aarch64 + mtls_descriptor=desc ++ mtls_traditional=trad + ;; + esac +diff --git a/sysdeps/aarch64/setjmp.S b/sysdeps/aarch64/setjmp.S +index b630ca099a..e175ec4d36 100644 +--- a/sysdeps/aarch64/setjmp.S ++++ b/sysdeps/aarch64/setjmp.S +@@ -37,6 +37,12 @@ ENTRY (__sigsetjmp) + PTR_ARG (0) + + 1: ++ ++#if IS_IN(libc) ++ /* Disable ZA state of SME in libc.a and libc.so, but not in ld.so. */ ++ CALL_LIBC_ARM_ZA_DISABLE ++#endif ++ + stp x19, x20, [x0, #JB_X19<<3] + stp x21, x22, [x0, #JB_X21<<3] + stp x23, x24, [x0, #JB_X23<<3] +diff --git a/sysdeps/aarch64/tst-sme-clone.c b/sysdeps/aarch64/tst-sme-clone.c +new file mode 100644 +index 0000000000..b6ad54fa37 +--- /dev/null ++++ b/sysdeps/aarch64/tst-sme-clone.c +@@ -0,0 +1,54 @@ ++/* Test that ZA state of SME is cleared in both parent and child ++ when clone() syscall is used. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include "tst-sme-skeleton.c" ++ ++#include ++#include ++ ++static int ++fun (void * const arg) ++{ ++ printf ("in child: %s\n", (const char *)arg); ++ /* Check that ZA state of SME was disabled in child. */ ++ check_sme_za_state ("after clone in child", /* Clear. */ true); ++ return 0; ++} ++ ++static char __attribute__((aligned(16))) ++stack[1024 * 1024]; ++ ++static void ++run (struct blk *ptr) ++{ ++ char *syscall_name = (char *)"clone"; ++ printf ("in parent: before %s\n", syscall_name); ++ ++ /* Enabled ZA state so that effect of disabling be observable. */ ++ enable_sme_za_state (ptr); ++ check_sme_za_state ("before clone", /* Clear. */ false); ++ ++ pid_t pid = xclone (fun, syscall_name, stack, sizeof (stack), ++ CLONE_NEWUSER | CLONE_NEWNS | SIGCHLD); ++ ++ /* Check that ZA state of SME was disabled in parent. */ ++ check_sme_za_state ("after clone in parent", /* Clear. */ true); ++ ++ TEST_VERIFY (xwaitpid (pid, NULL, 0) == pid); ++} +diff --git a/sysdeps/aarch64/tst-sme-clone3.c b/sysdeps/aarch64/tst-sme-clone3.c +new file mode 100644 +index 0000000000..f420d5984d +--- /dev/null ++++ b/sysdeps/aarch64/tst-sme-clone3.c +@@ -0,0 +1,84 @@ ++/* Test that ZA state of SME is cleared in both parent and child ++ when clone3() syscall is used. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include "tst-sme-skeleton.c" ++ ++#include ++ ++#include ++#include ++#include ++ ++/* Since clone3 is not a public symbol, we link this test explicitly ++ with clone3.o and have to provide this declaration. */ ++int __clone3 (struct clone_args *cl_args, size_t size, ++ int (*func)(void *arg), void *arg); ++ ++static int ++fun (void * const arg) ++{ ++ printf ("in child: %s\n", (const char *)arg); ++ /* Check that ZA state of SME was disabled in child. */ ++ check_sme_za_state ("after clone3 in child", /* Clear. */ true); ++ return 0; ++} ++ ++static char __attribute__((aligned(16))) ++stack[1024 * 1024]; ++ ++/* Required by __arm_za_disable.o and provided by the startup code ++ as a hidden symbol. */ ++uint64_t _dl_hwcap2; ++ ++static void ++run (struct blk *ptr) ++{ ++ _dl_hwcap2 = getauxval (AT_HWCAP2); ++ ++ char *syscall_name = (char *)"clone3"; ++ struct clone_args args = { ++ .flags = CLONE_VM | CLONE_VFORK, ++ .exit_signal = SIGCHLD, ++ .stack = (uintptr_t) stack, ++ .stack_size = sizeof (stack), ++ }; ++ printf ("in parent: before %s\n", syscall_name); ++ ++ /* Enabled ZA state so that effect of disabling be observable. */ ++ enable_sme_za_state (ptr); ++ check_sme_za_state ("before clone3", /* Clear. */ false); ++ ++ pid_t pid = __clone3 (&args, sizeof (args), fun, syscall_name); ++ ++ /* Check that ZA state of SME was disabled in parent. */ ++ check_sme_za_state ("after clone3 in parent", /* Clear. */ true); ++ ++ printf ("%s child pid: %d\n", syscall_name, pid); ++ ++ xwaitpid (pid, NULL, 0); ++ printf ("in parent: after %s\n", syscall_name); ++} ++ ++/* Workaround to simplify linking with clone3.o. */ ++void __syscall_error(int code) ++{ ++ int err = -code; ++ fprintf (stderr, "syscall error %d (%s)\n", err, strerror (err)); ++ exit (err); ++} +diff --git a/sysdeps/aarch64/tst-sme-fork.c b/sysdeps/aarch64/tst-sme-fork.c +new file mode 100644 +index 0000000000..b003b08884 +--- /dev/null ++++ b/sysdeps/aarch64/tst-sme-fork.c +@@ -0,0 +1,43 @@ ++/* Test that ZA state of SME is cleared in both parent and child ++ when fork() function is used. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include "tst-sme-skeleton.c" ++ ++static void ++run (struct blk *blk) ++{ ++ /* Enabled ZA state so that effect of disabling be observable. */ ++ enable_sme_za_state (blk); ++ check_sme_za_state ("before fork", /* Clear. */ false); ++ fflush (stdout); ++ ++ pid_t pid = xfork (); ++ ++ if (pid == 0) ++ { ++ /* Check that ZA state of SME was disabled in child. */ ++ check_sme_za_state ("after fork in child", /* Clear. */ true); ++ exit (0); ++ } ++ ++ /* Check that ZA state of SME was disabled in parent. */ ++ check_sme_za_state ("after fork in parent", /* Clear. */ true); ++ ++ TEST_VERIFY (xwaitpid (pid, NULL, 0) == pid); ++} +diff --git a/sysdeps/aarch64/tst-sme-helper.h b/sysdeps/aarch64/tst-sme-helper.h +new file mode 100644 +index 0000000000..ab9c503e45 +--- /dev/null ++++ b/sysdeps/aarch64/tst-sme-helper.h +@@ -0,0 +1,94 @@ ++/* Utility functions for SME tests. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++struct blk { ++ void *za_save_buffer; ++ uint16_t num_za_save_slices; ++ char __reserved[6]; ++}; ++ ++/* Read SVCR to get SM (bit0) and ZA (bit1) state. */ ++static unsigned long ++get_svcr (void) ++{ ++ register unsigned long x0 asm ("x0"); ++ asm volatile ( ++ ".inst 0xd53b4240 /* mrs x0, svcr */\n" ++ : "=r" (x0)); ++ return x0; ++} ++ ++/* Returns tpidr2. */ ++static void * ++get_tpidr2 (void) ++{ ++ register unsigned long x0 asm ("x0"); ++ asm volatile ( ++ ".inst 0xd53bd0a0 /* mrs x0, tpidr2_el0 */\n" ++ : "=r"(x0) :: "memory"); ++ return (void *) x0; ++} ++ ++/* Obtains current streaming SVE vector register size. */ ++static unsigned long ++get_svl (void) ++{ ++ register unsigned long x0 asm ("x0"); ++ asm volatile ( ++ ".inst 0x04bf5820 /* rdsvl x0, 1 */\n" ++ : "=r" (x0)); ++ return x0; ++} ++ ++/* PSTATE.ZA = 1, set ZA state to active. */ ++static void ++start_za (void) ++{ ++ asm volatile ( ++ ".inst 0xd503457f /* smstart za */"); ++} ++ ++/* Load data into ZA byte by byte from p. */ ++static void __attribute__ ((noinline)) ++load_za (const void *buf, unsigned long svl) ++{ ++ register unsigned long x15 asm ("x15") = 0; ++ register unsigned long x16 asm ("x16") = (unsigned long)buf; ++ register unsigned long x17 asm ("x17") = svl; ++ ++ asm volatile ( ++ ".inst 0xd503437f /* smstart sm */\n" ++ ".L_ldr_loop:\n" ++ ".inst 0xe1006200 /* ldr za[w15, 0], [x16] */\n" ++ "add w15, w15, 1\n" ++ ".inst 0x04305030 /* addvl x16, x16, 1 */\n" ++ "cmp w15, w17\n" ++ "bne .L_ldr_loop\n" ++ ".inst 0xd503427f /* smstop sm */\n" ++ : "+r"(x15), "+r"(x16), "+r"(x17)); ++} ++ ++/* Set tpidr2 to BLK. */ ++static void ++set_tpidr2 (struct blk *blk) ++{ ++ register unsigned long x0 asm ("x0") = (unsigned long)blk; ++ asm volatile ( ++ ".inst 0xd51bd0a0 /* msr tpidr2_el0, x0 */\n" ++ :: "r"(x0) : "memory"); ++} +diff --git a/sysdeps/aarch64/tst-sme-jmp.c b/sysdeps/aarch64/tst-sme-jmp.c +index 62c419f6c1..b2d21c6e1a 100644 +--- a/sysdeps/aarch64/tst-sme-jmp.c ++++ b/sysdeps/aarch64/tst-sme-jmp.c +@@ -27,87 +27,15 @@ + #include + #include + +-struct blk { +- void *za_save_buffer; +- uint16_t num_za_save_slices; +- char __reserved[6]; +-}; ++#include "tst-sme-helper.h" + ++/* Streaming SVE vector register size. */ + static unsigned long svl; ++ + static uint8_t *za_orig; + static uint8_t *za_dump; + static uint8_t *za_save; + +-static unsigned long +-get_svl (void) +-{ +- register unsigned long x0 asm ("x0"); +- asm volatile ( +- ".inst 0x04bf5820 /* rdsvl x0, 1 */\n" +- : "=r" (x0)); +- return x0; +-} +- +-/* PSTATE.ZA = 1, set ZA state to active. */ +-static void +-start_za (void) +-{ +- asm volatile ( +- ".inst 0xd503457f /* smstart za */"); +-} +- +-/* Read SVCR to get SM (bit0) and ZA (bit1) state. */ +-static unsigned long +-get_svcr (void) +-{ +- register unsigned long x0 asm ("x0"); +- asm volatile ( +- ".inst 0xd53b4240 /* mrs x0, svcr */\n" +- : "=r" (x0)); +- return x0; +-} +- +-/* Load data into ZA byte by byte from p. */ +-static void __attribute__ ((noinline)) +-load_za (const void *p) +-{ +- register unsigned long x15 asm ("x15") = 0; +- register unsigned long x16 asm ("x16") = (unsigned long)p; +- register unsigned long x17 asm ("x17") = svl; +- +- asm volatile ( +- ".inst 0xd503437f /* smstart sm */\n" +- ".L_ldr_loop:\n" +- ".inst 0xe1006200 /* ldr za[w15, 0], [x16] */\n" +- "add w15, w15, 1\n" +- ".inst 0x04305030 /* addvl x16, x16, 1 */\n" +- "cmp w15, w17\n" +- "bne .L_ldr_loop\n" +- ".inst 0xd503427f /* smstop sm */\n" +- : "+r"(x15), "+r"(x16), "+r"(x17)); +-} +- +-/* Set tpidr2 to BLK. */ +-static void +-set_tpidr2 (struct blk *blk) +-{ +- register unsigned long x0 asm ("x0") = (unsigned long)blk; +- asm volatile ( +- ".inst 0xd51bd0a0 /* msr tpidr2_el0, x0 */\n" +- :: "r"(x0) : "memory"); +-} +- +-/* Returns tpidr2. */ +-static void * +-get_tpidr2 (void) +-{ +- register unsigned long x0 asm ("x0"); +- asm volatile ( +- ".inst 0xd53bd0a0 /* mrs x0, tpidr2_el0 */\n" +- : "=r"(x0) :: "memory"); +- return (void *) x0; +-} +- + static void + print_data(const char *msg, void *p) + { +@@ -157,7 +85,7 @@ longjmp_test (void) + FAIL_EXIT1 ("svcr != 0: %lu", svcr); + set_tpidr2 (&blk); + start_za (); +- load_za (za_orig); ++ load_za (za_orig, svl); + + print_data ("za save space", za_save); + p = get_tpidr2 (); +@@ -168,8 +96,8 @@ longjmp_test (void) + { + p = get_tpidr2 (); + printf ("before longjmp: tp2 = %p\n", p); +- if (p != &blk) +- FAIL_EXIT1 ("tpidr2 is clobbered"); ++ if (p != NULL) ++ FAIL_EXIT1 ("tpidr2 has not been reset to null"); + do_longjmp (env); + FAIL_EXIT1 ("longjmp returned"); + } +@@ -206,7 +134,7 @@ setcontext_test (void) + FAIL_EXIT1 ("svcr != 0: %lu", svcr); + set_tpidr2 (&blk); + start_za (); +- load_za (za_orig); ++ load_za (za_orig, svl); + + print_data ("za save space", za_save); + p = get_tpidr2 (); +diff --git a/sysdeps/aarch64/tst-sme-signal.c b/sysdeps/aarch64/tst-sme-signal.c +new file mode 100644 +index 0000000000..b4b07bcc44 +--- /dev/null ++++ b/sysdeps/aarch64/tst-sme-signal.c +@@ -0,0 +1,115 @@ ++/* Test handling of SME state in a signal handler. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include "tst-sme-skeleton.c" ++ ++#include ++ ++static struct _aarch64_ctx * ++extension (void *p) ++{ ++ return p; ++} ++ ++#ifndef TPIDR2_MAGIC ++#define TPIDR2_MAGIC 0x54504902 ++#endif ++ ++#ifndef ZA_MAGIC ++#define ZA_MAGIC 0x54366345 ++#endif ++ ++#ifndef ZT_MAGIC ++#define ZT_MAGIC 0x5a544e01 ++#endif ++ ++#ifndef EXTRA_MAGIC ++#define EXTRA_MAGIC 0x45585401 ++#endif ++ ++/* We use a pipe to make sure that the final check of the SME state ++ happens after signal handler finished. */ ++static int pipefd[2]; ++ ++#define WRITE(msg) xwrite (1, msg, sizeof (msg)); ++ ++static void ++handler (int signo, siginfo_t *si, void *ctx) ++{ ++ TEST_VERIFY (signo == SIGUSR1); ++ WRITE ("in the handler\n"); ++ check_sme_za_state ("during signal", true /* State is clear. */); ++ ucontext_t *uc = ctx; ++ void *p = uc->uc_mcontext.__reserved; ++ unsigned int found = 0; ++ uint32_t m; ++ while ((m = extension (p)->magic)) ++ { ++ if (m == TPIDR2_MAGIC) ++ { ++ WRITE ("found TPIDR2_MAGIC\n"); ++ found += 1; ++ } ++ if (m == ZA_MAGIC) ++ { ++ WRITE ("found ZA_MAGIC\n"); ++ found += 1; ++ } ++ if (m == ZT_MAGIC) ++ { ++ WRITE ("found ZT_MAGIC\n"); ++ found += 1; ++ } ++ if (m == EXTRA_MAGIC) ++ { ++ WRITE ("found EXTRA_MAGIC\n"); ++ struct { struct _aarch64_ctx h; uint64_t data; } *e = p; ++ p = (char *)e->data; ++ continue; ++ } ++ p = (char *)p + extension (p)->size; ++ } ++ TEST_COMPARE (found, 3); ++ ++ /* Signal that the wait is over (see below). */ ++ char message = '\0'; ++ xwrite (pipefd[1], &message, 1); ++} ++ ++static void ++run (struct blk *blk) ++{ ++ xpipe (pipefd); ++ ++ struct sigaction sigact; ++ sigemptyset (&sigact.sa_mask); ++ sigact.sa_flags = 0; ++ sigact.sa_flags |= SA_SIGINFO; ++ sigact.sa_sigaction = handler; ++ xsigaction (SIGUSR1, &sigact, NULL); ++ ++ enable_sme_za_state (blk); ++ check_sme_za_state ("before signal", false /* State is not clear. */); ++ xraise (SIGUSR1); ++ ++ /* Wait for signal handler to complete. */ ++ char response; ++ xread (pipefd[0], &response, 1); ++ ++ check_sme_za_state ("after signal", false /* State is not clear. */); ++} +diff --git a/sysdeps/aarch64/tst-sme-skeleton.c b/sysdeps/aarch64/tst-sme-skeleton.c +new file mode 100644 +index 0000000000..ba84dda1cb +--- /dev/null ++++ b/sysdeps/aarch64/tst-sme-skeleton.c +@@ -0,0 +1,101 @@ ++/* Template for SME tests. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include "tst-sme-helper.h" ++ ++/* Streaming SVE vector register size. */ ++static unsigned long svl; ++ ++static uint8_t *state; ++ ++static void ++enable_sme_za_state (struct blk *blk) ++{ ++ start_za (); ++ set_tpidr2 (blk); ++ load_za (blk, svl); ++} ++ ++/* Check if SME state is disabled (when CLEAR is true) or ++ enabled (when CLEAR is false). */ ++static void ++check_sme_za_state (const char msg[], bool clear) ++{ ++ unsigned long svcr = get_svcr (); ++ void *tpidr2 = get_tpidr2 (); ++ printf ("[%s]\n", msg); ++ printf ("svcr = %016lx\n", svcr); ++ printf ("tpidr2 = %016lx\n", (unsigned long)tpidr2); ++ if (clear) ++ { ++ TEST_VERIFY (svcr == 0); ++ TEST_VERIFY (tpidr2 == NULL); ++ } ++ else ++ { ++ TEST_VERIFY (svcr != 0); ++ TEST_VERIFY (tpidr2 != NULL); ++ } ++} ++ ++/* Should be defined in actual test that includes this ++ skeleton file. */ ++static void ++run (struct blk *ptr); ++ ++static int ++do_test (void) ++{ ++ unsigned long hwcap2 = getauxval (AT_HWCAP2); ++ if ((hwcap2 & HWCAP2_SME) == 0) ++ return EXIT_UNSUPPORTED; ++ ++ /* Get current streaming SVE vector length in bytes. */ ++ svl = get_svl (); ++ printf ("svl: %lu\n", svl); ++ ++ TEST_VERIFY_EXIT (!(svl < 16 || svl % 16 != 0 || svl >= (1 << 16))); ++ ++ /* Initialise buffer for ZA state of SME. */ ++ state = xmalloc (svl * svl); ++ memset (state, 1, svl * svl); ++ struct blk blk = { ++ .za_save_buffer = state, ++ .num_za_save_slices = svl, ++ .__reserved = {0}, ++ }; ++ ++ run (&blk); ++ ++ free (state); ++ return 0; ++} ++ ++#include +diff --git a/sysdeps/aarch64/tst-sme-vfork.c b/sysdeps/aarch64/tst-sme-vfork.c +new file mode 100644 +index 0000000000..3feea065e5 +--- /dev/null ++++ b/sysdeps/aarch64/tst-sme-vfork.c +@@ -0,0 +1,43 @@ ++/* Test that ZA state of SME is cleared in both parent and child ++ when vfork() function is used. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include "tst-sme-skeleton.c" ++ ++static void ++run (struct blk *blk) ++{ ++ /* Enabled ZA state so that effect of disabling be observable. */ ++ enable_sme_za_state (blk); ++ check_sme_za_state ("before vfork", /* Clear. */ false); ++ fflush (stdout); ++ ++ pid_t pid = vfork (); ++ ++ if (pid == 0) ++ { ++ /* Check that ZA state of SME was disabled in child. */ ++ check_sme_za_state ("after vfork in child", /* Clear. */ true); ++ _exit (0); ++ } ++ ++ /* Check that ZA state of SME was disabled in parent. */ ++ check_sme_za_state ("after vfork in parent", /* Clear. */ true); ++ ++ TEST_VERIFY (xwaitpid (pid, NULL, 0) == pid); ++} +diff --git a/sysdeps/aarch64/tst-sme-za-state.c b/sysdeps/aarch64/tst-sme-za-state.c +new file mode 100644 +index 0000000000..00118ef506 +--- /dev/null ++++ b/sysdeps/aarch64/tst-sme-za-state.c +@@ -0,0 +1,52 @@ ++/* Test for SME ZA state being cleared on setjmp and longjmp. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include "tst-sme-skeleton.c" ++ ++#include ++ ++static void ++run (struct blk *ptr) ++{ ++ jmp_buf buf; ++ int ret; ++ ++ check_sme_za_state ("initial state", /* Clear. */ true); ++ ++ /* Enabled ZA state so that effect of disabling be observable. */ ++ enable_sme_za_state (ptr); ++ check_sme_za_state ("before setjmp", /* Clear. */ false); ++ ++ if ((ret = setjmp (buf)) == 0) ++ { ++ check_sme_za_state ("after setjmp", /* Clear. */ true); ++ ++ /* Enabled ZA state so that effect of disabling be observable. */ ++ enable_sme_za_state (ptr); ++ check_sme_za_state ("before longjmp", /* Clear. */ false); ++ ++ longjmp (buf, 42); ++ ++ /* Unreachable. */ ++ TEST_VERIFY (false); ++ __builtin_unreachable (); ++ } ++ ++ TEST_COMPARE (ret, 42); ++ check_sme_za_state ("after longjmp", /* Clear. */ true); ++} diff --git a/sysdeps/arm/find_exidx.c b/sysdeps/arm/find_exidx.c index 60021a072c..468e016214 100644 --- a/sysdeps/arm/find_exidx.c @@ -7218,9 +10586,18 @@ *pcount = data.dlfo_eh_count; return (_Unwind_Ptr) data.dlfo_eh_frame; diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h -index e871f27ff2..ddb34a1588 100644 +index e871f27ff2..5cb685e470 100644 --- a/sysdeps/generic/ldsodefs.h +++ b/sysdeps/generic/ldsodefs.h +@@ -351,7 +351,7 @@ struct rtld_global + void (*free) (void *); + } _ns_unique_sym_table; + /* Keep track of changes to each namespace' list. */ +- struct r_debug_extended _ns_debug; ++ struct r_debug_extended _ns_debug_unused; + } _dl_ns[DL_NNS]; + /* One higher than index of last used namespace. */ + EXTERN size_t _dl_nns; @@ -695,10 +695,23 @@ extern const ElfW(Phdr) *_dl_phdr; extern size_t _dl_phnum; #endif @@ -7246,6 +10623,666 @@ /* Variable pointing to the end of the stack (or close to it). This value must be constant over the runtime of the application. Some programs +@@ -1041,15 +1054,29 @@ extern void _dl_debug_state (void); + rtld_hidden_proto (_dl_debug_state) + + /* Initialize `struct r_debug_extended' for the namespace NS. LDBASE +- is the run-time load address of the dynamic linker, to be put in the +- `r_ldbase' member. Return the address of the structure. */ ++ is the run-time load address of the dynamic linker, to be put in ++ the `r_ldbase' member. ++ ++ Return the address of the r_debug structure for the namespace. ++ This is not merely a convenience or optimization, but it is ++ necessary for the LIBC_PROBE Systemtap/debugger probes to work ++ reliably: direct variable access can create probes that tools ++ cannot consume. */ + extern struct r_debug *_dl_debug_initialize (ElfW(Addr) ldbase, Lmid_t ns) + attribute_hidden; + ++/* This is called after relocation processing to handle a potential ++ copy relocation for _r_debug. */ ++void _dl_debug_post_relocate (struct link_map *main_map) attribute_hidden; ++ + /* Update the `r_map' member and return the address of `struct r_debug' + of the namespace NS. */ + extern struct r_debug *_dl_debug_update (Lmid_t ns) attribute_hidden; + ++/* Update R->r_state to STATE and notify the debugger by calling ++ _dl_debug_state. */ ++void _dl_debug_change_state (struct r_debug *r, int state) attribute_hidden; ++ + /* Initialize the basic data structure for the search paths. SOURCE + is either "LD_LIBRARY_PATH" or "--library-path". + GLIBC_HWCAPS_PREPEND adds additional glibc-hwcaps subdirectories to +diff --git a/sysdeps/generic/libc-tsd.h b/sysdeps/generic/libc-tsd.h +deleted file mode 100644 +index b95e4094f6..0000000000 +--- a/sysdeps/generic/libc-tsd.h ++++ /dev/null +@@ -1,60 +0,0 @@ +-/* libc-internal interface for thread-specific data. Stub or TLS version. +- Copyright (C) 1998-2025 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- . */ +- +-#ifndef _GENERIC_LIBC_TSD_H +-#define _GENERIC_LIBC_TSD_H 1 +- +-/* This file defines the following macros for accessing a small fixed +- set of thread-specific `void *' data used only internally by libc. +- +- __libc_tsd_define(CLASS, TYPE, KEY) -- Define or declare a datum with TYPE +- for KEY. CLASS can be `static' for +- keys used in only one source file, +- empty for global definitions, or +- `extern' for global declarations. +- __libc_tsd_address(TYPE, KEY) -- Return the `TYPE *' pointing to +- the current thread's datum for KEY. +- __libc_tsd_get(TYPE, KEY) -- Return the `TYPE' datum for KEY. +- __libc_tsd_set(TYPE, KEY, VALUE) -- Set the datum for KEY to VALUE. +- +- The set of available KEY's will usually be provided as an enum, +- and contains (at least): +- _LIBC_TSD_KEY_MALLOC +- _LIBC_TSD_KEY_DL_ERROR +- _LIBC_TSD_KEY_RPC_VARS +- All uses must be the literal _LIBC_TSD_* name in the __libc_tsd_* macros. +- Some implementations may not provide any enum at all and instead +- using string pasting in the macros. */ +- +-#include +- +-/* When full support for __thread variables is available, this interface is +- just a trivial wrapper for it. Without TLS, this is the generic/stub +- implementation for wholly single-threaded systems. +- +- We don't define an enum for the possible key values, because the KEYs +- translate directly into variables by macro magic. */ +- +-#define __libc_tsd_define(CLASS, TYPE, KEY) \ +- CLASS __thread TYPE __libc_tsd_##KEY attribute_tls_model_ie; +- +-#define __libc_tsd_address(TYPE, KEY) (&__libc_tsd_##KEY) +-#define __libc_tsd_get(TYPE, KEY) (__libc_tsd_##KEY) +-#define __libc_tsd_set(TYPE, KEY, VALUE) (__libc_tsd_##KEY = (VALUE)) +- +-#endif /* libc-tsd.h */ +diff --git a/sysdeps/i386/Makefile b/sysdeps/i386/Makefile +index a2e8c0b128..c0c017b899 100644 +--- a/sysdeps/i386/Makefile ++++ b/sysdeps/i386/Makefile +@@ -30,7 +30,9 @@ stack-align-test-flags += -malign-double + endif + + ifeq ($(subdir),elf) +-sysdep-dl-routines += tlsdesc dl-tlsdesc ++sysdep-dl-routines += \ ++ dl-tls-get-addr \ ++# sysdep-dl-routines + + tests += tst-audit3 + modules-names += tst-auditmod3a tst-auditmod3b +@@ -58,6 +60,15 @@ $(objpfx)tst-ld-sse-use.out: ../sysdeps/i386/tst-ld-sse-use.sh $(objpfx)ld.so + @echo "Checking ld.so for SSE register use. This will take a few seconds..." + $(BASH) $< $(objpfx) '$(NM)' '$(OBJDUMP)' '$(READELF)' > $@; \ + $(evaluate-test) ++ ++tests-special += $(objpfx)check-gnu-tls.out ++ ++$(objpfx)check-gnu-tls.out: $(common-objpfx)libc.so ++ LC_ALL=C $(READELF) -V -W $< \ ++ | sed -ne '/.gnu.version_d/, /.gnu.version_r/ p' \ ++ | grep GLIBC_ABI_GNU_TLS > $@; \ ++ $(evaluate-test) ++generated += check-gnu-tls.out + else + CFLAGS-.os += $(if $(filter rtld-%.os,$(@F)), $(rtld-CFLAGS)) + endif +diff --git a/sysdeps/i386/Versions b/sysdeps/i386/Versions +index 36e23b466a..9c84c8ef04 100644 +--- a/sysdeps/i386/Versions ++++ b/sysdeps/i386/Versions +@@ -28,6 +28,11 @@ libc { + GLIBC_2.13 { + __fentry__; + } ++ GLIBC_ABI_GNU_TLS { ++ # This symbol is used only for empty version map and will be removed ++ # by scripts/versions.awk. ++ __placeholder_only_for_empty_version_map; ++ } + } + libm { + GLIBC_2.1 { +diff --git a/sysdeps/i386/dl-tls-get-addr.c b/sysdeps/i386/dl-tls-get-addr.c +new file mode 100644 +index 0000000000..c97e5c57be +--- /dev/null ++++ b/sysdeps/i386/dl-tls-get-addr.c +@@ -0,0 +1,68 @@ ++/* Ifunc selector for ___tls_get_addr. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifdef SHARED ++# define ___tls_get_addr __redirect____tls_get_addr ++# include ++# undef ___tls_get_addr ++# undef __tls_get_addr ++ ++# define SYMBOL_NAME ___tls_get_addr ++# include ++ ++extern __typeof (REDIRECT_NAME) OPTIMIZE (fnsave) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (fxsave) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (xsave) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (xsavec) attribute_hidden; ++ ++static inline void * ++IFUNC_SELECTOR (void) ++{ ++ const struct cpu_features* cpu_features = __get_cpu_features (); ++ ++ if (cpu_features->xsave_state_size != 0) ++ { ++ if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)) ++ return OPTIMIZE (xsavec); ++ else ++ return OPTIMIZE (xsave); ++ } ++ else if (CPU_FEATURE_USABLE_P (cpu_features, FXSR)) ++ return OPTIMIZE (fxsave); ++ return OPTIMIZE (fnsave); ++} ++ ++libc_ifunc_redirected (__redirect____tls_get_addr, ___tls_get_addr, ++ IFUNC_SELECTOR ()); ++ ++/* The special thing about the x86 TLS ABI is that we have two ++ variants of the __tls_get_addr function with different calling ++ conventions. The GNU version, which we are mostly concerned here, ++ takes the parameter in a register. The name is changed by adding ++ an additional underscore at the beginning. The Sun version uses ++ the normal calling convention. */ ++ ++rtld_hidden_proto (___tls_get_addr) ++rtld_hidden_def (___tls_get_addr) ++ ++void * ++__tls_get_addr (tls_index *ti) ++{ ++ return ___tls_get_addr (ti); ++} ++#endif +diff --git a/sysdeps/i386/dl-tls.h b/sysdeps/i386/dl-tls.h +index f453931d78..ef605c5b0d 100644 +--- a/sysdeps/i386/dl-tls.h ++++ b/sysdeps/i386/dl-tls.h +@@ -37,34 +37,14 @@ typedef struct dl_tls_index + /* This is the prototype for the GNU version. */ + extern void *___tls_get_addr (tls_index *ti) + __attribute__ ((__regparm__ (1))); +-extern void *___tls_get_addr_internal (tls_index *ti) +- __attribute__ ((__regparm__ (1))) attribute_hidden; +- + # if IS_IN (rtld) +-/* The special thing about the x86 TLS ABI is that we have two +- variants of the __tls_get_addr function with different calling +- conventions. The GNU version, which we are mostly concerned here, +- takes the parameter in a register. The name is changed by adding +- an additional underscore at the beginning. The Sun version uses +- the normal calling convention. */ +-void * +-__tls_get_addr (tls_index *ti) +-{ +- return ___tls_get_addr_internal (ti); +-} +- +- + /* Prepare using the definition of __tls_get_addr in the generic + version of this file. */ +-# define __tls_get_addr __attribute__ ((__regparm__ (1))) ___tls_get_addr +-strong_alias (___tls_get_addr, ___tls_get_addr_internal) +-rtld_hidden_proto (___tls_get_addr) +-rtld_hidden_def (___tls_get_addr) +-#else +- ++# define __tls_get_addr \ ++ __attribute__ ((__regparm__ (1))) ___tls_get_addr_internal ++# else + /* Users should get the better interface. */ +-# define __tls_get_addr ___tls_get_addr +- ++# define __tls_get_addr ___tls_get_addr + # endif + #endif + +diff --git a/sysdeps/i386/dl-tlsdesc-dynamic.h b/sysdeps/i386/dl-tlsdesc-dynamic.h +index 6aec06d15c..be9ecd659b 100644 +--- a/sysdeps/i386/dl-tlsdesc-dynamic.h ++++ b/sysdeps/i386/dl-tlsdesc-dynamic.h +@@ -16,34 +16,6 @@ + License along with the GNU C Library; if not, see + . */ + +-#undef REGISTER_SAVE_AREA +- +-#if !defined USE_FNSAVE && (STATE_SAVE_ALIGNMENT % 16) != 0 +-# error STATE_SAVE_ALIGNMENT must be multiple of 16 +-#endif +- +-#if DL_RUNTIME_RESOLVE_REALIGN_STACK +-# ifdef USE_FNSAVE +-# error USE_FNSAVE shouldn't be defined +-# endif +-# ifdef USE_FXSAVE +-/* Use fxsave to save all registers. */ +-# define REGISTER_SAVE_AREA 512 +-# endif +-#else +-# ifdef USE_FNSAVE +-/* Use fnsave to save x87 FPU stack registers. */ +-# define REGISTER_SAVE_AREA 108 +-# else +-# ifndef USE_FXSAVE +-# error USE_FXSAVE must be defined +-# endif +-/* Use fxsave to save all registers. Add 12 bytes to align the stack +- to 16 bytes. */ +-# define REGISTER_SAVE_AREA (512 + 12) +-# endif +-#endif +- + .hidden _dl_tlsdesc_dynamic + .global _dl_tlsdesc_dynamic + .type _dl_tlsdesc_dynamic,@function +@@ -104,85 +76,7 @@ _dl_tlsdesc_dynamic: + ret + .p2align 4,,7 + 2: +- cfi_adjust_cfa_offset (32) +-#if DL_RUNTIME_RESOLVE_REALIGN_STACK +- movl %ebx, -28(%esp) +- movl %esp, %ebx +- cfi_def_cfa_register(%ebx) +- and $-STATE_SAVE_ALIGNMENT, %esp +-#endif +-#ifdef REGISTER_SAVE_AREA +- subl $REGISTER_SAVE_AREA, %esp +-# if !DL_RUNTIME_RESOLVE_REALIGN_STACK +- cfi_adjust_cfa_offset(REGISTER_SAVE_AREA) +-# endif +-#else +-# if !DL_RUNTIME_RESOLVE_REALIGN_STACK +-# error DL_RUNTIME_RESOLVE_REALIGN_STACK must be true +-# endif +- /* Allocate stack space of the required size to save the state. */ +- LOAD_PIC_REG (cx) +- subl RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET+_rtld_local_ro@GOTOFF(%ecx), %esp +-#endif +-#ifdef USE_FNSAVE +- fnsave (%esp) +-#elif defined USE_FXSAVE +- fxsave (%esp) +-#else +- /* Save the argument for ___tls_get_addr in EAX. */ +- movl %eax, %ecx +- movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax +- xorl %edx, %edx +- /* Clear the XSAVE Header. */ +-# ifdef USE_XSAVE +- movl %edx, (512)(%esp) +- movl %edx, (512 + 4 * 1)(%esp) +- movl %edx, (512 + 4 * 2)(%esp) +- movl %edx, (512 + 4 * 3)(%esp) +-# endif +- movl %edx, (512 + 4 * 4)(%esp) +- movl %edx, (512 + 4 * 5)(%esp) +- movl %edx, (512 + 4 * 6)(%esp) +- movl %edx, (512 + 4 * 7)(%esp) +- movl %edx, (512 + 4 * 8)(%esp) +- movl %edx, (512 + 4 * 9)(%esp) +- movl %edx, (512 + 4 * 10)(%esp) +- movl %edx, (512 + 4 * 11)(%esp) +- movl %edx, (512 + 4 * 12)(%esp) +- movl %edx, (512 + 4 * 13)(%esp) +- movl %edx, (512 + 4 * 14)(%esp) +- movl %edx, (512 + 4 * 15)(%esp) +-# ifdef USE_XSAVE +- xsave (%esp) +-# else +- xsavec (%esp) +-# endif +- /* Restore the argument for ___tls_get_addr in EAX. */ +- movl %ecx, %eax +-#endif +- call HIDDEN_JUMPTARGET (___tls_get_addr) +- /* Get register content back. */ +-#ifdef USE_FNSAVE +- frstor (%esp) +-#elif defined USE_FXSAVE +- fxrstor (%esp) +-#else +- /* Save and retore ___tls_get_addr return value stored in EAX. */ +- movl %eax, %ecx +- movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax +- xorl %edx, %edx +- xrstor (%esp) +- movl %ecx, %eax +-#endif +-#if DL_RUNTIME_RESOLVE_REALIGN_STACK +- mov %ebx, %esp +- cfi_def_cfa_register(%esp) +- movl -28(%esp), %ebx +- cfi_restore(%ebx) +-#else +- addl $REGISTER_SAVE_AREA, %esp +- cfi_adjust_cfa_offset(-REGISTER_SAVE_AREA) +-#endif ++#include "tls-get-addr-wrapper.h" + jmp 1b + cfi_endproc + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic +diff --git a/sysdeps/i386/dl-tlsdesc.S b/sysdeps/i386/dl-tlsdesc.S +index c080993a60..c914ca4220 100644 +--- a/sysdeps/i386/dl-tlsdesc.S ++++ b/sysdeps/i386/dl-tlsdesc.S +@@ -22,23 +22,6 @@ + #include + #include "tlsdesc.h" + +-#ifndef DL_STACK_ALIGNMENT +-/* Due to GCC bug: +- +- https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066 +- +- __tls_get_addr may be called with 4-byte stack alignment. Although +- this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume +- that stack will be always aligned at 16 bytes. */ +-# define DL_STACK_ALIGNMENT 4 +-#endif +- +-/* True if _dl_tlsdesc_dynamic should align stack for STATE_SAVE or align +- stack to MINIMUM_ALIGNMENT bytes before calling ___tls_get_addr. */ +-#define DL_RUNTIME_RESOLVE_REALIGN_STACK \ +- (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \ +- || MINIMUM_ALIGNMENT > DL_STACK_ALIGNMENT) +- + .text + + /* This function is used to compute the TP offset for symbols in +diff --git a/sysdeps/i386/tls-get-addr-wrapper.h b/sysdeps/i386/tls-get-addr-wrapper.h +new file mode 100644 +index 0000000000..0708e5ad1d +--- /dev/null ++++ b/sysdeps/i386/tls-get-addr-wrapper.h +@@ -0,0 +1,127 @@ ++/* Wrapper of i386 ___tls_get_addr to save and restore vector registers. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#undef REGISTER_SAVE_AREA ++ ++#if !defined USE_FNSAVE && (STATE_SAVE_ALIGNMENT % 16) != 0 ++# error STATE_SAVE_ALIGNMENT must be multiple of 16 ++#endif ++ ++#if DL_RUNTIME_RESOLVE_REALIGN_STACK ++# ifdef USE_FNSAVE ++# error USE_FNSAVE shouldn't be defined ++# endif ++# ifdef USE_FXSAVE ++/* Use fxsave to save all registers. */ ++# define REGISTER_SAVE_AREA 512 ++# endif ++#else ++# ifdef USE_FNSAVE ++/* Use fnsave to save x87 FPU stack registers. */ ++# define REGISTER_SAVE_AREA 108 ++# else ++# ifndef USE_FXSAVE ++# error USE_FXSAVE must be defined ++# endif ++/* Use fxsave to save all registers. Add 12 bytes to align the stack ++ to 16 bytes. */ ++# define REGISTER_SAVE_AREA (512 + 12) ++# endif ++#endif ++ ++#if DL_RUNTIME_RESOLVE_REALIGN_STACK ++ movl %ebx, 28(%esp) ++ movl %esp, %ebx ++ cfi_def_cfa_register(%ebx) ++ and $-STATE_SAVE_ALIGNMENT, %esp ++#endif ++#ifdef REGISTER_SAVE_AREA ++ subl $REGISTER_SAVE_AREA, %esp ++# if !DL_RUNTIME_RESOLVE_REALIGN_STACK ++ cfi_adjust_cfa_offset(REGISTER_SAVE_AREA) ++# endif ++#else ++# if !DL_RUNTIME_RESOLVE_REALIGN_STACK ++# error DL_RUNTIME_RESOLVE_REALIGN_STACK must be true ++# endif ++ /* Allocate stack space of the required size to save the state. */ ++ LOAD_PIC_REG (cx) ++ subl RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET \ ++ +XSAVE_STATE_SIZE_OFFSET+_rtld_local_ro@GOTOFF(%ecx), %esp ++#endif ++#ifdef USE_FNSAVE ++ fnsave (%esp) ++#elif defined USE_FXSAVE ++ fxsave (%esp) ++#else ++ /* Save the argument for ___tls_get_addr in EAX. */ ++ movl %eax, %ecx ++ movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax ++ xorl %edx, %edx ++ /* Clear the XSAVE Header. */ ++# ifdef USE_XSAVE ++ movl %edx, (512)(%esp) ++ movl %edx, (512 + 4 * 1)(%esp) ++ movl %edx, (512 + 4 * 2)(%esp) ++ movl %edx, (512 + 4 * 3)(%esp) ++# endif ++ movl %edx, (512 + 4 * 4)(%esp) ++ movl %edx, (512 + 4 * 5)(%esp) ++ movl %edx, (512 + 4 * 6)(%esp) ++ movl %edx, (512 + 4 * 7)(%esp) ++ movl %edx, (512 + 4 * 8)(%esp) ++ movl %edx, (512 + 4 * 9)(%esp) ++ movl %edx, (512 + 4 * 10)(%esp) ++ movl %edx, (512 + 4 * 11)(%esp) ++ movl %edx, (512 + 4 * 12)(%esp) ++ movl %edx, (512 + 4 * 13)(%esp) ++ movl %edx, (512 + 4 * 14)(%esp) ++ movl %edx, (512 + 4 * 15)(%esp) ++# ifdef USE_XSAVE ++ xsave (%esp) ++# else ++ xsavec (%esp) ++# endif ++ /* Restore the argument for ___tls_get_addr in EAX. */ ++ movl %ecx, %eax ++#endif ++ call ___tls_get_addr_internal ++ /* Get register content back. */ ++#ifdef USE_FNSAVE ++ frstor (%esp) ++#elif defined USE_FXSAVE ++ fxrstor (%esp) ++#else ++ /* Save and retore ___tls_get_addr return value stored in EAX. */ ++ movl %eax, %ecx ++ movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax ++ xorl %edx, %edx ++ xrstor (%esp) ++ movl %ecx, %eax ++#endif ++#if DL_RUNTIME_RESOLVE_REALIGN_STACK ++ mov %ebx, %esp ++ cfi_def_cfa_register(%esp) ++ movl 28(%esp), %ebx ++ cfi_restore(%ebx) ++#else ++ addl $REGISTER_SAVE_AREA, %esp ++ cfi_adjust_cfa_offset(-REGISTER_SAVE_AREA) ++#endif ++ ++#undef STATE_SAVE_ALIGNMENT +diff --git a/sysdeps/i386/tls_get_addr.S b/sysdeps/i386/tls_get_addr.S +new file mode 100644 +index 0000000000..7d143d8a23 +--- /dev/null ++++ b/sysdeps/i386/tls_get_addr.S +@@ -0,0 +1,57 @@ ++/* Thread-local storage handling in the ELF dynamic linker. i386 version. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++ ++ .text ++#ifdef SHARED ++# define USE_FNSAVE ++# define MINIMUM_ALIGNMENT 4 ++# define STATE_SAVE_ALIGNMENT 4 ++# define ___tls_get_addr _____tls_get_addr_fnsave ++# include "tls_get_addr.h" ++# undef ___tls_get_addr ++# undef MINIMUM_ALIGNMENT ++# undef USE_FNSAVE ++ ++# define MINIMUM_ALIGNMENT 16 ++ ++# define USE_FXSAVE ++# define STATE_SAVE_ALIGNMENT 16 ++# define ___tls_get_addr _____tls_get_addr_fxsave ++# include "tls_get_addr.h" ++# undef ___tls_get_addr ++# undef USE_FXSAVE ++ ++# define USE_XSAVE ++# define STATE_SAVE_ALIGNMENT 64 ++# define ___tls_get_addr _____tls_get_addr_xsave ++# include "tls_get_addr.h" ++# undef ___tls_get_addr ++# undef USE_XSAVE ++ ++# define USE_XSAVEC ++# define STATE_SAVE_ALIGNMENT 64 ++# define ___tls_get_addr _____tls_get_addr_xsavec ++# include "tls_get_addr.h" ++# undef ___tls_get_addr ++# undef USE_XSAVEC ++#endif /* SHARED */ +diff --git a/sysdeps/i386/tls_get_addr.h b/sysdeps/i386/tls_get_addr.h +new file mode 100644 +index 0000000000..1825798724 +--- /dev/null ++++ b/sysdeps/i386/tls_get_addr.h +@@ -0,0 +1,42 @@ ++/* Thread-local storage handling in the ELF dynamic linker. i386 version. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++ .hidden ___tls_get_addr ++ .global ___tls_get_addr ++ .type ___tls_get_addr,@function ++ ++ /* This function is a wrapper of ___tls_get_addr_internal to ++ preserve caller-saved vector registers. */ ++ ++ cfi_startproc ++ .align 16 ++___tls_get_addr: ++ /* Like all TLS resolvers, preserve call-clobbered registers. ++ We need two scratch regs anyway. */ ++ subl $32, %esp ++ cfi_adjust_cfa_offset (32) ++ movl %ecx, 20(%esp) ++ movl %edx, 24(%esp) ++#include "tls-get-addr-wrapper.h" ++ movl 20(%esp), %ecx ++ movl 24(%esp), %edx ++ addl $32, %esp ++ cfi_adjust_cfa_offset (-32) ++ ret ++ cfi_endproc ++ .size ___tls_get_addr, .-___tls_get_addr diff --git a/sysdeps/ieee754/dbl-64/e_atanh.c b/sysdeps/ieee754/dbl-64/e_atanh.c index 1e09e46f0f..d1c71b2aa4 100644 --- a/sysdeps/ieee754/dbl-64/e_atanh.c @@ -7388,6 +11425,30 @@ { if (sgn) return -st[j].rh - st[j].rl; +diff --git a/sysdeps/loongarch/preconfigure b/sysdeps/loongarch/preconfigure +index 0d1e9ed8df..6726ab8302 100644 +--- a/sysdeps/loongarch/preconfigure ++++ b/sysdeps/loongarch/preconfigure +@@ -44,6 +44,7 @@ loongarch*) + + base_machine=loongarch + mtls_descriptor=desc ++ mtls_traditional=trad + ;; + esac + +diff --git a/sysdeps/loongarch/preconfigure.ac b/sysdeps/loongarch/preconfigure.ac +index df07dbf41f..56402261df 100644 +--- a/sysdeps/loongarch/preconfigure.ac ++++ b/sysdeps/loongarch/preconfigure.ac +@@ -42,6 +42,7 @@ loongarch*) + + base_machine=loongarch + mtls_descriptor=desc ++ mtls_traditional=trad + ;; + esac + diff --git a/sysdeps/mach/hurd/dl-execstack.c b/sysdeps/mach/hurd/dl-execstack.c index 0617d3a161..dc4719bd38 100644 --- a/sysdeps/mach/hurd/dl-execstack.c @@ -7445,6 +11506,22 @@ /* Cleanup buffers */ +diff --git a/sysdeps/powerpc/Makefile b/sysdeps/powerpc/Makefile +index 5e6cb07ce6..5cdb64f29b 100644 +--- a/sysdeps/powerpc/Makefile ++++ b/sysdeps/powerpc/Makefile +@@ -28,6 +28,11 @@ tst-cache-ppc-static-dlopen-ENV = LD_LIBRARY_PATH=$(objpfx):$(common-objpfx):$(c + $(objpfx)tst-cache-ppc-static-dlopen.out: $(objpfx)mod-cache-ppc.so + + $(objpfx)tst-cache-ppc: $(objpfx)mod-cache-ppc.so ++ ++# The test checks if the __tls_get_addr does not clobber caller-saved ++# register, so disable the powerpc specific optimization to force a ++# __tls_get_addr call. ++LDFLAGS-tst-tls23-mod.so = -Wl,--no-tls-get-addr-optimize + endif + + ifneq (no,$(multi-arch)) diff --git a/sysdeps/powerpc/powerpc64/le/power10/memchr.S b/sysdeps/powerpc/powerpc64/le/power10/memchr.S deleted file mode 100644 index 96ad5a2e1d..0000000000 @@ -7767,37 +11844,20 @@ -weak_alias (__memchr, memchr) -libc_hidden_builtin_def (memchr) diff --git a/sysdeps/powerpc/powerpc64/le/power10/strcmp.S b/sysdeps/powerpc/powerpc64/le/power10/strcmp.S -deleted file mode 100644 -index fffa1ee0a9..0000000000 +index fffa1ee0a9..0d4a53317c 100644 --- a/sysdeps/powerpc/powerpc64/le/power10/strcmp.S -+++ /dev/null -@@ -1,233 +0,0 @@ --/* Optimized strcmp implementation for PowerPC64/POWER10. ++++ b/sysdeps/powerpc/powerpc64/le/power10/strcmp.S +@@ -1,5 +1,5 @@ + /* Optimized strcmp implementation for PowerPC64/POWER10. - Copyright (C) 2021-2025 Free Software Foundation, Inc. -- This file is part of the GNU C Library. -- -- The GNU C Library is free software; you can redistribute it and/or -- modify it under the terms of the GNU Lesser General Public -- License as published by the Free Software Foundation; either -- version 2.1 of the License, or (at your option) any later version. -- -- The GNU C Library is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- Lesser General Public License for more details. -- -- You should have received a copy of the GNU Lesser General Public -- License along with the GNU C Library; if not, see -- . */ --#include -- --#ifndef STRCMP --# define STRCMP strcmp --#endif -- --/* Implements the function -- int [r3] strcmp (const char *s1 [r3], const char *s2 [r4]). */ -- ++ Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or +@@ -24,16 +24,6 @@ + /* Implements the function + int [r3] strcmp (const char *s1 [r3], const char *s2 [r4]). */ + -/* TODO: Change this to actual instructions when minimum binutils is upgraded - to 2.27. Macros are defined below for these newer instructions in order - to maintain compatibility. */ @@ -7808,44 +11868,36 @@ - | ((1)<<(32-11)) \ - | ((ra)<<(32-16)) \ - | dq) -- --#define COMPARE_16(vreg1,vreg2,offset) \ -- lxv vreg1+32,offset(r3); \ -- lxv vreg2+32,offset(r4); \ -- vcmpnezb. v7,vreg1,vreg2; \ -- bne cr6,L(different); \ -- --#define COMPARE_32(vreg1,vreg2,offset,label1,label2) \ + + #define COMPARE_16(vreg1,vreg2,offset) \ + lxv vreg1+32,offset(r3); \ +@@ -42,8 +32,8 @@ + bne cr6,L(different); \ + + #define COMPARE_32(vreg1,vreg2,offset,label1,label2) \ - LXVP(vreg1+32,offset,r3); \ - LXVP(vreg2+32,offset,r4); \ -- vcmpnezb. v7,vreg1+1,vreg2+1; \ -- bne cr6,L(label1); \ -- vcmpnezb. v7,vreg1,vreg2; \ -- bne cr6,L(label2); \ -- --#define TAIL(vreg1,vreg2) \ -- vctzlsbb r6,v7; \ -- vextubrx r5,r6,vreg1; \ -- vextubrx r4,r6,vreg2; \ -- subf r3,r4,r5; \ -- blr; \ -- --#define CHECK_N_BYTES(reg1,reg2,len_reg) \ -- sldi r0,len_reg,56; \ -- lxvl 32+v4,reg1,r0; \ -- lxvl 32+v5,reg2,r0; \ -- add reg1,reg1,len_reg; \ -- add reg2,reg2,len_reg; \ ++ lxvp vreg1+32,offset(r3); \ ++ lxvp vreg2+32,offset(r4); \ + vcmpnezb. v7,vreg1+1,vreg2+1; \ + bne cr6,L(label1); \ + vcmpnezb. v7,vreg1,vreg2; \ +@@ -62,120 +52,77 @@ + lxvl 32+v5,reg2,r0; \ + add reg1,reg1,len_reg; \ + add reg2,reg2,len_reg; \ - vcmpnezb v7,v4,v5; \ -- vctzlsbb r6,v7; \ -- cmpld cr7,r6,len_reg; \ -- blt cr7,L(different); \ -- ++ vcmpnezb. v7,v4,v5; \ + vctzlsbb r6,v7; \ + cmpld cr7,r6,len_reg; \ + blt cr7,L(different); \ + - /* TODO: change this to .machine power10 when the minimum required - binutils allows it. */ -- + - .machine power9 --ENTRY_TOCLESS (STRCMP, 4) ++ .machine power10 + ENTRY_TOCLESS (STRCMP, 4) - andi. r7,r3,4095 - andi. r8,r4,4095 - cmpldi cr0,r7,4096-16 @@ -7862,7 +11914,28 @@ - cmpld cr7,r7,r5 - beq cr7,L(same_aligned) - blt cr7,L(nalign1_min) -- ++ li r11,16 ++ /* eq bit of cr1 used as swap status flag to indicate if ++ source pointers were swapped. */ ++ crclr 4*cr1+eq ++ andi. r7,r3,15 ++ sub r7,r11,r7 /* r7(nalign1) = 16 - (str1 & 15). */ ++ andi. r9,r4,15 ++ sub r5,r11,r9 /* r5(nalign2) = 16 - (str2 & 15). */ ++ cmpld cr7,r7,r5 ++ beq cr7,L(same_aligned) ++ blt cr7,L(nalign1_min) ++ /* Swap r3 and r4, and r7 and r5 such that r3 and r7 hold the ++ pointer which is closer to the next 16B boundary so that only ++ one CHECK_N_BYTES is needed before entering the loop below. */ ++ mr r8,r4 ++ mr r4,r3 ++ mr r3,r8 ++ mr r12,r7 ++ mr r7,r5 ++ mr r5,r12 ++ crset 4*cr1+eq /* Set bit on swapping source pointers. */ + - /* nalign2 is minimum and s2 pointer is aligned. */ - CHECK_N_BYTES(r3,r4,r5) - /* Are we on the 64B hunk which crosses a page? */ @@ -7876,8 +11949,9 @@ - b L(compare_64B_unaligned) - - /* nalign1 is minimum and s1 pointer is aligned. */ --L(nalign1_min): -- CHECK_N_BYTES(r3,r4,r7) ++ .p2align 5 + L(nalign1_min): + CHECK_N_BYTES(r3,r4,r7) - /* Are we on the 64B hunk which crosses a page? */ - andi. r10,r4,63 /* Determine offset into 64B hunk. */ - andi. r8,r4,15 /* The offset into the 16B hunk. */ @@ -7886,8 +11960,8 @@ - rlwinm. r7,r7,26,0x3F /* ((r4-4096))>>6&63. */ - beq L(compare_64_pagecross) - mtctr r7 -- -- .p2align 5 + + .p2align 5 -L(compare_64B_unaligned): - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) @@ -7896,13 +11970,30 @@ - addi r3,r3,64 - addi r4,r4,64 - bdnz L(compare_64B_unaligned) -- ++L(s1_aligned): ++ /* r9 and r5 is number of bytes to be read after and before ++ page boundary correspondingly. */ ++ sub r5,r5,r7 ++ subfic r9,r5,16 ++ /* Now let r7 hold the count of quadwords which can be ++ checked without crossing a page boundary. quadword offset is ++ (str2>>4)&0xFF. */ ++ rlwinm r7,r4,28,0xFF ++ /* Below check is required only for first iteration. For second ++ iteration and beyond, the new loop counter is always 255. */ ++ cmpldi r7,255 ++ beq L(L3) ++ /* Get the initial loop count by 255-((str2>>4)&0xFF). */ ++ subfic r11,r7,255 + - /* Cross the page boundary of s2, carefully. Only for first - iteration we have to get the count of 64B blocks to be checked. - From second iteration and beyond, loop counter is always 63. */ -L(compare_64_pagecross): - li r11, 63 -- mtctr r11 ++ .p2align 5 ++L(L1): + mtctr r11 - cmpldi r10,16 - ble L(cross_4) - cmpldi r10,32 @@ -7937,8 +12028,12 @@ - CHECK_N_BYTES(r3,r4,r9) - CHECK_N_BYTES(r3,r4,r8) - COMPARE_16(v4,v5,0) -- addi r3,r3,16 -- addi r4,r4,16 ++ ++ .p2align 5 ++L(L2): ++ COMPARE_16(v4,v5,0) /* Load 16B blocks using lxv. */ + addi r3,r3,16 + addi r4,r4,16 - b L(compare_64B_unaligned) -L(cross_4): - COMPARE_16(v4,v5,0) @@ -7946,103 +12041,92 @@ - COMPARE_16(v4,v5,32) - addi r3,r3,48 - addi r4,r4,48 -- CHECK_N_BYTES(r3,r4,r9) ++ bdnz L(L2) ++ /* Cross the page boundary of s2, carefully. */ ++ ++ .p2align 5 ++L(L3): ++ CHECK_N_BYTES(r3,r4,r5) + CHECK_N_BYTES(r3,r4,r9) - CHECK_N_BYTES(r3,r4,r8) - b L(compare_64B_unaligned) -- --L(same_aligned): -- CHECK_N_BYTES(r3,r4,r7) -- /* Align s1 to 32B and adjust s2 address. -- Use lxvp only if both s1 and s2 are 32B aligned. */ -- COMPARE_16(v4,v5,0) -- COMPARE_16(v4,v5,16) -- COMPARE_16(v4,v5,32) -- COMPARE_16(v4,v5,48) -- addi r3,r3,64 -- addi r4,r4,64 -- COMPARE_16(v4,v5,0) -- COMPARE_16(v4,v5,16) -- -- clrldi r6,r3,59 -- subfic r5,r6,32 -- add r3,r3,r5 -- add r4,r4,r5 -- andi. r5,r4,0x1F -- beq cr0,L(32B_aligned_loop) -- -- .p2align 5 --L(16B_aligned_loop): -- COMPARE_16(v4,v5,0) -- COMPARE_16(v4,v5,16) -- COMPARE_16(v4,v5,32) -- COMPARE_16(v4,v5,48) -- addi r3,r3,64 -- addi r4,r4,64 -- b L(16B_aligned_loop) -- -- /* Calculate and return the difference. */ --L(different): ++ li r11,255 /* Load the new loop counter. */ ++ b L(L1) + ++ .p2align 5 + L(same_aligned): + CHECK_N_BYTES(r3,r4,r7) + /* Align s1 to 32B and adjust s2 address. +@@ -208,26 +155,31 @@ L(16B_aligned_loop): + + /* Calculate and return the difference. */ + L(different): - TAIL(v4,v5) -- -- .p2align 5 --L(32B_aligned_loop): -- COMPARE_32(v14,v16,0,tail1,tail2) ++ vctzlsbb r6,v7 ++ vextubrx r5,r6,v4 ++ vextubrx r4,r6,v5 ++ bt 4*cr1+eq,L(swapped) ++ subf r3,r4,r5 ++ blr ++ ++ /* If src pointers were swapped, then swap the ++ indices and calculate the return value. */ ++L(swapped): ++ subf r3,r5,r4 ++ blr + + .p2align 5 + L(32B_aligned_loop): + COMPARE_32(v14,v16,0,tail1,tail2) - COMPARE_32(v18,v20,32,tail3,tail4) - COMPARE_32(v22,v24,64,tail5,tail6) - COMPARE_32(v26,v28,96,tail7,tail8) -- addi r3,r3,128 -- addi r4,r4,128 -- b L(32B_aligned_loop) -- --L(tail1): TAIL(v15,v17) --L(tail2): TAIL(v14,v16) ++ COMPARE_32(v14,v16,32,tail1,tail2) ++ COMPARE_32(v14,v16,64,tail1,tail2) ++ COMPARE_32(v14,v16,96,tail1,tail2) + addi r3,r3,128 + addi r4,r4,128 + b L(32B_aligned_loop) + + L(tail1): TAIL(v15,v17) + L(tail2): TAIL(v14,v16) -L(tail3): TAIL(v19,v21) -L(tail4): TAIL(v18,v20) -L(tail5): TAIL(v23,v25) -L(tail6): TAIL(v22,v24) -L(tail7): TAIL(v27,v29) -L(tail8): TAIL(v26,v28) -- --END (STRCMP) --libc_hidden_builtin_def (strcmp) + + END (STRCMP) + libc_hidden_builtin_def (strcmp) +diff --git a/sysdeps/powerpc/powerpc64/le/power10/strlen.S b/sysdeps/powerpc/powerpc64/le/power10/strlen.S +index 4985a9291b..a4c5498740 100644 +--- a/sysdeps/powerpc/powerpc64/le/power10/strlen.S ++++ b/sysdeps/powerpc/powerpc64/le/power10/strlen.S +@@ -31,7 +31,7 @@ + # define FUNCNAME RAWMEMCHR + # endif + # define MCOUNT_NARGS 2 +-# define VREG_ZERO v20 ++# define VREG_ZERO v17 + # define OFF_START_LOOP 256 + # define RAWMEMCHR_SUBTRACT_VECTORS \ + vsububm v4,v4,v18; \ diff --git a/sysdeps/powerpc/powerpc64/le/power10/strncmp.S b/sysdeps/powerpc/powerpc64/le/power10/strncmp.S -deleted file mode 100644 -index 10700dd400..0000000000 +index 10700dd400..6e09fcb7f2 100644 --- a/sysdeps/powerpc/powerpc64/le/power10/strncmp.S -+++ /dev/null -@@ -1,271 +0,0 @@ --/* Optimized strncmp implementation for PowerPC64/POWER10. ++++ b/sysdeps/powerpc/powerpc64/le/power10/strncmp.S +@@ -1,5 +1,5 @@ + /* Optimized strncmp implementation for PowerPC64/POWER10. - Copyright (C) 2024-2025 Free Software Foundation, Inc. -- This file is part of the GNU C Library. -- -- The GNU C Library is free software; you can redistribute it and/or -- modify it under the terms of the GNU Lesser General Public -- License as published by the Free Software Foundation; either -- version 2.1 of the License, or (at your option) any later version. -- -- The GNU C Library is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- Lesser General Public License for more details. -- -- You should have received a copy of the GNU Lesser General Public -- License along with the GNU C Library; if not, see -- . */ -- --#include -- --/* Implements the function -- -- int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t [r5] n) -- -- The implementation uses unaligned doubleword access to avoid specialized -- code paths depending of data alignment for first 32 bytes and uses -- vectorised loops after that. */ -- --#ifndef STRNCMP --# define STRNCMP strncmp --#endif -- ++ Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or +@@ -30,17 +30,6 @@ + # define STRNCMP strncmp + #endif + -/* TODO: Change this to actual instructions when minimum binutils is upgraded - to 2.27. Macros are defined below for these newer instructions in order - to maintain compatibility. */ @@ -8054,236 +12138,58 @@ - | ((ra)<<(32-16)) \ - | dq) - --#define COMPARE_16(vreg1,vreg2,offset) \ -- lxv vreg1+32,offset(r3); \ -- lxv vreg2+32,offset(r4); \ -- vcmpnezb. v7,vreg1,vreg2; \ -- bne cr6,L(different); \ -- cmpldi cr7,r5,16; \ -- ble cr7,L(ret0); \ -- addi r5,r5,-16; -- --#define COMPARE_32(vreg1,vreg2,offset,label1,label2) \ + #define COMPARE_16(vreg1,vreg2,offset) \ + lxv vreg1+32,offset(r3); \ + lxv vreg2+32,offset(r4); \ +@@ -51,8 +40,8 @@ + addi r5,r5,-16; + + #define COMPARE_32(vreg1,vreg2,offset,label1,label2) \ - LXVP(vreg1+32,offset,r3); \ - LXVP(vreg2+32,offset,r4); \ -- vcmpnezb. v7,vreg1+1,vreg2+1; \ -- bne cr6,L(label1); \ -- vcmpnezb. v7,vreg1,vreg2; \ -- bne cr6,L(label2); \ -- cmpldi cr7,r5,32; \ -- ble cr7,L(ret0); \ -- addi r5,r5,-32; -- --#define TAIL_FIRST_16B(vreg1,vreg2) \ -- vctzlsbb r6,v7; \ -- cmpld cr7,r5,r6; \ -- ble cr7,L(ret0); \ -- vextubrx r5,r6,vreg1; \ -- vextubrx r4,r6,vreg2; \ -- subf r3,r4,r5; \ -- blr; -- --#define TAIL_SECOND_16B(vreg1,vreg2) \ -- vctzlsbb r6,v7; \ -- addi r0,r6,16; \ -- cmpld cr7,r5,r0; \ -- ble cr7,L(ret0); \ -- vextubrx r5,r6,vreg1; \ -- vextubrx r4,r6,vreg2; \ -- subf r3,r4,r5; \ -- blr; -- --#define CHECK_N_BYTES(reg1,reg2,len_reg) \ -- sldi r6,len_reg,56; \ -- lxvl 32+v4,reg1,r6; \ -- lxvl 32+v5,reg2,r6; \ -- add reg1,reg1,len_reg; \ -- add reg2,reg2,len_reg; \ -- vcmpnezb v7,v4,v5; \ -- vctzlsbb r6,v7; \ -- cmpld cr7,r6,len_reg; \ -- blt cr7,L(different); \ -- cmpld cr7,r5,len_reg; \ -- ble cr7,L(ret0); \ -- sub r5,r5,len_reg; \ -- ++ lxvp vreg1+32,offset(r3); \ ++ lxvp vreg2+32,offset(r4); \ + vcmpnezb. v7,vreg1+1,vreg2+1; \ + bne cr6,L(label1); \ + vcmpnezb. v7,vreg1,vreg2; \ +@@ -94,9 +83,7 @@ + ble cr7,L(ret0); \ + sub r5,r5,len_reg; \ + - /* TODO: change this to .machine power10 when the minimum required - binutils allows it. */ - .machine power9 --ENTRY_TOCLESS (STRNCMP, 4) -- /* Check if size is 0. */ -- cmpdi cr0,r5,0 -- beq cr0,L(ret0) -- andi. r7,r3,4095 -- andi. r8,r4,4095 -- cmpldi cr0,r7,4096-16 -- cmpldi cr1,r8,4096-16 -- bgt cr0,L(crosses) -- bgt cr1,L(crosses) -- COMPARE_16(v4,v5,0) -- addi r3,r3,16 -- addi r4,r4,16 -- --L(crosses): -- andi. r7,r3,15 -- subfic r7,r7,16 /* r7(nalign1) = 16 - (str1 & 15). */ -- andi. r9,r4,15 -- subfic r8,r9,16 /* r8(nalign2) = 16 - (str2 & 15). */ -- cmpld cr7,r7,r8 -- beq cr7,L(same_aligned) -- blt cr7,L(nalign1_min) -- -- /* nalign2 is minimum and s2 pointer is aligned. */ -- CHECK_N_BYTES(r3,r4,r8) -- /* Are we on the 64B hunk which crosses a page? */ -- andi. r10,r3,63 /* Determine offset into 64B hunk. */ -- andi. r8,r3,15 /* The offset into the 16B hunk. */ -- neg r7,r3 -- andi. r9,r7,15 /* Number of bytes after a 16B cross. */ -- rlwinm. r7,r7,26,0x3F /* ((r4-4096))>>6&63. */ -- beq L(compare_64_pagecross) -- mtctr r7 -- b L(compare_64B_unaligned) -- -- /* nalign1 is minimum and s1 pointer is aligned. */ --L(nalign1_min): -- CHECK_N_BYTES(r3,r4,r7) -- /* Are we on the 64B hunk which crosses a page? */ -- andi. r10,r4,63 /* Determine offset into 64B hunk. */ -- andi. r8,r4,15 /* The offset into the 16B hunk. */ -- neg r7,r4 -- andi. r9,r7,15 /* Number of bytes after a 16B cross. */ -- rlwinm. r7,r7,26,0x3F /* ((r4-4096))>>6&63. */ -- beq L(compare_64_pagecross) -- mtctr r7 -- -- .p2align 5 --L(compare_64B_unaligned): -- COMPARE_16(v4,v5,0) -- COMPARE_16(v4,v5,16) -- COMPARE_16(v4,v5,32) -- COMPARE_16(v4,v5,48) -- addi r3,r3,64 -- addi r4,r4,64 -- bdnz L(compare_64B_unaligned) -- -- /* Cross the page boundary of s2, carefully. Only for first -- iteration we have to get the count of 64B blocks to be checked. -- From second iteration and beyond, loop counter is always 63. */ --L(compare_64_pagecross): -- li r11, 63 -- mtctr r11 -- cmpldi r10,16 -- ble L(cross_4) -- cmpldi r10,32 -- ble L(cross_3) -- cmpldi r10,48 -- ble L(cross_2) --L(cross_1): -- CHECK_N_BYTES(r3,r4,r9) -- CHECK_N_BYTES(r3,r4,r8) -- COMPARE_16(v4,v5,0) -- COMPARE_16(v4,v5,16) -- COMPARE_16(v4,v5,32) -- addi r3,r3,48 -- addi r4,r4,48 -- b L(compare_64B_unaligned) --L(cross_2): -- COMPARE_16(v4,v5,0) -- addi r3,r3,16 -- addi r4,r4,16 -- CHECK_N_BYTES(r3,r4,r9) -- CHECK_N_BYTES(r3,r4,r8) -- COMPARE_16(v4,v5,0) -- COMPARE_16(v4,v5,16) -- addi r3,r3,32 -- addi r4,r4,32 -- b L(compare_64B_unaligned) --L(cross_3): -- COMPARE_16(v4,v5,0) -- COMPARE_16(v4,v5,16) -- addi r3,r3,32 -- addi r4,r4,32 -- CHECK_N_BYTES(r3,r4,r9) -- CHECK_N_BYTES(r3,r4,r8) -- COMPARE_16(v4,v5,0) -- addi r3,r3,16 -- addi r4,r4,16 -- b L(compare_64B_unaligned) --L(cross_4): -- COMPARE_16(v4,v5,0) -- COMPARE_16(v4,v5,16) -- COMPARE_16(v4,v5,32) -- addi r3,r3,48 -- addi r4,r4,48 -- CHECK_N_BYTES(r3,r4,r9) -- CHECK_N_BYTES(r3,r4,r8) -- b L(compare_64B_unaligned) -- --L(same_aligned): -- CHECK_N_BYTES(r3,r4,r7) -- /* Align s1 to 32B and adjust s2 address. -- Use lxvp only if both s1 and s2 are 32B aligned. */ -- COMPARE_16(v4,v5,0) -- COMPARE_16(v4,v5,16) -- COMPARE_16(v4,v5,32) -- COMPARE_16(v4,v5,48) -- addi r3,r3,64 -- addi r4,r4,64 -- COMPARE_16(v4,v5,0) -- COMPARE_16(v4,v5,16) -- addi r5,r5,32 -- -- clrldi r6,r3,59 -- subfic r7,r6,32 -- add r3,r3,r7 -- add r4,r4,r7 -- subf r5,r7,r5 -- andi. r7,r4,0x1F -- beq cr0,L(32B_aligned_loop) -- -- .p2align 5 --L(16B_aligned_loop): -- COMPARE_16(v4,v5,0) -- COMPARE_16(v4,v5,16) -- COMPARE_16(v4,v5,32) -- COMPARE_16(v4,v5,48) -- addi r3,r3,64 -- addi r4,r4,64 -- b L(16B_aligned_loop) -- -- /* Calculate and return the difference. */ --L(different): -- TAIL_FIRST_16B(v4,v5) -- -- .p2align 5 --L(32B_aligned_loop): -- COMPARE_32(v14,v16,0,tail1,tail2) ++ .machine power10 + ENTRY_TOCLESS (STRNCMP, 4) + /* Check if size is 0. */ + cmpdi cr0,r5,0 +@@ -246,21 +233,15 @@ L(different): + .p2align 5 + L(32B_aligned_loop): + COMPARE_32(v14,v16,0,tail1,tail2) - COMPARE_32(v18,v20,32,tail3,tail4) - COMPARE_32(v22,v24,64,tail5,tail6) - COMPARE_32(v26,v28,96,tail7,tail8) -- addi r3,r3,128 -- addi r4,r4,128 -- b L(32B_aligned_loop) -- --L(tail1): TAIL_FIRST_16B(v15,v17) --L(tail2): TAIL_SECOND_16B(v14,v16) ++ COMPARE_32(v14,v16,32,tail1,tail2) ++ COMPARE_32(v14,v16,64,tail1,tail2) ++ COMPARE_32(v14,v16,96,tail1,tail2) + addi r3,r3,128 + addi r4,r4,128 + b L(32B_aligned_loop) + + L(tail1): TAIL_FIRST_16B(v15,v17) + L(tail2): TAIL_SECOND_16B(v14,v16) -L(tail3): TAIL_FIRST_16B(v19,v21) -L(tail4): TAIL_SECOND_16B(v18,v20) -L(tail5): TAIL_FIRST_16B(v23,v25) -L(tail6): TAIL_SECOND_16B(v22,v24) -L(tail7): TAIL_FIRST_16B(v27,v29) -L(tail8): TAIL_SECOND_16B(v26,v28) -- -- .p2align 5 --L(ret0): -- li r3,0 -- blr -- --END(STRNCMP) --libc_hidden_builtin_def(strncmp) + + .p2align 5 + L(ret0): diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile -index dc7c5b14ee..142e6c24c7 100644 +index dc7c5b14ee..c40feea250 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/Makefile +++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile @@ -31,12 +31,11 @@ sysdep_routines += memcpy-power8-cached memcpy-power7 memcpy-a2 memcpy-power6 \ @@ -8298,27 +12204,17 @@ - strlen-power10 +sysdep_routines += memcmp-power10 memcpy-power10 memmove-power10 memset-power10 \ + rawmemchr-power9 rawmemchr-power10 \ -+ strcmp-power9 strncmp-power9 \ ++ strcmp-power9 strcmp-power10 strncmp-power9 strncmp-power10 \ + strcpy-power9 strcat-power10 stpcpy-power9 \ + strlen-power9 strncpy-power9 stpncpy-power9 strlen-power10 endif CFLAGS-strncase-power7.c += -mcpu=power7 -funroll-loops CFLAGS-strncase_l-power7.c += -mcpu=power7 -funroll-loops diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c -index 0a31a5853c..de288a0d80 100644 +index 0a31a5853c..65debb5ee5 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c -@@ -164,9 +164,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, - /* Support sysdeps/powerpc/powerpc64/multiarch/strncmp.c. */ - IFUNC_IMPL (i, name, strncmp, - #ifdef __LITTLE_ENDIAN__ -- IFUNC_IMPL_ADD (array, i, strncmp, hwcap2 & PPC_FEATURE2_ARCH_3_1 -- && hwcap & PPC_FEATURE_HAS_VSX, -- __strncmp_power10) - IFUNC_IMPL_ADD (array, i, strncmp, hwcap2 & PPC_FEATURE2_ARCH_3_00 - && hwcap & PPC_FEATURE_HAS_ALTIVEC, - __strncmp_power9) -@@ -229,12 +226,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, +@@ -229,12 +229,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/powerpc/powerpc64/multiarch/memchr.c. */ IFUNC_IMPL (i, name, memchr, @@ -8331,17 +12227,6 @@ IFUNC_IMPL_ADD (array, i, memchr, hwcap2 & PPC_FEATURE2_ARCH_2_07 && hwcap & PPC_FEATURE_HAS_ALTIVEC, -@@ -386,10 +377,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, - /* Support sysdeps/powerpc/powerpc64/multiarch/strcmp.c. */ - IFUNC_IMPL (i, name, strcmp, - #ifdef __LITTLE_ENDIAN__ -- IFUNC_IMPL_ADD (array, i, strcmp, -- (hwcap2 & PPC_FEATURE2_ARCH_3_1) -- && (hwcap & PPC_FEATURE_HAS_VSX), -- __strcmp_power10) - IFUNC_IMPL_ADD (array, i, strcmp, - hwcap2 & PPC_FEATURE2_ARCH_3_00 - && hwcap & PPC_FEATURE_HAS_ALTIVEC, diff --git a/sysdeps/powerpc/powerpc64/multiarch/memchr-power10.S b/sysdeps/powerpc/powerpc64/multiarch/memchr-power10.S deleted file mode 100644 index c9d2f4efd1..0000000000 @@ -8411,111 +12296,26 @@ weak_alias (__memchr, memchr) libc_hidden_builtin_def (memchr) diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcmp-power10.S b/sysdeps/powerpc/powerpc64/multiarch/strcmp-power10.S -deleted file mode 100644 -index 7b45fcd63a..0000000000 +index 7b45fcd63a..a4ee7fb53c 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/strcmp-power10.S -+++ /dev/null -@@ -1,26 +0,0 @@ --/* Optimized strcmp implementation for POWER10/PPC64. ++++ b/sysdeps/powerpc/powerpc64/multiarch/strcmp-power10.S +@@ -1,5 +1,5 @@ + /* Optimized strcmp implementation for POWER10/PPC64. - Copyright (C) 2021-2025 Free Software Foundation, Inc. -- This file is part of the GNU C Library. -- -- The GNU C Library is free software; you can redistribute it and/or -- modify it under the terms of the GNU Lesser General Public -- License as published by the Free Software Foundation; either -- version 2.1 of the License, or (at your option) any later version. -- -- The GNU C Library is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- Lesser General Public License for more details. -- -- You should have received a copy of the GNU Lesser General Public -- License along with the GNU C Library; if not, see -- . */ -- --#if defined __LITTLE_ENDIAN__ && IS_IN (libc) --#define STRCMP __strcmp_power10 -- --#undef libc_hidden_builtin_def --#define libc_hidden_builtin_def(name) -- --#include --#endif /* __LITTLE_ENDIAN__ && IS_IN (libc) */ -diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcmp.c b/sysdeps/powerpc/powerpc64/multiarch/strcmp.c -index 3c636e3bbc..7c77c084a7 100644 ---- a/sysdeps/powerpc/powerpc64/multiarch/strcmp.c -+++ b/sysdeps/powerpc/powerpc64/multiarch/strcmp.c -@@ -29,16 +29,12 @@ extern __typeof (strcmp) __strcmp_power7 attribute_hidden; - extern __typeof (strcmp) __strcmp_power8 attribute_hidden; - # ifdef __LITTLE_ENDIAN__ - extern __typeof (strcmp) __strcmp_power9 attribute_hidden; --extern __typeof (strcmp) __strcmp_power10 attribute_hidden; - # endif - - # undef strcmp ++ Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. - libc_ifunc_redirected (__redirect_strcmp, strcmp, - # ifdef __LITTLE_ENDIAN__ -- (hwcap2 & PPC_FEATURE2_ARCH_3_1 -- && hwcap & PPC_FEATURE_HAS_VSX) -- ? __strcmp_power10 : - (hwcap2 & PPC_FEATURE2_ARCH_3_00 - && hwcap & PPC_FEATURE_HAS_ALTIVEC) - ? __strcmp_power9 : + The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp-power10.S b/sysdeps/powerpc/powerpc64/multiarch/strncmp-power10.S -deleted file mode 100644 -index 43879085e2..0000000000 +index 43879085e2..bb25bc75b8 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/strncmp-power10.S -+++ /dev/null -@@ -1,25 +0,0 @@ ++++ b/sysdeps/powerpc/powerpc64/multiarch/strncmp-power10.S +@@ -1,4 +1,4 @@ -/* Copyright (C) 2024-2025 Free Software Foundation, Inc. -- This file is part of the GNU C Library. -- -- The GNU C Library is free software; you can redistribute it and/or -- modify it under the terms of the GNU Lesser General Public -- License as published by the Free Software Foundation; either -- version 2.1 of the License, or (at your option) any later version. -- -- The GNU C Library is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- Lesser General Public License for more details. -- -- You should have received a copy of the GNU Lesser General Public -- License along with the GNU C Library; if not, see -- . */ -- --#if defined __LITTLE_ENDIAN__ && IS_IN (libc) --#define STRNCMP __strncmp_power10 -- --#undef libc_hidden_builtin_def --#define libc_hidden_builtin_def(name) -- --#include --#endif -diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c -index 0a664a620d..4cfe27fa45 100644 ---- a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c -+++ b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c -@@ -29,7 +29,6 @@ extern __typeof (strncmp) __strncmp_ppc attribute_hidden; - extern __typeof (strncmp) __strncmp_power8 attribute_hidden; - # ifdef __LITTLE_ENDIAN__ - extern __typeof (strncmp) __strncmp_power9 attribute_hidden; --extern __typeof (strncmp) __strncmp_power10 attribute_hidden; - # endif - # undef strncmp ++/* Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. -@@ -37,9 +36,6 @@ extern __typeof (strncmp) __strncmp_power10 attribute_hidden; - ifunc symbol properly. */ - libc_ifunc_redirected (__redirect_strncmp, strncmp, - # ifdef __LITTLE_ENDIAN__ -- (hwcap2 & PPC_FEATURE2_ARCH_3_1 -- && hwcap & PPC_FEATURE_HAS_VSX) -- ? __strncmp_power10 : - (hwcap2 & PPC_FEATURE2_ARCH_3_00 - && hwcap & PPC_FEATURE_HAS_ALTIVEC) - ? __strncmp_power9 : + The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/pthread/Makefile b/sysdeps/pthread/Makefile index a123e28a57..7fcbc72bc0 100644 --- a/sysdeps/pthread/Makefile @@ -8964,6 +12764,34 @@ ifeq ($(subdir),stdlib) gen-as-const-headers += ucontext_i.sym +diff --git a/sysdeps/unix/sysv/linux/aarch64/clone.S b/sysdeps/unix/sysv/linux/aarch64/clone.S +index 97e1afa57f..bab0ce7719 100644 +--- a/sysdeps/unix/sysv/linux/aarch64/clone.S ++++ b/sysdeps/unix/sysv/linux/aarch64/clone.S +@@ -51,6 +51,9 @@ ENTRY(__clone) + and x1, x1, -16 + cbz x1, .Lsyscall_error + ++ /* Clear ZA state of SME. */ ++ CALL_LIBC_ARM_ZA_DISABLE ++ + /* Do the system call. */ + /* X0:flags, x1:newsp, x2:parenttidptr, x3:newtls, x4:childtid. */ + mov x0, x2 /* flags */ +diff --git a/sysdeps/unix/sysv/linux/aarch64/clone3.S b/sysdeps/unix/sysv/linux/aarch64/clone3.S +index 443e117bf9..f1f22c9865 100644 +--- a/sysdeps/unix/sysv/linux/aarch64/clone3.S ++++ b/sysdeps/unix/sysv/linux/aarch64/clone3.S +@@ -50,6 +50,9 @@ ENTRY(__clone3) + cbz x10, .Lsyscall_error /* No NULL cl_args pointer. */ + cbz x2, .Lsyscall_error /* No NULL function pointer. */ + ++ /* Clear ZA state of SME. */ ++ CALL_LIBC_ARM_ZA_DISABLE ++ + /* Do the system call, the kernel expects: + x8: system call number + x0: cl_args diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c index 6d63c8a9ec..1acc82d077 100644 --- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c @@ -8976,6 +12804,95 @@ #define DCZID_DZP_MASK (1 << 4) #define DCZID_BS_MASK (0xf) +diff --git a/sysdeps/unix/sysv/linux/aarch64/setcontext.S b/sysdeps/unix/sysv/linux/aarch64/setcontext.S +index 695fc5b9b5..dedf3798a4 100644 +--- a/sysdeps/unix/sysv/linux/aarch64/setcontext.S ++++ b/sysdeps/unix/sysv/linux/aarch64/setcontext.S +@@ -49,25 +49,8 @@ ENTRY (__setcontext) + cbz x0, 1f + b C_SYMBOL_NAME (__syscall_error) + 1: +- /* Disable ZA of SME. */ +-#if HAVE_AARCH64_PAC_RET +- PACIASP +- cfi_window_save +-#endif +- stp x29, x30, [sp, -16]! +- cfi_adjust_cfa_offset (16) +- cfi_rel_offset (x29, 0) +- cfi_rel_offset (x30, 8) +- mov x29, sp +- bl __libc_arm_za_disable +- ldp x29, x30, [sp], 16 +- cfi_adjust_cfa_offset (-16) +- cfi_restore (x29) +- cfi_restore (x30) +-#if HAVE_AARCH64_PAC_RET +- AUTIASP +- cfi_window_save +-#endif ++ /* Clear ZA state of SME. */ ++ CALL_LIBC_ARM_ZA_DISABLE + /* Restore the general purpose registers. */ + mov x0, x9 + cfi_def_cfa (x0, 0) +diff --git a/sysdeps/unix/sysv/linux/aarch64/sysdep.h b/sysdeps/unix/sysv/linux/aarch64/sysdep.h +index b813805931..048fe11ad7 100644 +--- a/sysdeps/unix/sysv/linux/aarch64/sysdep.h ++++ b/sysdeps/unix/sysv/linux/aarch64/sysdep.h +@@ -150,6 +150,19 @@ + mov x8, SYS_ify (syscall_name); \ + svc 0 + ++/* Clear ZA state of SME (ASM version). */ ++/* The __libc_arm_za_disable function has special calling convention ++ that allows to call it without stack manipulation and preserving ++ most of the registers. */ ++ .macro CALL_LIBC_ARM_ZA_DISABLE ++ cfi_remember_state ++ mov x13, x30 ++ cfi_register(x30, x13) ++ bl __libc_arm_za_disable ++ mov x30, x13 ++ cfi_restore_state ++ .endm ++ + #else /* not __ASSEMBLER__ */ + + # ifdef __LP64__ +@@ -235,6 +248,32 @@ + #undef HAVE_INTERNAL_BRK_ADDR_SYMBOL + #define HAVE_INTERNAL_BRK_ADDR_SYMBOL 1 + ++/* Clear ZA state of SME (C version). */ ++/* The __libc_arm_za_disable function has special calling convention ++ that allows to call it without stack manipulation and preserving ++ most of the registers. */ ++#define CALL_LIBC_ARM_ZA_DISABLE() \ ++({ \ ++ unsigned long int __tmp; \ ++ asm volatile ( \ ++ " .cfi_remember_state\n" \ ++ " mov %0, x30\n" \ ++ " .cfi_register x30, %0\n" \ ++ " bl __libc_arm_za_disable\n" \ ++ " mov x30, %0\n" \ ++ " .cfi_restore_state\n" \ ++ : "=r" (__tmp) \ ++ : \ ++ : "x14", "x15", "x16", "x17", "x18", "memory" ); \ ++}) ++ ++/* Do clear ZA state of SME before making normal clone syscall. */ ++#define INLINE_CLONE_SYSCALL(a0, a1, a2, a3, a4) \ ++({ \ ++ CALL_LIBC_ARM_ZA_DISABLE (); \ ++ INLINE_SYSCALL_CALL (clone, a0, a1, a2, a3, a4); \ ++}) ++ + #endif /* __ASSEMBLER__ */ + + #endif /* linux/aarch64/sysdep.h */ diff --git a/sysdeps/unix/sysv/linux/aarch64/tst-aarch64-pkey.c b/sysdeps/unix/sysv/linux/aarch64/tst-aarch64-pkey.c index 3ff33ef72a..c884efc3b4 100644 --- a/sysdeps/unix/sysv/linux/aarch64/tst-aarch64-pkey.c @@ -9614,6 +13531,20 @@ +} + +#include +diff --git a/sysdeps/unix/sysv/linux/aarch64/vfork.S b/sysdeps/unix/sysv/linux/aarch64/vfork.S +index d5943a7485..2600bc9be3 100644 +--- a/sysdeps/unix/sysv/linux/aarch64/vfork.S ++++ b/sysdeps/unix/sysv/linux/aarch64/vfork.S +@@ -27,6 +27,9 @@ + + ENTRY (__vfork) + ++ /* Clear ZA state of SME. */ ++ CALL_LIBC_ARM_ZA_DISABLE ++ + mov x0, #0x4111 /* CLONE_VM | CLONE_VFORK | SIGCHLD */ + mov x1, sp + DO_CALL (clone, 2) diff --git a/sysdeps/unix/sysv/linux/bits/sched.h b/sysdeps/unix/sysv/linux/bits/sched.h index 3656e98eda..39b0b3d19c 100644 --- a/sysdeps/unix/sysv/linux/bits/sched.h @@ -9678,10 +13609,25 @@ int ret = INTERNAL_SYSCALL_CALL (rseq, RSEQ_SELF (), size, 0, RSEQ_SIG); if (!INTERNAL_SYSCALL_ERROR_P (ret)) diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile -index 5311b594af..01b0192ddf 100644 +index 5311b594af..c814060e08 100644 --- a/sysdeps/x86/Makefile +++ b/sysdeps/x86/Makefile -@@ -21,6 +21,9 @@ tests += \ +@@ -4,7 +4,13 @@ endif + + ifeq ($(subdir),elf) + sysdep_routines += get-cpuid-feature-leaf +-sysdep-dl-routines += dl-get-cpu-features ++sysdep-dl-routines += \ ++ dl-get-cpu-features \ ++ dl-tlsdesc \ ++ tls_get_addr \ ++ tlsdesc \ ++# sysdep-dl-routines ++ + sysdep_headers += \ + bits/platform/features.h \ + bits/platform/x86.h \ +@@ -21,6 +27,9 @@ tests += \ tst-cpu-features-supports-static \ tst-get-cpu-features \ tst-get-cpu-features-static \ @@ -9691,7 +13637,7 @@ tst-hwcap-tunables \ # tests tests-static += \ -@@ -91,6 +94,25 @@ CFLAGS-tst-gnu2-tls2.c += -msse +@@ -91,6 +100,42 @@ CFLAGS-tst-gnu2-tls2.c += -msse CFLAGS-tst-gnu2-tls2mod0.c += -msse2 -mtune=haswell CFLAGS-tst-gnu2-tls2mod1.c += -msse2 -mtune=haswell CFLAGS-tst-gnu2-tls2mod2.c += -msse2 -mtune=haswell @@ -9714,9 +13660,40 @@ + $(objpfx)tst-gnu2-tls2mod0.so \ + $(objpfx)tst-gnu2-tls2mod1.so \ + $(objpfx)tst-gnu2-tls2mod2.so ++ ++CFLAGS-tst-tls23.c += -msse2 ++CFLAGS-tst-tls23-mod.c += -msse2 -mtune=haswell ++ ++LDFLAGS-tst-tls23 += -rdynamic ++tst-tls23-mod.so-no-z-defs = yes ++ ++$(objpfx)tst-tls23-mod.so: $(libsupport) ++ ++tests-special += $(objpfx)check-gnu2-tls.out ++ ++$(objpfx)check-gnu2-tls.out: $(common-objpfx)libc.so ++ LC_ALL=C $(READELF) -V -W $< \ ++ | sed -ne '/.gnu.version_d/, /.gnu.version_r/ p' \ ++ | grep GLIBC_ABI_GNU2_TLS > $@; \ ++ $(evaluate-test) ++generated += check-gnu2-tls.out endif ifeq ($(subdir),math) +diff --git a/sysdeps/x86/Versions b/sysdeps/x86/Versions +index 4b10c4b5d7..e8dcfccbe4 100644 +--- a/sysdeps/x86/Versions ++++ b/sysdeps/x86/Versions +@@ -7,4 +7,9 @@ libc { + GLIBC_2.33 { + __x86_get_cpuid_feature_leaf; + } ++ GLIBC_ABI_GNU2_TLS { ++ # This symbol is used only for empty version map and will be removed ++ # by scripts/versions.awk. ++ __placeholder_only_for_empty_version_map; ++ } + } diff --git a/sysdeps/x86/bits/floatn.h b/sysdeps/x86/bits/floatn.h index d197cb10dd..4674165bd7 100644 --- a/sysdeps/x86/bits/floatn.h @@ -9749,7 +13726,7 @@ # endif diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c -index 27abaca8b7..e50f1d6932 100644 +index 27abaca8b7..6b1651a6f0 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -24,6 +24,7 @@ @@ -9910,17 +13887,19 @@ INTEL_ATOM_GRANDRIDGE, INTEL_ATOM_TREMONT, -@@ -575,7 +540,9 @@ enum +@@ -575,7 +540,11 @@ enum INTEL_BIGCORE_METEORLAKE, INTEL_BIGCORE_LUNARLAKE, INTEL_BIGCORE_ARROWLAKE, + INTEL_BIGCORE_PANTHERLAKE, INTEL_BIGCORE_GRANITERAPIDS, + INTEL_BIGCORE_DIAMONDRAPIDS, ++ INTEL_BIGCORE_WILDCATLAKE, ++ INTEL_BIGCORE_NOVALAKE, /* Mixed (bigcore + atom SOC). */ INTEL_MIXED_LAKEFIELD, -@@ -589,7 +556,7 @@ enum +@@ -589,7 +558,7 @@ enum INTEL_UNKNOWN, }; @@ -9929,7 +13908,7 @@ intel_get_fam6_microarch (unsigned int model, __attribute__ ((unused)) unsigned int stepping) { -@@ -620,6 +587,8 @@ intel_get_fam6_microarch (unsigned int model, +@@ -620,6 +589,8 @@ intel_get_fam6_microarch (unsigned int model, return INTEL_ATOM_GOLDMONT_PLUS; case 0xAF: return INTEL_ATOM_SIERRAFOREST; @@ -9938,7 +13917,7 @@ case 0xB6: return INTEL_ATOM_GRANDRIDGE; case 0x86: -@@ -727,8 +696,12 @@ intel_get_fam6_microarch (unsigned int model, +@@ -727,8 +698,14 @@ intel_get_fam6_microarch (unsigned int model, return INTEL_BIGCORE_METEORLAKE; case 0xbd: return INTEL_BIGCORE_LUNARLAKE; @@ -9948,10 +13927,12 @@ return INTEL_BIGCORE_ARROWLAKE; + case 0xCC: + return INTEL_BIGCORE_PANTHERLAKE; ++ case 0xD5: ++ return INTEL_BIGCORE_WILDCATLAKE; case 0xAD: case 0xAE: return INTEL_BIGCORE_GRANITERAPIDS; -@@ -792,133 +765,20 @@ init_cpu_features (struct cpu_features *cpu_features) +@@ -792,133 +769,20 @@ init_cpu_features (struct cpu_features *cpu_features) cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset] &= ~bit_arch_Avoid_Non_Temporal_Memset; @@ -10090,7 +14071,7 @@ case INTEL_BIGCORE_SKYLAKE_AVX512: /* 0x55 (Skylake-avx512) && stepping <= 5 disable TSX. */ if (stepping <= 5) -@@ -927,38 +787,163 @@ init_cpu_features (struct cpu_features *cpu_features) +@@ -927,38 +791,176 @@ init_cpu_features (struct cpu_features *cpu_features) case INTEL_BIGCORE_KABYLAKE: /* NB: Although the errata documents that for model == 0x8e @@ -10146,17 +14127,28 @@ + break; } } ++ else if (family == 18) ++ switch (model) ++ { ++ case 0x01: ++ case 0x03: ++ microarch = INTEL_BIGCORE_NOVALAKE; ++ break; ++ ++ default: ++ break; ++ } + else if (family == 19) + switch (model) + { + case 0x01: + microarch = INTEL_BIGCORE_DIAMONDRAPIDS; + break; - ++ + default: + break; + } -+ + + switch (microarch) + { + /* Atom / KNL tuning. */ @@ -10262,6 +14254,8 @@ + case INTEL_BIGCORE_LUNARLAKE: + case INTEL_BIGCORE_ARROWLAKE: + case INTEL_BIGCORE_PANTHERLAKE: ++ case INTEL_BIGCORE_WILDCATLAKE: ++ case INTEL_BIGCORE_NOVALAKE: + case INTEL_BIGCORE_SAPPHIRERAPIDS: + case INTEL_BIGCORE_EMERALDRAPIDS: + case INTEL_BIGCORE_GRANITERAPIDS: @@ -10275,7 +14269,7 @@ /* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER if AVX512ER is available. Don't use AVX512 to avoid lower CPU -@@ -1159,6 +1144,9 @@ no_cpuid: +@@ -1159,6 +1161,9 @@ no_cpuid: TUNABLE_CALLBACK (set_prefer_map_32bit_exec)); #endif @@ -10285,7 +14279,7 @@ bool disable_xsave_features = false; if (!CPU_FEATURE_USABLE_P (cpu_features, OSXSAVE)) -@@ -1212,6 +1200,7 @@ no_cpuid: +@@ -1212,6 +1217,7 @@ no_cpuid: CPU_FEATURE_UNSET (cpu_features, FMA4); } @@ -10347,7 +14341,7 @@ /* Unused for x86. */ # define INIT_ARCH() diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h -index 541393f1dc..c3c73e75dd 100644 +index 541393f1dc..b8e963b654 100644 --- a/sysdeps/x86/sysdep.h +++ b/sysdeps/x86/sysdep.h @@ -102,6 +102,9 @@ @@ -10370,6 +14364,36 @@ /* States to be included in xsave_state_size. */ # define FULL_STATE_SAVE_MASK STATE_SAVE_MASK #endif +@@ -177,6 +183,29 @@ + + #define atom_text_section .section ".text.atom", "ax" + ++#ifndef DL_STACK_ALIGNMENT ++/* Due to GCC bug: ++ ++ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066 ++ ++ __tls_get_addr may be called with 8-byte/4-byte stack alignment. ++ Although this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't ++ assume that stack will be always aligned at 16 bytes. */ ++# ifdef __x86_64__ ++# define DL_STACK_ALIGNMENT 8 ++# define MINIMUM_ALIGNMENT 16 ++# else ++# define DL_STACK_ALIGNMENT 4 ++# endif ++#endif ++ ++/* True if _dl_runtime_resolve/_dl_tlsdesc_dynamic should align stack for ++ STATE_SAVE or align stack to MINIMUM_ALIGNMENT bytes before calling ++ _dl_fixup/__tls_get_addr. */ ++#define DL_RUNTIME_RESOLVE_REALIGN_STACK \ ++ (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \ ++ || MINIMUM_ALIGNMENT > DL_STACK_ALIGNMENT) ++ + #endif /* __ASSEMBLER__ */ + + #endif /* _X86_SYSDEP_H */ diff --git a/sysdeps/x86/tst-gnu2-tls2-x86-noxsave.c b/sysdeps/x86/tst-gnu2-tls2-x86-noxsave.c new file mode 100644 index 0000000000..f0024c143d @@ -10391,11 +14415,90 @@ +++ b/sysdeps/x86/tst-gnu2-tls2-x86-noxsavexsavec.c @@ -0,0 +1 @@ +#include +diff --git a/sysdeps/x86/tst-tls23.c b/sysdeps/x86/tst-tls23.c +new file mode 100644 +index 0000000000..6130d91cf8 +--- /dev/null ++++ b/sysdeps/x86/tst-tls23.c +@@ -0,0 +1,22 @@ ++#ifndef __x86_64__ ++#include ++ ++#define IS_SUPPORTED() CPU_FEATURE_ACTIVE (SSE2) ++#endif ++ ++/* Set XMM0...XMM7 to all 1s. */ ++#define PREPARE_MALLOC() \ ++{ \ ++ asm volatile ("pcmpeqd %%xmm0, %%xmm0" : : : "xmm0" ); \ ++ asm volatile ("pcmpeqd %%xmm1, %%xmm1" : : : "xmm1" ); \ ++ asm volatile ("pcmpeqd %%xmm2, %%xmm2" : : : "xmm2" ); \ ++ asm volatile ("pcmpeqd %%xmm3, %%xmm3" : : : "xmm3" ); \ ++ asm volatile ("pcmpeqd %%xmm4, %%xmm4" : : : "xmm4" ); \ ++ asm volatile ("pcmpeqd %%xmm5, %%xmm5" : : : "xmm5" ); \ ++ asm volatile ("pcmpeqd %%xmm6, %%xmm6" : : : "xmm6" ); \ ++ asm volatile ("pcmpeqd %%xmm7, %%xmm7" : : : "xmm7" ); \ ++} ++ ++#include ++ ++v2di v1, v2, v3; +diff --git a/sysdeps/x86/tst-tls23.h b/sysdeps/x86/tst-tls23.h +new file mode 100644 +index 0000000000..21cee4ca07 +--- /dev/null ++++ b/sysdeps/x86/tst-tls23.h +@@ -0,0 +1,35 @@ ++/* Test that __tls_get_addr preserves XMM registers. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++typedef long long v2di __attribute__((vector_size(16))); ++extern v2di v1, v2, v3; ++ ++#define BEFORE_TLS_CALL() \ ++ v1 = __extension__(v2di){0, 0}; \ ++ v2 = __extension__(v2di){0, 0}; ++ ++#define AFTER_TLS_CALL() \ ++ v3 = __extension__(v2di){0, 0}; \ ++ asm volatile ("" : "+x" (v3)); \ ++ union { v2di x; long long a[2]; } u; \ ++ u.x = v3; \ ++ TEST_VERIFY_EXIT (u.a[0] == 0 && u.a[1] == 0); ++ ++#include diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile -index 9d31685e02..5723ec1847 100644 +index 9d31685e02..a1dc28a700 100644 --- a/sysdeps/x86_64/Makefile +++ b/sysdeps/x86_64/Makefile -@@ -142,7 +142,6 @@ CFLAGS-tst-avxmod.c += $(AVX-CFLAGS) +@@ -41,9 +41,6 @@ ifeq ($(subdir),elf) + CFLAGS-.os += $(if $(filter $(@F),$(patsubst %,%.os,$(all-rtld-routines))),\ + -mno-mmx) + +-sysdep-dl-routines += tlsdesc dl-tlsdesc tls_get_addr +- +-tests += ifuncmain8 + modules-names += ifuncmod8 + + $(objpfx)ifuncmain8: $(objpfx)ifuncmod8.so +@@ -142,7 +139,6 @@ CFLAGS-tst-avxmod.c += $(AVX-CFLAGS) AVX512-CFLAGS = -mavx512f CFLAGS-tst-audit10-aux.c += $(AVX512-CFLAGS) CFLAGS-tst-auditmod10a.c += $(AVX512-CFLAGS) @@ -10403,6 +14506,38 @@ CFLAGS-tst-avx512-aux.c += $(AVX512-CFLAGS) CFLAGS-tst-avx512mod.c += $(AVX512-CFLAGS) +@@ -212,6 +208,15 @@ LDFLAGS-tst-plt-rewrite2 = -Wl,-z,now + LDFLAGS-tst-plt-rewritemod2.so = -Wl,-z,now,-z,undefs + tst-plt-rewrite2-ENV = GLIBC_TUNABLES=glibc.cpu.plt_rewrite=2 + $(objpfx)tst-plt-rewrite2: $(objpfx)tst-plt-rewritemod2.so ++ ++tests-special += $(objpfx)check-dt-x86-64-plt.out ++ ++$(objpfx)check-dt-x86-64-plt.out: $(common-objpfx)libc.so ++ LC_ALL=C $(READELF) -V -W $< \ ++ | sed -ne '/.gnu.version_d/, /.gnu.version_r/ p' \ ++ | grep GLIBC_ABI_DT_X86_64_PLT > $@; \ ++ $(evaluate-test) ++generated += check-dt-x86-64-plt.out + endif + + test-internal-extras += tst-gnu2-tls2mod1 +diff --git a/sysdeps/x86_64/Versions b/sysdeps/x86_64/Versions +index e94758b236..6a989ad3b3 100644 +--- a/sysdeps/x86_64/Versions ++++ b/sysdeps/x86_64/Versions +@@ -5,6 +5,11 @@ libc { + GLIBC_2.13 { + __fentry__; + } ++ GLIBC_ABI_DT_X86_64_PLT { ++ # This symbol is used only for empty version map and will be removed ++ # by scripts/versions.awk. ++ __placeholder_only_for_empty_version_map; ++ } + } + libm { + GLIBC_2.1 { diff --git a/sysdeps/x86_64/dl-tlsdesc-dynamic.h b/sysdeps/x86_64/dl-tlsdesc-dynamic.h index 9965ddd2c0..4f496de8c8 100644 --- a/sysdeps/x86_64/dl-tlsdesc-dynamic.h @@ -10416,6 +14551,70 @@ #endif /* Besides rdi and rsi, saved above, save rcx, rdx, r8, r9, r10 and r11. */ +diff --git a/sysdeps/x86_64/dl-tlsdesc.S b/sysdeps/x86_64/dl-tlsdesc.S +index d1bb125560..9a55fc52bb 100644 +--- a/sysdeps/x86_64/dl-tlsdesc.S ++++ b/sysdeps/x86_64/dl-tlsdesc.S +@@ -22,7 +22,6 @@ + #include + #include + #include "tlsdesc.h" +-#include "dl-trampoline-save.h" + + /* Area on stack to save and restore registers used for parameter + passing when calling _dl_tlsdesc_dynamic. */ +diff --git a/sysdeps/x86_64/dl-trampoline-save.h b/sysdeps/x86_64/dl-trampoline-save.h +deleted file mode 100644 +index 761128d980..0000000000 +--- a/sysdeps/x86_64/dl-trampoline-save.h ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* x86-64 PLT trampoline register save macros. +- Copyright (C) 2024-2025 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- . */ +- +-#ifndef DL_STACK_ALIGNMENT +-/* Due to GCC bug: +- +- https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066 +- +- __tls_get_addr may be called with 8-byte stack alignment. Although +- this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume +- that stack will be always aligned at 16 bytes. */ +-# define DL_STACK_ALIGNMENT 8 +-#endif +- +-/* True if _dl_runtime_resolve should align stack for STATE_SAVE or align +- stack to 16 bytes before calling _dl_fixup. */ +-#define DL_RUNTIME_RESOLVE_REALIGN_STACK \ +- (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \ +- || 16 > DL_STACK_ALIGNMENT) +diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S +index a055722e64..ac85f96794 100644 +--- a/sysdeps/x86_64/dl-trampoline.S ++++ b/sysdeps/x86_64/dl-trampoline.S +@@ -22,7 +22,6 @@ + #include + #include + #include +-#include "dl-trampoline-save.h" + + /* Area on stack to save and restore registers used for parameter + passing when calling _dl_fixup. */ diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile index e823d2fcc6..3403422443 100644 --- a/sysdeps/x86_64/fpu/multiarch/Makefile @@ -10650,6 +14849,19 @@ 1, __wcpncpy_generic)) +diff --git a/sysdeps/x86_64/multiarch/ifunc-wmemset.h b/sysdeps/x86_64/multiarch/ifunc-wmemset.h +index f95cca6ae5..50af138230 100644 +--- a/sysdeps/x86_64/multiarch/ifunc-wmemset.h ++++ b/sysdeps/x86_64/multiarch/ifunc-wmemset.h +@@ -35,7 +35,7 @@ IFUNC_SELECTOR (void) + + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2) + && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, +- AVX_Fast_Unaligned_Load, !)) ++ AVX_Fast_Unaligned_Load,)) + { + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)) + { diff --git a/sysdeps/x86_64/tst-auditmod10b.c b/sysdeps/x86_64/tst-auditmod10b.c index 6eb21b6f06..0b994ef0f0 100644 --- a/sysdeps/x86_64/tst-auditmod10b.c @@ -10796,3 +15008,15 @@ return 0; } +diff --git a/time/strftime_l.c b/time/strftime_l.c +index f51d926b46..5cb5f5d213 100644 +--- a/time/strftime_l.c ++++ b/time/strftime_l.c +@@ -40,6 +40,7 @@ + #endif + + #include ++#include + #include /* Some systems define `time_t' here. */ + + #ifdef TIME_WITH_SYS_TIME diff -Nru glibc-2.41/debian/patches/i386/local-revert-i386-add-GLIBC_ABI_GNU2_TLS-version.diff glibc-2.41/debian/patches/i386/local-revert-i386-add-GLIBC_ABI_GNU2_TLS-version.diff --- glibc-2.41/debian/patches/i386/local-revert-i386-add-GLIBC_ABI_GNU2_TLS-version.diff 1970-01-01 00:00:00.000000000 +0000 +++ glibc-2.41/debian/patches/i386/local-revert-i386-add-GLIBC_ABI_GNU2_TLS-version.diff 2026-03-01 20:10:06.000000000 +0000 @@ -0,0 +1,62 @@ +--- a/sysdeps/x86/Makefile ++++ b/sysdeps/x86/Makefile +@@ -127,15 +127,6 @@ LDFLAGS-tst-tls23 += -rdynamic + tst-tls23-mod.so-no-z-defs = yes + + $(objpfx)tst-tls23-mod.so: $(libsupport) +- +-tests-special += $(objpfx)check-gnu2-tls.out +- +-$(objpfx)check-gnu2-tls.out: $(common-objpfx)libc.so +- LC_ALL=C $(READELF) -V -W $< \ +- | sed -ne '/.gnu.version_d/, /.gnu.version_r/ p' \ +- | grep GLIBC_ABI_GNU2_TLS > $@; \ +- $(evaluate-test) +-generated += check-gnu2-tls.out + endif + + ifeq ($(subdir),math) +--- a/sysdeps/x86/Versions ++++ b/sysdeps/x86/Versions +@@ -7,9 +7,4 @@ libc { + GLIBC_2.33 { + __x86_get_cpuid_feature_leaf; + } +- GLIBC_ABI_GNU2_TLS { +- # This symbol is used only for empty version map and will be removed +- # by scripts/versions.awk. +- __placeholder_only_for_empty_version_map; +- } + } +--- a/sysdeps/x86_64/Makefile ++++ b/sysdeps/x86_64/Makefile +@@ -217,6 +217,15 @@ $(objpfx)check-dt-x86-64-plt.out: $(common-objpfx)libc.so + | grep GLIBC_ABI_DT_X86_64_PLT > $@; \ + $(evaluate-test) + generated += check-dt-x86-64-plt.out ++ ++tests-special += $(objpfx)check-gnu2-tls.out ++ ++$(objpfx)check-gnu2-tls.out: $(common-objpfx)libc.so ++ LC_ALL=C $(READELF) -V -W $< \ ++ | sed -ne '/.gnu.version_d/, /.gnu.version_r/ p' \ ++ | grep GLIBC_ABI_GNU2_TLS > $@; \ ++ $(evaluate-test) ++generated += check-gnu2-tls.out + endif + + test-internal-extras += tst-gnu2-tls2mod1 +--- a/sysdeps/x86_64/Versions ++++ b/sysdeps/x86_64/Versions +@@ -5,6 +5,11 @@ libc { + GLIBC_2.13 { + __fentry__; + } ++ GLIBC_ABI_GNU2_TLS { ++ # This symbol is used only for empty version map and will be removed ++ # by scripts/versions.awk. ++ __placeholder_only_for_empty_version_map; ++ } + GLIBC_ABI_DT_X86_64_PLT { + # This symbol is used only for empty version map and will be removed + # by scripts/versions.awk. diff -Nru glibc-2.41/debian/patches/i386/local-revert-i386-add-GLIBC_ABI_GNU_TLS-version.diff glibc-2.41/debian/patches/i386/local-revert-i386-add-GLIBC_ABI_GNU_TLS-version.diff --- glibc-2.41/debian/patches/i386/local-revert-i386-add-GLIBC_ABI_GNU_TLS-version.diff 1970-01-01 00:00:00.000000000 +0000 +++ glibc-2.41/debian/patches/i386/local-revert-i386-add-GLIBC_ABI_GNU_TLS-version.diff 2026-03-01 20:10:06.000000000 +0000 @@ -0,0 +1,32 @@ +--- a/sysdeps/i386/Makefile ++++ b/sysdeps/i386/Makefile +@@ -60,15 +60,6 @@ $(objpfx)tst-ld-sse-use.out: ../sysdeps/i386/tst-ld-sse-use.sh $(objpfx)ld.so + @echo "Checking ld.so for SSE register use. This will take a few seconds..." + $(BASH) $< $(objpfx) '$(NM)' '$(OBJDUMP)' '$(READELF)' > $@; \ + $(evaluate-test) +- +-tests-special += $(objpfx)check-gnu-tls.out +- +-$(objpfx)check-gnu-tls.out: $(common-objpfx)libc.so +- LC_ALL=C $(READELF) -V -W $< \ +- | sed -ne '/.gnu.version_d/, /.gnu.version_r/ p' \ +- | grep GLIBC_ABI_GNU_TLS > $@; \ +- $(evaluate-test) +-generated += check-gnu-tls.out + else + CFLAGS-.os += $(if $(filter rtld-%.os,$(@F)), $(rtld-CFLAGS)) + endif +--- a/sysdeps/i386/Versions ++++ b/sysdeps/i386/Versions +@@ -28,11 +28,6 @@ libc { + GLIBC_2.13 { + __fentry__; + } +- GLIBC_ABI_GNU_TLS { +- # This symbol is used only for empty version map and will be removed +- # by scripts/versions.awk. +- __placeholder_only_for_empty_version_map; +- } + } + libm { + GLIBC_2.1 { diff -Nru glibc-2.41/debian/patches/series glibc-2.41/debian/patches/series --- glibc-2.41/debian/patches/series 2025-12-28 16:32:28.000000000 +0000 +++ glibc-2.41/debian/patches/series 2026-03-01 20:10:06.000000000 +0000 @@ -1,5 +1,12 @@ git-updates.diff +# Revert addition of symbol versions used as ABI flags, as the dpkg-shlibdeps +# version in trixie is not able to handle them (see #1122107) +i386/local-revert-i386-add-GLIBC_ABI_GNU_TLS-version.diff +i386/local-revert-i386-add-GLIBC_ABI_GNU2_TLS-version.diff +amd64/local-revert-x86-64-add-GLIBC_ABI_DT_X86_64_PLT-version.diff +amd64/local-revert-x86-64-add-GLIBC_ABI_GNU2_TLS-version.diff + locale/check-unknown-symbols.diff locale/locale-print-LANGUAGE.diff locale/LC_IDENTIFICATION-optional-fields.diff