Version in base suite: 5.1.8-0+deb12u1

Base version: ffmpeg_5.1.8-0+deb12u1
Target version: ffmpeg_5.1.9-0+deb12u1
Base file: /srv/ftp-master.debian.org/ftp/pool/main/f/ffmpeg/ffmpeg_5.1.8-0+deb12u1.dsc
Target file: /srv/ftp-master.debian.org/policy/pool/main/f/ffmpeg/ffmpeg_5.1.9-0+deb12u1.dsc

 .forgejo/pre-commit/config.yaml         |   23 +
 .forgejo/workflows/lint.yml             |   29 +
 .forgejo/workflows/test.yml             |   80 ++++
 COPYING.LGPLv2.1                        |   18 
 Changelog                               |  158 ++++++++
 RELEASE                                 |    2 
 VERSION                                 |    2 
 configure                               |    2 
 debian/changelog                        |    6 
 doc/Doxyfile                            |    2 
 doc/build_system.txt                    |    1 
 doc/dev_community/resolution_process.md |    1 
 doc/fate_config.sh.template             |    5 
 doc/nut.texi                            |    1 
 doc/undefined.txt                       |    1 
 ffbuild/libversion.sh                   |    2 
 fftools/ffmpeg.h                        |    3 
 fftools/ffmpeg_opt.c                    |   10 
 libavcodec/aarch64/aacpsdsp_neon.S      |  218 +++++------
 libavcodec/aarch64/fft_neon.S           |   24 -
 libavcodec/aarch64/h264cmc_neon.S       |  414 +++++++++++-----------
 libavcodec/aarch64/h264dsp_neon.S       |  602 ++++++++++++++++----------------
 libavcodec/aarch64/h264qpel_neon.S      |  562 ++++++++++++++---------------
 libavcodec/aarch64/hevcdsp_idct_neon.S  |  294 +++++++--------
 libavcodec/aarch64/hpeldsp_neon.S       |  376 +++++++++----------
 libavcodec/aarch64/me_cmp_neon.S        |    2 
 libavcodec/aarch64/neon.S               |  228 ++++++------
 libavcodec/aarch64/opusdsp_neon.S       |  114 +++---
 libavcodec/aarch64/sbrdsp_neon.S        |  294 +++++++--------
 libavcodec/aarch64/simple_idct_neon.S   |  398 ++++++++++-----------
 libavcodec/aarch64/vp8dsp_neon.S        |  304 ++++++++--------
 libavcodec/adpcm.c                      |    2 
 libavcodec/alsdec.c                     |   17 
 libavcodec/arm/int_neon.S               |    1 
 libavcodec/av1dec.c                     |   16 
 libavcodec/bmp.c                        |    2 
 libavcodec/cfhd.c                       |    9 
 libavcodec/cfhd.h                       |    9 
 libavcodec/cfhdenc.c                    |   12 
 libavcodec/cljrdec.c                    |    1 
 libavcodec/dca_xll.c                    |   20 -
 libavcodec/dfpwmdec.c                   |    5 
 libavcodec/dv_profile.c                 |    1 
 libavcodec/dvdsub_parser.c              |    2 
 libavcodec/escape130.c                  |    2 
 libavcodec/exr.c                        |   21 -
 libavcodec/ffv1_template.c              |    1 
 libavcodec/ffv1enc_template.c           |    1 
 libavcodec/flashsv.c                    |    3 
 libavcodec/golomb.h                     |    4 
 libavcodec/h264_direct.c                |   34 +
 libavcodec/h264_mc_template.c           |    1 
 libavcodec/h264_parser.c                |   11 
 libavcodec/h264_refs.c                  |    6 
 libavcodec/h264_slice.c                 |    9 
 libavcodec/hevc_cabac.c                 |    1 
 libavcodec/imgconvert.c                 |    1 
 libavcodec/imm5.c                       |    2 
 libavcodec/interplayacm.c               |    3 
 libavcodec/jpeg2000dec.c                |    7 
 libavcodec/lcldec.c                     |    4 
 libavcodec/magicyuv.c                   |    3 
 libavcodec/mdec.c                       |    3 
 libavcodec/mjpegdec.c                   |   10 
 libavcodec/mpegaudiodsp_template.c      |    1 
 libavcodec/mpegaudioenc_template.c      |    1 
 libavcodec/mpegvideo_enc.c              |   17 
 libavcodec/msmpeg4.c                    |    1 
 libavcodec/notchlc.c                    |    6 
 libavcodec/omx.c                        |    5 
 libavcodec/qdm2.c                       |    2 
 libavcodec/ralf.c                       |    1 
 libavcodec/rasc.c                       |   10 
 libavcodec/snow_dwt.c                   |    2 
 libavcodec/svq1dec.c                    |    5 
 libavcodec/tdsc.c                       |   10 
 libavcodec/vp3.c                        |    2 
 libavcodec/vp9.c                        |   18 
 libavcodec/wmaenc.c                     |    4 
 libavcodec/x86/fmtconvert.asm           |    1 
 libavcodec/x86/mpegvideoencdsp.asm      |    1 
 libavcodec/xxan.c                       |    2 
 libavcodec/zmbv.c                       |   14 
 libavfilter/aarch64/vf_nlmeans_neon.S   |   78 ++--
 libavfilter/af_amerge.c                 |   18 
 libavfilter/af_lv2.c                    |    6 
 libavfilter/af_pan.c                    |    4 
 libavfilter/afir_template.c             |    2 
 libavfilter/convolution.h               |    1 
 libavfilter/qp_table.c                  |    1 
 libavfilter/scale_eval.c                |   13 
 libavfilter/scene_sad.c                 |    1 
 libavfilter/vf_codecview.c              |   17 
 libavfilter/vf_convolution.c            |   36 -
 libavfilter/vf_find_rect.c              |   55 +-
 libavfilter/vf_kerndeint.c              |    6 
 libavfilter/vf_libopencv.c              |    2 
 libavfilter/vf_neighbor_opencl.c        |    3 
 libavfilter/vf_overlay_cuda.cu          |    1 
 libavfilter/vf_scale.c                  |    4 
 libavfilter/vf_stack.c                  |   38 +-
 libavfilter/vf_v360.c                   |    3 
 libavfilter/vf_zscale.c                 |   13 
 libavformat/avidec.c                    |   10 
 libavformat/cafdec.c                    |    6 
 libavformat/concat.c                    |   12 
 libavformat/dash.c                      |    2 
 libavformat/dashdec.c                   |   84 ++--
 libavformat/demux.c                     |   11 
 libavformat/dhav.c                      |    4 
 libavformat/dss.c                       |    5 
 libavformat/dtshddec.c                  |    7 
 libavformat/fifo_test.c                 |    1 
 libavformat/flac_picture.c              |    6 
 libavformat/g726.c                      |    1 
 libavformat/hls.c                       |   11 
 libavformat/hls_sample_encryption.c     |    1 
 libavformat/hls_sample_encryption.h     |    3 
 libavformat/hlsplaylist.c               |    1 
 libavformat/http.c                      |   11 
 libavformat/icodec.c                    |    2 
 libavformat/iff.c                       |    4 
 libavformat/img2dec.c                   |   15 
 libavformat/img2enc.c                   |    5 
 libavformat/lrcdec.c                    |    2 
 libavformat/matroskadec.c               |    4 
 libavformat/mlvdec.c                    |    8 
 libavformat/mov.c                       |   27 +
 libavformat/mpegts.c                    |    9 
 libavformat/mpegtsenc.c                 |    7 
 libavformat/mpjpegdec.c                 |    2 
 libavformat/os_support.h                |    1 
 libavformat/pcm.c                       |   10 
 libavformat/rdt.c                       |    1 
 libavformat/rsd.c                       |    7 
 libavformat/rtmpproto.c                 |   58 +--
 libavformat/rtpdec_jpeg.c               |    6 
 libavformat/rtpdec_latm.c               |    6 
 libavformat/rtpdec_mpeg4.c              |    2 
 libavformat/rtpdec_qdm2.c               |   10 
 libavformat/rtsp.c                      |   19 -
 libavformat/rtspdec.c                   |    8 
 libavformat/scd.c                       |    3 
 libavformat/segafilm.c                  |    2 
 libavformat/vividas.c                   |    8 
 libavformat/wavdec.c                    |    3 
 libavformat/wtvdec.c                    |    3 
 libavformat/xwma.c                      |    2 
 libavformat/yuv4mpegenc.c               |    3 
 libavutil/aarch64/float_dsp_neon.S      |  200 +++++-----
 libavutil/aes.c                         |    1 
 libavutil/bswap.h                       |    2 
 libavutil/eval.c                        |   15 
 libavutil/hwcontext_cuda_internal.h     |    1 
 libavutil/hwcontext_qsv.h               |    1 
 libavutil/samplefmt.h                   |    3 
 libavutil/tests/blowfish.c              |    1 
 libavutil/timecode.c                    |   11 
 libswresample/aarch64/resample.S        |   88 ++--
 libswresample/rematrix.c                |    5 
 libswresample/resample_template.c       |   17 
 libswresample/soxr_resample.c           |    1 
 libswresample/swresample.c              |   32 +
 libswresample/swresample_frame.c        |    1 
 libswresample/swresample_internal.h     |    1 
 libswresample/version.c                 |    1 
 libswscale/aarch64/hscale.S             |  292 +++++++--------
 libswscale/aarch64/output.S             |   64 +--
 libswscale/aarch64/yuv2rgb_neon.S       |  234 ++++++------
 libswscale/gamma.c                      |    1 
 libswscale/output.c                     |   20 -
 libswscale/rgb2rgb_template.c           |    4 
 libswscale/swscale_unscaled.c           |    6 
 libswscale/utils.c                      |   59 ++-
 libswscale/vscale.c                     |    2 
 libswscale/x86/yuv2rgb_template.c       |   25 +
 tests/extended.ffconcat                 |    1 
 tests/fate/ffprobe.mak                  |    1 
 tests/fate/lossless-audio.mak           |    1 
 tests/ref/fate/zmbv-8bit                |    1 
 tests/simple1.ffconcat                  |    1 
 tests/simple2.ffconcat                  |    1 
 tools/check_arm_indent.sh               |   58 +++
 tools/indent_arm_assembly.pl            |  243 ++++++++++++
 184 files changed, 3822 insertions(+), 2791 deletions(-)

dpkg-source: warning: cannot verify inline signature for /srv/release.debian.org/tmp/tmp3pcrtk3s/ffmpeg_5.1.8-0+deb12u1.dsc: no acceptable signature found
dpkg-source: warning: cannot verify inline signature for /srv/release.debian.org/tmp/tmp3pcrtk3s/ffmpeg_5.1.9-0+deb12u1.dsc: no acceptable signature found
diff -Nru ffmpeg-5.1.8/.forgejo/pre-commit/config.yaml ffmpeg-5.1.9/.forgejo/pre-commit/config.yaml
--- ffmpeg-5.1.8/.forgejo/pre-commit/config.yaml	1970-01-01 00:00:00.000000000 +0000
+++ ffmpeg-5.1.9/.forgejo/pre-commit/config.yaml	2026-05-05 14:22:01.000000000 +0000
@@ -0,0 +1,23 @@
+exclude: ^tests/ref/
+
+repos:
+- repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v5.0.0
+  hooks:
+    - id: check-case-conflict
+    - id: check-executables-have-shebangs
+    - id: check-illegal-windows-names
+    - id: check-shebang-scripts-are-executable
+    - id: check-yaml
+    - id: end-of-file-fixer
+    - id: fix-byte-order-marker
+    - id: mixed-line-ending
+    - id: trailing-whitespace
+- repo: local
+  hooks:
+    - id: aarch64-asm-indent
+      name: fix aarch64 assembly indentation
+      files: ^.*/aarch64/.*\.S$
+      language: script
+      entry: ./tools/check_arm_indent.sh --apply
+      pass_filenames: false
diff -Nru ffmpeg-5.1.8/.forgejo/workflows/lint.yml ffmpeg-5.1.9/.forgejo/workflows/lint.yml
--- ffmpeg-5.1.8/.forgejo/workflows/lint.yml	1970-01-01 00:00:00.000000000 +0000
+++ ffmpeg-5.1.9/.forgejo/workflows/lint.yml	2026-05-05 15:50:52.000000000 +0000
@@ -0,0 +1,29 @@
+name: Lint
+
+on:
+  push:
+    branches:
+      - release/5.1
+  pull_request:
+
+jobs:
+  lint:
+    name: Pre-Commit
+    runs-on: utilities
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Install pre-commit CI
+        id: install
+        run: |
+            python3 -m venv ~/pre-commit
+            ~/pre-commit/bin/pip install --upgrade pip setuptools
+            ~/pre-commit/bin/pip install pre-commit
+            echo "envhash=$({ python3 --version && cat .forgejo/pre-commit/config.yaml; } | sha256sum | cut -d' ' -f1)" >> $FORGEJO_OUTPUT
+      - name: Cache
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pre-commit
+          key: pre-commit-${{ steps.install.outputs.envhash }}
+      - name: Run pre-commit CI
+        run: ~/pre-commit/bin/pre-commit run -c .forgejo/pre-commit/config.yaml --show-diff-on-failure --color=always --all-files
diff -Nru ffmpeg-5.1.8/.forgejo/workflows/test.yml ffmpeg-5.1.9/.forgejo/workflows/test.yml
--- ffmpeg-5.1.8/.forgejo/workflows/test.yml	1970-01-01 00:00:00.000000000 +0000
+++ ffmpeg-5.1.9/.forgejo/workflows/test.yml	2026-05-05 15:50:52.000000000 +0000
@@ -0,0 +1,80 @@
+name: Test
+
+on:
+  push:
+    branches:
+      - release/5.1
+  pull_request:
+
+jobs:
+  run_fate:
+    name: Fate (${{ matrix.runner }}, ${{ matrix.shared }}, ${{ matrix.bits }} bit)
+    strategy:
+      fail-fast: false
+      matrix:
+        runner: [linux-aarch64]
+        shared: ['static']
+        bits: ['64']
+        include:
+          - runner: linux-amd64
+            shared: 'static'
+            bits: '32'
+          - runner: linux-amd64
+            shared: 'shared'
+            bits: '64'
+    runs-on: ${{ matrix.runner }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Configure
+        run: |
+          ./configure --enable-gpl --enable-nonfree --enable-memory-poisoning --assert-level=2 \
+              $([ "${{ matrix.bits }}" != "32" ] || echo --arch=x86_32 --extra-cflags=-m32 --extra-cxxflags=-m32 --extra-ldflags=-m32) \
+              $([ "${{ matrix.shared }}" != "shared" ] || echo --enable-shared --disable-static) \
+              || CFGRES=$? && CFGRES=$?
+          cat ffbuild/config.log
+          exit $CFGRES
+      - name: Build
+        run: make -j$(nproc)
+      - name: Restore Cached Fate-Suite
+        id: cache
+        uses: actions/cache/restore@v4
+        with:
+          path: fate-suite
+          key: fate-suite
+          restore-keys: |
+            fate-suite-
+      - name: Sync Fate-Suite
+        id: fate
+        run: |
+          make fate-rsync SAMPLES=$PWD/fate-suite
+          echo "hash=$(find fate-suite -type f -printf "%P %s %T@\n" | sort | sha256sum | cut -d' ' -f1)" >> $FORGEJO_OUTPUT
+      - name: Cache Fate-Suite
+        uses: actions/cache/save@v4
+        if: ${{ format('fate-suite-{0}', steps.fate.outputs.hash) != steps.cache.outputs.cache-matched-key }}
+        with:
+          path: fate-suite
+          key: fate-suite-${{ steps.fate.outputs.hash }}
+      - name: Run Fate
+        run: LD_LIBRARY_PATH="$(printf "%s:" "$PWD"/lib*)$PWD" make fate fate-build SAMPLES=$PWD/fate-suite -j$(nproc)
+  compile_only:
+    name: Fate (Win64, Build-Only)
+    strategy:
+      fail-fast: false
+      matrix:
+        image: ["ghcr.io/btbn/ffmpeg-builds/win64-gpl-5.1:latest"]
+    runs-on: linux-amd64
+    container: ${{ matrix.image }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Configure
+        run: |
+          ./configure --pkg-config-flags="--static" $FFBUILD_TARGET_FLAGS $FF_CONFIGURE \
+              --cc="$CC" --cxx="$CXX" --ar="$AR" --ranlib="$RANLIB" --nm="$NM" \
+              --extra-cflags="$FF_CFLAGS" --extra-cxxflags="$FF_CXXFLAGS" \
+              --extra-libs="$FF_LIBS" --extra-ldflags="$FF_LDFLAGS" --extra-ldexeflags="$FF_LDEXEFLAGS"
+      - name: Build
+        run: make -j$(nproc)
+      - name: Run Fate
+        run: make -j$(nproc) fate-build
diff -Nru ffmpeg-5.1.8/COPYING.LGPLv2.1 ffmpeg-5.1.9/COPYING.LGPLv2.1
--- ffmpeg-5.1.8/COPYING.LGPLv2.1	2025-11-21 01:15:18.000000000 +0000
+++ ffmpeg-5.1.9/COPYING.LGPLv2.1	2026-05-05 14:22:01.000000000 +0000
@@ -55,7 +55,7 @@
 that what they have is not the original version, so that the original
 author's reputation will not be affected by problems that might be
 introduced by others.
-
+
   Finally, software patents pose a constant threat to the existence of
 any free program.  We wish to make sure that a company cannot
 effectively restrict the users of a free program by obtaining a
@@ -111,7 +111,7 @@
 "work based on the library" and a "work that uses the library".  The
 former contains code derived from the library, whereas the latter must
 be combined with the library in order to run.
-
+
                   GNU LESSER GENERAL PUBLIC LICENSE
    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 
@@ -158,7 +158,7 @@
   You may charge a fee for the physical act of transferring a copy,
 and you may at your option offer warranty protection in exchange for a
 fee.
-
+
   2. You may modify your copy or copies of the Library or any portion
 of it, thus forming a work based on the Library, and copy and
 distribute such modifications or work under the terms of Section 1
@@ -216,7 +216,7 @@
 ordinary GNU General Public License has appeared, then you can specify
 that version instead if you wish.)  Do not make any other change in
 these notices.
-
+
   Once this change is made in a given copy, it is irreversible for
 that copy, so the ordinary GNU General Public License applies to all
 subsequent copies and derivative works made from that copy.
@@ -267,7 +267,7 @@
 distribute the object code for the work under the terms of Section 6.
 Any executables containing that work also fall under Section 6,
 whether or not they are linked directly with the Library itself.
-
+
   6. As an exception to the Sections above, you may also combine or
 link a "work that uses the Library" with the Library to produce a
 work containing portions of the Library, and distribute that work
@@ -329,7 +329,7 @@
 accompany the operating system.  Such a contradiction means you cannot
 use both them and the Library together in an executable that you
 distribute.
-
+
   7. You may place library facilities that are a work based on the
 Library side-by-side in a single library together with other library
 facilities not covered by this License, and distribute such a combined
@@ -370,7 +370,7 @@
 restrictions on the recipients' exercise of the rights granted herein.
 You are not responsible for enforcing compliance by third parties with
 this License.
-
+
   11. If, as a consequence of a court judgment or allegation of patent
 infringement or for any other reason (not limited to patent issues),
 conditions are imposed on you (whether by court order, agreement or
@@ -422,7 +422,7 @@
 the Free Software Foundation.  If the Library does not specify a
 license version number, you may choose any version ever published by
 the Free Software Foundation.
-
+
   14. If you wish to incorporate parts of the Library into other free
 programs whose distribution conditions are incompatible with these,
 write to the author to ask for permission.  For software which is
@@ -456,7 +456,7 @@
 DAMAGES.
 
                      END OF TERMS AND CONDITIONS
-
+
            How to Apply These Terms to Your New Libraries
 
   If you develop a new library, and you want it to be of the greatest
diff -Nru ffmpeg-5.1.8/Changelog ffmpeg-5.1.9/Changelog
--- ffmpeg-5.1.8/Changelog	2025-11-26 02:41:35.000000000 +0000
+++ ffmpeg-5.1.9/Changelog	2026-05-05 15:50:55.000000000 +0000
@@ -1,6 +1,164 @@
 Entries are sorted chronologically from oldest to youngest within each release,
 releases are sorted from youngest to oldest.
 
+version 5.1.9:
+ avcodec/av1dec: check that primary_ref_frame is within range
+ configure: bump CONFIG_THIS_YEAR to 2026
+ avcodec/dfpwmdec: Check nb_samples
+ avcodec/alsdec: do not set nbits invalidly
+ swscale/swscale_unscaled: adjust last line copy
+ avformat/avidec: check LIST size in avi_load_index()
+ avformat/avidec: validate INFO list size before parsing
+ libavformat/xwma: fix overflow in seek position
+ avformat/pcm: Use 64bit for byte_rate
+ avfilter/vf_kerndeint: Check for minimum height
+ avcodec/ralf: Add the missing return statement after the error log
+ avfilter/vf_codecview: Clamp block to the visible frame region
+ avcodec/zmbv: reject XOR data that overruns the decompression buffer
+ avcodec/rasc: fix heap use-after-free in decode_move()
+ avformat/rtpdec_mpeg4: reject zero-length AU header sections
+ fftools/ffmpeg_opt: validate stream index in negative map handling
+ avformat/rtmpproto: prevent integer overflow accumulating FLV buffer size
+ avformat/rtmpproto: validate compressed SWF header length
+ avformat/rtsp: Fix out-of-bounds read in SDP parser when control_url is empty
+ avformat/rtpdec_latm: avoid integer overflow in LATM length parsing
+ avcodec/h264: recompute per-slice direct mode state for every slice
+ avcodec/h264_refs: Clear stale pointers from ref_list
+ avformat/concat: guard total_size overflow
+ avcodec/wmaenc: Fix missing padding in extradata
+ avcodec/tdsc: remove double stride adjustment
+ avformat/cafdec: fix negative index use in read_seek
+ avcodec/notchlc: Check 255 loops
+ avformat/rtpdec_jpeg: check qtable_len
+ avformat/vividas: use-of-uninitialized-value in keybuffer
+ avcodec/tdsc: Check jpeg size
+ avcodec/tdsc: Better input size check
+ avcodec/tdsc: Check tile_size
+ avformat/mov: check extradata in mov_read_dops()
+ avformat/mov: Check read size for opus extradata
+ avformat/rtspdec: reject non-positive ANNOUNCE Content-Length
+ avformat/wavdec: Fix use-of-uninitialized-value in find_guid()
+ avformat/hls_sample_encryption: add missing padding for audio setup buffer
+ avcodec/svq1dec: Check input space for minimum
+ avcodec/vp9: Rollback dimensions when format is rejected
+ avformat/rtpdec_qdm2: Check block_size
+ avcodec/escape130: Initialize old_y_avg
+ avutil/samplefmt: Dont claim that av_get_sample_fmt_string checks sample_fmt
+ avformat: check avio_read() return values in dss/dtshd/mlv
+ avcodec/alsdec: preserve full float value in zero-truncated samples
+ avcodec/alsdec: propagate read_diff_float_data() errors in read_frame_data()
+ avcodec/alsdec: fix mantissa unpacking in compressed Part A path
+ libavfilter/vf_v360: fix operator precedence in stereo loop condition
+ avcodec/alsdec: fix abs(INT_MIN) UB in read_diff_float_data()
+ avformat/rsd: reject short ADPCM_THP extradata reads
+ avformat/mov: Handle integer overflow in MOV parser
+ avcodec/dvdsub_parser: Fix buf_size check
+ avfilter/af_pan: fix sscanf() return value checks in parse_channel_name
+ avutil/bswap: fix implicit conversion warning in av_bswap64
+ avformat/mpegts: fix descriptor accounting across multiple IOD descriptors
+ avcodec/xxan: zero-initialize y_buffer
+ avcodec/exr: Check input space before reverse_lut()
+ avcodec/h264_slice: reject slice_num >= 0xFFFF
+ avutil/timecode: Check for integer overflow in av_timecode_init_from_components()
+ avformat/mov: do not allocate out-of-range buffers
+ avfilter/af_lv2: call lilv_instance_activate before lilv_instance_run
+ avformat/rtmpproto: fix listen_timeout conversion for special negative values
+ swscale/output: fix integer overflows in chroma in yuv2rgba64_X_c_template()
+ avcodec/lcldec: Fixes uqvq overflow
+ avcodec/av1dec: sync frame header and tile group behavior with CBS
+ avformat/mlvdec: avoid uninitialized read in read_string()
+ avcodec/magicyuv: fix small median images
+ swscale/output: Fix integer overflow in alpha in yuv2rgba64_1_c_template()
+ swscale/utils: Check *Inc
+ avfilter/vf_scale: Fix integer overflow in config_props()
+ swscale/output: Fixes integer overflow in yuv2planeX_8_c
+ swscale/utils: initialize chroma when luma switched to cascade
+ avformat/rtsp: Pass blacklist
+ avformat/rtsp: Explicitly check protocol
+ avfilter/vf_convolution: Use avpriv_mirror
+ avfilter/vf_convolution: Handle corner cases with small frames
+ avformat/mov: use 64bit in CENC subsample bounds checks
+ avutil/eval: Check depth of AVExpr
+ avformat/vividas: Reset n_audio_subpackets on error
+ avformat/matroskadec: Check that end_time_ns >= start_time_ns
+ avcodec/vp3: Sanity check cropping
+ avformat/dhav: Check avio_seek() return
+ avformat/segafilm: dont read uninitialized value
+ avcodec/mpegvideo_enc: Restructure ff_h263_encode_gob_header() relation to update_mb_info()
+ avcodec/exr: check tile_attr.x/ySize
+ avformat/demux: Fix integer overflows in select_from_pts_buffer()
+ avcodec/golomb: Fix get_ur_golomb_jpegls() with esclen = 0
+ swresample/resample_template: add casts to avoid undefined overflows
+ avcodec/h264_parser: Check pts for overflow
+ avformat/wtvdec: Check that language is fully read
+ avcodec/imm5: Dont pass EAGAIN on as is
+ avcodec/interplayacm: Check input for fill_block()
+ avcodec/flashsv: Check for input space before (re)allocating frame
+ avcodec/mdec: Check input space vs minimal block size
+ avcodec/h264_parser: Check remaining input length in loop in scan_mmco_reset()
+ avcodec/exr: fix AVERROR typo
+ avcodec/cfhd: Check transform type before continuing
+ avcodec/cfhd: Add CFHDSegment enum and named identifiers
+ avformat/icodec: Check size
+ avformat/lrcdec: Check ss for finiteness
+ avformat/http: Also count redirects from the cache
+ avformat/http: allow adjusting the redirect limit
+ fftools/ffmpeg_opt: limit recursion of presets
+ swscale/rgb2rgb_template: fix signed shift into sign bit
+ swresample: Check ch layouts in swr_alloc_set_opts2()
+ swresample: Check user chlayout in swr_set_matrix()
+ avcodec/bmp: fix indention
+ avcodec/exr: Handle axmax like bxmin in 04d7a6d3db56ea1a93908ff2d3d312e3fc40a58c
+ avformat/cafdec: Check nb_entries in read_info_chunk()
+ avcodec/vp9: Reallocate on resolution change which does not change tile_cols
+ avformat/img2dec: Check avio_size() for failure
+ avformat/mpegtsenc: Check remaining space in SDT
+ avformat/img2enc: Check split planes packet size
+ avformat/yuv4mpegen: Sanity check input packet frame dimensions
+ avformat/iff: Error out with 0 channel loudspeaker configuration
+ Fix overflow in STSD parser
+ avcodec/adpcm: Check input buffer size
+ avformat/scd: Use ffio_read_size()
+ avformat/hls: Check for integer overflow with #EXTINF:
+ avcodec/dca_xll: Clear padding in ff_dca_xll_parse()
+ vfilter/vf_find_rect: Clamp x/y min/max to valid values
+ avcodec/dca_xll: Check get_rice_array()
+ avformat/mpegts: Check program_info_length
+ avformat/mpegts: Check IOD_DESCRIPTOR len
+ avcodec/qdm2: fix heap-use-after-free in qdm2_decode_frame
+ avcodec/jpeg2000dec: Print bpno level when erroring out
+ avformat/dashdec: check value valid after read value from mpd xml
+ swscale/utils: zero init filter memory as before
+ lavc/j2kdec: Do not ignore colour association for packed formats
+ swscale/utils: Sanity check sizeFactor
+ swscale/utils: Avoid FF_ALLOC_TYPED_ARRAY() and use av_malloc_array() directly
+ avcodec/mjpegdec: fix segfault on extern_huff and no extradata
+ avcodec/exr: use av_realloc_array()
+ avcodec/omx: Check extradata size and nFilledLen
+ avfilter/scale_eval: Use 64bit for factor_w/h
+ avfilter/scale_eval: Avoid undefined behavior with double to int cast
+ avformat/http: Check that the protocol of redirects is http or https
+ avfilter/vf_find_rect: Fix handling odd sized images
+ avcodec/notchlc: zero-initialize history buffer
+ avfilter/vf_stack: add checks for the final canvas dimensions
+ avcodec/mjpegdec: only test the size bound in sequential mjpeg
+ avformat/hls: fix double space
+ avformat/hls: Check seg size and offset for overflow
+ avformat/flac_picture: Correct check
+ avfilter/vf_neighbor_opencl: add error condition when filter name doesn't match
+ avfilter/vf_libopencv: make sure there is space for null-terminator in shape_str
+ fate: add missing options in config template
+ (fforge/pr/22398) swscale/x86/yuv2rgb_template: Add emms to MMX(EXT) functions
+ forgejo: backport CI job names
+ (fforge/pr/21341) avformat/img2dec: reject input images too big to fit into a single packet
+ avfilter/af_amerge: fix possible crash with custom layouts
+ (fforge/pr/21063) avformat/os_support: Include stdint.h for int64_t
+ all: apply linter fixes
+ tools/check_arm_indent: skip empty glob
+ forgejo: apply needed CI changes for 5.1
+ forgejo: backport CI to release/5.1
+
+
 version 5.1.8:
  avutil/common: cast GET_BYTE/GET_16BIT returned value
  avfilter/vf_drawtext: Account for bbox text seperator
diff -Nru ffmpeg-5.1.8/RELEASE ffmpeg-5.1.9/RELEASE
--- ffmpeg-5.1.8/RELEASE	2025-11-26 02:41:35.000000000 +0000
+++ ffmpeg-5.1.9/RELEASE	2026-05-05 15:50:55.000000000 +0000
@@ -1 +1 @@
-5.1.8
+5.1.9
diff -Nru ffmpeg-5.1.8/VERSION ffmpeg-5.1.9/VERSION
--- ffmpeg-5.1.8/VERSION	2025-11-26 02:41:35.000000000 +0000
+++ ffmpeg-5.1.9/VERSION	2026-05-05 15:50:55.000000000 +0000
@@ -1 +1 @@
-5.1.8
+5.1.9
diff -Nru ffmpeg-5.1.8/configure ffmpeg-5.1.9/configure
--- ffmpeg-5.1.8/configure	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/configure	2026-05-05 15:50:55.000000000 +0000
@@ -7797,7 +7797,7 @@
 #define FFMPEG_CONFIG_H
 #define FFMPEG_CONFIGURATION "$(c_escape $FFMPEG_CONFIGURATION)"
 #define FFMPEG_LICENSE "$(c_escape $license)"
-#define CONFIG_THIS_YEAR 2025
+#define CONFIG_THIS_YEAR 2026
 #define FFMPEG_DATADIR "$(eval c_escape $datadir)"
 #define AVCONV_DATADIR "$(eval c_escape $datadir)"
 #define CC_IDENT "$(c_escape ${cc_ident:-Unknown compiler})"
diff -Nru ffmpeg-5.1.8/debian/changelog ffmpeg-5.1.9/debian/changelog
--- ffmpeg-5.1.8/debian/changelog	2025-12-05 21:14:02.000000000 +0000
+++ ffmpeg-5.1.9/debian/changelog	2026-05-13 13:23:40.000000000 +0000
@@ -1,3 +1,9 @@
+ffmpeg (7:5.1.9-0+deb12u1) bookworm-security; urgency=medium
+
+  * New upstream version 5.1.9
+
+ -- Sebastian Ramacher <sramacher@debian.org>  Wed, 13 May 2026 15:23:40 +0200
+
 ffmpeg (7:5.1.8-0+deb12u1) bookworm-security; urgency=medium
 
   * New upstream version 5.1.8
diff -Nru ffmpeg-5.1.8/doc/Doxyfile ffmpeg-5.1.9/doc/Doxyfile
--- ffmpeg-5.1.8/doc/Doxyfile	2025-11-26 02:41:35.000000000 +0000
+++ ffmpeg-5.1.9/doc/Doxyfile	2026-05-05 15:50:55.000000000 +0000
@@ -38,7 +38,7 @@
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = 5.1.8
+PROJECT_NUMBER         = 5.1.9
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff -Nru ffmpeg-5.1.8/doc/build_system.txt ffmpeg-5.1.9/doc/build_system.txt
--- ffmpeg-5.1.8/doc/build_system.txt	2025-11-21 01:15:18.000000000 +0000
+++ ffmpeg-5.1.9/doc/build_system.txt	2026-05-05 14:22:01.000000000 +0000
@@ -63,4 +63,3 @@
 make -k
     Continue build in case of errors, this is useful for the regression tests
     sometimes but note that it will still not run all reg tests.
-
diff -Nru ffmpeg-5.1.8/doc/dev_community/resolution_process.md ffmpeg-5.1.9/doc/dev_community/resolution_process.md
--- ffmpeg-5.1.8/doc/dev_community/resolution_process.md	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/doc/dev_community/resolution_process.md	2026-05-05 15:50:52.000000000 +0000
@@ -88,4 +88,3 @@
 
 The decisions from the TC are final, until the matters are reopened after
 no less than one year.
-
diff -Nru ffmpeg-5.1.8/doc/fate_config.sh.template ffmpeg-5.1.9/doc/fate_config.sh.template
--- ffmpeg-5.1.8/doc/fate_config.sh.template	2025-11-23 02:57:58.000000000 +0000
+++ ffmpeg-5.1.9/doc/fate_config.sh.template	2026-05-05 15:50:55.000000000 +0000
@@ -11,16 +11,21 @@
 # the following are optional and map to configure options
 arch=
 cpu=
+toolchain=
 cross_prefix=
 as=
 cc=
+cxx=
 ld=
+nm=
 target_os=
 sysroot=
 target_exec=
 target_path=
 target_samples=
 extra_cflags=
+extra_cxxflags=
+extra_objcflags=
 extra_ldflags=
 extra_libs=
 extra_conf=     # extra configure options not covered above
diff -Nru ffmpeg-5.1.8/doc/nut.texi ffmpeg-5.1.9/doc/nut.texi
--- ffmpeg-5.1.8/doc/nut.texi	2025-11-21 01:15:18.000000000 +0000
+++ ffmpeg-5.1.9/doc/nut.texi	2026-05-05 14:22:01.000000000 +0000
@@ -157,4 +157,3 @@
 @item XVID @tab non-compliant MPEG-4 generated by old Xvid
 @item XVIX @tab non-compliant MPEG-4 generated by old Xvid with interlacing bug
 @end multitable
-
diff -Nru ffmpeg-5.1.8/doc/undefined.txt ffmpeg-5.1.9/doc/undefined.txt
--- ffmpeg-5.1.8/doc/undefined.txt	2025-11-21 01:15:18.000000000 +0000
+++ ffmpeg-5.1.9/doc/undefined.txt	2026-05-05 14:22:01.000000000 +0000
@@ -44,4 +44,3 @@
 here the reader knows that a,b,c are meant to be signed integers but for C
 standard compliance / to avoid undefined behavior they are stored in unsigned
 ints.
-
diff -Nru ffmpeg-5.1.8/ffbuild/libversion.sh ffmpeg-5.1.9/ffbuild/libversion.sh
--- ffmpeg-5.1.8/ffbuild/libversion.sh	2025-11-23 02:57:58.000000000 +0000
+++ ffmpeg-5.1.9/ffbuild/libversion.sh	2026-05-05 14:22:01.000000000 +0000
@@ -1,3 +1,5 @@
+#!/bin/sh
+
 toupper(){
     echo "$@" | tr abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ
 }
diff -Nru ffmpeg-5.1.8/fftools/ffmpeg.h ffmpeg-5.1.9/fftools/ffmpeg.h
--- ffmpeg-5.1.8/fftools/ffmpeg.h	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/fftools/ffmpeg.h	2026-05-05 15:50:55.000000000 +0000
@@ -237,6 +237,9 @@
     int        nb_autoscale;
     SpecifierOpt *bits_per_raw_sample;
     int        nb_bits_per_raw_sample;
+
+
+    int depth;
 } OptionsContext;
 
 typedef struct InputFilter {
diff -Nru ffmpeg-5.1.8/fftools/ffmpeg_opt.c ffmpeg-5.1.9/fftools/ffmpeg_opt.c
--- ffmpeg-5.1.8/fftools/ffmpeg_opt.c	2025-11-26 02:41:35.000000000 +0000
+++ ffmpeg-5.1.9/fftools/ffmpeg_opt.c	2026-05-05 15:50:55.000000000 +0000
@@ -475,6 +475,8 @@
             for (i = 0; i < o->nb_stream_maps; i++) {
                 m = &o->stream_maps[i];
                 if (file_idx == m->file_index &&
+                    m->stream_index >= 0 &&
+                    m->stream_index < input_files[m->file_index]->nb_streams &&
                     check_stream_specifier(input_files[m->file_index]->ctx,
                                            input_files[m->file_index]->ctx->streams[m->stream_index],
                                            *p == ':' ? p + 1 : p) > 0)
@@ -3267,6 +3269,12 @@
     FILE *f=NULL;
     char filename[1000], line[1000], tmp_line[1000];
     const char *codec_name = NULL;
+    int depth = o->depth;
+
+    if (depth > 2) {
+        av_log(NULL, AV_LOG_ERROR, "too deep recursion\n");
+        return AVERROR(EINVAL);
+    }
 
     tmp_line[0] = *opt;
     tmp_line[1] = 0;
@@ -3280,6 +3288,7 @@
         exit_program(1);
     }
 
+    o->depth ++;
     while (fgets(line, sizeof(line), f)) {
         char *key = tmp_line, *value, *endptr;
 
@@ -3304,6 +3313,7 @@
         }
     }
 
+    o->depth = depth;
     fclose(f);
 
     return 0;
diff -Nru ffmpeg-5.1.8/libavcodec/aarch64/aacpsdsp_neon.S ffmpeg-5.1.9/libavcodec/aarch64/aacpsdsp_neon.S
--- ffmpeg-5.1.8/libavcodec/aarch64/aacpsdsp_neon.S	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/aarch64/aacpsdsp_neon.S	2026-03-16 18:10:00.000000000 +0000
@@ -19,130 +19,130 @@
 #include "libavutil/aarch64/asm.S"
 
 function ff_ps_add_squares_neon, export=1
-1:      ld1         {v0.4S,v1.4S}, [x1], #32
-        fmul        v0.4S, v0.4S, v0.4S
-        fmul        v1.4S, v1.4S, v1.4S
-        faddp       v2.4S, v0.4S, v1.4S
-        ld1         {v3.4S}, [x0]
-        fadd        v3.4S, v3.4S, v2.4S
-        st1         {v3.4S}, [x0], #16
-        subs        w2, w2, #4
-        b.gt        1b
+1:      ld1             {v0.4s,v1.4s}, [x1], #32
+        fmul            v0.4s, v0.4s, v0.4s
+        fmul            v1.4s, v1.4s, v1.4s
+        faddp           v2.4s, v0.4s, v1.4s
+        ld1             {v3.4s}, [x0]
+        fadd            v3.4s, v3.4s, v2.4s
+        st1             {v3.4s}, [x0], #16
+        subs            w2, w2, #4
+        b.gt            1b
         ret
 endfunc
 
 function ff_ps_mul_pair_single_neon, export=1
-1:      ld1         {v0.4S,v1.4S}, [x1], #32
-        ld1         {v2.4S},       [x2], #16
-        zip1        v3.4S, v2.4S, v2.4S
-        zip2        v4.4S, v2.4S, v2.4S
-        fmul        v0.4S, v0.4S, v3.4S
-        fmul        v1.4S, v1.4S, v4.4S
-        st1         {v0.4S,v1.4S}, [x0], #32
-        subs        w3, w3, #4
-        b.gt        1b
+1:      ld1             {v0.4s,v1.4s}, [x1], #32
+        ld1             {v2.4s},       [x2], #16
+        zip1            v3.4s, v2.4s, v2.4s
+        zip2            v4.4s, v2.4s, v2.4s
+        fmul            v0.4s, v0.4s, v3.4s
+        fmul            v1.4s, v1.4s, v4.4s
+        st1             {v0.4s,v1.4s}, [x0], #32
+        subs            w3, w3, #4
+        b.gt            1b
         ret
 endfunc
 
 function ff_ps_stereo_interpolate_neon, export=1
-        ld1         {v0.4S}, [x2]
-        ld1         {v1.4S}, [x3]
-        zip1        v4.4S, v0.4S, v0.4S
-        zip2        v5.4S, v0.4S, v0.4S
-        zip1        v6.4S, v1.4S, v1.4S
-        zip2        v7.4S, v1.4S, v1.4S
-1:      ld1         {v2.2S}, [x0]
-        ld1         {v3.2S}, [x1]
-        fadd        v4.4S, v4.4S, v6.4S
-        fadd        v5.4S, v5.4S, v7.4S
-        mov         v2.D[1], v2.D[0]
-        mov         v3.D[1], v3.D[0]
-        fmul        v2.4S, v2.4S, v4.4S
-        fmla        v2.4S, v3.4S, v5.4S
-        st1         {v2.D}[0], [x0], #8
-        st1         {v2.D}[1], [x1], #8
-        subs        w4, w4, #1
-        b.gt        1b
+        ld1             {v0.4s}, [x2]
+        ld1             {v1.4s}, [x3]
+        zip1            v4.4s, v0.4s, v0.4s
+        zip2            v5.4s, v0.4s, v0.4s
+        zip1            v6.4s, v1.4s, v1.4s
+        zip2            v7.4s, v1.4s, v1.4s
+1:      ld1             {v2.2s}, [x0]
+        ld1             {v3.2s}, [x1]
+        fadd            v4.4s, v4.4s, v6.4s
+        fadd            v5.4s, v5.4s, v7.4s
+        mov             v2.d[1], v2.d[0]
+        mov             v3.d[1], v3.d[0]
+        fmul            v2.4s, v2.4s, v4.4s
+        fmla            v2.4s, v3.4s, v5.4s
+        st1             {v2.d}[0], [x0], #8
+        st1             {v2.d}[1], [x1], #8
+        subs            w4, w4, #1
+        b.gt            1b
         ret
 endfunc
 
 function ff_ps_stereo_interpolate_ipdopd_neon, export=1
-        ld1         {v0.4S,v1.4S}, [x2]
-        ld1         {v6.4S,v7.4S}, [x3]
-        fneg        v2.4S, v1.4S
-        fneg        v3.4S, v7.4S
-        zip1        v16.4S, v0.4S, v0.4S
-        zip2        v17.4S, v0.4S, v0.4S
-        zip1        v18.4S, v2.4S, v1.4S
-        zip2        v19.4S, v2.4S, v1.4S
-        zip1        v20.4S, v6.4S, v6.4S
-        zip2        v21.4S, v6.4S, v6.4S
-        zip1        v22.4S, v3.4S, v7.4S
-        zip2        v23.4S, v3.4S, v7.4S
-1:      ld1         {v2.2S}, [x0]
-        ld1         {v3.2S}, [x1]
-        fadd        v16.4S, v16.4S, v20.4S
-        fadd        v17.4S, v17.4S, v21.4S
-        mov         v2.D[1], v2.D[0]
-        mov         v3.D[1], v3.D[0]
-        fmul        v4.4S, v2.4S, v16.4S
-        fmla        v4.4S, v3.4S, v17.4S
-        fadd        v18.4S, v18.4S, v22.4S
-        fadd        v19.4S, v19.4S, v23.4S
-        ext         v2.16B, v2.16B, v2.16B, #4
-        ext         v3.16B, v3.16B, v3.16B, #4
-        fmla        v4.4S, v2.4S, v18.4S
-        fmla        v4.4S, v3.4S, v19.4S
-        st1         {v4.D}[0], [x0], #8
-        st1         {v4.D}[1], [x1], #8
-        subs        w4, w4, #1
-        b.gt        1b
+        ld1             {v0.4s,v1.4s}, [x2]
+        ld1             {v6.4s,v7.4s}, [x3]
+        fneg            v2.4s, v1.4s
+        fneg            v3.4s, v7.4s
+        zip1            v16.4s, v0.4s, v0.4s
+        zip2            v17.4s, v0.4s, v0.4s
+        zip1            v18.4s, v2.4s, v1.4s
+        zip2            v19.4s, v2.4s, v1.4s
+        zip1            v20.4s, v6.4s, v6.4s
+        zip2            v21.4s, v6.4s, v6.4s
+        zip1            v22.4s, v3.4s, v7.4s
+        zip2            v23.4s, v3.4s, v7.4s
+1:      ld1             {v2.2s}, [x0]
+        ld1             {v3.2s}, [x1]
+        fadd            v16.4s, v16.4s, v20.4s
+        fadd            v17.4s, v17.4s, v21.4s
+        mov             v2.d[1], v2.d[0]
+        mov             v3.d[1], v3.d[0]
+        fmul            v4.4s, v2.4s, v16.4s
+        fmla            v4.4s, v3.4s, v17.4s
+        fadd            v18.4s, v18.4s, v22.4s
+        fadd            v19.4s, v19.4s, v23.4s
+        ext             v2.16b, v2.16b, v2.16b, #4
+        ext             v3.16b, v3.16b, v3.16b, #4
+        fmla            v4.4s, v2.4s, v18.4s
+        fmla            v4.4s, v3.4s, v19.4s
+        st1             {v4.d}[0], [x0], #8
+        st1             {v4.d}[1], [x1], #8
+        subs            w4, w4, #1
+        b.gt            1b
         ret
 endfunc
 
 function ff_ps_hybrid_analysis_neon, export=1
-        lsl         x3, x3, #3
-        ld2         {v0.4S,v1.4S}, [x1], #32
-        ld2         {v2.2S,v3.2S}, [x1], #16
-        ld1         {v24.2S},      [x1], #8
-        ld2         {v4.2S,v5.2S}, [x1], #16
-        ld2         {v6.4S,v7.4S}, [x1]
-        rev64       v6.4S, v6.4S
-        rev64       v7.4S, v7.4S
-        ext         v6.16B, v6.16B, v6.16B, #8
-        ext         v7.16B, v7.16B, v7.16B, #8
-        rev64       v4.2S, v4.2S
-        rev64       v5.2S, v5.2S
-        mov         v2.D[1], v3.D[0]
-        mov         v4.D[1], v5.D[0]
-        mov         v5.D[1], v2.D[0]
-        mov         v3.D[1], v4.D[0]
-        fadd        v16.4S, v0.4S, v6.4S
-        fadd        v17.4S, v1.4S, v7.4S
-        fsub        v18.4S, v1.4S, v7.4S
-        fsub        v19.4S, v0.4S, v6.4S
-        fadd        v22.4S, v2.4S, v4.4S
-        fsub        v23.4S, v5.4S, v3.4S
-        trn1        v20.2D, v22.2D, v23.2D      // {re4+re8, re5+re7, im8-im4, im7-im5}
-        trn2        v21.2D, v22.2D, v23.2D      // {im4+im8, im5+im7, re4-re8, re5-re7}
-1:      ld2         {v2.4S,v3.4S}, [x2], #32
-        ld2         {v4.2S,v5.2S}, [x2], #16
-        ld1         {v6.2S},       [x2], #8
-        add         x2, x2, #8
-        mov         v4.D[1], v5.D[0]
-        mov         v6.S[1], v6.S[0]
-        fmul        v6.2S, v6.2S, v24.2S
-        fmul        v0.4S, v2.4S, v16.4S
-        fmul        v1.4S, v2.4S, v17.4S
-        fmls        v0.4S, v3.4S, v18.4S
-        fmla        v1.4S, v3.4S, v19.4S
-        fmla        v0.4S, v4.4S, v20.4S
-        fmla        v1.4S, v4.4S, v21.4S
-        faddp       v0.4S, v0.4S, v1.4S
-        faddp       v0.4S, v0.4S, v0.4S
-        fadd        v0.2S, v0.2S, v6.2S
-        st1         {v0.2S}, [x0], x3
-        subs        w4, w4, #1
-        b.gt        1b
+        lsl             x3, x3, #3
+        ld2             {v0.4s,v1.4s}, [x1], #32
+        ld2             {v2.2s,v3.2s}, [x1], #16
+        ld1             {v24.2s},      [x1], #8
+        ld2             {v4.2s,v5.2s}, [x1], #16
+        ld2             {v6.4s,v7.4s}, [x1]
+        rev64           v6.4s, v6.4s
+        rev64           v7.4s, v7.4s
+        ext             v6.16b, v6.16b, v6.16b, #8
+        ext             v7.16b, v7.16b, v7.16b, #8
+        rev64           v4.2s, v4.2s
+        rev64           v5.2s, v5.2s
+        mov             v2.d[1], v3.d[0]
+        mov             v4.d[1], v5.d[0]
+        mov             v5.d[1], v2.d[0]
+        mov             v3.d[1], v4.d[0]
+        fadd            v16.4s, v0.4s, v6.4s
+        fadd            v17.4s, v1.4s, v7.4s
+        fsub            v18.4s, v1.4s, v7.4s
+        fsub            v19.4s, v0.4s, v6.4s
+        fadd            v22.4s, v2.4s, v4.4s
+        fsub            v23.4s, v5.4s, v3.4s
+        trn1            v20.2d, v22.2d, v23.2d      // {re4+re8, re5+re7, im8-im4, im7-im5}
+        trn2            v21.2d, v22.2d, v23.2d      // {im4+im8, im5+im7, re4-re8, re5-re7}
+1:      ld2             {v2.4s,v3.4s}, [x2], #32
+        ld2             {v4.2s,v5.2s}, [x2], #16
+        ld1             {v6.2s},       [x2], #8
+        add             x2, x2, #8
+        mov             v4.d[1], v5.d[0]
+        mov             v6.s[1], v6.s[0]
+        fmul            v6.2s, v6.2s, v24.2s
+        fmul            v0.4s, v2.4s, v16.4s
+        fmul            v1.4s, v2.4s, v17.4s
+        fmls            v0.4s, v3.4s, v18.4s
+        fmla            v1.4s, v3.4s, v19.4s
+        fmla            v0.4s, v4.4s, v20.4s
+        fmla            v1.4s, v4.4s, v21.4s
+        faddp           v0.4s, v0.4s, v1.4s
+        faddp           v0.4s, v0.4s, v0.4s
+        fadd            v0.2s, v0.2s, v6.2s
+        st1             {v0.2s}, [x0], x3
+        subs            w4, w4, #1
+        b.gt            1b
         ret
 endfunc
diff -Nru ffmpeg-5.1.8/libavcodec/aarch64/fft_neon.S ffmpeg-5.1.9/libavcodec/aarch64/fft_neon.S
--- ffmpeg-5.1.8/libavcodec/aarch64/fft_neon.S	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/aarch64/fft_neon.S	2026-05-05 15:50:52.000000000 +0000
@@ -359,18 +359,18 @@
 endfunc
 .endm
 
-        def_fft    32,    16,     8
-        def_fft    64,    32,    16
-        def_fft   128,    64,    32
-        def_fft   256,   128,    64
-        def_fft   512,   256,   128
-        def_fft  1024,   512,   256
-        def_fft  2048,  1024,   512
-        def_fft  4096,  2048,  1024
-        def_fft  8192,  4096,  2048
-        def_fft 16384,  8192,  4096
-        def_fft 32768, 16384,  8192
-        def_fft 65536, 32768, 16384
+        def_fft         32,    16,     8
+        def_fft         64,    32,    16
+        def_fft         128,    64,    32
+        def_fft         256,   128,    64
+        def_fft         512,   256,   128
+        def_fft         1024,   512,   256
+        def_fft         2048,  1024,   512
+        def_fft         4096,  2048,  1024
+        def_fft         8192,  4096,  2048
+        def_fft         16384,  8192,  4096
+        def_fft         32768, 16384,  8192
+        def_fft         65536, 32768, 16384
 
 function ff_fft_calc_neon, export=1
         prfm            pldl1keep, [x1]
diff -Nru ffmpeg-5.1.8/libavcodec/aarch64/h264cmc_neon.S ffmpeg-5.1.9/libavcodec/aarch64/h264cmc_neon.S
--- ffmpeg-5.1.8/libavcodec/aarch64/h264cmc_neon.S	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/aarch64/h264cmc_neon.S	2026-05-05 15:50:52.000000000 +0000
@@ -38,11 +38,11 @@
         lsl             w9,  w9,  #3
         lsl             w10, w10, #1
         add             w9,  w9,  w10
-        add             x6,  x6,  w9, UXTW
-        ld1r            {v22.8H}, [x6]
+        add             x6,  x6,  w9, uxtw
+        ld1r            {v22.8h}, [x6]
   .endif
   .ifc \codec,vc1
-        movi            v22.8H,   #28
+        movi            v22.8h,   #28
   .endif
         mul             w7,  w4,  w5
         lsl             w14, w5,  #3
@@ -55,139 +55,139 @@
         add             w4,  w4,  #64
         b.eq            2f
 
-        dup             v0.8B,  w4
-        dup             v1.8B,  w12
-        ld1             {v4.8B, v5.8B}, [x1], x2
-        dup             v2.8B,  w6
-        dup             v3.8B,  w7
-        ext             v5.8B,  v4.8B,  v5.8B,  #1
-1:      ld1             {v6.8B, v7.8B}, [x1], x2
-        umull           v16.8H, v4.8B,  v0.8B
-        umlal           v16.8H, v5.8B,  v1.8B
-        ext             v7.8B,  v6.8B,  v7.8B,  #1
-        ld1             {v4.8B, v5.8B}, [x1], x2
-        umlal           v16.8H, v6.8B,  v2.8B
-        prfm            pldl1strm, [x1]
-        ext             v5.8B,  v4.8B,  v5.8B,  #1
-        umlal           v16.8H, v7.8B,  v3.8B
-        umull           v17.8H, v6.8B,  v0.8B
+        dup             v0.8b,  w4
+        dup             v1.8b,  w12
+        ld1             {v4.8b, v5.8b}, [x1], x2
+        dup             v2.8b,  w6
+        dup             v3.8b,  w7
+        ext             v5.8b,  v4.8b,  v5.8b,  #1
+1:      ld1             {v6.8b, v7.8b}, [x1], x2
+        umull           v16.8h, v4.8b,  v0.8b
+        umlal           v16.8h, v5.8b,  v1.8b
+        ext             v7.8b,  v6.8b,  v7.8b,  #1
+        ld1             {v4.8b, v5.8b}, [x1], x2
+        umlal           v16.8h, v6.8b,  v2.8b
+        prfm            pldl1strm, [x1]
+        ext             v5.8b,  v4.8b,  v5.8b,  #1
+        umlal           v16.8h, v7.8b,  v3.8b
+        umull           v17.8h, v6.8b,  v0.8b
         subs            w3,  w3,  #2
-        umlal           v17.8H, v7.8B, v1.8B
-        umlal           v17.8H, v4.8B, v2.8B
-        umlal           v17.8H, v5.8B, v3.8B
+        umlal           v17.8h, v7.8b, v1.8b
+        umlal           v17.8h, v4.8b, v2.8b
+        umlal           v17.8h, v5.8b, v3.8b
         prfm            pldl1strm, [x1, x2]
   .ifc \codec,h264
-        rshrn           v16.8B, v16.8H, #6
-        rshrn           v17.8B, v17.8H, #6
+        rshrn           v16.8b, v16.8h, #6
+        rshrn           v17.8b, v17.8h, #6
   .else
-        add             v16.8H, v16.8H, v22.8H
-        add             v17.8H, v17.8H, v22.8H
-        shrn            v16.8B, v16.8H, #6
-        shrn            v17.8B, v17.8H, #6
+        add             v16.8h, v16.8h, v22.8h
+        add             v17.8h, v17.8h, v22.8h
+        shrn            v16.8b, v16.8h, #6
+        shrn            v17.8b, v17.8h, #6
   .endif
   .ifc \type,avg
-        ld1             {v20.8B}, [x8], x2
-        ld1             {v21.8B}, [x8], x2
-        urhadd          v16.8B, v16.8B, v20.8B
-        urhadd          v17.8B, v17.8B, v21.8B
+        ld1             {v20.8b}, [x8], x2
+        ld1             {v21.8b}, [x8], x2
+        urhadd          v16.8b, v16.8b, v20.8b
+        urhadd          v17.8b, v17.8b, v21.8b
   .endif
-        st1             {v16.8B}, [x0], x2
-        st1             {v17.8B}, [x0], x2
+        st1             {v16.8b}, [x0], x2
+        st1             {v17.8b}, [x0], x2
         b.gt            1b
         ret
 
 2:      adds            w12, w12, w6
-        dup             v0.8B, w4
+        dup             v0.8b, w4
         b.eq            5f
         tst             w6,  w6
-        dup             v1.8B, w12
+        dup             v1.8b, w12
         b.eq            4f
 
-        ld1             {v4.8B}, [x1], x2
-3:      ld1             {v6.8B}, [x1], x2
-        umull           v16.8H, v4.8B,  v0.8B
-        umlal           v16.8H, v6.8B,  v1.8B
-        ld1             {v4.8B}, [x1], x2
-        umull           v17.8H, v6.8B,  v0.8B
-        umlal           v17.8H, v4.8B,  v1.8B
+        ld1             {v4.8b}, [x1], x2
+3:      ld1             {v6.8b}, [x1], x2
+        umull           v16.8h, v4.8b,  v0.8b
+        umlal           v16.8h, v6.8b,  v1.8b
+        ld1             {v4.8b}, [x1], x2
+        umull           v17.8h, v6.8b,  v0.8b
+        umlal           v17.8h, v4.8b,  v1.8b
         prfm            pldl1strm, [x1]
   .ifc \codec,h264
-        rshrn           v16.8B, v16.8H, #6
-        rshrn           v17.8B, v17.8H, #6
+        rshrn           v16.8b, v16.8h, #6
+        rshrn           v17.8b, v17.8h, #6
   .else
-        add             v16.8H, v16.8H, v22.8H
-        add             v17.8H, v17.8H, v22.8H
-        shrn            v16.8B, v16.8H, #6
-        shrn            v17.8B, v17.8H, #6
+        add             v16.8h, v16.8h, v22.8h
+        add             v17.8h, v17.8h, v22.8h
+        shrn            v16.8b, v16.8h, #6
+        shrn            v17.8b, v17.8h, #6
   .endif
         prfm            pldl1strm, [x1, x2]
   .ifc \type,avg
-        ld1             {v20.8B}, [x8], x2
-        ld1             {v21.8B}, [x8], x2
-        urhadd          v16.8B, v16.8B, v20.8B
-        urhadd          v17.8B, v17.8B, v21.8B
+        ld1             {v20.8b}, [x8], x2
+        ld1             {v21.8b}, [x8], x2
+        urhadd          v16.8b, v16.8b, v20.8b
+        urhadd          v17.8b, v17.8b, v21.8b
   .endif
         subs            w3,  w3,  #2
-        st1             {v16.8B}, [x0], x2
-        st1             {v17.8B}, [x0], x2
+        st1             {v16.8b}, [x0], x2
+        st1             {v17.8b}, [x0], x2
         b.gt            3b
         ret
 
-4:      ld1             {v4.8B, v5.8B}, [x1], x2
-        ld1             {v6.8B, v7.8B}, [x1], x2
-        ext             v5.8B,  v4.8B,  v5.8B,  #1
-        ext             v7.8B,  v6.8B,  v7.8B,  #1
+4:      ld1             {v4.8b, v5.8b}, [x1], x2
+        ld1             {v6.8b, v7.8b}, [x1], x2
+        ext             v5.8b,  v4.8b,  v5.8b,  #1
+        ext             v7.8b,  v6.8b,  v7.8b,  #1
         prfm            pldl1strm, [x1]
         subs            w3,  w3,  #2
-        umull           v16.8H, v4.8B, v0.8B
-        umlal           v16.8H, v5.8B, v1.8B
-        umull           v17.8H, v6.8B, v0.8B
-        umlal           v17.8H, v7.8B, v1.8B
+        umull           v16.8h, v4.8b, v0.8b
+        umlal           v16.8h, v5.8b, v1.8b
+        umull           v17.8h, v6.8b, v0.8b
+        umlal           v17.8h, v7.8b, v1.8b
         prfm            pldl1strm, [x1, x2]
   .ifc \codec,h264
-        rshrn           v16.8B, v16.8H, #6
-        rshrn           v17.8B, v17.8H, #6
+        rshrn           v16.8b, v16.8h, #6
+        rshrn           v17.8b, v17.8h, #6
   .else
-        add             v16.8H, v16.8H, v22.8H
-        add             v17.8H, v17.8H, v22.8H
-        shrn            v16.8B, v16.8H, #6
-        shrn            v17.8B, v17.8H, #6
+        add             v16.8h, v16.8h, v22.8h
+        add             v17.8h, v17.8h, v22.8h
+        shrn            v16.8b, v16.8h, #6
+        shrn            v17.8b, v17.8h, #6
   .endif
   .ifc \type,avg
-        ld1             {v20.8B}, [x8], x2
-        ld1             {v21.8B}, [x8], x2
-        urhadd          v16.8B, v16.8B, v20.8B
-        urhadd          v17.8B, v17.8B, v21.8B
+        ld1             {v20.8b}, [x8], x2
+        ld1             {v21.8b}, [x8], x2
+        urhadd          v16.8b, v16.8b, v20.8b
+        urhadd          v17.8b, v17.8b, v21.8b
   .endif
-        st1             {v16.8B}, [x0], x2
-        st1             {v17.8B}, [x0], x2
+        st1             {v16.8b}, [x0], x2
+        st1             {v17.8b}, [x0], x2
         b.gt            4b
         ret
 
-5:      ld1             {v4.8B}, [x1], x2
-        ld1             {v5.8B}, [x1], x2
+5:      ld1             {v4.8b}, [x1], x2
+        ld1             {v5.8b}, [x1], x2
         prfm            pldl1strm, [x1]
         subs            w3,  w3,  #2
-        umull           v16.8H, v4.8B, v0.8B
-        umull           v17.8H, v5.8B, v0.8B
+        umull           v16.8h, v4.8b, v0.8b
+        umull           v17.8h, v5.8b, v0.8b
         prfm            pldl1strm, [x1, x2]
   .ifc \codec,h264
-        rshrn           v16.8B, v16.8H, #6
-        rshrn           v17.8B, v17.8H, #6
+        rshrn           v16.8b, v16.8h, #6
+        rshrn           v17.8b, v17.8h, #6
   .else
-        add             v16.8H, v16.8H, v22.8H
-        add             v17.8H, v17.8H, v22.8H
-        shrn            v16.8B, v16.8H, #6
-        shrn            v17.8B, v17.8H, #6
+        add             v16.8h, v16.8h, v22.8h
+        add             v17.8h, v17.8h, v22.8h
+        shrn            v16.8b, v16.8h, #6
+        shrn            v17.8b, v17.8h, #6
   .endif
   .ifc \type,avg
-        ld1             {v20.8B}, [x8], x2
-        ld1             {v21.8B}, [x8], x2
-        urhadd          v16.8B, v16.8B, v20.8B
-        urhadd          v17.8B, v17.8B, v21.8B
+        ld1             {v20.8b}, [x8], x2
+        ld1             {v21.8b}, [x8], x2
+        urhadd          v16.8b, v16.8b, v20.8b
+        urhadd          v17.8b, v17.8b, v21.8b
   .endif
-        st1             {v16.8B}, [x0], x2
-        st1             {v17.8B}, [x0], x2
+        st1             {v16.8b}, [x0], x2
+        st1             {v17.8b}, [x0], x2
         b.gt            5b
         ret
 endfunc
@@ -208,11 +208,11 @@
         lsl             w9,  w9,  #3
         lsl             w10, w10, #1
         add             w9,  w9,  w10
-        add             x6,  x6,  w9, UXTW
-        ld1r            {v22.8H}, [x6]
+        add             x6,  x6,  w9, uxtw
+        ld1r            {v22.8h}, [x6]
   .endif
   .ifc \codec,vc1
-        movi            v22.8H,   #28
+        movi            v22.8h,   #28
   .endif
         mul             w7,  w4,  w5
         lsl             w14, w5,  #3
@@ -225,133 +225,133 @@
         add             w4,  w4,  #64
         b.eq            2f
 
-        dup             v24.8B,  w4
-        dup             v25.8B,  w12
-        ld1             {v4.8B}, [x1], x2
-        dup             v26.8B,  w6
-        dup             v27.8B,  w7
-        ext             v5.8B,  v4.8B,  v5.8B, #1
-        trn1            v0.2S,  v24.2S, v25.2S
-        trn1            v2.2S,  v26.2S, v27.2S
-        trn1            v4.2S,  v4.2S,  v5.2S
-1:      ld1             {v6.8B}, [x1], x2
-        ext             v7.8B,  v6.8B,  v7.8B, #1
-        trn1            v6.2S,  v6.2S,  v7.2S
-        umull           v18.8H, v4.8B,  v0.8B
-        umlal           v18.8H, v6.8B,  v2.8B
-        ld1             {v4.8B}, [x1], x2
-        ext             v5.8B,  v4.8B,  v5.8B, #1
-        trn1            v4.2S,  v4.2S,  v5.2S
-        prfm            pldl1strm, [x1]
-        umull           v19.8H, v6.8B,  v0.8B
-        umlal           v19.8H, v4.8B,  v2.8B
-        trn1            v30.2D, v18.2D, v19.2D
-        trn2            v31.2D, v18.2D, v19.2D
-        add             v18.8H, v30.8H, v31.8H
+        dup             v24.8b,  w4
+        dup             v25.8b,  w12
+        ld1             {v4.8b}, [x1], x2
+        dup             v26.8b,  w6
+        dup             v27.8b,  w7
+        ext             v5.8b,  v4.8b,  v5.8b, #1
+        trn1            v0.2s,  v24.2s, v25.2s
+        trn1            v2.2s,  v26.2s, v27.2s
+        trn1            v4.2s,  v4.2s,  v5.2s
+1:      ld1             {v6.8b}, [x1], x2
+        ext             v7.8b,  v6.8b,  v7.8b, #1
+        trn1            v6.2s,  v6.2s,  v7.2s
+        umull           v18.8h, v4.8b,  v0.8b
+        umlal           v18.8h, v6.8b,  v2.8b
+        ld1             {v4.8b}, [x1], x2
+        ext             v5.8b,  v4.8b,  v5.8b, #1
+        trn1            v4.2s,  v4.2s,  v5.2s
+        prfm            pldl1strm, [x1]
+        umull           v19.8h, v6.8b,  v0.8b
+        umlal           v19.8h, v4.8b,  v2.8b
+        trn1            v30.2d, v18.2d, v19.2d
+        trn2            v31.2d, v18.2d, v19.2d
+        add             v18.8h, v30.8h, v31.8h
   .ifc \codec,h264
-        rshrn           v16.8B, v18.8H, #6
+        rshrn           v16.8b, v18.8h, #6
   .else
-        add             v18.8H, v18.8H, v22.8H
-        shrn            v16.8B, v18.8H, #6
+        add             v18.8h, v18.8h, v22.8h
+        shrn            v16.8b, v18.8h, #6
   .endif
         subs            w3,  w3,  #2
         prfm            pldl1strm, [x1, x2]
   .ifc \type,avg
-        ld1             {v20.S}[0], [x8], x2
-        ld1             {v20.S}[1], [x8], x2
-        urhadd          v16.8B, v16.8B, v20.8B
+        ld1             {v20.s}[0], [x8], x2
+        ld1             {v20.s}[1], [x8], x2
+        urhadd          v16.8b, v16.8b, v20.8b
   .endif
-        st1             {v16.S}[0], [x0], x2
-        st1             {v16.S}[1], [x0], x2
+        st1             {v16.s}[0], [x0], x2
+        st1             {v16.s}[1], [x0], x2
         b.gt            1b
         ret
 
 2:      adds            w12, w12, w6
-        dup             v30.8B, w4
+        dup             v30.8b, w4
         b.eq            5f
         tst             w6,  w6
-        dup             v31.8B, w12
-        trn1            v0.2S,  v30.2S, v31.2S
-        trn2            v1.2S,  v30.2S, v31.2S
+        dup             v31.8b, w12
+        trn1            v0.2s,  v30.2s, v31.2s
+        trn2            v1.2s,  v30.2s, v31.2s
         b.eq            4f
 
-        ext             v1.8B,  v0.8B,  v1.8B, #4
-        ld1             {v4.S}[0], [x1], x2
-3:      ld1             {v4.S}[1], [x1], x2
-        umull           v18.8H, v4.8B,  v0.8B
-        ld1             {v4.S}[0], [x1], x2
-        umull           v19.8H, v4.8B,  v1.8B
-        trn1            v30.2D, v18.2D, v19.2D
-        trn2            v31.2D, v18.2D, v19.2D
-        add             v18.8H, v30.8H, v31.8H
+        ext             v1.8b,  v0.8b,  v1.8b, #4
+        ld1             {v4.s}[0], [x1], x2
+3:      ld1             {v4.s}[1], [x1], x2
+        umull           v18.8h, v4.8b,  v0.8b
+        ld1             {v4.s}[0], [x1], x2
+        umull           v19.8h, v4.8b,  v1.8b
+        trn1            v30.2d, v18.2d, v19.2d
+        trn2            v31.2d, v18.2d, v19.2d
+        add             v18.8h, v30.8h, v31.8h
         prfm            pldl1strm, [x1]
   .ifc \codec,h264
-        rshrn           v16.8B, v18.8H, #6
+        rshrn           v16.8b, v18.8h, #6
   .else
-        add             v18.8H, v18.8H, v22.8H
-        shrn            v16.8B, v18.8H, #6
+        add             v18.8h, v18.8h, v22.8h
+        shrn            v16.8b, v18.8h, #6
   .endif
   .ifc \type,avg
-        ld1             {v20.S}[0], [x8], x2
-        ld1             {v20.S}[1], [x8], x2
-        urhadd          v16.8B, v16.8B, v20.8B
+        ld1             {v20.s}[0], [x8], x2
+        ld1             {v20.s}[1], [x8], x2
+        urhadd          v16.8b, v16.8b, v20.8b
   .endif
         subs            w3,  w3,  #2
         prfm            pldl1strm, [x1, x2]
-        st1             {v16.S}[0], [x0], x2
-        st1             {v16.S}[1], [x0], x2
+        st1             {v16.s}[0], [x0], x2
+        st1             {v16.s}[1], [x0], x2
         b.gt            3b
         ret
 
-4:      ld1             {v4.8B}, [x1], x2
-        ld1             {v6.8B}, [x1], x2
-        ext             v5.8B,  v4.8B,  v5.8B, #1
-        ext             v7.8B,  v6.8B,  v7.8B, #1
-        trn1            v4.2S,  v4.2S,  v5.2S
-        trn1            v6.2S,  v6.2S,  v7.2S
-        umull           v18.8H, v4.8B,  v0.8B
-        umull           v19.8H, v6.8B,  v0.8B
+4:      ld1             {v4.8b}, [x1], x2
+        ld1             {v6.8b}, [x1], x2
+        ext             v5.8b,  v4.8b,  v5.8b, #1
+        ext             v7.8b,  v6.8b,  v7.8b, #1
+        trn1            v4.2s,  v4.2s,  v5.2s
+        trn1            v6.2s,  v6.2s,  v7.2s
+        umull           v18.8h, v4.8b,  v0.8b
+        umull           v19.8h, v6.8b,  v0.8b
         subs            w3,  w3,  #2
-        trn1            v30.2D, v18.2D, v19.2D
-        trn2            v31.2D, v18.2D, v19.2D
-        add             v18.8H, v30.8H, v31.8H
+        trn1            v30.2d, v18.2d, v19.2d
+        trn2            v31.2d, v18.2d, v19.2d
+        add             v18.8h, v30.8h, v31.8h
         prfm            pldl1strm, [x1]
   .ifc \codec,h264
-        rshrn           v16.8B, v18.8H, #6
+        rshrn           v16.8b, v18.8h, #6
   .else
-        add             v18.8H, v18.8H, v22.8H
-        shrn            v16.8B, v18.8H, #6
+        add             v18.8h, v18.8h, v22.8h
+        shrn            v16.8b, v18.8h, #6
   .endif
   .ifc \type,avg
-        ld1             {v20.S}[0], [x8], x2
-        ld1             {v20.S}[1], [x8], x2
-        urhadd          v16.8B, v16.8B, v20.8B
+        ld1             {v20.s}[0], [x8], x2
+        ld1             {v20.s}[1], [x8], x2
+        urhadd          v16.8b, v16.8b, v20.8b
   .endif
         prfm            pldl1strm, [x1]
-        st1             {v16.S}[0], [x0], x2
-        st1             {v16.S}[1], [x0], x2
+        st1             {v16.s}[0], [x0], x2
+        st1             {v16.s}[1], [x0], x2
         b.gt            4b
         ret
 
-5:      ld1             {v4.S}[0], [x1], x2
-        ld1             {v4.S}[1], [x1], x2
-        umull           v18.8H, v4.8B,  v30.8B
+5:      ld1             {v4.s}[0], [x1], x2
+        ld1             {v4.s}[1], [x1], x2
+        umull           v18.8h, v4.8b,  v30.8b
         subs            w3,  w3,  #2
         prfm            pldl1strm, [x1]
   .ifc \codec,h264
-        rshrn           v16.8B, v18.8H, #6
+        rshrn           v16.8b, v18.8h, #6
   .else
-        add             v18.8H, v18.8H, v22.8H
-        shrn            v16.8B, v18.8H, #6
+        add             v18.8h, v18.8h, v22.8h
+        shrn            v16.8b, v18.8h, #6
   .endif
   .ifc \type,avg
-        ld1             {v20.S}[0], [x8], x2
-        ld1             {v20.S}[1], [x8], x2
-        urhadd          v16.8B, v16.8B, v20.8B
+        ld1             {v20.s}[0], [x8], x2
+        ld1             {v20.s}[1], [x8], x2
+        urhadd          v16.8b, v16.8b, v20.8b
   .endif
         prfm            pldl1strm, [x1]
-        st1             {v16.S}[0], [x0], x2
-        st1             {v16.S}[1], [x0], x2
+        st1             {v16.s}[0], [x0], x2
+        st1             {v16.s}[1], [x0], x2
         b.gt            5b
         ret
 endfunc
@@ -372,51 +372,51 @@
         sub             w4,  w7,  w13
         sub             w4,  w4,  w14
         add             w4,  w4,  #64
-        dup             v0.8B,  w4
-        dup             v2.8B,  w12
-        dup             v1.8B,  w6
-        dup             v3.8B,  w7
-        trn1            v0.4H,  v0.4H,  v2.4H
-        trn1            v1.4H,  v1.4H,  v3.4H
+        dup             v0.8b,  w4
+        dup             v2.8b,  w12
+        dup             v1.8b,  w6
+        dup             v3.8b,  w7
+        trn1            v0.4h,  v0.4h,  v2.4h
+        trn1            v1.4h,  v1.4h,  v3.4h
 1:
-        ld1             {v4.S}[0],  [x1], x2
-        ld1             {v4.S}[1],  [x1], x2
-        rev64           v5.2S,  v4.2S
-        ld1             {v5.S}[1],  [x1]
-        ext             v6.8B,  v4.8B,  v5.8B,  #1
-        ext             v7.8B,  v5.8B,  v4.8B,  #1
-        trn1            v4.4H,  v4.4H,  v6.4H
-        trn1            v5.4H,  v5.4H,  v7.4H
-        umull           v16.8H, v4.8B,  v0.8B
-        umlal           v16.8H, v5.8B,  v1.8B
+        ld1             {v4.s}[0],  [x1], x2
+        ld1             {v4.s}[1],  [x1], x2
+        rev64           v5.2s,  v4.2s
+        ld1             {v5.s}[1],  [x1]
+        ext             v6.8b,  v4.8b,  v5.8b,  #1
+        ext             v7.8b,  v5.8b,  v4.8b,  #1
+        trn1            v4.4h,  v4.4h,  v6.4h
+        trn1            v5.4h,  v5.4h,  v7.4h
+        umull           v16.8h, v4.8b,  v0.8b
+        umlal           v16.8h, v5.8b,  v1.8b
   .ifc \type,avg
-        ld1             {v18.H}[0], [x0], x2
-        ld1             {v18.H}[2], [x0]
+        ld1             {v18.h}[0], [x0], x2
+        ld1             {v18.h}[2], [x0]
         sub             x0,  x0,  x2
   .endif
-        rev64           v17.4S, v16.4S
-        add             v16.8H, v16.8H, v17.8H
-        rshrn           v16.8B, v16.8H, #6
+        rev64           v17.4s, v16.4s
+        add             v16.8h, v16.8h, v17.8h
+        rshrn           v16.8b, v16.8h, #6
   .ifc \type,avg
-        urhadd          v16.8B, v16.8B, v18.8B
+        urhadd          v16.8b, v16.8b, v18.8b
   .endif
-        st1             {v16.H}[0], [x0], x2
-        st1             {v16.H}[2], [x0], x2
+        st1             {v16.h}[0], [x0], x2
+        st1             {v16.h}[2], [x0], x2
         subs            w3,  w3,  #2
         b.gt            1b
         ret
 
 2:
-        ld1             {v16.H}[0], [x1], x2
-        ld1             {v16.H}[1], [x1], x2
+        ld1             {v16.h}[0], [x1], x2
+        ld1             {v16.h}[1], [x1], x2
   .ifc \type,avg
-        ld1             {v18.H}[0], [x0], x2
-        ld1             {v18.H}[1], [x0]
+        ld1             {v18.h}[0], [x0], x2
+        ld1             {v18.h}[1], [x0]
         sub             x0,  x0,  x2
-        urhadd          v16.8B, v16.8B, v18.8B
+        urhadd          v16.8b, v16.8b, v18.8b
   .endif
-        st1             {v16.H}[0], [x0], x2
-        st1             {v16.H}[1], [x0], x2
+        st1             {v16.h}[0], [x0], x2
+        st1             {v16.h}[1], [x0], x2
         subs            w3,  w3,  #2
         b.gt            2b
         ret
diff -Nru ffmpeg-5.1.8/libavcodec/aarch64/h264dsp_neon.S ffmpeg-5.1.9/libavcodec/aarch64/h264dsp_neon.S
--- ffmpeg-5.1.8/libavcodec/aarch64/h264dsp_neon.S	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/aarch64/h264dsp_neon.S	2026-03-16 18:10:00.000000000 +0000
@@ -27,7 +27,7 @@
         cmp             w2,  #0
         ldr             w6,  [x4]
         ccmp            w3,  #0, #0, ne
-        mov             v24.S[0], w6
+        mov             v24.s[0], w6
         and             w8,  w6,  w6,  lsl #16
         b.eq            1f
         ands            w8,  w8,  w8,  lsl #8
@@ -38,95 +38,95 @@
 .endm
 
 .macro  h264_loop_filter_luma
-        dup             v22.16B, w2                     // alpha
-        uxtl            v24.8H,  v24.8B
-        uabd            v21.16B, v16.16B, v0.16B        // abs(p0 - q0)
-        uxtl            v24.4S,  v24.4H
-        uabd            v28.16B, v18.16B, v16.16B       // abs(p1 - p0)
-        sli             v24.8H,  v24.8H,  #8
-        uabd            v30.16B, v2.16B,  v0.16B        // abs(q1 - q0)
-        sli             v24.4S,  v24.4S,  #16
-        cmhi            v21.16B, v22.16B, v21.16B       // < alpha
-        dup             v22.16B, w3                     // beta
-        cmlt            v23.16B, v24.16B, #0
-        cmhi            v28.16B, v22.16B, v28.16B       // < beta
-        cmhi            v30.16B, v22.16B, v30.16B       // < beta
-        bic             v21.16B, v21.16B, v23.16B
-        uabd            v17.16B, v20.16B, v16.16B       // abs(p2 - p0)
-        and             v21.16B, v21.16B, v28.16B
-        uabd            v19.16B,  v4.16B,  v0.16B       // abs(q2 - q0)
-        and             v21.16B, v21.16B, v30.16B      // < beta
+        dup             v22.16b, w2                     // alpha
+        uxtl            v24.8h,  v24.8b
+        uabd            v21.16b, v16.16b, v0.16b        // abs(p0 - q0)
+        uxtl            v24.4s,  v24.4h
+        uabd            v28.16b, v18.16b, v16.16b       // abs(p1 - p0)
+        sli             v24.8h,  v24.8h,  #8
+        uabd            v30.16b, v2.16b,  v0.16b        // abs(q1 - q0)
+        sli             v24.4s,  v24.4s,  #16
+        cmhi            v21.16b, v22.16b, v21.16b       // < alpha
+        dup             v22.16b, w3                     // beta
+        cmlt            v23.16b, v24.16b, #0
+        cmhi            v28.16b, v22.16b, v28.16b       // < beta
+        cmhi            v30.16b, v22.16b, v30.16b       // < beta
+        bic             v21.16b, v21.16b, v23.16b
+        uabd            v17.16b, v20.16b, v16.16b       // abs(p2 - p0)
+        and             v21.16b, v21.16b, v28.16b
+        uabd            v19.16b,  v4.16b,  v0.16b       // abs(q2 - q0)
+        and             v21.16b, v21.16b, v30.16b      // < beta
         shrn            v30.8b,  v21.8h,  #4
         mov             x7, v30.d[0]
-        cmhi            v17.16B, v22.16B, v17.16B       // < beta
-        cmhi            v19.16B, v22.16B, v19.16B       // < beta
+        cmhi            v17.16b, v22.16b, v17.16b       // < beta
+        cmhi            v19.16b, v22.16b, v19.16b       // < beta
         cbz             x7,  9f
-        and             v17.16B, v17.16B, v21.16B
-        and             v19.16B, v19.16B, v21.16B
-        and             v24.16B, v24.16B, v21.16B
-        urhadd          v28.16B, v16.16B,  v0.16B
-        sub             v21.16B, v24.16B, v17.16B
-        uqadd           v23.16B, v18.16B, v24.16B
-        uhadd           v20.16B, v20.16B, v28.16B
-        sub             v21.16B, v21.16B, v19.16B
-        uhadd           v28.16B,  v4.16B, v28.16B
-        umin            v23.16B, v23.16B, v20.16B
-        uqsub           v22.16B, v18.16B, v24.16B
-        uqadd           v4.16B,   v2.16B, v24.16B
-        umax            v23.16B, v23.16B, v22.16B
-        uqsub           v22.16B,  v2.16B, v24.16B
-        umin            v28.16B,  v4.16B, v28.16B
-        uxtl            v4.8H,    v0.8B
-        umax            v28.16B, v28.16B, v22.16B
-        uxtl2           v20.8H,   v0.16B
-        usubw           v4.8H,    v4.8H,  v16.8B
-        usubw2          v20.8H,  v20.8H,  v16.16B
-        shl             v4.8H,    v4.8H,  #2
-        shl             v20.8H,  v20.8H,  #2
-        uaddw           v4.8H,    v4.8H,  v18.8B
-        uaddw2          v20.8H,  v20.8H,  v18.16B
-        usubw           v4.8H,    v4.8H,   v2.8B
-        usubw2          v20.8H,  v20.8H,   v2.16B
-        rshrn           v4.8B,    v4.8H,  #3
-        rshrn2          v4.16B,  v20.8H,  #3
-        bsl             v17.16B, v23.16B, v18.16B
-        bsl             v19.16B, v28.16B,  v2.16B
-        neg             v23.16B, v21.16B
-        uxtl            v28.8H,  v16.8B
-        smin            v4.16B,   v4.16B, v21.16B
-        uxtl2           v21.8H,  v16.16B
-        smax            v4.16B,   v4.16B, v23.16B
-        uxtl            v22.8H,   v0.8B
-        uxtl2           v24.8H,   v0.16B
-        saddw           v28.8H,  v28.8H,  v4.8B
-        saddw2          v21.8H,  v21.8H,  v4.16B
-        ssubw           v22.8H,  v22.8H,  v4.8B
-        ssubw2          v24.8H,  v24.8H,  v4.16B
-        sqxtun          v16.8B,  v28.8H
-        sqxtun2         v16.16B, v21.8H
-        sqxtun          v0.8B,   v22.8H
-        sqxtun2         v0.16B,  v24.8H
+        and             v17.16b, v17.16b, v21.16b
+        and             v19.16b, v19.16b, v21.16b
+        and             v24.16b, v24.16b, v21.16b
+        urhadd          v28.16b, v16.16b,  v0.16b
+        sub             v21.16b, v24.16b, v17.16b
+        uqadd           v23.16b, v18.16b, v24.16b
+        uhadd           v20.16b, v20.16b, v28.16b
+        sub             v21.16b, v21.16b, v19.16b
+        uhadd           v28.16b,  v4.16b, v28.16b
+        umin            v23.16b, v23.16b, v20.16b
+        uqsub           v22.16b, v18.16b, v24.16b
+        uqadd           v4.16b,   v2.16b, v24.16b
+        umax            v23.16b, v23.16b, v22.16b
+        uqsub           v22.16b,  v2.16b, v24.16b
+        umin            v28.16b,  v4.16b, v28.16b
+        uxtl            v4.8h,    v0.8b
+        umax            v28.16b, v28.16b, v22.16b
+        uxtl2           v20.8h,   v0.16b
+        usubw           v4.8h,    v4.8h,  v16.8b
+        usubw2          v20.8h,  v20.8h,  v16.16b
+        shl             v4.8h,    v4.8h,  #2
+        shl             v20.8h,  v20.8h,  #2
+        uaddw           v4.8h,    v4.8h,  v18.8b
+        uaddw2          v20.8h,  v20.8h,  v18.16b
+        usubw           v4.8h,    v4.8h,   v2.8b
+        usubw2          v20.8h,  v20.8h,   v2.16b
+        rshrn           v4.8b,    v4.8h,  #3
+        rshrn2          v4.16b,  v20.8h,  #3
+        bsl             v17.16b, v23.16b, v18.16b
+        bsl             v19.16b, v28.16b,  v2.16b
+        neg             v23.16b, v21.16b
+        uxtl            v28.8h,  v16.8b
+        smin            v4.16b,   v4.16b, v21.16b
+        uxtl2           v21.8h,  v16.16b
+        smax            v4.16b,   v4.16b, v23.16b
+        uxtl            v22.8h,   v0.8b
+        uxtl2           v24.8h,   v0.16b
+        saddw           v28.8h,  v28.8h,  v4.8b
+        saddw2          v21.8h,  v21.8h,  v4.16b
+        ssubw           v22.8h,  v22.8h,  v4.8b
+        ssubw2          v24.8h,  v24.8h,  v4.16b
+        sqxtun          v16.8b,  v28.8h
+        sqxtun2         v16.16b, v21.8h
+        sqxtun          v0.8b,   v22.8h
+        sqxtun2         v0.16b,  v24.8h
 .endm
 
 function ff_h264_v_loop_filter_luma_neon, export=1
         h264_loop_filter_start
 
-        ld1             {v0.16B},  [x0], x1
-        ld1             {v2.16B},  [x0], x1
-        ld1             {v4.16B},  [x0], x1
+        ld1             {v0.16b},  [x0], x1
+        ld1             {v2.16b},  [x0], x1
+        ld1             {v4.16b},  [x0], x1
         sub             x0,  x0,  x1, lsl #2
         sub             x0,  x0,  x1, lsl #1
-        ld1             {v20.16B},  [x0], x1
-        ld1             {v18.16B},  [x0], x1
-        ld1             {v16.16B},  [x0], x1
+        ld1             {v20.16b},  [x0], x1
+        ld1             {v18.16b},  [x0], x1
+        ld1             {v16.16b},  [x0], x1
 
         h264_loop_filter_luma
 
         sub             x0,  x0,  x1, lsl #1
-        st1             {v17.16B},  [x0], x1
-        st1             {v16.16B}, [x0], x1
-        st1             {v0.16B},  [x0], x1
-        st1             {v19.16B}, [x0]
+        st1             {v17.16b},  [x0], x1
+        st1             {v16.16b}, [x0], x1
+        st1             {v0.16b},  [x0], x1
+        st1             {v19.16b}, [x0]
 9:
         ret
 endfunc
@@ -135,22 +135,22 @@
         h264_loop_filter_start
 
         sub             x0,  x0,  #4
-        ld1             {v6.8B},  [x0], x1
-        ld1             {v20.8B}, [x0], x1
-        ld1             {v18.8B}, [x0], x1
-        ld1             {v16.8B}, [x0], x1
-        ld1             {v0.8B},  [x0], x1
-        ld1             {v2.8B},  [x0], x1
-        ld1             {v4.8B},  [x0], x1
-        ld1             {v26.8B}, [x0], x1
-        ld1             {v6.D}[1],  [x0], x1
-        ld1             {v20.D}[1], [x0], x1
-        ld1             {v18.D}[1], [x0], x1
-        ld1             {v16.D}[1], [x0], x1
-        ld1             {v0.D}[1],  [x0], x1
-        ld1             {v2.D}[1],  [x0], x1
-        ld1             {v4.D}[1],  [x0], x1
-        ld1             {v26.D}[1], [x0], x1
+        ld1             {v6.8b},  [x0], x1
+        ld1             {v20.8b}, [x0], x1
+        ld1             {v18.8b}, [x0], x1
+        ld1             {v16.8b}, [x0], x1
+        ld1             {v0.8b},  [x0], x1
+        ld1             {v2.8b},  [x0], x1
+        ld1             {v4.8b},  [x0], x1
+        ld1             {v26.8b}, [x0], x1
+        ld1             {v6.d}[1],  [x0], x1
+        ld1             {v20.d}[1], [x0], x1
+        ld1             {v18.d}[1], [x0], x1
+        ld1             {v16.d}[1], [x0], x1
+        ld1             {v0.d}[1],  [x0], x1
+        ld1             {v2.d}[1],  [x0], x1
+        ld1             {v4.d}[1],  [x0], x1
+        ld1             {v26.d}[1], [x0], x1
 
         transpose_8x16B v6, v20, v18, v16, v0, v2, v4, v26, v21, v23
 
@@ -160,22 +160,22 @@
 
         sub             x0,  x0,  x1, lsl #4
         add             x0,  x0,  #2
-        st1             {v17.S}[0],  [x0], x1
-        st1             {v16.S}[0], [x0], x1
-        st1             {v0.S}[0],  [x0], x1
-        st1             {v19.S}[0], [x0], x1
-        st1             {v17.S}[1],  [x0], x1
-        st1             {v16.S}[1], [x0], x1
-        st1             {v0.S}[1],  [x0], x1
-        st1             {v19.S}[1], [x0], x1
-        st1             {v17.S}[2],  [x0], x1
-        st1             {v16.S}[2], [x0], x1
-        st1             {v0.S}[2],  [x0], x1
-        st1             {v19.S}[2], [x0], x1
-        st1             {v17.S}[3],  [x0], x1
-        st1             {v16.S}[3], [x0], x1
-        st1             {v0.S}[3],  [x0], x1
-        st1             {v19.S}[3], [x0], x1
+        st1             {v17.s}[0],  [x0], x1
+        st1             {v16.s}[0], [x0], x1
+        st1             {v0.s}[0],  [x0], x1
+        st1             {v19.s}[0], [x0], x1
+        st1             {v17.s}[1],  [x0], x1
+        st1             {v16.s}[1], [x0], x1
+        st1             {v0.s}[1],  [x0], x1
+        st1             {v19.s}[1], [x0], x1
+        st1             {v17.s}[2],  [x0], x1
+        st1             {v16.s}[2], [x0], x1
+        st1             {v0.s}[2],  [x0], x1
+        st1             {v19.s}[2], [x0], x1
+        st1             {v17.s}[3],  [x0], x1
+        st1             {v16.s}[3], [x0], x1
+        st1             {v0.s}[3],  [x0], x1
+        st1             {v19.s}[3], [x0], x1
 9:
         ret
 endfunc
@@ -377,52 +377,52 @@
 endfunc
 
 .macro  h264_loop_filter_chroma
-        dup             v22.8B, w2              // alpha
-        dup             v23.8B, w3              // beta
-        uxtl            v24.8H, v24.8B
-        uabd            v26.8B, v16.8B, v0.8B   // abs(p0 - q0)
-        uabd            v28.8B, v18.8B, v16.8B  // abs(p1 - p0)
-        uabd            v30.8B, v2.8B,  v0.8B   // abs(q1 - q0)
-        cmhi            v26.8B, v22.8B, v26.8B  // < alpha
-        cmhi            v28.8B, v23.8B, v28.8B  // < beta
-        cmhi            v30.8B, v23.8B, v30.8B  // < beta
-        uxtl            v4.8H,  v0.8B
-        and             v26.8B, v26.8B, v28.8B
-        usubw           v4.8H,  v4.8H,  v16.8B
-        and             v26.8B, v26.8B, v30.8B
-        shl             v4.8H,  v4.8H,  #2
+        dup             v22.8b, w2              // alpha
+        dup             v23.8b, w3              // beta
+        uxtl            v24.8h, v24.8b
+        uabd            v26.8b, v16.8b, v0.8b   // abs(p0 - q0)
+        uabd            v28.8b, v18.8b, v16.8b  // abs(p1 - p0)
+        uabd            v30.8b, v2.8b,  v0.8b   // abs(q1 - q0)
+        cmhi            v26.8b, v22.8b, v26.8b  // < alpha
+        cmhi            v28.8b, v23.8b, v28.8b  // < beta
+        cmhi            v30.8b, v23.8b, v30.8b  // < beta
+        uxtl            v4.8h,  v0.8b
+        and             v26.8b, v26.8b, v28.8b
+        usubw           v4.8h,  v4.8h,  v16.8b
+        and             v26.8b, v26.8b, v30.8b
+        shl             v4.8h,  v4.8h,  #2
         mov             x8,  v26.d[0]
-        sli             v24.8H, v24.8H, #8
-        uaddw           v4.8H,  v4.8H,  v18.8B
+        sli             v24.8h, v24.8h, #8
+        uaddw           v4.8h,  v4.8h,  v18.8b
         cbz             x8,  9f
-        usubw           v4.8H,  v4.8H,  v2.8B
-        rshrn           v4.8B,  v4.8H,  #3
-        smin            v4.8B,  v4.8B,  v24.8B
-        neg             v25.8B, v24.8B
-        smax            v4.8B,  v4.8B,  v25.8B
-        uxtl            v22.8H, v0.8B
-        and             v4.8B,  v4.8B,  v26.8B
-        uxtl            v28.8H, v16.8B
-        saddw           v28.8H, v28.8H, v4.8B
-        ssubw           v22.8H, v22.8H, v4.8B
-        sqxtun          v16.8B, v28.8H
-        sqxtun          v0.8B,  v22.8H
+        usubw           v4.8h,  v4.8h,  v2.8b
+        rshrn           v4.8b,  v4.8h,  #3
+        smin            v4.8b,  v4.8b,  v24.8b
+        neg             v25.8b, v24.8b
+        smax            v4.8b,  v4.8b,  v25.8b
+        uxtl            v22.8h, v0.8b
+        and             v4.8b,  v4.8b,  v26.8b
+        uxtl            v28.8h, v16.8b
+        saddw           v28.8h, v28.8h, v4.8b
+        ssubw           v22.8h, v22.8h, v4.8b
+        sqxtun          v16.8b, v28.8h
+        sqxtun          v0.8b,  v22.8h
 .endm
 
 function ff_h264_v_loop_filter_chroma_neon, export=1
         h264_loop_filter_start
 
         sub             x0,  x0,  x1, lsl #1
-        ld1             {v18.8B}, [x0], x1
-        ld1             {v16.8B}, [x0], x1
-        ld1             {v0.8B},  [x0], x1
-        ld1             {v2.8B},  [x0]
+        ld1             {v18.8b}, [x0], x1
+        ld1             {v16.8b}, [x0], x1
+        ld1             {v0.8b},  [x0], x1
+        ld1             {v2.8b},  [x0]
 
         h264_loop_filter_chroma
 
         sub             x0,  x0,  x1, lsl #1
-        st1             {v16.8B}, [x0], x1
-        st1             {v0.8B},  [x0], x1
+        st1             {v16.8b}, [x0], x1
+        st1             {v0.8b},  [x0], x1
 9:
         ret
 endfunc
@@ -432,14 +432,14 @@
 
         sub             x0,  x0,  #2
 h_loop_filter_chroma420:
-        ld1             {v18.S}[0], [x0], x1
-        ld1             {v16.S}[0], [x0], x1
-        ld1             {v0.S}[0],  [x0], x1
-        ld1             {v2.S}[0],  [x0], x1
-        ld1             {v18.S}[1], [x0], x1
-        ld1             {v16.S}[1], [x0], x1
-        ld1             {v0.S}[1],  [x0], x1
-        ld1             {v2.S}[1],  [x0], x1
+        ld1             {v18.s}[0], [x0], x1
+        ld1             {v16.s}[0], [x0], x1
+        ld1             {v0.s}[0],  [x0], x1
+        ld1             {v2.s}[0],  [x0], x1
+        ld1             {v18.s}[1], [x0], x1
+        ld1             {v16.s}[1], [x0], x1
+        ld1             {v0.s}[1],  [x0], x1
+        ld1             {v2.s}[1],  [x0], x1
 
         transpose_4x8B  v18, v16, v0, v2, v28, v29, v30, v31
 
@@ -448,14 +448,14 @@
         transpose_4x8B  v18, v16, v0, v2, v28, v29, v30, v31
 
         sub             x0,  x0,  x1, lsl #3
-        st1             {v18.S}[0], [x0], x1
-        st1             {v16.S}[0], [x0], x1
-        st1             {v0.S}[0],  [x0], x1
-        st1             {v2.S}[0],  [x0], x1
-        st1             {v18.S}[1], [x0], x1
-        st1             {v16.S}[1], [x0], x1
-        st1             {v0.S}[1],  [x0], x1
-        st1             {v2.S}[1],  [x0], x1
+        st1             {v18.s}[0], [x0], x1
+        st1             {v16.s}[0], [x0], x1
+        st1             {v0.s}[0],  [x0], x1
+        st1             {v2.s}[0],  [x0], x1
+        st1             {v18.s}[1], [x0], x1
+        st1             {v16.s}[1], [x0], x1
+        st1             {v0.s}[1],  [x0], x1
+        st1             {v2.s}[1],  [x0], x1
 9:
         ret
 endfunc
@@ -526,7 +526,7 @@
         ld1             {v17.8b}, [x4], x1
         ld1             {v19.8b}, [x4], x1
 
-        transpose_4x8B v18, v16, v17, v19, v26, v27, v28, v29
+        transpose_4x8B  v18, v16, v17, v19, v26, v27, v28, v29
 
         h264_loop_filter_chroma_intra
 
@@ -554,7 +554,7 @@
         ld1             {v17.s}[1], [x4], x1
         ld1             {v19.s}[1], [x4], x1
 
-        transpose_4x8B v18, v16, v17, v19, v26, v27, v28, v29
+        transpose_4x8B  v18, v16, v17, v19, v26, v27, v28, v29
 
         h264_loop_filter_chroma_intra
 
@@ -584,102 +584,102 @@
 endfunc
 
 .macro  biweight_16     macs, macd
-        dup             v0.16B,  w5
-        dup             v1.16B,  w6
-        mov             v4.16B,  v16.16B
-        mov             v6.16B,  v16.16B
+        dup             v0.16b,  w5
+        dup             v1.16b,  w6
+        mov             v4.16b,  v16.16b
+        mov             v6.16b,  v16.16b
 1:      subs            w3,  w3,  #2
-        ld1             {v20.16B}, [x0], x2
-        \macd           v4.8H,   v0.8B,  v20.8B
+        ld1             {v20.16b}, [x0], x2
+        \macd           v4.8h,   v0.8b,  v20.8b
         \macd\()2       v6.8H,   v0.16B, v20.16B
-        ld1             {v22.16B}, [x1], x2
-        \macs           v4.8H,   v1.8B,  v22.8B
+        ld1             {v22.16b}, [x1], x2
+        \macs           v4.8h,   v1.8b,  v22.8b
         \macs\()2       v6.8H,   v1.16B, v22.16B
-        mov             v24.16B, v16.16B
-        ld1             {v28.16B}, [x0], x2
-        mov             v26.16B, v16.16B
-        \macd           v24.8H,  v0.8B,  v28.8B
+        mov             v24.16b, v16.16b
+        ld1             {v28.16b}, [x0], x2
+        mov             v26.16b, v16.16b
+        \macd           v24.8h,  v0.8b,  v28.8b
         \macd\()2       v26.8H,  v0.16B, v28.16B
-        ld1             {v30.16B}, [x1], x2
-        \macs           v24.8H,  v1.8B,  v30.8B
+        ld1             {v30.16b}, [x1], x2
+        \macs           v24.8h,  v1.8b,  v30.8b
         \macs\()2       v26.8H,  v1.16B, v30.16B
-        sshl            v4.8H,   v4.8H,  v18.8H
-        sshl            v6.8H,   v6.8H,  v18.8H
-        sqxtun          v4.8B,   v4.8H
-        sqxtun2         v4.16B,  v6.8H
-        sshl            v24.8H,  v24.8H, v18.8H
-        sshl            v26.8H,  v26.8H, v18.8H
-        sqxtun          v24.8B,  v24.8H
-        sqxtun2         v24.16B, v26.8H
-        mov             v6.16B,  v16.16B
-        st1             {v4.16B},  [x7], x2
-        mov             v4.16B,  v16.16B
-        st1             {v24.16B}, [x7], x2
+        sshl            v4.8h,   v4.8h,  v18.8h
+        sshl            v6.8h,   v6.8h,  v18.8h
+        sqxtun          v4.8b,   v4.8h
+        sqxtun2         v4.16b,  v6.8h
+        sshl            v24.8h,  v24.8h, v18.8h
+        sshl            v26.8h,  v26.8h, v18.8h
+        sqxtun          v24.8b,  v24.8h
+        sqxtun2         v24.16b, v26.8h
+        mov             v6.16b,  v16.16b
+        st1             {v4.16b},  [x7], x2
+        mov             v4.16b,  v16.16b
+        st1             {v24.16b}, [x7], x2
         b.ne            1b
         ret
 .endm
 
 .macro  biweight_8      macs, macd
-        dup             v0.8B,  w5
-        dup             v1.8B,  w6
-        mov             v2.16B,  v16.16B
-        mov             v20.16B, v16.16B
+        dup             v0.8b,  w5
+        dup             v1.8b,  w6
+        mov             v2.16b,  v16.16b
+        mov             v20.16b, v16.16b
 1:      subs            w3,  w3,  #2
-        ld1             {v4.8B}, [x0], x2
-        \macd           v2.8H,  v0.8B,  v4.8B
-        ld1             {v5.8B}, [x1], x2
-        \macs           v2.8H,  v1.8B,  v5.8B
-        ld1             {v6.8B}, [x0], x2
-        \macd           v20.8H, v0.8B,  v6.8B
-        ld1             {v7.8B}, [x1], x2
-        \macs           v20.8H, v1.8B,  v7.8B
-        sshl            v2.8H,  v2.8H,  v18.8H
-        sqxtun          v2.8B,  v2.8H
-        sshl            v20.8H, v20.8H, v18.8H
-        sqxtun          v4.8B,  v20.8H
-        mov             v20.16B, v16.16B
-        st1             {v2.8B}, [x7], x2
-        mov             v2.16B,  v16.16B
-        st1             {v4.8B}, [x7], x2
+        ld1             {v4.8b}, [x0], x2
+        \macd           v2.8h,  v0.8b,  v4.8b
+        ld1             {v5.8b}, [x1], x2
+        \macs           v2.8h,  v1.8b,  v5.8b
+        ld1             {v6.8b}, [x0], x2
+        \macd           v20.8h, v0.8b,  v6.8b
+        ld1             {v7.8b}, [x1], x2
+        \macs           v20.8h, v1.8b,  v7.8b
+        sshl            v2.8h,  v2.8h,  v18.8h
+        sqxtun          v2.8b,  v2.8h
+        sshl            v20.8h, v20.8h, v18.8h
+        sqxtun          v4.8b,  v20.8h
+        mov             v20.16b, v16.16b
+        st1             {v2.8b}, [x7], x2
+        mov             v2.16b,  v16.16b
+        st1             {v4.8b}, [x7], x2
         b.ne            1b
         ret
 .endm
 
 .macro  biweight_4      macs, macd
-        dup             v0.8B,  w5
-        dup             v1.8B,  w6
-        mov             v2.16B, v16.16B
-        mov             v20.16B,v16.16B
+        dup             v0.8b,  w5
+        dup             v1.8b,  w6
+        mov             v2.16b, v16.16b
+        mov             v20.16b,v16.16b
 1:      subs            w3,  w3,  #4
-        ld1             {v4.S}[0], [x0], x2
-        ld1             {v4.S}[1], [x0], x2
-        \macd           v2.8H,  v0.8B,  v4.8B
-        ld1             {v5.S}[0], [x1], x2
-        ld1             {v5.S}[1], [x1], x2
-        \macs           v2.8H,  v1.8B,  v5.8B
+        ld1             {v4.s}[0], [x0], x2
+        ld1             {v4.s}[1], [x0], x2
+        \macd           v2.8h,  v0.8b,  v4.8b
+        ld1             {v5.s}[0], [x1], x2
+        ld1             {v5.s}[1], [x1], x2
+        \macs           v2.8h,  v1.8b,  v5.8b
         b.lt            2f
-        ld1             {v6.S}[0], [x0], x2
-        ld1             {v6.S}[1], [x0], x2
-        \macd           v20.8H, v0.8B,  v6.8B
-        ld1             {v7.S}[0], [x1], x2
-        ld1             {v7.S}[1], [x1], x2
-        \macs           v20.8H, v1.8B,  v7.8B
-        sshl            v2.8H,  v2.8H,  v18.8H
-        sqxtun          v2.8B,  v2.8H
-        sshl            v20.8H, v20.8H, v18.8H
-        sqxtun          v4.8B,  v20.8H
-        mov             v20.16B, v16.16B
-        st1             {v2.S}[0], [x7], x2
-        st1             {v2.S}[1], [x7], x2
-        mov             v2.16B,  v16.16B
-        st1             {v4.S}[0], [x7], x2
-        st1             {v4.S}[1], [x7], x2
+        ld1             {v6.s}[0], [x0], x2
+        ld1             {v6.s}[1], [x0], x2
+        \macd           v20.8h, v0.8b,  v6.8b
+        ld1             {v7.s}[0], [x1], x2
+        ld1             {v7.s}[1], [x1], x2
+        \macs           v20.8h, v1.8b,  v7.8b
+        sshl            v2.8h,  v2.8h,  v18.8h
+        sqxtun          v2.8b,  v2.8h
+        sshl            v20.8h, v20.8h, v18.8h
+        sqxtun          v4.8b,  v20.8h
+        mov             v20.16b, v16.16b
+        st1             {v2.s}[0], [x7], x2
+        st1             {v2.s}[1], [x7], x2
+        mov             v2.16b,  v16.16b
+        st1             {v4.s}[0], [x7], x2
+        st1             {v4.s}[1], [x7], x2
         b.ne            1b
         ret
-2:      sshl            v2.8H,  v2.8H,  v18.8H
-        sqxtun          v2.8B,  v2.8H
-        st1             {v2.S}[0], [x7], x2
-        st1             {v2.S}[1], [x7], x2
+2:      sshl            v2.8h,  v2.8h,  v18.8h
+        sqxtun          v2.8b,  v2.8h
+        st1             {v2.s}[0], [x7], x2
+        st1             {v2.s}[1], [x7], x2
         ret
 .endm
 
@@ -689,10 +689,10 @@
         add             w7,  w7,  #1
         eor             w8,  w8,  w6,  lsr #30
         orr             w7,  w7,  #1
-        dup             v18.8H,   w4
+        dup             v18.8h,   w4
         lsl             w7,  w7,  w4
-        not             v18.16B,  v18.16B
-        dup             v16.8H,   w7
+        not             v18.16b,  v18.16b
+        dup             v16.8h,   w7
         mov             x7,  x0
         cbz             w8,  10f
         subs            w8,  w8,  #1
@@ -716,78 +716,78 @@
         biweight_func   4
 
 .macro  weight_16       add
-        dup             v0.16B,  w4
+        dup             v0.16b,  w4
 1:      subs            w2,  w2,  #2
-        ld1             {v20.16B}, [x0], x1
-        umull           v4.8H,   v0.8B,  v20.8B
-        umull2          v6.8H,   v0.16B, v20.16B
-        ld1             {v28.16B}, [x0], x1
-        umull           v24.8H,  v0.8B,  v28.8B
-        umull2          v26.8H,  v0.16B, v28.16B
-        \add            v4.8H,   v16.8H, v4.8H
-        srshl           v4.8H,   v4.8H,  v18.8H
-        \add            v6.8H,   v16.8H, v6.8H
-        srshl           v6.8H,   v6.8H,  v18.8H
-        sqxtun          v4.8B,   v4.8H
-        sqxtun2         v4.16B,  v6.8H
-        \add            v24.8H,  v16.8H, v24.8H
-        srshl           v24.8H,  v24.8H, v18.8H
-        \add            v26.8H,  v16.8H, v26.8H
-        srshl           v26.8H,  v26.8H, v18.8H
-        sqxtun          v24.8B,  v24.8H
-        sqxtun2         v24.16B, v26.8H
-        st1             {v4.16B},  [x5], x1
-        st1             {v24.16B}, [x5], x1
+        ld1             {v20.16b}, [x0], x1
+        umull           v4.8h,   v0.8b,  v20.8b
+        umull2          v6.8h,   v0.16b, v20.16b
+        ld1             {v28.16b}, [x0], x1
+        umull           v24.8h,  v0.8b,  v28.8b
+        umull2          v26.8h,  v0.16b, v28.16b
+        \add            v4.8h,   v16.8h, v4.8h
+        srshl           v4.8h,   v4.8h,  v18.8h
+        \add            v6.8h,   v16.8h, v6.8h
+        srshl           v6.8h,   v6.8h,  v18.8h
+        sqxtun          v4.8b,   v4.8h
+        sqxtun2         v4.16b,  v6.8h
+        \add            v24.8h,  v16.8h, v24.8h
+        srshl           v24.8h,  v24.8h, v18.8h
+        \add            v26.8h,  v16.8h, v26.8h
+        srshl           v26.8h,  v26.8h, v18.8h
+        sqxtun          v24.8b,  v24.8h
+        sqxtun2         v24.16b, v26.8h
+        st1             {v4.16b},  [x5], x1
+        st1             {v24.16b}, [x5], x1
         b.ne            1b
         ret
 .endm
 
 .macro  weight_8        add
-        dup             v0.8B,  w4
+        dup             v0.8b,  w4
 1:      subs            w2,  w2,  #2
-        ld1             {v4.8B}, [x0], x1
-        umull           v2.8H,  v0.8B,  v4.8B
-        ld1             {v6.8B}, [x0], x1
-        umull           v20.8H, v0.8B,  v6.8B
-        \add            v2.8H,  v16.8H,  v2.8H
-        srshl           v2.8H,  v2.8H,  v18.8H
-        sqxtun          v2.8B,  v2.8H
-        \add            v20.8H, v16.8H,  v20.8H
-        srshl           v20.8H, v20.8H, v18.8H
-        sqxtun          v4.8B,  v20.8H
-        st1             {v2.8B}, [x5], x1
-        st1             {v4.8B}, [x5], x1
+        ld1             {v4.8b}, [x0], x1
+        umull           v2.8h,  v0.8b,  v4.8b
+        ld1             {v6.8b}, [x0], x1
+        umull           v20.8h, v0.8b,  v6.8b
+        \add            v2.8h,  v16.8h,  v2.8h
+        srshl           v2.8h,  v2.8h,  v18.8h
+        sqxtun          v2.8b,  v2.8h
+        \add            v20.8h, v16.8h,  v20.8h
+        srshl           v20.8h, v20.8h, v18.8h
+        sqxtun          v4.8b,  v20.8h
+        st1             {v2.8b}, [x5], x1
+        st1             {v4.8b}, [x5], x1
         b.ne            1b
         ret
 .endm
 
 .macro  weight_4        add
-        dup             v0.8B,  w4
+        dup             v0.8b,  w4
 1:      subs            w2,  w2,  #4
-        ld1             {v4.S}[0], [x0], x1
-        ld1             {v4.S}[1], [x0], x1
-        umull           v2.8H,  v0.8B,  v4.8B
+        ld1             {v4.s}[0], [x0], x1
+        ld1             {v4.s}[1], [x0], x1
+        umull           v2.8h,  v0.8b,  v4.8b
         b.lt            2f
-        ld1             {v6.S}[0], [x0], x1
-        ld1             {v6.S}[1], [x0], x1
-        umull           v20.8H, v0.8B,  v6.8B
-        \add            v2.8H,  v16.8H,  v2.8H
-        srshl           v2.8H,  v2.8H,  v18.8H
-        sqxtun          v2.8B,  v2.8H
-        \add            v20.8H, v16.8H,  v20.8H
-        srshl           v20.8H, v20.8h, v18.8H
-        sqxtun          v4.8B,  v20.8H
-        st1             {v2.S}[0], [x5], x1
-        st1             {v2.S}[1], [x5], x1
-        st1             {v4.S}[0], [x5], x1
-        st1             {v4.S}[1], [x5], x1
+        ld1             {v6.s}[0], [x0], x1
+        ld1             {v6.s}[1], [x0], x1
+        umull           v20.8h, v0.8b,  v6.8b
+        \add            v2.8h,  v16.8h,  v2.8h
+        srshl           v2.8h,  v2.8h,  v18.8h
+        sqxtun          v2.8b,  v2.8h
+        \add            v20.8h, v16.8h,  v20.8h
+        srshl           v20.8h, v20.8h, v18.8h
+        sqxtun          v4.8b,  v20.8h
+        st1             {v2.s}[0], [x5], x1
+        st1             {v2.s}[1], [x5], x1
+        st1             {v4.s}[0], [x5], x1
+        st1             {v4.s}[1], [x5], x1
         b.ne            1b
         ret
-2:      \add            v2.8H,  v16.8H,  v2.8H
-        srshl           v2.8H,  v2.8H,  v18.8H
-        sqxtun          v2.8B,  v2.8H
-        st1             {v2.S}[0], [x5], x1
-        st1             {v2.S}[1], [x5], x1
+2:      \add            v2.8h,  v16.8h,  v2.8h
+        srshl           v2.8h,  v2.8h,  v18.8h
+        sqxtun          v2.8b,  v2.8h
+        st1             {v2.s}[0], [x5], x1
+        st1             {v2.s}[1], [x5], x1
         ret
 .endm
 
@@ -796,18 +796,18 @@
         cmp             w3,  #1
         mov             w6,  #1
         lsl             w5,  w5,  w3
-        dup             v16.8H,  w5
+        dup             v16.8h,  w5
         mov             x5,  x0
         b.le            20f
         sub             w6,  w6,  w3
-        dup             v18.8H,  w6
+        dup             v18.8h,  w6
         cmp             w4, #0
         b.lt            10f
         weight_\w       shadd
 10:     neg             w4,  w4
         weight_\w       shsub
 20:     neg             w6,  w3
-        dup             v18.8H,  w6
+        dup             v18.8h,  w6
         cmp             w4,  #0
         b.lt            10f
         weight_\w       add
@@ -825,7 +825,7 @@
         ldr             w6,  [x4]
         ccmp            w3,  #0,  #0,  ne
         lsl             w2,  w2,  #2
-        mov             v24.S[0], w6
+        mov             v24.s[0], w6
         lsl             w3,  w3,  #2
         and             w8,  w6,  w6,  lsl #16
         b.eq            1f
@@ -1017,7 +1017,7 @@
         ld1             {v16.8h}, [x4], x1
         ld1             {v19.8h}, [x9], x1
 
-        transpose_4x8H v18, v16, v17, v19, v26, v27, v28, v29
+        transpose_4x8H  v18, v16, v17, v19, v26, v27, v28, v29
 
         h264_loop_filter_chroma_intra_10
 
@@ -1045,7 +1045,7 @@
         ld1             {v19.4h},   [x4], x1
         ld1             {v19.d}[1], [x9], x1
 
-        transpose_4x8H v18, v16, v17, v19, v26, v27, v28, v29
+        transpose_4x8H  v18, v16, v17, v19, v26, v27, v28, v29
 
         h264_loop_filter_chroma_intra_10
 
diff -Nru ffmpeg-5.1.8/libavcodec/aarch64/h264qpel_neon.S ffmpeg-5.1.9/libavcodec/aarch64/h264qpel_neon.S
--- ffmpeg-5.1.8/libavcodec/aarch64/h264qpel_neon.S	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/aarch64/h264qpel_neon.S	2026-05-05 14:21:58.000000000 +0000
@@ -27,127 +27,127 @@
 .macro  lowpass_const   r
         movz            \r, #20, lsl #16
         movk            \r, #5
-        mov             v6.S[0], \r
+        mov             v6.s[0], \r
 .endm
 
 //trashes v0-v5
 .macro  lowpass_8       r0,  r1,  r2,  r3,  d0,  d1,  narrow=1
-        ext             v2.8B,      \r0\().8B, \r1\().8B, #2
-        ext             v3.8B,      \r0\().8B, \r1\().8B, #3
-        uaddl           v2.8H,      v2.8B,     v3.8B
-        ext             v4.8B,      \r0\().8B, \r1\().8B, #1
-        ext             v5.8B,      \r0\().8B, \r1\().8B, #4
-        uaddl           v4.8H,      v4.8B,     v5.8B
-        ext             v1.8B,      \r0\().8B, \r1\().8B, #5
-        uaddl           \d0\().8H,  \r0\().8B, v1.8B
-        ext             v0.8B,      \r2\().8B, \r3\().8B, #2
-        mla             \d0\().8H,  v2.8H,     v6.H[1]
-        ext             v1.8B,      \r2\().8B, \r3\().8B, #3
-        uaddl           v0.8H,      v0.8B,     v1.8B
-        ext             v1.8B,      \r2\().8B, \r3\().8B, #1
-        mls             \d0\().8H,  v4.8H,     v6.H[0]
-        ext             v3.8B,      \r2\().8B, \r3\().8B, #4
-        uaddl           v1.8H,      v1.8B,     v3.8B
-        ext             v2.8B,      \r2\().8B, \r3\().8B, #5
-        uaddl           \d1\().8H,  \r2\().8B, v2.8B
-        mla             \d1\().8H,  v0.8H,     v6.H[1]
-        mls             \d1\().8H,  v1.8H,     v6.H[0]
+        ext             v2.8b,      \r0\().8b, \r1\().8b, #2
+        ext             v3.8b,      \r0\().8b, \r1\().8b, #3
+        uaddl           v2.8h,      v2.8b,     v3.8b
+        ext             v4.8b,      \r0\().8b, \r1\().8b, #1
+        ext             v5.8b,      \r0\().8b, \r1\().8b, #4
+        uaddl           v4.8h,      v4.8b,     v5.8b
+        ext             v1.8b,      \r0\().8b, \r1\().8b, #5
+        uaddl           \d0\().8h,  \r0\().8b, v1.8b
+        ext             v0.8b,      \r2\().8b, \r3\().8b, #2
+        mla             \d0\().8h,  v2.8h,     v6.h[1]
+        ext             v1.8b,      \r2\().8b, \r3\().8b, #3
+        uaddl           v0.8h,      v0.8b,     v1.8b
+        ext             v1.8b,      \r2\().8b, \r3\().8b, #1
+        mls             \d0\().8h,  v4.8h,     v6.h[0]
+        ext             v3.8b,      \r2\().8b, \r3\().8b, #4
+        uaddl           v1.8h,      v1.8b,     v3.8b
+        ext             v2.8b,      \r2\().8b, \r3\().8b, #5
+        uaddl           \d1\().8h,  \r2\().8b, v2.8b
+        mla             \d1\().8h,  v0.8h,     v6.h[1]
+        mls             \d1\().8h,  v1.8h,     v6.h[0]
   .if \narrow
-        sqrshrun        \d0\().8B,  \d0\().8H, #5
-        sqrshrun        \d1\().8B,  \d1\().8H, #5
+        sqrshrun        \d0\().8b,  \d0\().8h, #5
+        sqrshrun        \d1\().8b,  \d1\().8h, #5
   .endif
 .endm
 
 //trashes v0-v4
 .macro  lowpass_8_v     r0,  r1,  r2,  r3,  r4,  r5,  r6,  d0,  d1,  narrow=1
-        uaddl           v2.8H,      \r2\().8B, \r3\().8B
-        uaddl           v0.8H,      \r3\().8B, \r4\().8B
-        uaddl           v4.8H,      \r1\().8B, \r4\().8B
-        uaddl           v1.8H,      \r2\().8B, \r5\().8B
-        uaddl           \d0\().8H,  \r0\().8B, \r5\().8B
-        uaddl           \d1\().8H,  \r1\().8B, \r6\().8B
-        mla             \d0\().8H,  v2.8H,     v6.H[1]
-        mls             \d0\().8H,  v4.8H,     v6.H[0]
-        mla             \d1\().8H,  v0.8H,     v6.H[1]
-        mls             \d1\().8H,  v1.8H,     v6.H[0]
+        uaddl           v2.8h,      \r2\().8b, \r3\().8b
+        uaddl           v0.8h,      \r3\().8b, \r4\().8b
+        uaddl           v4.8h,      \r1\().8b, \r4\().8b
+        uaddl           v1.8h,      \r2\().8b, \r5\().8b
+        uaddl           \d0\().8h,  \r0\().8b, \r5\().8b
+        uaddl           \d1\().8h,  \r1\().8b, \r6\().8b
+        mla             \d0\().8h,  v2.8h,     v6.h[1]
+        mls             \d0\().8h,  v4.8h,     v6.h[0]
+        mla             \d1\().8h,  v0.8h,     v6.h[1]
+        mls             \d1\().8h,  v1.8h,     v6.h[0]
   .if \narrow
-        sqrshrun        \d0\().8B,  \d0\().8H, #5
-        sqrshrun        \d1\().8B,  \d1\().8H, #5
+        sqrshrun        \d0\().8b,  \d0\().8h, #5
+        sqrshrun        \d1\().8b,  \d1\().8h, #5
   .endif
 .endm
 
 //trashes v0-v5, v7, v30-v31
 .macro  lowpass_8H      r0,  r1
-        ext             v0.16B,     \r0\().16B, \r0\().16B, #2
-        ext             v1.16B,     \r0\().16B, \r0\().16B, #3
-        uaddl           v0.8H,      v0.8B,      v1.8B
-        ext             v2.16B,     \r0\().16B, \r0\().16B, #1
-        ext             v3.16B,     \r0\().16B, \r0\().16B, #4
-        uaddl           v2.8H,      v2.8B,      v3.8B
-        ext             v30.16B,    \r0\().16B, \r0\().16B, #5
-        uaddl           \r0\().8H,  \r0\().8B,  v30.8B
-        ext             v4.16B,     \r1\().16B, \r1\().16B, #2
-        mla             \r0\().8H,  v0.8H,      v6.H[1]
-        ext             v5.16B,     \r1\().16B, \r1\().16B, #3
-        uaddl           v4.8H,      v4.8B,      v5.8B
-        ext             v7.16B,     \r1\().16B, \r1\().16B, #1
-        mls             \r0\().8H,  v2.8H,      v6.H[0]
-        ext             v0.16B,     \r1\().16B, \r1\().16B, #4
-        uaddl           v7.8H,      v7.8B,      v0.8B
-        ext             v31.16B,    \r1\().16B, \r1\().16B, #5
-        uaddl           \r1\().8H,  \r1\().8B,  v31.8B
-        mla             \r1\().8H,  v4.8H,      v6.H[1]
-        mls             \r1\().8H,  v7.8H,      v6.H[0]
+        ext             v0.16b,     \r0\().16b, \r0\().16b, #2
+        ext             v1.16b,     \r0\().16b, \r0\().16b, #3
+        uaddl           v0.8h,      v0.8b,      v1.8b
+        ext             v2.16b,     \r0\().16b, \r0\().16b, #1
+        ext             v3.16b,     \r0\().16b, \r0\().16b, #4
+        uaddl           v2.8h,      v2.8b,      v3.8b
+        ext             v30.16b,    \r0\().16b, \r0\().16b, #5
+        uaddl           \r0\().8h,  \r0\().8b,  v30.8b
+        ext             v4.16b,     \r1\().16b, \r1\().16b, #2
+        mla             \r0\().8h,  v0.8h,      v6.h[1]
+        ext             v5.16b,     \r1\().16b, \r1\().16b, #3
+        uaddl           v4.8h,      v4.8b,      v5.8b
+        ext             v7.16b,     \r1\().16b, \r1\().16b, #1
+        mls             \r0\().8h,  v2.8h,      v6.h[0]
+        ext             v0.16b,     \r1\().16b, \r1\().16b, #4
+        uaddl           v7.8h,      v7.8b,      v0.8b
+        ext             v31.16b,    \r1\().16b, \r1\().16b, #5
+        uaddl           \r1\().8h,  \r1\().8b,  v31.8b
+        mla             \r1\().8h,  v4.8h,      v6.h[1]
+        mls             \r1\().8h,  v7.8h,      v6.h[0]
 .endm
 
 // trashes v2-v5, v30
 .macro  lowpass_8_1     r0,  r1,  d0,  narrow=1
-        ext             v2.8B,     \r0\().8B, \r1\().8B, #2
-        ext             v3.8B,     \r0\().8B, \r1\().8B, #3
-        uaddl           v2.8H,     v2.8B,     v3.8B
-        ext             v4.8B,     \r0\().8B, \r1\().8B, #1
-        ext             v5.8B,     \r0\().8B, \r1\().8B, #4
-        uaddl           v4.8H,     v4.8B,     v5.8B
-        ext             v30.8B,    \r0\().8B, \r1\().8B, #5
-        uaddl           \d0\().8H, \r0\().8B, v30.8B
-        mla             \d0\().8H, v2.8H,     v6.H[1]
-        mls             \d0\().8H, v4.8H,     v6.H[0]
+        ext             v2.8b,     \r0\().8b, \r1\().8b, #2
+        ext             v3.8b,     \r0\().8b, \r1\().8b, #3
+        uaddl           v2.8h,     v2.8b,     v3.8b
+        ext             v4.8b,     \r0\().8b, \r1\().8b, #1
+        ext             v5.8b,     \r0\().8b, \r1\().8b, #4
+        uaddl           v4.8h,     v4.8b,     v5.8b
+        ext             v30.8b,    \r0\().8b, \r1\().8b, #5
+        uaddl           \d0\().8h, \r0\().8b, v30.8b
+        mla             \d0\().8h, v2.8h,     v6.h[1]
+        mls             \d0\().8h, v4.8h,     v6.h[0]
   .if \narrow
-        sqrshrun        \d0\().8B, \d0\().8H, #5
+        sqrshrun        \d0\().8b, \d0\().8h, #5
   .endif
 .endm
 
 // trashed v0-v7
 .macro  lowpass_8.16    r0,  r1,  r2,  r3,  r4,  r5
-        saddl           v5.4S,      \r2\().4H,  \r3\().4H
-        saddl2          v1.4S,      \r2\().8H,  \r3\().8H
-        saddl           v6.4S,      \r1\().4H,  \r4\().4H
-        saddl2          v2.4S,      \r1\().8H,  \r4\().8H
-        saddl           v0.4S,      \r0\().4H,  \r5\().4H
-        saddl2          v4.4S,      \r0\().8H,  \r5\().8H
-
-        shl             v3.4S,  v5.4S,  #4
-        shl             v5.4S,  v5.4S,  #2
-        shl             v7.4S,  v6.4S,  #2
-        add             v5.4S,  v5.4S,  v3.4S
-        add             v6.4S,  v6.4S,  v7.4S
-
-        shl             v3.4S,  v1.4S,  #4
-        shl             v1.4S,  v1.4S,  #2
-        shl             v7.4S,  v2.4S,  #2
-        add             v1.4S,  v1.4S,  v3.4S
-        add             v2.4S,  v2.4S,  v7.4S
-
-        add             v5.4S,  v5.4S,  v0.4S
-        sub             v5.4S,  v5.4S,  v6.4S
+        saddl           v5.4s,      \r2\().4h,  \r3\().4h
+        saddl2          v1.4s,      \r2\().8h,  \r3\().8h
+        saddl           v6.4s,      \r1\().4h,  \r4\().4h
+        saddl2          v2.4s,      \r1\().8h,  \r4\().8h
+        saddl           v0.4s,      \r0\().4h,  \r5\().4h
+        saddl2          v4.4s,      \r0\().8h,  \r5\().8h
+
+        shl             v3.4s,  v5.4s,  #4
+        shl             v5.4s,  v5.4s,  #2
+        shl             v7.4s,  v6.4s,  #2
+        add             v5.4s,  v5.4s,  v3.4s
+        add             v6.4s,  v6.4s,  v7.4s
+
+        shl             v3.4s,  v1.4s,  #4
+        shl             v1.4s,  v1.4s,  #2
+        shl             v7.4s,  v2.4s,  #2
+        add             v1.4s,  v1.4s,  v3.4s
+        add             v2.4s,  v2.4s,  v7.4s
+
+        add             v5.4s,  v5.4s,  v0.4s
+        sub             v5.4s,  v5.4s,  v6.4s
 
-        add             v1.4S,  v1.4S,  v4.4S
-        sub             v1.4S,  v1.4S,  v2.4S
+        add             v1.4s,  v1.4s,  v4.4s
+        sub             v1.4s,  v1.4s,  v2.4s
 
-        rshrn           v5.4H,  v5.4S,  #10
-        rshrn2          v5.8H,  v1.4S,  #10
+        rshrn           v5.4h,  v5.4s,  #10
+        rshrn2          v5.8h,  v1.4s,  #10
 
-        sqxtun          \r0\().8B,  v5.8H
+        sqxtun          \r0\().8b,  v5.8h
 .endm
 
 function put_h264_qpel16_h_lowpass_neon_packed
@@ -176,19 +176,19 @@
 endfunc
 
 function \type\()_h264_qpel8_h_lowpass_neon
-1:      ld1             {v28.8B, v29.8B}, [x1], x2
-        ld1             {v16.8B, v17.8B}, [x1], x2
+1:      ld1             {v28.8b, v29.8b}, [x1], x2
+        ld1             {v16.8b, v17.8b}, [x1], x2
         subs            x12, x12, #2
         lowpass_8       v28, v29, v16, v17, v28, v16
   .ifc \type,avg
-        ld1             {v2.8B},    [x0], x3
-        ld1             {v3.8B},    [x0]
-        urhadd          v28.8B, v28.8B,  v2.8B
-        urhadd          v16.8B, v16.8B, v3.8B
+        ld1             {v2.8b},    [x0], x3
+        ld1             {v3.8b},    [x0]
+        urhadd          v28.8b, v28.8b,  v2.8b
+        urhadd          v16.8b, v16.8b, v3.8b
         sub             x0,  x0,  x3
   .endif
-        st1             {v28.8B},    [x0], x3
-        st1             {v16.8B},    [x0], x3
+        st1             {v28.8b},    [x0], x3
+        st1             {v16.8b},    [x0], x3
         b.ne            1b
         ret
 endfunc
@@ -213,23 +213,23 @@
 endfunc
 
 function \type\()_h264_qpel8_h_lowpass_l2_neon
-1:      ld1             {v26.8B, v27.8B}, [x1], x2
-        ld1             {v16.8B, v17.8B}, [x1], x2
-        ld1             {v28.8B},     [x3], x2
-        ld1             {v29.8B},     [x3], x2
+1:      ld1             {v26.8b, v27.8b}, [x1], x2
+        ld1             {v16.8b, v17.8b}, [x1], x2
+        ld1             {v28.8b},     [x3], x2
+        ld1             {v29.8b},     [x3], x2
         subs            x12, x12, #2
         lowpass_8       v26, v27, v16, v17, v26, v27
-        urhadd          v26.8B, v26.8B, v28.8B
-        urhadd          v27.8B, v27.8B, v29.8B
+        urhadd          v26.8b, v26.8b, v28.8b
+        urhadd          v27.8b, v27.8b, v29.8b
   .ifc \type,avg
-        ld1             {v2.8B},      [x0], x2
-        ld1             {v3.8B},      [x0]
-        urhadd          v26.8B, v26.8B, v2.8B
-        urhadd          v27.8B, v27.8B, v3.8B
+        ld1             {v2.8b},      [x0], x2
+        ld1             {v3.8b},      [x0]
+        urhadd          v26.8b, v26.8b, v2.8b
+        urhadd          v27.8b, v27.8b, v3.8b
         sub             x0,  x0,  x2
   .endif
-        st1             {v26.8B},     [x0], x2
-        st1             {v27.8B},     [x0], x2
+        st1             {v26.8b},     [x0], x2
+        st1             {v27.8b},     [x0], x2
         b.ne            1b
         ret
 endfunc
@@ -270,52 +270,52 @@
 endfunc
 
 function \type\()_h264_qpel8_v_lowpass_neon
-        ld1             {v16.8B}, [x1], x3
-        ld1             {v17.8B}, [x1], x3
-        ld1             {v18.8B}, [x1], x3
-        ld1             {v19.8B}, [x1], x3
-        ld1             {v20.8B}, [x1], x3
-        ld1             {v21.8B}, [x1], x3
-        ld1             {v22.8B}, [x1], x3
-        ld1             {v23.8B}, [x1], x3
-        ld1             {v24.8B}, [x1], x3
-        ld1             {v25.8B}, [x1], x3
-        ld1             {v26.8B}, [x1], x3
-        ld1             {v27.8B}, [x1], x3
-        ld1             {v28.8B}, [x1]
+        ld1             {v16.8b}, [x1], x3
+        ld1             {v17.8b}, [x1], x3
+        ld1             {v18.8b}, [x1], x3
+        ld1             {v19.8b}, [x1], x3
+        ld1             {v20.8b}, [x1], x3
+        ld1             {v21.8b}, [x1], x3
+        ld1             {v22.8b}, [x1], x3
+        ld1             {v23.8b}, [x1], x3
+        ld1             {v24.8b}, [x1], x3
+        ld1             {v25.8b}, [x1], x3
+        ld1             {v26.8b}, [x1], x3
+        ld1             {v27.8b}, [x1], x3
+        ld1             {v28.8b}, [x1]
 
         lowpass_8_v     v16, v17, v18, v19, v20, v21, v22, v16, v17
         lowpass_8_v     v18, v19, v20, v21, v22, v23, v24, v18, v19
         lowpass_8_v     v20, v21, v22, v23, v24, v25, v26, v20, v21
         lowpass_8_v     v22, v23, v24, v25, v26, v27, v28, v22, v23
   .ifc \type,avg
-        ld1             {v24.8B},  [x0], x2
-        ld1             {v25.8B}, [x0], x2
-        ld1             {v26.8B}, [x0], x2
-        urhadd          v16.8B, v16.8B, v24.8B
-        ld1             {v27.8B}, [x0], x2
-        urhadd          v17.8B, v17.8B, v25.8B
-        ld1             {v28.8B}, [x0], x2
-        urhadd          v18.8B, v18.8B, v26.8B
-        ld1             {v29.8B}, [x0], x2
-        urhadd          v19.8B, v19.8B, v27.8B
-        ld1             {v30.8B}, [x0], x2
-        urhadd          v20.8B, v20.8B, v28.8B
-        ld1             {v31.8B}, [x0], x2
-        urhadd          v21.8B, v21.8B, v29.8B
-        urhadd          v22.8B, v22.8B, v30.8B
-        urhadd          v23.8B, v23.8B, v31.8B
+        ld1             {v24.8b},  [x0], x2
+        ld1             {v25.8b}, [x0], x2
+        ld1             {v26.8b}, [x0], x2
+        urhadd          v16.8b, v16.8b, v24.8b
+        ld1             {v27.8b}, [x0], x2
+        urhadd          v17.8b, v17.8b, v25.8b
+        ld1             {v28.8b}, [x0], x2
+        urhadd          v18.8b, v18.8b, v26.8b
+        ld1             {v29.8b}, [x0], x2
+        urhadd          v19.8b, v19.8b, v27.8b
+        ld1             {v30.8b}, [x0], x2
+        urhadd          v20.8b, v20.8b, v28.8b
+        ld1             {v31.8b}, [x0], x2
+        urhadd          v21.8b, v21.8b, v29.8b
+        urhadd          v22.8b, v22.8b, v30.8b
+        urhadd          v23.8b, v23.8b, v31.8b
         sub             x0,  x0,  x2,  lsl #3
   .endif
 
-        st1             {v16.8B}, [x0], x2
-        st1             {v17.8B}, [x0], x2
-        st1             {v18.8B}, [x0], x2
-        st1             {v19.8B}, [x0], x2
-        st1             {v20.8B}, [x0], x2
-        st1             {v21.8B}, [x0], x2
-        st1             {v22.8B}, [x0], x2
-        st1             {v23.8B}, [x0], x2
+        st1             {v16.8b}, [x0], x2
+        st1             {v17.8b}, [x0], x2
+        st1             {v18.8b}, [x0], x2
+        st1             {v19.8b}, [x0], x2
+        st1             {v20.8b}, [x0], x2
+        st1             {v21.8b}, [x0], x2
+        st1             {v22.8b}, [x0], x2
+        st1             {v23.8b}, [x0], x2
 
         ret
 endfunc
@@ -343,70 +343,70 @@
 endfunc
 
 function \type\()_h264_qpel8_v_lowpass_l2_neon
-        ld1             {v16.8B}, [x1], x3
-        ld1             {v17.8B}, [x1], x3
-        ld1             {v18.8B}, [x1], x3
-        ld1             {v19.8B}, [x1], x3
-        ld1             {v20.8B}, [x1], x3
-        ld1             {v21.8B}, [x1], x3
-        ld1             {v22.8B}, [x1], x3
-        ld1             {v23.8B}, [x1], x3
-        ld1             {v24.8B}, [x1], x3
-        ld1             {v25.8B}, [x1], x3
-        ld1             {v26.8B}, [x1], x3
-        ld1             {v27.8B}, [x1], x3
-        ld1             {v28.8B}, [x1]
+        ld1             {v16.8b}, [x1], x3
+        ld1             {v17.8b}, [x1], x3
+        ld1             {v18.8b}, [x1], x3
+        ld1             {v19.8b}, [x1], x3
+        ld1             {v20.8b}, [x1], x3
+        ld1             {v21.8b}, [x1], x3
+        ld1             {v22.8b}, [x1], x3
+        ld1             {v23.8b}, [x1], x3
+        ld1             {v24.8b}, [x1], x3
+        ld1             {v25.8b}, [x1], x3
+        ld1             {v26.8b}, [x1], x3
+        ld1             {v27.8b}, [x1], x3
+        ld1             {v28.8b}, [x1]
 
         lowpass_8_v     v16, v17, v18, v19, v20, v21, v22, v16, v17
         lowpass_8_v     v18, v19, v20, v21, v22, v23, v24, v18, v19
         lowpass_8_v     v20, v21, v22, v23, v24, v25, v26, v20, v21
         lowpass_8_v     v22, v23, v24, v25, v26, v27, v28, v22, v23
 
-        ld1             {v24.8B},  [x12], x2
-        ld1             {v25.8B},  [x12], x2
-        ld1             {v26.8B},  [x12], x2
-        ld1             {v27.8B},  [x12], x2
-        ld1             {v28.8B},  [x12], x2
-        urhadd          v16.8B, v24.8B, v16.8B
-        urhadd          v17.8B, v25.8B, v17.8B
-        ld1             {v29.8B},  [x12], x2
-        urhadd          v18.8B, v26.8B, v18.8B
-        urhadd          v19.8B, v27.8B, v19.8B
-        ld1             {v30.8B}, [x12], x2
-        urhadd          v20.8B, v28.8B, v20.8B
-        urhadd          v21.8B, v29.8B, v21.8B
-        ld1             {v31.8B}, [x12], x2
-        urhadd          v22.8B, v30.8B, v22.8B
-        urhadd          v23.8B, v31.8B, v23.8B
+        ld1             {v24.8b},  [x12], x2
+        ld1             {v25.8b},  [x12], x2
+        ld1             {v26.8b},  [x12], x2
+        ld1             {v27.8b},  [x12], x2
+        ld1             {v28.8b},  [x12], x2
+        urhadd          v16.8b, v24.8b, v16.8b
+        urhadd          v17.8b, v25.8b, v17.8b
+        ld1             {v29.8b},  [x12], x2
+        urhadd          v18.8b, v26.8b, v18.8b
+        urhadd          v19.8b, v27.8b, v19.8b
+        ld1             {v30.8b}, [x12], x2
+        urhadd          v20.8b, v28.8b, v20.8b
+        urhadd          v21.8b, v29.8b, v21.8b
+        ld1             {v31.8b}, [x12], x2
+        urhadd          v22.8b, v30.8b, v22.8b
+        urhadd          v23.8b, v31.8b, v23.8b
 
   .ifc \type,avg
-        ld1             {v24.8B}, [x0], x3
-        ld1             {v25.8B}, [x0], x3
-        ld1             {v26.8B}, [x0], x3
-        urhadd          v16.8B, v16.8B, v24.8B
-        ld1             {v27.8B}, [x0], x3
-        urhadd          v17.8B, v17.8B, v25.8B
-        ld1             {v28.8B}, [x0], x3
-        urhadd          v18.8B, v18.8B, v26.8B
-        ld1             {v29.8B}, [x0], x3
-        urhadd          v19.8B, v19.8B, v27.8B
-        ld1             {v30.8B}, [x0], x3
-        urhadd          v20.8B, v20.8B, v28.8B
-        ld1             {v31.8B}, [x0], x3
-        urhadd          v21.8B, v21.8B, v29.8B
-        urhadd          v22.8B, v22.8B, v30.8B
-        urhadd          v23.8B, v23.8B, v31.8B
+        ld1             {v24.8b}, [x0], x3
+        ld1             {v25.8b}, [x0], x3
+        ld1             {v26.8b}, [x0], x3
+        urhadd          v16.8b, v16.8b, v24.8b
+        ld1             {v27.8b}, [x0], x3
+        urhadd          v17.8b, v17.8b, v25.8b
+        ld1             {v28.8b}, [x0], x3
+        urhadd          v18.8b, v18.8b, v26.8b
+        ld1             {v29.8b}, [x0], x3
+        urhadd          v19.8b, v19.8b, v27.8b
+        ld1             {v30.8b}, [x0], x3
+        urhadd          v20.8b, v20.8b, v28.8b
+        ld1             {v31.8b}, [x0], x3
+        urhadd          v21.8b, v21.8b, v29.8b
+        urhadd          v22.8b, v22.8b, v30.8b
+        urhadd          v23.8b, v23.8b, v31.8b
         sub             x0,  x0,  x3,  lsl #3
   .endif
 
-        st1             {v16.8B}, [x0], x3
-        st1             {v17.8B}, [x0], x3
-        st1             {v18.8B}, [x0], x3
-        st1             {v19.8B}, [x0], x3
-        st1             {v20.8B}, [x0], x3
-        st1             {v21.8B}, [x0], x3
-        st1             {v22.8B}, [x0], x3
-        st1             {v23.8B}, [x0], x3
+        st1             {v16.8b}, [x0], x3
+        st1             {v17.8b}, [x0], x3
+        st1             {v18.8b}, [x0], x3
+        st1             {v19.8b}, [x0], x3
+        st1             {v20.8b}, [x0], x3
+        st1             {v21.8b}, [x0], x3
+        st1             {v22.8b}, [x0], x3
+        st1             {v23.8b}, [x0], x3
 
         ret
 endfunc
@@ -417,19 +417,19 @@
 
 function put_h264_qpel8_hv_lowpass_neon_top
         lowpass_const   w12
-        ld1             {v16.8H}, [x1], x3
-        ld1             {v17.8H}, [x1], x3
-        ld1             {v18.8H}, [x1], x3
-        ld1             {v19.8H}, [x1], x3
-        ld1             {v20.8H}, [x1], x3
-        ld1             {v21.8H}, [x1], x3
-        ld1             {v22.8H}, [x1], x3
-        ld1             {v23.8H}, [x1], x3
-        ld1             {v24.8H}, [x1], x3
-        ld1             {v25.8H}, [x1], x3
-        ld1             {v26.8H}, [x1], x3
-        ld1             {v27.8H}, [x1], x3
-        ld1             {v28.8H}, [x1]
+        ld1             {v16.8h}, [x1], x3
+        ld1             {v17.8h}, [x1], x3
+        ld1             {v18.8h}, [x1], x3
+        ld1             {v19.8h}, [x1], x3
+        ld1             {v20.8h}, [x1], x3
+        ld1             {v21.8h}, [x1], x3
+        ld1             {v22.8h}, [x1], x3
+        ld1             {v23.8h}, [x1], x3
+        ld1             {v24.8h}, [x1], x3
+        ld1             {v25.8h}, [x1], x3
+        ld1             {v26.8h}, [x1], x3
+        ld1             {v27.8h}, [x1], x3
+        ld1             {v28.8h}, [x1]
         lowpass_8H      v16, v17
         lowpass_8H      v18, v19
         lowpass_8H      v20, v21
@@ -458,33 +458,33 @@
         mov             x10, x30
         bl              put_h264_qpel8_hv_lowpass_neon_top
   .ifc \type,avg
-        ld1             {v0.8B},      [x0], x2
-        ld1             {v1.8B},      [x0], x2
-        ld1             {v2.8B},      [x0], x2
-        urhadd          v16.8B, v16.8B, v0.8B
-        ld1             {v3.8B},      [x0], x2
-        urhadd          v17.8B, v17.8B, v1.8B
-        ld1             {v4.8B},      [x0], x2
-        urhadd          v18.8B, v18.8B, v2.8B
-        ld1             {v5.8B},      [x0], x2
-        urhadd          v19.8B, v19.8B, v3.8B
-        ld1             {v6.8B},      [x0], x2
-        urhadd          v20.8B, v20.8B, v4.8B
-        ld1             {v7.8B},      [x0], x2
-        urhadd          v21.8B, v21.8B, v5.8B
-        urhadd          v22.8B, v22.8B, v6.8B
-        urhadd          v23.8B, v23.8B, v7.8B
+        ld1             {v0.8b},      [x0], x2
+        ld1             {v1.8b},      [x0], x2
+        ld1             {v2.8b},      [x0], x2
+        urhadd          v16.8b, v16.8b, v0.8b
+        ld1             {v3.8b},      [x0], x2
+        urhadd          v17.8b, v17.8b, v1.8b
+        ld1             {v4.8b},      [x0], x2
+        urhadd          v18.8b, v18.8b, v2.8b
+        ld1             {v5.8b},      [x0], x2
+        urhadd          v19.8b, v19.8b, v3.8b
+        ld1             {v6.8b},      [x0], x2
+        urhadd          v20.8b, v20.8b, v4.8b
+        ld1             {v7.8b},      [x0], x2
+        urhadd          v21.8b, v21.8b, v5.8b
+        urhadd          v22.8b, v22.8b, v6.8b
+        urhadd          v23.8b, v23.8b, v7.8b
         sub             x0,  x0,  x2,  lsl #3
   .endif
 
-        st1             {v16.8B},     [x0], x2
-        st1             {v17.8B},     [x0], x2
-        st1             {v18.8B},     [x0], x2
-        st1             {v19.8B},     [x0], x2
-        st1             {v20.8B},     [x0], x2
-        st1             {v21.8B},     [x0], x2
-        st1             {v22.8B},     [x0], x2
-        st1             {v23.8B},     [x0], x2
+        st1             {v16.8b},     [x0], x2
+        st1             {v17.8b},     [x0], x2
+        st1             {v18.8b},     [x0], x2
+        st1             {v19.8b},     [x0], x2
+        st1             {v20.8b},     [x0], x2
+        st1             {v21.8b},     [x0], x2
+        st1             {v22.8b},     [x0], x2
+        st1             {v23.8b},     [x0], x2
 
         ret             x10
 endfunc
@@ -498,45 +498,45 @@
         mov             x10, x30
         bl              put_h264_qpel8_hv_lowpass_neon_top
 
-        ld1             {v0.8B, v1.8B},  [x2], #16
-        ld1             {v2.8B, v3.8B},  [x2], #16
-        urhadd          v0.8B,  v0.8B,  v16.8B
-        urhadd          v1.8B,  v1.8B,  v17.8B
-        ld1             {v4.8B, v5.8B},  [x2], #16
-        urhadd          v2.8B,  v2.8B,  v18.8B
-        urhadd          v3.8B,  v3.8B,  v19.8B
-        ld1             {v6.8B, v7.8B},  [x2], #16
-        urhadd          v4.8B,  v4.8B,  v20.8B
-        urhadd          v5.8B,  v5.8B,  v21.8B
-        urhadd          v6.8B,  v6.8B,  v22.8B
-        urhadd          v7.8B,  v7.8B,  v23.8B
+        ld1             {v0.8b, v1.8b},  [x2], #16
+        ld1             {v2.8b, v3.8b},  [x2], #16
+        urhadd          v0.8b,  v0.8b,  v16.8b
+        urhadd          v1.8b,  v1.8b,  v17.8b
+        ld1             {v4.8b, v5.8b},  [x2], #16
+        urhadd          v2.8b,  v2.8b,  v18.8b
+        urhadd          v3.8b,  v3.8b,  v19.8b
+        ld1             {v6.8b, v7.8b},  [x2], #16
+        urhadd          v4.8b,  v4.8b,  v20.8b
+        urhadd          v5.8b,  v5.8b,  v21.8b
+        urhadd          v6.8b,  v6.8b,  v22.8b
+        urhadd          v7.8b,  v7.8b,  v23.8b
   .ifc \type,avg
-        ld1             {v16.8B},     [x0], x3
-        ld1             {v17.8B},     [x0], x3
-        ld1             {v18.8B},     [x0], x3
-        urhadd          v0.8B,  v0.8B,  v16.8B
-        ld1             {v19.8B},     [x0], x3
-        urhadd          v1.8B,  v1.8B,  v17.8B
-        ld1             {v20.8B},     [x0], x3
-        urhadd          v2.8B,  v2.8B,  v18.8B
-        ld1             {v21.8B},     [x0], x3
-        urhadd          v3.8B,  v3.8B,  v19.8B
-        ld1             {v22.8B},     [x0], x3
-        urhadd          v4.8B,  v4.8B,  v20.8B
-        ld1             {v23.8B},     [x0], x3
-        urhadd          v5.8B,  v5.8B,  v21.8B
-        urhadd          v6.8B,  v6.8B,  v22.8B
-        urhadd          v7.8B,  v7.8B,  v23.8B
+        ld1             {v16.8b},     [x0], x3
+        ld1             {v17.8b},     [x0], x3
+        ld1             {v18.8b},     [x0], x3
+        urhadd          v0.8b,  v0.8b,  v16.8b
+        ld1             {v19.8b},     [x0], x3
+        urhadd          v1.8b,  v1.8b,  v17.8b
+        ld1             {v20.8b},     [x0], x3
+        urhadd          v2.8b,  v2.8b,  v18.8b
+        ld1             {v21.8b},     [x0], x3
+        urhadd          v3.8b,  v3.8b,  v19.8b
+        ld1             {v22.8b},     [x0], x3
+        urhadd          v4.8b,  v4.8b,  v20.8b
+        ld1             {v23.8b},     [x0], x3
+        urhadd          v5.8b,  v5.8b,  v21.8b
+        urhadd          v6.8b,  v6.8b,  v22.8b
+        urhadd          v7.8b,  v7.8b,  v23.8b
         sub             x0,  x0,  x3,  lsl #3
   .endif
-        st1             {v0.8B},      [x0], x3
-        st1             {v1.8B},      [x0], x3
-        st1             {v2.8B},      [x0], x3
-        st1             {v3.8B},      [x0], x3
-        st1             {v4.8B},      [x0], x3
-        st1             {v5.8B},      [x0], x3
-        st1             {v6.8B},      [x0], x3
-        st1             {v7.8B},      [x0], x3
+        st1             {v0.8b},      [x0], x3
+        st1             {v1.8b},      [x0], x3
+        st1             {v2.8b},      [x0], x3
+        st1             {v3.8b},      [x0], x3
+        st1             {v4.8b},      [x0], x3
+        st1             {v5.8b},      [x0], x3
+        st1             {v6.8b},      [x0], x3
+        st1             {v7.8b},      [x0], x3
 
         ret             x10
 endfunc
@@ -580,8 +580,8 @@
 endfunc
 .endm
 
-        h264_qpel16_hv put
-        h264_qpel16_hv avg
+        h264_qpel16_hv  put
+        h264_qpel16_hv  avg
 
 .macro  h264_qpel8      type
 function ff_\type\()_h264_qpel8_mc10_neon, export=1
@@ -759,8 +759,8 @@
 endfunc
 .endm
 
-        h264_qpel8 put
-        h264_qpel8 avg
+        h264_qpel8      put
+        h264_qpel8      avg
 
 .macro  h264_qpel16     type
 function ff_\type\()_h264_qpel16_mc10_neon, export=1
@@ -931,5 +931,5 @@
 endfunc
 .endm
 
-        h264_qpel16 put
-        h264_qpel16 avg
+        h264_qpel16     put
+        h264_qpel16     avg
diff -Nru ffmpeg-5.1.8/libavcodec/aarch64/hevcdsp_idct_neon.S ffmpeg-5.1.9/libavcodec/aarch64/hevcdsp_idct_neon.S
--- ffmpeg-5.1.8/libavcodec/aarch64/hevcdsp_idct_neon.S	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/aarch64/hevcdsp_idct_neon.S	2026-05-05 15:50:52.000000000 +0000
@@ -38,10 +38,10 @@
 endconst
 
 .macro clip10 in1, in2, c1, c2
-        smax        \in1, \in1, \c1
-        smax        \in2, \in2, \c1
-        smin        \in1, \in1, \c2
-        smin        \in2, \in2, \c2
+        smax            \in1, \in1, \c1
+        smax            \in2, \in2, \c1
+        smin            \in1, \in1, \c2
+        smin            \in2, \in2, \c2
 .endm
 
 function ff_hevc_add_residual_4x4_8_neon, export=1
@@ -50,13 +50,13 @@
         ld1             {v2.s}[1], [x0], x2
         ld1             {v2.s}[2], [x0], x2
         ld1             {v2.s}[3], [x0], x2
-        sub              x0,  x0,  x2, lsl #2
-        uxtl             v6.8h,  v2.8b
-        uxtl2            v7.8h,  v2.16b
-        sqadd            v0.8h,  v0.8h, v6.8h
-        sqadd            v1.8h,  v1.8h, v7.8h
-        sqxtun           v0.8b,  v0.8h
-        sqxtun2          v0.16b, v1.8h
+        sub             x0,  x0,  x2, lsl #2
+        uxtl            v6.8h,  v2.8b
+        uxtl2           v7.8h,  v2.16b
+        sqadd           v0.8h,  v0.8h, v6.8h
+        sqadd           v1.8h,  v1.8h, v7.8h
+        sqxtun          v0.8b,  v0.8h
+        sqxtun2         v0.16b, v1.8h
         st1             {v0.s}[0], [x0], x2
         st1             {v0.s}[1], [x0], x2
         st1             {v0.s}[2], [x0], x2
@@ -70,12 +70,12 @@
         ld1             {v2.d}[0], [x12], x2
         ld1             {v2.d}[1], [x12], x2
         ld1             {v3.d}[0], [x12], x2
-        sqadd            v0.8h, v0.8h, v2.8h
+        sqadd           v0.8h, v0.8h, v2.8h
         ld1             {v3.d}[1], [x12], x2
-        movi             v4.8h, #0
-        sqadd            v1.8h, v1.8h, v3.8h
-        mvni             v5.8h, #0xFC, lsl #8 // movi #0x3FF
-        clip10           v0.8h, v1.8h, v4.8h, v5.8h
+        movi            v4.8h, #0
+        sqadd           v1.8h, v1.8h, v3.8h
+        mvni            v5.8h, #0xFC, lsl #8 // movi #0x3FF
+        clip10          v0.8h, v1.8h, v4.8h, v5.8h
         st1             {v0.d}[0],  [x0], x2
         st1             {v0.d}[1],  [x0], x2
         st1             {v1.d}[0],  [x0], x2
@@ -85,48 +85,48 @@
 
 function ff_hevc_add_residual_8x8_8_neon, export=1
         add             x12,  x0, x2
-        add              x2,  x2, x2
-        mov              x3,  #8
-1:      subs             x3,  x3, #2
+        add             x2,  x2, x2
+        mov             x3,  #8
+1:      subs            x3,  x3, #2
         ld1             {v2.d}[0],     [x0]
         ld1             {v2.d}[1],    [x12]
-        uxtl             v3.8h,  v2.8b
+        uxtl            v3.8h,  v2.8b
         ld1             {v0.8h-v1.8h}, [x1], #32
-        uxtl2            v2.8h,  v2.16b
-        sqadd            v0.8h,  v0.8h,   v3.8h
-        sqadd            v1.8h,  v1.8h,   v2.8h
-        sqxtun           v0.8b,  v0.8h
-        sqxtun2          v0.16b, v1.8h
+        uxtl2           v2.8h,  v2.16b
+        sqadd           v0.8h,  v0.8h,   v3.8h
+        sqadd           v1.8h,  v1.8h,   v2.8h
+        sqxtun          v0.8b,  v0.8h
+        sqxtun2         v0.16b, v1.8h
         st1             {v0.d}[0],     [x0], x2
         st1             {v0.d}[1],    [x12], x2
-        bne              1b
+        bne             1b
         ret
 endfunc
 
 function ff_hevc_add_residual_8x8_10_neon, export=1
         add             x12,  x0, x2
-        add              x2,  x2, x2
-        mov              x3,  #8
-        movi             v4.8h, #0
-        mvni             v5.8h, #0xFC, lsl #8 // movi #0x3FF
-1:      subs             x3,  x3, #2
+        add             x2,  x2, x2
+        mov             x3,  #8
+        movi            v4.8h, #0
+        mvni            v5.8h, #0xFC, lsl #8 // movi #0x3FF
+1:      subs            x3,  x3, #2
         ld1             {v0.8h-v1.8h}, [x1], #32
         ld1             {v2.8h},       [x0]
-        sqadd            v0.8h, v0.8h, v2.8h
+        sqadd           v0.8h, v0.8h, v2.8h
         ld1             {v3.8h},      [x12]
-        sqadd            v1.8h, v1.8h, v3.8h
-        clip10           v0.8h, v1.8h, v4.8h, v5.8h
+        sqadd           v1.8h, v1.8h, v3.8h
+        clip10          v0.8h, v1.8h, v4.8h, v5.8h
         st1             {v0.8h},       [x0], x2
         st1             {v1.8h},      [x12], x2
-        bne              1b
+        bne             1b
         ret
 endfunc
 
 function ff_hevc_add_residual_16x16_8_neon, export=1
-        mov              x3,  #16
+        mov             x3,  #16
         add             x12, x0, x2
-        add              x2,  x2, x2
-1:      subs             x3,  x3, #2
+        add             x2,  x2, x2
+1:      subs            x3,  x3, #2
         ld1             {v16.16b},     [x0]
         ld1             {v0.8h-v3.8h}, [x1], #64
         ld1             {v19.16b},    [x12]
@@ -134,47 +134,47 @@
         uxtl2           v18.8h, v16.16b
         uxtl            v20.8h, v19.8b
         uxtl2           v21.8h, v19.16b
-        sqadd            v0.8h,  v0.8h, v17.8h
-        sqadd            v1.8h,  v1.8h, v18.8h
-        sqadd            v2.8h,  v2.8h, v20.8h
-        sqadd            v3.8h,  v3.8h, v21.8h
-        sqxtun           v0.8b,  v0.8h
+        sqadd           v0.8h,  v0.8h, v17.8h
+        sqadd           v1.8h,  v1.8h, v18.8h
+        sqadd           v2.8h,  v2.8h, v20.8h
+        sqadd           v3.8h,  v3.8h, v21.8h
+        sqxtun          v0.8b,  v0.8h
         sqxtun2         v0.16b,  v1.8h
-        sqxtun           v1.8b,  v2.8h
+        sqxtun          v1.8b,  v2.8h
         sqxtun2         v1.16b,  v3.8h
         st1             {v0.16b},     [x0], x2
         st1             {v1.16b},    [x12], x2
-        bne              1b
+        bne             1b
         ret
 endfunc
 
 function ff_hevc_add_residual_16x16_10_neon, export=1
-        mov              x3,  #16
+        mov             x3,  #16
         movi            v20.8h, #0
         mvni            v21.8h, #0xFC, lsl #8 // movi #0x3FF
         add             x12,  x0, x2
-        add              x2,  x2, x2
-1:      subs             x3,  x3, #2
+        add             x2,  x2, x2
+1:      subs            x3,  x3, #2
         ld1             {v16.8h-v17.8h}, [x0]
         ld1             {v0.8h-v3.8h},  [x1], #64
-        sqadd            v0.8h, v0.8h, v16.8h
+        sqadd           v0.8h, v0.8h, v16.8h
         ld1             {v18.8h-v19.8h}, [x12]
-        sqadd            v1.8h, v1.8h, v17.8h
-        sqadd            v2.8h, v2.8h, v18.8h
-        sqadd            v3.8h, v3.8h, v19.8h
-        clip10           v0.8h, v1.8h, v20.8h, v21.8h
-        clip10           v2.8h, v3.8h, v20.8h, v21.8h
+        sqadd           v1.8h, v1.8h, v17.8h
+        sqadd           v2.8h, v2.8h, v18.8h
+        sqadd           v3.8h, v3.8h, v19.8h
+        clip10          v0.8h, v1.8h, v20.8h, v21.8h
+        clip10          v2.8h, v3.8h, v20.8h, v21.8h
         st1             {v0.8h-v1.8h},   [x0], x2
         st1             {v2.8h-v3.8h},  [x12], x2
-        bne              1b
+        bne             1b
         ret
 endfunc
 
 function ff_hevc_add_residual_32x32_8_neon, export=1
         add             x12,  x0, x2
-        add              x2,  x2, x2
-        mov              x3,  #32
-1:      subs             x3,  x3, #2
+        add             x2,  x2, x2
+        mov             x3,  #32
+1:      subs            x3,  x3, #2
         ld1             {v20.16b, v21.16b}, [x0]
         uxtl            v16.8h,  v20.8b
         uxtl2           v17.8h,  v20.16b
@@ -187,43 +187,43 @@
         uxtl2           v21.8h,  v22.16b
         uxtl            v22.8h,  v23.8b
         uxtl2           v23.8h,  v23.16b
-        sqadd            v0.8h,  v0.8h,  v16.8h
-        sqadd            v1.8h,  v1.8h,  v17.8h
-        sqadd            v2.8h,  v2.8h,  v18.8h
-        sqadd            v3.8h,  v3.8h,  v19.8h
-        sqadd            v4.8h,  v4.8h,  v20.8h
-        sqadd            v5.8h,  v5.8h,  v21.8h
-        sqadd            v6.8h,  v6.8h,  v22.8h
-        sqadd            v7.8h,  v7.8h,  v23.8h
-        sqxtun           v0.8b,  v0.8h
+        sqadd           v0.8h,  v0.8h,  v16.8h
+        sqadd           v1.8h,  v1.8h,  v17.8h
+        sqadd           v2.8h,  v2.8h,  v18.8h
+        sqadd           v3.8h,  v3.8h,  v19.8h
+        sqadd           v4.8h,  v4.8h,  v20.8h
+        sqadd           v5.8h,  v5.8h,  v21.8h
+        sqadd           v6.8h,  v6.8h,  v22.8h
+        sqadd           v7.8h,  v7.8h,  v23.8h
+        sqxtun          v0.8b,  v0.8h
         sqxtun2         v0.16b,  v1.8h
-        sqxtun           v1.8b,  v2.8h
+        sqxtun          v1.8b,  v2.8h
         sqxtun2         v1.16b,  v3.8h
-        sqxtun           v2.8b,  v4.8h
+        sqxtun          v2.8b,  v4.8h
         sqxtun2         v2.16b,  v5.8h
         st1             {v0.16b, v1.16b},  [x0], x2
-        sqxtun           v3.8b,  v6.8h
+        sqxtun          v3.8b,  v6.8h
         sqxtun2         v3.16b,  v7.8h
         st1             {v2.16b, v3.16b}, [x12], x2
-        bne              1b
+        bne             1b
         ret
 endfunc
 
 function ff_hevc_add_residual_32x32_10_neon, export=1
-        mov              x3,  #32
+        mov             x3,  #32
         movi            v20.8h, #0
         mvni            v21.8h, #0xFC, lsl #8 // movi #0x3FF
-1:      subs             x3,  x3, #1
+1:      subs            x3,  x3, #1
         ld1             {v0.8h-v3.8h},   [x1], #64
         ld1             {v16.8h-v19.8h}, [x0]
-        sqadd            v0.8h, v0.8h, v16.8h
-        sqadd            v1.8h, v1.8h, v17.8h
-        sqadd            v2.8h, v2.8h, v18.8h
-        sqadd            v3.8h, v3.8h, v19.8h
-        clip10           v0.8h, v1.8h, v20.8h, v21.8h
-        clip10           v2.8h, v3.8h, v20.8h, v21.8h
+        sqadd           v0.8h, v0.8h, v16.8h
+        sqadd           v1.8h, v1.8h, v17.8h
+        sqadd           v2.8h, v2.8h, v18.8h
+        sqadd           v3.8h, v3.8h, v19.8h
+        clip10          v0.8h, v1.8h, v20.8h, v21.8h
+        clip10          v2.8h, v3.8h, v20.8h, v21.8h
         st1             {v0.8h-v3.8h},   [x0], x2
-        bne              1b
+        bne             1b
         ret
 endfunc
 
@@ -246,19 +246,19 @@
 
 // uses and clobbers v28-v31 as temp registers
 .macro tr_4x4_8 in0, in1, in2, in3, out0, out1, out2, out3, p1, p2
-         sshll\p1       v28.4s, \in0, #6
-         mov            v29.16b, v28.16b
-         smull\p1       v30.4s, \in1, v0.h[1]
-         smull\p1       v31.4s, \in1, v0.h[3]
-         smlal\p2       v28.4s, \in2, v0.h[0] //e0
-         smlsl\p2       v29.4s, \in2, v0.h[0] //e1
-         smlal\p2       v30.4s, \in3, v0.h[3] //o0
-         smlsl\p2       v31.4s, \in3, v0.h[1] //o1
-
-         add            \out0, v28.4s, v30.4s
-         add            \out1, v29.4s, v31.4s
-         sub            \out2, v29.4s, v31.4s
-         sub            \out3, v28.4s, v30.4s
+        sshll\p1        v28.4s, \in0, #6
+        mov             v29.16b, v28.16b
+        smull\p1        v30.4s, \in1, v0.h[1]
+        smull\p1        v31.4s, \in1, v0.h[3]
+        smlal\p2        v28.4s, \in2, v0.h[0] //e0
+        smlsl\p2        v29.4s, \in2, v0.h[0] //e1
+        smlal\p2        v30.4s, \in3, v0.h[3] //o0
+        smlsl\p2        v31.4s, \in3, v0.h[1] //o1
+
+        add             \out0, v28.4s, v30.4s
+        add             \out1, v29.4s, v31.4s
+        sub             \out2, v29.4s, v31.4s
+        sub             \out3, v28.4s, v30.4s
 .endm
 
 .macro transpose8_4x4 r0, r1, r2, r3
@@ -325,11 +325,11 @@
 .macro idct_8x8 bitdepth
 function ff_hevc_idct_8x8_\bitdepth\()_neon, export=1
 //x0 - coeffs
-        mov              x1,  x0
+        mov             x1,  x0
         ld1             {v16.8h-v19.8h}, [x1], #64
         ld1             {v20.8h-v23.8h}, [x1]
 
-        movrel           x1, trans
+        movrel          x1, trans
         ld1             {v0.8h}, [x1]
 
         tr_8x4          7, v16,.4h, v17,.4h, v18,.4h, v19,.4h, v20,.4h, v21,.4h, v22,.4h, v23,.4h
@@ -342,7 +342,7 @@
 
         transpose_8x8   v16, v17, v18, v19, v20, v21, v22, v23
 
-        mov              x1,  x0
+        mov             x1,  x0
         st1             {v16.8h-v19.8h}, [x1], #64
         st1             {v20.8h-v23.8h}, [x1]
 
@@ -351,8 +351,8 @@
 .endm
 
 .macro butterfly e, o, tmp_p, tmp_m
-        add        \tmp_p, \e, \o
-        sub        \tmp_m, \e, \o
+        add             \tmp_p, \e, \o
+        sub             \tmp_m, \e, \o
 .endm
 
 .macro tr16_8x4 in0, in1, in2, in3, offset
@@ -381,7 +381,7 @@
         butterfly       v25.4s, v29.4s, v17.4s, v22.4s
         butterfly       v26.4s, v30.4s, v18.4s, v21.4s
         butterfly       v27.4s, v31.4s, v19.4s, v20.4s
-        add              x4,  sp,  #\offset
+        add             x4,  sp,  #\offset
         st1             {v16.4s-v19.4s}, [x4], #64
         st1             {v20.4s-v23.4s}, [x4]
 .endm
@@ -398,14 +398,14 @@
 .endm
 
 .macro add_member in, t0, t1, t2, t3, t4, t5, t6, t7, op0, op1, op2, op3, op4, op5, op6, op7, p
-        sum_sub v21.4s, \in, \t0, \op0, \p
-        sum_sub v22.4s, \in, \t1, \op1, \p
-        sum_sub v23.4s, \in, \t2, \op2, \p
-        sum_sub v24.4s, \in, \t3, \op3, \p
-        sum_sub v25.4s, \in, \t4, \op4, \p
-        sum_sub v26.4s, \in, \t5, \op5, \p
-        sum_sub v27.4s, \in, \t6, \op6, \p
-        sum_sub v28.4s, \in, \t7, \op7, \p
+        sum_sub         v21.4s, \in, \t0, \op0, \p
+        sum_sub         v22.4s, \in, \t1, \op1, \p
+        sum_sub         v23.4s, \in, \t2, \op2, \p
+        sum_sub         v24.4s, \in, \t3, \op3, \p
+        sum_sub         v25.4s, \in, \t4, \op4, \p
+        sum_sub         v26.4s, \in, \t5, \op5, \p
+        sum_sub         v27.4s, \in, \t6, \op6, \p
+        sum_sub         v28.4s, \in, \t7, \op7, \p
 .endm
 
 .macro butterfly16 in0, in1, in2, in3, in4, in5, in6, in7
@@ -473,20 +473,20 @@
 
 .macro tr_16x4 name, shift, offset, step
 function func_tr_16x4_\name
-        mov              x1,  x5
-        add              x3,  x5, #(\step * 64)
-        mov              x2,  #(\step * 128)
+        mov             x1,  x5
+        add             x3,  x5, #(\step * 64)
+        mov             x2,  #(\step * 128)
         load16          v16.d, v17.d, v18.d, v19.d
-        movrel           x1,  trans
+        movrel          x1,  trans
         ld1             {v0.8h}, [x1]
 
         tr16_8x4        v16, v17, v18, v19, \offset
 
-        add              x1,  x5, #(\step * 32)
-        add              x3,  x5, #(\step * 3 *32)
-        mov              x2,  #(\step * 128)
+        add             x1,  x5, #(\step * 32)
+        add             x3,  x5, #(\step * 3 *32)
+        mov             x2,  #(\step * 128)
         load16          v20.d, v17.d, v18.d, v19.d
-        movrel           x1, trans, 16
+        movrel          x1, trans, 16
         ld1             {v1.8h}, [x1]
         smull           v21.4s, v20.4h, v1.h[0]
         smull           v22.4s, v20.4h, v1.h[1]
@@ -505,16 +505,16 @@
         add_member      v19.4h, v1.h[6], v1.h[3], v1.h[0], v1.h[2], v1.h[5], v1.h[7], v1.h[4], v1.h[1], +, -, +, -, +, +, -, +
         add_member      v19.8h, v1.h[7], v1.h[6], v1.h[5], v1.h[4], v1.h[3], v1.h[2], v1.h[1], v1.h[0], +, -, +, -, +, -, +, -, 2
 
-        add              x4, sp, #\offset
+        add             x4, sp, #\offset
         ld1             {v16.4s-v19.4s}, [x4], #64
 
         butterfly16     v16.4s, v21.4s, v17.4s, v22.4s, v18.4s, v23.4s, v19.4s, v24.4s
         scale           v29, v30, v31, v24, v20.4s, v16.4s, v21.4s, v17.4s, v22.4s, v18.4s, v23.4s, v19.4s, \shift
         transpose16_4x4_2 v29, v30, v31, v24
-        mov              x1,  x6
-        add              x3,  x6, #(24 +3*32)
-        mov              x2, #32
-        mov              x4, #-32
+        mov             x1,  x6
+        add             x3,  x6, #(24 +3*32)
+        mov             x2, #32
+        mov             x4, #-32
         store16         v29.d, v30.d, v31.d, v24.d, x4
 
         add             x4, sp, #(\offset + 64)
@@ -523,10 +523,10 @@
         scale           v29, v30, v31, v20, v20.4s, v16.4s, v25.4s, v17.4s, v26.4s, v18.4s, v27.4s, v19.4s, \shift
         transpose16_4x4_2 v29, v30, v31, v20
 
-        add              x1,  x6, #8
-        add              x3,  x6, #(16 + 3 * 32)
-        mov              x2, #32
-        mov              x4, #-32
+        add             x1,  x6, #8
+        add             x3,  x6, #(16 + 3 * 32)
+        mov             x2, #32
+        mov             x4, #-32
         store16         v29.d, v30.d, v31.d, v20.d, x4
 
         ret
@@ -539,21 +539,21 @@
         mov             x15, x30
 
         // allocate a temp buffer
-        sub              sp,  sp,  #640
+        sub             sp,  sp,  #640
 
 .irp i, 0, 1, 2, 3
-        add              x5,  x0, #(8 * \i)
-        add              x6,  sp, #(8 * \i * 16)
+        add             x5,  x0, #(8 * \i)
+        add             x6,  sp, #(8 * \i * 16)
         bl              func_tr_16x4_firstpass
 .endr
 
 .irp i, 0, 1, 2, 3
-        add              x5,  sp, #(8 * \i)
-        add              x6,  x0, #(8 * \i * 16)
+        add             x5,  sp, #(8 * \i)
+        add             x6,  x0, #(8 * \i * 16)
         bl              func_tr_16x4_secondpass_\bitdepth
 .endr
 
-        add              sp,  sp,  #640
+        add             sp,  sp,  #640
 
         mov             x30, x15
         ret
@@ -573,35 +573,35 @@
 // void ff_hevc_idct_NxN_dc_DEPTH_neon(int16_t *coeffs)
 .macro idct_dc size, bitdepth
 function ff_hevc_idct_\size\()x\size\()_dc_\bitdepth\()_neon, export=1
-        ld1r         {v4.8h}, [x0]
-        srshr         v4.8h,  v4.8h,  #1
-        srshr         v0.8h,  v4.8h,  #(14 - \bitdepth)
-        srshr         v1.8h,  v4.8h,  #(14 - \bitdepth)
+        ld1r            {v4.8h}, [x0]
+        srshr           v4.8h,  v4.8h,  #1
+        srshr           v0.8h,  v4.8h,  #(14 - \bitdepth)
+        srshr           v1.8h,  v4.8h,  #(14 - \bitdepth)
 .if \size > 4
-        srshr         v2.8h,  v4.8h,  #(14 - \bitdepth)
-        srshr         v3.8h,  v4.8h,  #(14 - \bitdepth)
+        srshr           v2.8h,  v4.8h,  #(14 - \bitdepth)
+        srshr           v3.8h,  v4.8h,  #(14 - \bitdepth)
 .if \size > 16 /* dc 32x32 */
-        mov              x2,  #4
+        mov             x2,  #4
 1:
-        subs             x2,  x2, #1
+        subs            x2,  x2, #1
 .endif
         add             x12,  x0, #64
         mov             x13,  #128
 .if \size > 8 /* dc 16x16 */
-        st1            {v0.8h-v3.8h},  [x0], x13
-        st1            {v0.8h-v3.8h}, [x12], x13
-        st1            {v0.8h-v3.8h},  [x0], x13
-        st1            {v0.8h-v3.8h}, [x12], x13
-        st1            {v0.8h-v3.8h},  [x0], x13
-        st1            {v0.8h-v3.8h}, [x12], x13
+        st1             {v0.8h-v3.8h},  [x0], x13
+        st1             {v0.8h-v3.8h}, [x12], x13
+        st1             {v0.8h-v3.8h},  [x0], x13
+        st1             {v0.8h-v3.8h}, [x12], x13
+        st1             {v0.8h-v3.8h},  [x0], x13
+        st1             {v0.8h-v3.8h}, [x12], x13
 .endif /* dc 8x8 */
-        st1            {v0.8h-v3.8h},  [x0], x13
-        st1            {v0.8h-v3.8h}, [x12], x13
+        st1             {v0.8h-v3.8h},  [x0], x13
+        st1             {v0.8h-v3.8h}, [x12], x13
 .if \size > 16 /* dc 32x32 */
         bne             1b
 .endif
 .else /* dc 4x4 */
-        st1            {v0.8h-v1.8h},  [x0]
+        st1             {v0.8h-v1.8h},  [x0]
 .endif
         ret
 endfunc
diff -Nru ffmpeg-5.1.8/libavcodec/aarch64/hpeldsp_neon.S ffmpeg-5.1.9/libavcodec/aarch64/hpeldsp_neon.S
--- ffmpeg-5.1.8/libavcodec/aarch64/hpeldsp_neon.S	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/aarch64/hpeldsp_neon.S	2026-05-05 14:21:58.000000000 +0000
@@ -26,295 +26,295 @@
   .if \avg
         mov             x12, x0
   .endif
-1:      ld1             {v0.16B},  [x1], x2
-        ld1             {v1.16B},  [x1], x2
-        ld1             {v2.16B},  [x1], x2
-        ld1             {v3.16B},  [x1], x2
-  .if \avg
-        ld1             {v4.16B},  [x12], x2
-        urhadd          v0.16B,  v0.16B,  v4.16B
-        ld1             {v5.16B},  [x12], x2
-        urhadd          v1.16B,  v1.16B,  v5.16B
-        ld1             {v6.16B},  [x12], x2
-        urhadd          v2.16B,  v2.16B,  v6.16B
-        ld1             {v7.16B},  [x12], x2
-        urhadd          v3.16B,  v3.16B,  v7.16B
+1:      ld1             {v0.16b},  [x1], x2
+        ld1             {v1.16b},  [x1], x2
+        ld1             {v2.16b},  [x1], x2
+        ld1             {v3.16b},  [x1], x2
+  .if \avg
+        ld1             {v4.16b},  [x12], x2
+        urhadd          v0.16b,  v0.16b,  v4.16b
+        ld1             {v5.16b},  [x12], x2
+        urhadd          v1.16b,  v1.16b,  v5.16b
+        ld1             {v6.16b},  [x12], x2
+        urhadd          v2.16b,  v2.16b,  v6.16b
+        ld1             {v7.16b},  [x12], x2
+        urhadd          v3.16b,  v3.16b,  v7.16b
   .endif
         subs            w3,  w3,  #4
-        st1             {v0.16B},  [x0], x2
-        st1             {v1.16B},  [x0], x2
-        st1             {v2.16B},  [x0], x2
-        st1             {v3.16B},  [x0], x2
+        st1             {v0.16b},  [x0], x2
+        st1             {v1.16b},  [x0], x2
+        st1             {v2.16b},  [x0], x2
+        st1             {v3.16b},  [x0], x2
         b.ne            1b
         ret
 .endm
 
 .macro  pixels16_x2     rnd=1, avg=0
-1:      ld1             {v0.16B, v1.16B}, [x1], x2
-        ld1             {v2.16B, v3.16B}, [x1], x2
+1:      ld1             {v0.16b, v1.16b}, [x1], x2
+        ld1             {v2.16b, v3.16b}, [x1], x2
         subs            w3,  w3,  #2
-        ext             v1.16B,  v0.16B,  v1.16B,  #1
-        avg             v0.16B,  v0.16B,  v1.16B
-        ext             v3.16B,  v2.16B,  v3.16B,  #1
-        avg             v2.16B,  v2.16B,  v3.16B
-  .if \avg
-        ld1             {v1.16B}, [x0], x2
-        ld1             {v3.16B}, [x0]
-        urhadd          v0.16B,  v0.16B,  v1.16B
-        urhadd          v2.16B,  v2.16B,  v3.16B
+        ext             v1.16b,  v0.16b,  v1.16b,  #1
+        avg             v0.16b,  v0.16b,  v1.16b
+        ext             v3.16b,  v2.16b,  v3.16b,  #1
+        avg             v2.16b,  v2.16b,  v3.16b
+  .if \avg
+        ld1             {v1.16b}, [x0], x2
+        ld1             {v3.16b}, [x0]
+        urhadd          v0.16b,  v0.16b,  v1.16b
+        urhadd          v2.16b,  v2.16b,  v3.16b
         sub             x0,  x0,  x2
   .endif
-        st1             {v0.16B}, [x0], x2
-        st1             {v2.16B}, [x0], x2
+        st1             {v0.16b}, [x0], x2
+        st1             {v2.16b}, [x0], x2
         b.ne            1b
         ret
 .endm
 
 .macro  pixels16_y2     rnd=1, avg=0
         sub             w3,  w3,  #2
-        ld1             {v0.16B}, [x1], x2
-        ld1             {v1.16B}, [x1], x2
+        ld1             {v0.16b}, [x1], x2
+        ld1             {v1.16b}, [x1], x2
 1:      subs            w3,  w3,  #2
-        avg             v2.16B,  v0.16B,  v1.16B
-        ld1             {v0.16B}, [x1], x2
-        avg             v3.16B,  v0.16B,  v1.16B
-        ld1             {v1.16B}, [x1], x2
-  .if \avg
-        ld1             {v4.16B}, [x0], x2
-        ld1             {v5.16B}, [x0]
-        urhadd          v2.16B,  v2.16B,  v4.16B
-        urhadd          v3.16B,  v3.16B,  v5.16B
+        avg             v2.16b,  v0.16b,  v1.16b
+        ld1             {v0.16b}, [x1], x2
+        avg             v3.16b,  v0.16b,  v1.16b
+        ld1             {v1.16b}, [x1], x2
+  .if \avg
+        ld1             {v4.16b}, [x0], x2
+        ld1             {v5.16b}, [x0]
+        urhadd          v2.16b,  v2.16b,  v4.16b
+        urhadd          v3.16b,  v3.16b,  v5.16b
         sub             x0,  x0,  x2
   .endif
-        st1             {v2.16B}, [x0], x2
-        st1             {v3.16B}, [x0], x2
+        st1             {v2.16b}, [x0], x2
+        st1             {v3.16b}, [x0], x2
         b.ne            1b
 
-        avg             v2.16B,  v0.16B,  v1.16B
-        ld1             {v0.16B}, [x1], x2
-        avg             v3.16B,  v0.16B,  v1.16B
-  .if \avg
-        ld1             {v4.16B}, [x0], x2
-        ld1             {v5.16B}, [x0]
-        urhadd          v2.16B,  v2.16B,  v4.16B
-        urhadd          v3.16B,  v3.16B,  v5.16B
+        avg             v2.16b,  v0.16b,  v1.16b
+        ld1             {v0.16b}, [x1], x2
+        avg             v3.16b,  v0.16b,  v1.16b
+  .if \avg
+        ld1             {v4.16b}, [x0], x2
+        ld1             {v5.16b}, [x0]
+        urhadd          v2.16b,  v2.16b,  v4.16b
+        urhadd          v3.16b,  v3.16b,  v5.16b
         sub             x0,  x0,  x2
   .endif
-        st1             {v2.16B},     [x0], x2
-        st1             {v3.16B},     [x0], x2
+        st1             {v2.16b},     [x0], x2
+        st1             {v3.16b},     [x0], x2
 
         ret
 .endm
 
 .macro  pixels16_xy2    rnd=1, avg=0
         sub             w3,  w3,  #2
-        ld1             {v0.16B, v1.16B}, [x1], x2
-        ld1             {v4.16B, v5.16B}, [x1], x2
+        ld1             {v0.16b, v1.16b}, [x1], x2
+        ld1             {v4.16b, v5.16b}, [x1], x2
 NRND    movi            v26.8H, #1
-        ext             v1.16B,  v0.16B,  v1.16B,  #1
-        ext             v5.16B,  v4.16B,  v5.16B,  #1
-        uaddl           v16.8H,  v0.8B,   v1.8B
-        uaddl2          v20.8H,  v0.16B,  v1.16B
-        uaddl           v18.8H,  v4.8B,   v5.8B
-        uaddl2          v22.8H,  v4.16B,  v5.16B
+        ext             v1.16b,  v0.16b,  v1.16b,  #1
+        ext             v5.16b,  v4.16b,  v5.16b,  #1
+        uaddl           v16.8h,  v0.8b,   v1.8b
+        uaddl2          v20.8h,  v0.16b,  v1.16b
+        uaddl           v18.8h,  v4.8b,   v5.8b
+        uaddl2          v22.8h,  v4.16b,  v5.16b
 1:      subs            w3,  w3,  #2
-        ld1             {v0.16B, v1.16B}, [x1], x2
-        add             v24.8H,  v16.8H,  v18.8H
+        ld1             {v0.16b, v1.16b}, [x1], x2
+        add             v24.8h,  v16.8h,  v18.8h
 NRND    add             v24.8H,  v24.8H,  v26.8H
-        ext             v30.16B, v0.16B,  v1.16B,  #1
-        add             v1.8H,   v20.8H,  v22.8H
-        mshrn           v28.8B,  v24.8H,  #2
+        ext             v30.16b, v0.16b,  v1.16b,  #1
+        add             v1.8h,   v20.8h,  v22.8h
+        mshrn           v28.8b,  v24.8h,  #2
 NRND    add             v1.8H,   v1.8H,   v26.8H
-        mshrn2          v28.16B, v1.8H,   #2
+        mshrn2          v28.16b, v1.8h,   #2
   .if \avg
-        ld1             {v16.16B},        [x0]
-        urhadd          v28.16B, v28.16B, v16.16B
+        ld1             {v16.16b},        [x0]
+        urhadd          v28.16b, v28.16b, v16.16b
   .endif
-        uaddl           v16.8H,  v0.8B,   v30.8B
-        ld1             {v2.16B, v3.16B}, [x1], x2
-        uaddl2          v20.8H,  v0.16B,  v30.16B
-        st1             {v28.16B},        [x0], x2
-        add             v24.8H,  v16.8H,  v18.8H
+        uaddl           v16.8h,  v0.8b,   v30.8b
+        ld1             {v2.16b, v3.16b}, [x1], x2
+        uaddl2          v20.8h,  v0.16b,  v30.16b
+        st1             {v28.16b},        [x0], x2
+        add             v24.8h,  v16.8h,  v18.8h
 NRND    add             v24.8H,  v24.8H,  v26.8H
-        ext             v3.16B,  v2.16B,  v3.16B,  #1
-        add             v0.8H,   v20.8H,  v22.8H
-        mshrn           v30.8B,  v24.8H,  #2
+        ext             v3.16b,  v2.16b,  v3.16b,  #1
+        add             v0.8h,   v20.8h,  v22.8h
+        mshrn           v30.8b,  v24.8h,  #2
 NRND    add             v0.8H,   v0.8H,   v26.8H
-        mshrn2          v30.16B, v0.8H,   #2
+        mshrn2          v30.16b, v0.8h,   #2
   .if \avg
-        ld1             {v18.16B},        [x0]
-        urhadd          v30.16B, v30.16B, v18.16B
+        ld1             {v18.16b},        [x0]
+        urhadd          v30.16b, v30.16b, v18.16b
   .endif
-        uaddl           v18.8H,   v2.8B,  v3.8B
-        uaddl2          v22.8H,   v2.16B, v3.16B
-        st1             {v30.16B},        [x0], x2
+        uaddl           v18.8h,   v2.8b,  v3.8b
+        uaddl2          v22.8h,   v2.16b, v3.16b
+        st1             {v30.16b},        [x0], x2
         b.gt            1b
 
-        ld1             {v0.16B, v1.16B}, [x1], x2
-        add             v24.8H,  v16.8H,  v18.8H
+        ld1             {v0.16b, v1.16b}, [x1], x2
+        add             v24.8h,  v16.8h,  v18.8h
 NRND    add             v24.8H,  v24.8H,  v26.8H
-        ext             v30.16B, v0.16B,  v1.16B,  #1
-        add             v1.8H,   v20.8H,  v22.8H
-        mshrn           v28.8B,  v24.8H,  #2
+        ext             v30.16b, v0.16b,  v1.16b,  #1
+        add             v1.8h,   v20.8h,  v22.8h
+        mshrn           v28.8b,  v24.8h,  #2
 NRND    add             v1.8H,   v1.8H,   v26.8H
-        mshrn2          v28.16B, v1.8H,   #2
+        mshrn2          v28.16b, v1.8h,   #2
   .if \avg
-        ld1             {v16.16B},        [x0]
-        urhadd          v28.16B, v28.16B, v16.16B
+        ld1             {v16.16b},        [x0]
+        urhadd          v28.16b, v28.16b, v16.16b
   .endif
-        uaddl           v16.8H,  v0.8B,   v30.8B
-        uaddl2          v20.8H,  v0.16B,  v30.16B
-        st1             {v28.16B},        [x0], x2
-        add             v24.8H,  v16.8H,  v18.8H
+        uaddl           v16.8h,  v0.8b,   v30.8b
+        uaddl2          v20.8h,  v0.16b,  v30.16b
+        st1             {v28.16b},        [x0], x2
+        add             v24.8h,  v16.8h,  v18.8h
 NRND    add             v24.8H,  v24.8H,  v26.8H
-        add             v0.8H,   v20.8H,  v22.8H
-        mshrn           v30.8B,  v24.8H,  #2
+        add             v0.8h,   v20.8h,  v22.8h
+        mshrn           v30.8b,  v24.8h,  #2
 NRND    add             v0.8H,   v0.8H,   v26.8H
-        mshrn2          v30.16B, v0.8H,   #2
+        mshrn2          v30.16b, v0.8h,   #2
   .if \avg
-        ld1             {v18.16B},        [x0]
-        urhadd          v30.16B, v30.16B, v18.16B
+        ld1             {v18.16b},        [x0]
+        urhadd          v30.16b, v30.16b, v18.16b
   .endif
-        st1             {v30.16B},        [x0], x2
+        st1             {v30.16b},        [x0], x2
 
         ret
 .endm
 
 .macro  pixels8         rnd=1, avg=0
-1:      ld1             {v0.8B}, [x1], x2
-        ld1             {v1.8B}, [x1], x2
-        ld1             {v2.8B}, [x1], x2
-        ld1             {v3.8B}, [x1], x2
-  .if \avg
-        ld1             {v4.8B}, [x0], x2
-        urhadd          v0.8B,  v0.8B,  v4.8B
-        ld1             {v5.8B}, [x0], x2
-        urhadd          v1.8B,  v1.8B,  v5.8B
-        ld1             {v6.8B}, [x0], x2
-        urhadd          v2.8B,  v2.8B,  v6.8B
-        ld1             {v7.8B}, [x0], x2
-        urhadd          v3.8B,  v3.8B,  v7.8B
+1:      ld1             {v0.8b}, [x1], x2
+        ld1             {v1.8b}, [x1], x2
+        ld1             {v2.8b}, [x1], x2
+        ld1             {v3.8b}, [x1], x2
+  .if \avg
+        ld1             {v4.8b}, [x0], x2
+        urhadd          v0.8b,  v0.8b,  v4.8b
+        ld1             {v5.8b}, [x0], x2
+        urhadd          v1.8b,  v1.8b,  v5.8b
+        ld1             {v6.8b}, [x0], x2
+        urhadd          v2.8b,  v2.8b,  v6.8b
+        ld1             {v7.8b}, [x0], x2
+        urhadd          v3.8b,  v3.8b,  v7.8b
         sub             x0,  x0,  x2,  lsl #2
   .endif
         subs            w3,  w3,  #4
-        st1             {v0.8B}, [x0], x2
-        st1             {v1.8B}, [x0], x2
-        st1             {v2.8B}, [x0], x2
-        st1             {v3.8B}, [x0], x2
+        st1             {v0.8b}, [x0], x2
+        st1             {v1.8b}, [x0], x2
+        st1             {v2.8b}, [x0], x2
+        st1             {v3.8b}, [x0], x2
         b.ne            1b
         ret
 .endm
 
 .macro  pixels8_x2      rnd=1, avg=0
-1:      ld1             {v0.8B, v1.8B}, [x1], x2
-        ext             v1.8B,  v0.8B,  v1.8B,  #1
-        ld1             {v2.8B, v3.8B}, [x1], x2
-        ext             v3.8B,  v2.8B,  v3.8B,  #1
+1:      ld1             {v0.8b, v1.8b}, [x1], x2
+        ext             v1.8b,  v0.8b,  v1.8b,  #1
+        ld1             {v2.8b, v3.8b}, [x1], x2
+        ext             v3.8b,  v2.8b,  v3.8b,  #1
         subs            w3,  w3,  #2
-        avg             v0.8B,   v0.8B,   v1.8B
-        avg             v2.8B,   v2.8B,   v3.8B
+        avg             v0.8b,   v0.8b,   v1.8b
+        avg             v2.8b,   v2.8b,   v3.8b
   .if \avg
-        ld1             {v4.8B},     [x0], x2
-        ld1             {v5.8B},     [x0]
-        urhadd          v0.8B,   v0.8B,   v4.8B
-        urhadd          v2.8B,   v2.8B,   v5.8B
+        ld1             {v4.8b},     [x0], x2
+        ld1             {v5.8b},     [x0]
+        urhadd          v0.8b,   v0.8b,   v4.8b
+        urhadd          v2.8b,   v2.8b,   v5.8b
         sub             x0,  x0,  x2
   .endif
-        st1             {v0.8B}, [x0], x2
-        st1             {v2.8B}, [x0], x2
+        st1             {v0.8b}, [x0], x2
+        st1             {v2.8b}, [x0], x2
         b.ne            1b
         ret
 .endm
 
 .macro  pixels8_y2      rnd=1, avg=0
         sub             w3,  w3,  #2
-        ld1             {v0.8B},  [x1], x2
-        ld1             {v1.8B},  [x1], x2
+        ld1             {v0.8b},  [x1], x2
+        ld1             {v1.8b},  [x1], x2
 1:      subs            w3,  w3,  #2
-        avg             v4.8B,  v0.8B,  v1.8B
-        ld1             {v0.8B},  [x1], x2
-        avg             v5.8B,  v0.8B,  v1.8B
-        ld1             {v1.8B},  [x1], x2
-  .if \avg
-        ld1             {v2.8B},     [x0], x2
-        ld1             {v3.8B},     [x0]
-        urhadd          v4.8B,  v4.8B,  v2.8B
-        urhadd          v5.8B,  v5.8B,  v3.8B
+        avg             v4.8b,  v0.8b,  v1.8b
+        ld1             {v0.8b},  [x1], x2
+        avg             v5.8b,  v0.8b,  v1.8b
+        ld1             {v1.8b},  [x1], x2
+  .if \avg
+        ld1             {v2.8b},     [x0], x2
+        ld1             {v3.8b},     [x0]
+        urhadd          v4.8b,  v4.8b,  v2.8b
+        urhadd          v5.8b,  v5.8b,  v3.8b
         sub             x0,  x0,  x2
   .endif
-        st1             {v4.8B},     [x0], x2
-        st1             {v5.8B},     [x0], x2
+        st1             {v4.8b},     [x0], x2
+        st1             {v5.8b},     [x0], x2
         b.ne            1b
 
-        avg             v4.8B,  v0.8B,  v1.8B
-        ld1             {v0.8B},  [x1], x2
-        avg             v5.8B,  v0.8B,  v1.8B
-  .if \avg
-        ld1             {v2.8B},     [x0], x2
-        ld1             {v3.8B},     [x0]
-        urhadd          v4.8B,  v4.8B,  v2.8B
-        urhadd          v5.8B,  v5.8B,  v3.8B
+        avg             v4.8b,  v0.8b,  v1.8b
+        ld1             {v0.8b},  [x1], x2
+        avg             v5.8b,  v0.8b,  v1.8b
+  .if \avg
+        ld1             {v2.8b},     [x0], x2
+        ld1             {v3.8b},     [x0]
+        urhadd          v4.8b,  v4.8b,  v2.8b
+        urhadd          v5.8b,  v5.8b,  v3.8b
         sub             x0,  x0,  x2
   .endif
-        st1             {v4.8B},     [x0], x2
-        st1             {v5.8B},     [x0], x2
+        st1             {v4.8b},     [x0], x2
+        st1             {v5.8b},     [x0], x2
 
         ret
 .endm
 
 .macro  pixels8_xy2     rnd=1, avg=0
         sub             w3,  w3,  #2
-        ld1             {v0.16B},     [x1], x2
-        ld1             {v1.16B},     [x1], x2
+        ld1             {v0.16b},     [x1], x2
+        ld1             {v1.16b},     [x1], x2
 NRND    movi            v19.8H, #1
-        ext             v4.16B,  v0.16B,  v4.16B,  #1
-        ext             v6.16B,  v1.16B,  v6.16B,  #1
-        uaddl           v16.8H,  v0.8B,  v4.8B
-        uaddl           v17.8H,  v1.8B,  v6.8B
+        ext             v4.16b,  v0.16b,  v4.16b,  #1
+        ext             v6.16b,  v1.16b,  v6.16b,  #1
+        uaddl           v16.8h,  v0.8b,  v4.8b
+        uaddl           v17.8h,  v1.8b,  v6.8b
 1:      subs            w3,  w3,  #2
-        ld1             {v0.16B},     [x1], x2
-        add             v18.8H, v16.8H,  v17.8H
-        ext             v4.16B,  v0.16B,  v4.16B,  #1
+        ld1             {v0.16b},     [x1], x2
+        add             v18.8h, v16.8h,  v17.8h
+        ext             v4.16b,  v0.16b,  v4.16b,  #1
 NRND    add             v18.8H, v18.8H, v19.8H
-        uaddl           v16.8H,  v0.8B,  v4.8B
-        mshrn           v5.8B,  v18.8H, #2
-        ld1             {v1.16B},     [x1], x2
-        add             v18.8H, v16.8H,  v17.8H
+        uaddl           v16.8h,  v0.8b,  v4.8b
+        mshrn           v5.8b,  v18.8h, #2
+        ld1             {v1.16b},     [x1], x2
+        add             v18.8h, v16.8h,  v17.8h
   .if \avg
-        ld1             {v7.8B},     [x0]
-        urhadd          v5.8B,  v5.8B,  v7.8B
+        ld1             {v7.8b},     [x0]
+        urhadd          v5.8b,  v5.8b,  v7.8b
   .endif
 NRND    add             v18.8H, v18.8H, v19.8H
-        st1             {v5.8B},     [x0], x2
-        mshrn           v7.8B,  v18.8H, #2
+        st1             {v5.8b},     [x0], x2
+        mshrn           v7.8b,  v18.8h, #2
   .if \avg
-        ld1             {v5.8B},     [x0]
-        urhadd          v7.8B,  v7.8B,  v5.8B
+        ld1             {v5.8b},     [x0]
+        urhadd          v7.8b,  v7.8b,  v5.8b
   .endif
-        ext             v6.16B,  v1.16B,  v6.16B,  #1
-        uaddl           v17.8H,  v1.8B,   v6.8B
-        st1             {v7.8B},     [x0], x2
+        ext             v6.16b,  v1.16b,  v6.16b,  #1
+        uaddl           v17.8h,  v1.8b,   v6.8b
+        st1             {v7.8b},     [x0], x2
         b.gt            1b
 
-        ld1             {v0.16B},     [x1], x2
-        add             v18.8H, v16.8H, v17.8H
-        ext             v4.16B, v0.16B, v4.16B,  #1
+        ld1             {v0.16b},     [x1], x2
+        add             v18.8h, v16.8h, v17.8h
+        ext             v4.16b, v0.16b, v4.16b,  #1
 NRND    add             v18.8H, v18.8H, v19.8H
-        uaddl           v16.8H,  v0.8B, v4.8B
-        mshrn           v5.8B,  v18.8H, #2
-        add             v18.8H, v16.8H, v17.8H
+        uaddl           v16.8h,  v0.8b, v4.8b
+        mshrn           v5.8b,  v18.8h, #2
+        add             v18.8h, v16.8h, v17.8h
   .if \avg
-        ld1             {v7.8B},     [x0]
-        urhadd          v5.8B,  v5.8B,  v7.8B
+        ld1             {v7.8b},     [x0]
+        urhadd          v5.8b,  v5.8b,  v7.8b
   .endif
 NRND    add             v18.8H, v18.8H, v19.8H
-        st1             {v5.8B},     [x0], x2
-        mshrn           v7.8B,  v18.8H, #2
+        st1             {v5.8b},     [x0], x2
+        mshrn           v7.8b,  v18.8h, #2
   .if \avg
-        ld1             {v5.8B},     [x0]
-        urhadd          v7.8B,  v7.8B,  v5.8B
+        ld1             {v5.8b},     [x0]
+        urhadd          v7.8b,  v7.8b,  v5.8b
   .endif
-        st1             {v7.8B},     [x0], x2
+        st1             {v7.8b},     [x0], x2
 
         ret
 .endm
diff -Nru ffmpeg-5.1.8/libavcodec/aarch64/me_cmp_neon.S ffmpeg-5.1.9/libavcodec/aarch64/me_cmp_neon.S
--- ffmpeg-5.1.8/libavcodec/aarch64/me_cmp_neon.S	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/aarch64/me_cmp_neon.S	2026-05-05 15:50:52.000000000 +0000
@@ -27,7 +27,7 @@
         // x3           ptrdiff_t stride
         // w4           int h
         cmp             w4, #4                      // if h < 4, jump to completion section
-        movi            v18.4S, #0                  // clear result accumulator
+        movi            v18.4s, #0                  // clear result accumulator
         b.lt            2f
 1:
         ld1             {v0.16b}, [x1], x3          // load pix1
diff -Nru ffmpeg-5.1.8/libavcodec/aarch64/neon.S ffmpeg-5.1.9/libavcodec/aarch64/neon.S
--- ffmpeg-5.1.8/libavcodec/aarch64/neon.S	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/aarch64/neon.S	2026-05-05 15:50:52.000000000 +0000
@@ -17,146 +17,146 @@
  */
 
 .macro  transpose_8x8B  r0, r1, r2, r3, r4, r5, r6, r7, r8, r9
-        trn1            \r8\().8B,  \r0\().8B,  \r1\().8B
-        trn2            \r9\().8B,  \r0\().8B,  \r1\().8B
-        trn1            \r1\().8B,  \r2\().8B,  \r3\().8B
-        trn2            \r3\().8B,  \r2\().8B,  \r3\().8B
-        trn1            \r0\().8B,  \r4\().8B,  \r5\().8B
-        trn2            \r5\().8B,  \r4\().8B,  \r5\().8B
-        trn1            \r2\().8B,  \r6\().8B,  \r7\().8B
-        trn2            \r7\().8B,  \r6\().8B,  \r7\().8B
-
-        trn1            \r4\().4H,  \r0\().4H,  \r2\().4H
-        trn2            \r2\().4H,  \r0\().4H,  \r2\().4H
-        trn1            \r6\().4H,  \r5\().4H,  \r7\().4H
-        trn2            \r7\().4H,  \r5\().4H,  \r7\().4H
-        trn1            \r5\().4H,  \r9\().4H,  \r3\().4H
-        trn2            \r9\().4H,  \r9\().4H,  \r3\().4H
-        trn1            \r3\().4H,  \r8\().4H,  \r1\().4H
-        trn2            \r8\().4H,  \r8\().4H,  \r1\().4H
-
-        trn1            \r0\().2S,  \r3\().2S,  \r4\().2S
-        trn2            \r4\().2S,  \r3\().2S,  \r4\().2S
+        trn1            \r8\().8b,  \r0\().8b,  \r1\().8b
+        trn2            \r9\().8b,  \r0\().8b,  \r1\().8b
+        trn1            \r1\().8b,  \r2\().8b,  \r3\().8b
+        trn2            \r3\().8b,  \r2\().8b,  \r3\().8b
+        trn1            \r0\().8b,  \r4\().8b,  \r5\().8b
+        trn2            \r5\().8b,  \r4\().8b,  \r5\().8b
+        trn1            \r2\().8b,  \r6\().8b,  \r7\().8b
+        trn2            \r7\().8b,  \r6\().8b,  \r7\().8b
+
+        trn1            \r4\().4h,  \r0\().4h,  \r2\().4h
+        trn2            \r2\().4h,  \r0\().4h,  \r2\().4h
+        trn1            \r6\().4h,  \r5\().4h,  \r7\().4h
+        trn2            \r7\().4h,  \r5\().4h,  \r7\().4h
+        trn1            \r5\().4h,  \r9\().4h,  \r3\().4h
+        trn2            \r9\().4h,  \r9\().4h,  \r3\().4h
+        trn1            \r3\().4h,  \r8\().4h,  \r1\().4h
+        trn2            \r8\().4h,  \r8\().4h,  \r1\().4h
+
+        trn1            \r0\().2s,  \r3\().2s,  \r4\().2s
+        trn2            \r4\().2s,  \r3\().2s,  \r4\().2s
 
-        trn1            \r1\().2S,  \r5\().2S,  \r6\().2S
-        trn2            \r5\().2S,  \r5\().2S,  \r6\().2S
+        trn1            \r1\().2s,  \r5\().2s,  \r6\().2s
+        trn2            \r5\().2s,  \r5\().2s,  \r6\().2s
 
-        trn2            \r6\().2S,  \r8\().2S,  \r2\().2S
-        trn1            \r2\().2S,  \r8\().2S,  \r2\().2S
+        trn2            \r6\().2s,  \r8\().2s,  \r2\().2s
+        trn1            \r2\().2s,  \r8\().2s,  \r2\().2s
 
-        trn1            \r3\().2S,  \r9\().2S,  \r7\().2S
-        trn2            \r7\().2S,  \r9\().2S,  \r7\().2S
+        trn1            \r3\().2s,  \r9\().2s,  \r7\().2s
+        trn2            \r7\().2s,  \r9\().2s,  \r7\().2s
 .endm
 
 .macro  transpose_8x16B r0, r1, r2, r3, r4, r5, r6, r7, t0, t1
-        trn1            \t0\().16B, \r0\().16B, \r1\().16B
-        trn2            \t1\().16B, \r0\().16B, \r1\().16B
-        trn1            \r1\().16B, \r2\().16B, \r3\().16B
-        trn2            \r3\().16B, \r2\().16B, \r3\().16B
-        trn1            \r0\().16B, \r4\().16B, \r5\().16B
-        trn2            \r5\().16B, \r4\().16B, \r5\().16B
-        trn1            \r2\().16B, \r6\().16B, \r7\().16B
-        trn2            \r7\().16B, \r6\().16B, \r7\().16B
-
-        trn1            \r4\().8H,  \r0\().8H,  \r2\().8H
-        trn2            \r2\().8H,  \r0\().8H,  \r2\().8H
-        trn1            \r6\().8H,  \r5\().8H,  \r7\().8H
-        trn2            \r7\().8H,  \r5\().8H,  \r7\().8H
-        trn1            \r5\().8H,  \t1\().8H,  \r3\().8H
-        trn2            \t1\().8H,  \t1\().8H,  \r3\().8H
-        trn1            \r3\().8H,  \t0\().8H,  \r1\().8H
-        trn2            \t0\().8H,  \t0\().8H,  \r1\().8H
-
-        trn1            \r0\().4S,  \r3\().4S,  \r4\().4S
-        trn2            \r4\().4S,  \r3\().4S,  \r4\().4S
+        trn1            \t0\().16b, \r0\().16b, \r1\().16b
+        trn2            \t1\().16b, \r0\().16b, \r1\().16b
+        trn1            \r1\().16b, \r2\().16b, \r3\().16b
+        trn2            \r3\().16b, \r2\().16b, \r3\().16b
+        trn1            \r0\().16b, \r4\().16b, \r5\().16b
+        trn2            \r5\().16b, \r4\().16b, \r5\().16b
+        trn1            \r2\().16b, \r6\().16b, \r7\().16b
+        trn2            \r7\().16b, \r6\().16b, \r7\().16b
+
+        trn1            \r4\().8h,  \r0\().8h,  \r2\().8h
+        trn2            \r2\().8h,  \r0\().8h,  \r2\().8h
+        trn1            \r6\().8h,  \r5\().8h,  \r7\().8h
+        trn2            \r7\().8h,  \r5\().8h,  \r7\().8h
+        trn1            \r5\().8h,  \t1\().8h,  \r3\().8h
+        trn2            \t1\().8h,  \t1\().8h,  \r3\().8h
+        trn1            \r3\().8h,  \t0\().8h,  \r1\().8h
+        trn2            \t0\().8h,  \t0\().8h,  \r1\().8h
+
+        trn1            \r0\().4s,  \r3\().4s,  \r4\().4s
+        trn2            \r4\().4s,  \r3\().4s,  \r4\().4s
 
-        trn1            \r1\().4S,  \r5\().4S,  \r6\().4S
-        trn2            \r5\().4S,  \r5\().4S,  \r6\().4S
+        trn1            \r1\().4s,  \r5\().4s,  \r6\().4s
+        trn2            \r5\().4s,  \r5\().4s,  \r6\().4s
 
-        trn2            \r6\().4S,  \t0\().4S,  \r2\().4S
-        trn1            \r2\().4S,  \t0\().4S,  \r2\().4S
+        trn2            \r6\().4s,  \t0\().4s,  \r2\().4s
+        trn1            \r2\().4s,  \t0\().4s,  \r2\().4s
 
-        trn1            \r3\().4S,  \t1\().4S,  \r7\().4S
-        trn2            \r7\().4S,  \t1\().4S,  \r7\().4S
+        trn1            \r3\().4s,  \t1\().4s,  \r7\().4s
+        trn2            \r7\().4s,  \t1\().4s,  \r7\().4s
 .endm
 
 .macro  transpose_4x16B r0, r1, r2, r3, t4, t5, t6, t7
-        trn1            \t4\().16B, \r0\().16B,  \r1\().16B
-        trn2            \t5\().16B, \r0\().16B,  \r1\().16B
-        trn1            \t6\().16B, \r2\().16B,  \r3\().16B
-        trn2            \t7\().16B, \r2\().16B,  \r3\().16B
-
-        trn1            \r0\().8H,  \t4\().8H,  \t6\().8H
-        trn2            \r2\().8H,  \t4\().8H,  \t6\().8H
-        trn1            \r1\().8H,  \t5\().8H,  \t7\().8H
-        trn2            \r3\().8H,  \t5\().8H,  \t7\().8H
+        trn1            \t4\().16b, \r0\().16b,  \r1\().16b
+        trn2            \t5\().16b, \r0\().16b,  \r1\().16b
+        trn1            \t6\().16b, \r2\().16b,  \r3\().16b
+        trn2            \t7\().16b, \r2\().16b,  \r3\().16b
+
+        trn1            \r0\().8h,  \t4\().8h,  \t6\().8h
+        trn2            \r2\().8h,  \t4\().8h,  \t6\().8h
+        trn1            \r1\().8h,  \t5\().8h,  \t7\().8h
+        trn2            \r3\().8h,  \t5\().8h,  \t7\().8h
 .endm
 
 .macro  transpose_4x8B  r0, r1, r2, r3, t4, t5, t6, t7
-        trn1            \t4\().8B,  \r0\().8B,  \r1\().8B
-        trn2            \t5\().8B,  \r0\().8B,  \r1\().8B
-        trn1            \t6\().8B,  \r2\().8B,  \r3\().8B
-        trn2            \t7\().8B,  \r2\().8B,  \r3\().8B
-
-        trn1            \r0\().4H,  \t4\().4H,  \t6\().4H
-        trn2            \r2\().4H,  \t4\().4H,  \t6\().4H
-        trn1            \r1\().4H,  \t5\().4H,  \t7\().4H
-        trn2            \r3\().4H,  \t5\().4H,  \t7\().4H
+        trn1            \t4\().8b,  \r0\().8b,  \r1\().8b
+        trn2            \t5\().8b,  \r0\().8b,  \r1\().8b
+        trn1            \t6\().8b,  \r2\().8b,  \r3\().8b
+        trn2            \t7\().8b,  \r2\().8b,  \r3\().8b
+
+        trn1            \r0\().4h,  \t4\().4h,  \t6\().4h
+        trn2            \r2\().4h,  \t4\().4h,  \t6\().4h
+        trn1            \r1\().4h,  \t5\().4h,  \t7\().4h
+        trn2            \r3\().4h,  \t5\().4h,  \t7\().4h
 .endm
 
 .macro  transpose_4x4H  r0, r1, r2, r3, r4, r5, r6, r7
-        trn1            \r4\().4H,  \r0\().4H,  \r1\().4H
-        trn2            \r5\().4H,  \r0\().4H,  \r1\().4H
-        trn1            \r6\().4H,  \r2\().4H,  \r3\().4H
-        trn2            \r7\().4H,  \r2\().4H,  \r3\().4H
-
-        trn1            \r0\().2S,  \r4\().2S,  \r6\().2S
-        trn2            \r2\().2S,  \r4\().2S,  \r6\().2S
-        trn1            \r1\().2S,  \r5\().2S,  \r7\().2S
-        trn2            \r3\().2S,  \r5\().2S,  \r7\().2S
+        trn1            \r4\().4h,  \r0\().4h,  \r1\().4h
+        trn2            \r5\().4h,  \r0\().4h,  \r1\().4h
+        trn1            \r6\().4h,  \r2\().4h,  \r3\().4h
+        trn2            \r7\().4h,  \r2\().4h,  \r3\().4h
+
+        trn1            \r0\().2s,  \r4\().2s,  \r6\().2s
+        trn2            \r2\().2s,  \r4\().2s,  \r6\().2s
+        trn1            \r1\().2s,  \r5\().2s,  \r7\().2s
+        trn2            \r3\().2s,  \r5\().2s,  \r7\().2s
 .endm
 
 .macro transpose_4x8H r0, r1, r2, r3, t4, t5, t6, t7
-        trn1            \t4\().8H,  \r0\().8H,  \r1\().8H
-        trn2            \t5\().8H,  \r0\().8H,  \r1\().8H
-        trn1            \t6\().8H,  \r2\().8H,  \r3\().8H
-        trn2            \t7\().8H,  \r2\().8H,  \r3\().8H
-
-        trn1            \r0\().4S,  \t4\().4S,  \t6\().4S
-        trn2            \r2\().4S,  \t4\().4S,  \t6\().4S
-        trn1            \r1\().4S,  \t5\().4S,  \t7\().4S
-        trn2            \r3\().4S,  \t5\().4S,  \t7\().4S
+        trn1            \t4\().8h,  \r0\().8h,  \r1\().8h
+        trn2            \t5\().8h,  \r0\().8h,  \r1\().8h
+        trn1            \t6\().8h,  \r2\().8h,  \r3\().8h
+        trn2            \t7\().8h,  \r2\().8h,  \r3\().8h
+
+        trn1            \r0\().4s,  \t4\().4s,  \t6\().4s
+        trn2            \r2\().4s,  \t4\().4s,  \t6\().4s
+        trn1            \r1\().4s,  \t5\().4s,  \t7\().4s
+        trn2            \r3\().4s,  \t5\().4s,  \t7\().4s
 .endm
 
 .macro  transpose_8x8H  r0, r1, r2, r3, r4, r5, r6, r7, r8, r9
-        trn1            \r8\().8H,  \r0\().8H,  \r1\().8H
-        trn2            \r9\().8H,  \r0\().8H,  \r1\().8H
-        trn1            \r1\().8H,  \r2\().8H,  \r3\().8H
-        trn2            \r3\().8H,  \r2\().8H,  \r3\().8H
-        trn1            \r0\().8H,  \r4\().8H,  \r5\().8H
-        trn2            \r5\().8H,  \r4\().8H,  \r5\().8H
-        trn1            \r2\().8H,  \r6\().8H,  \r7\().8H
-        trn2            \r7\().8H,  \r6\().8H,  \r7\().8H
-
-        trn1            \r4\().4S,  \r0\().4S,  \r2\().4S
-        trn2            \r2\().4S,  \r0\().4S,  \r2\().4S
-        trn1            \r6\().4S,  \r5\().4S,  \r7\().4S
-        trn2            \r7\().4S,  \r5\().4S,  \r7\().4S
-        trn1            \r5\().4S,  \r9\().4S,  \r3\().4S
-        trn2            \r9\().4S,  \r9\().4S,  \r3\().4S
-        trn1            \r3\().4S,  \r8\().4S,  \r1\().4S
-        trn2            \r8\().4S,  \r8\().4S,  \r1\().4S
-
-        trn1            \r0\().2D,  \r3\().2D,  \r4\().2D
-        trn2            \r4\().2D,  \r3\().2D,  \r4\().2D
+        trn1            \r8\().8h,  \r0\().8h,  \r1\().8h
+        trn2            \r9\().8h,  \r0\().8h,  \r1\().8h
+        trn1            \r1\().8h,  \r2\().8h,  \r3\().8h
+        trn2            \r3\().8h,  \r2\().8h,  \r3\().8h
+        trn1            \r0\().8h,  \r4\().8h,  \r5\().8h
+        trn2            \r5\().8h,  \r4\().8h,  \r5\().8h
+        trn1            \r2\().8h,  \r6\().8h,  \r7\().8h
+        trn2            \r7\().8h,  \r6\().8h,  \r7\().8h
+
+        trn1            \r4\().4s,  \r0\().4s,  \r2\().4s
+        trn2            \r2\().4s,  \r0\().4s,  \r2\().4s
+        trn1            \r6\().4s,  \r5\().4s,  \r7\().4s
+        trn2            \r7\().4s,  \r5\().4s,  \r7\().4s
+        trn1            \r5\().4s,  \r9\().4s,  \r3\().4s
+        trn2            \r9\().4s,  \r9\().4s,  \r3\().4s
+        trn1            \r3\().4s,  \r8\().4s,  \r1\().4s
+        trn2            \r8\().4s,  \r8\().4s,  \r1\().4s
+
+        trn1            \r0\().2d,  \r3\().2d,  \r4\().2d
+        trn2            \r4\().2d,  \r3\().2d,  \r4\().2d
 
-        trn1            \r1\().2D,  \r5\().2D,  \r6\().2D
-        trn2            \r5\().2D,  \r5\().2D,  \r6\().2D
+        trn1            \r1\().2d,  \r5\().2d,  \r6\().2d
+        trn2            \r5\().2d,  \r5\().2d,  \r6\().2d
 
-        trn2            \r6\().2D,  \r8\().2D,  \r2\().2D
-        trn1            \r2\().2D,  \r8\().2D,  \r2\().2D
+        trn2            \r6\().2d,  \r8\().2d,  \r2\().2d
+        trn1            \r2\().2d,  \r8\().2d,  \r2\().2d
 
-        trn1            \r3\().2D,  \r9\().2D,  \r7\().2D
-        trn2            \r7\().2D,  \r9\().2D,  \r7\().2D
+        trn1            \r3\().2d,  \r9\().2d,  \r7\().2d
+        trn2            \r7\().2d,  \r9\().2d,  \r7\().2d
 
 .endm
diff -Nru ffmpeg-5.1.8/libavcodec/aarch64/opusdsp_neon.S ffmpeg-5.1.9/libavcodec/aarch64/opusdsp_neon.S
--- ffmpeg-5.1.8/libavcodec/aarch64/opusdsp_neon.S	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/aarch64/opusdsp_neon.S	2026-05-05 14:21:58.000000000 +0000
@@ -33,81 +33,81 @@
 endconst
 
 function ff_opus_deemphasis_neon, export=1
-        movrel  x4, tab_st
-        ld1    {v4.4s}, [x4]
-        movrel  x4, tab_x0
-        ld1    {v5.4s}, [x4]
-        movrel  x4, tab_x1
-        ld1    {v6.4s}, [x4]
-        movrel  x4, tab_x2
-        ld1    {v7.4s}, [x4]
+        movrel          x4, tab_st
+        ld1             {v4.4s}, [x4]
+        movrel          x4, tab_x0
+        ld1             {v5.4s}, [x4]
+        movrel          x4, tab_x1
+        ld1             {v6.4s}, [x4]
+        movrel          x4, tab_x2
+        ld1             {v7.4s}, [x4]
 
-        fmul v0.4s, v4.4s, v0.s[0]
+        fmul            v0.4s, v4.4s, v0.s[0]
 
-1:      ld1  {v1.4s, v2.4s}, [x1], #32
+1:      ld1             {v1.4s, v2.4s}, [x1], #32
 
-        fmla v0.4s, v5.4s, v1.s[0]
-        fmul v3.4s, v7.4s, v2.s[2]
+        fmla            v0.4s, v5.4s, v1.s[0]
+        fmul            v3.4s, v7.4s, v2.s[2]
 
-        fmla v0.4s, v6.4s, v1.s[1]
-        fmla v3.4s, v6.4s, v2.s[1]
+        fmla            v0.4s, v6.4s, v1.s[1]
+        fmla            v3.4s, v6.4s, v2.s[1]
 
-        fmla v0.4s, v7.4s, v1.s[2]
-        fmla v3.4s, v5.4s, v2.s[0]
+        fmla            v0.4s, v7.4s, v1.s[2]
+        fmla            v3.4s, v5.4s, v2.s[0]
 
-        fadd v1.4s, v1.4s, v0.4s
-        fadd v2.4s, v2.4s, v3.4s
+        fadd            v1.4s, v1.4s, v0.4s
+        fadd            v2.4s, v2.4s, v3.4s
 
-        fmla v2.4s, v4.4s, v1.s[3]
+        fmla            v2.4s, v4.4s, v1.s[3]
 
-        st1  {v1.4s, v2.4s}, [x0], #32
-        fmul v0.4s, v4.4s, v2.s[3]
+        st1             {v1.4s, v2.4s}, [x0], #32
+        fmul            v0.4s, v4.4s, v2.s[3]
 
-        subs w2, w2, #8
-        b.gt 1b
+        subs            w2, w2, #8
+        b.gt            1b
 
-        mov s0, v2.s[3]
+        mov             s0, v2.s[3]
 
         ret
 endfunc
 
 function ff_opus_postfilter_neon, export=1
-        ld1 {v0.4s}, [x2]
-        dup v1.4s, v0.s[1]
-        dup v2.4s, v0.s[2]
-        dup v0.4s, v0.s[0]
-
-        add w1, w1, #2
-        sub x1, x0, x1, lsl #2
-
-        ld1 {v3.4s}, [x1]
-        fmul v3.4s, v3.4s, v2.4s
-
-1:      add x1, x1, #4
-        ld1 {v4.4s}, [x1]
-        add x1, x1, #4
-        ld1 {v5.4s}, [x1]
-        add x1, x1, #4
-        ld1 {v6.4s}, [x1]
-        add x1, x1, #4
-        ld1 {v7.4s}, [x1]
-
-        fmla v3.4s, v7.4s, v2.4s
-        fadd v6.4s, v6.4s, v4.4s
-
-        ld1 {v4.4s}, [x0]
-        fmla v4.4s, v5.4s, v0.4s
-
-        fmul v6.4s, v6.4s, v1.4s
-        fadd v6.4s, v6.4s, v3.4s
+        ld1             {v0.4s}, [x2]
+        dup             v1.4s, v0.s[1]
+        dup             v2.4s, v0.s[2]
+        dup             v0.4s, v0.s[0]
+
+        add             w1, w1, #2
+        sub             x1, x0, x1, lsl #2
+
+        ld1             {v3.4s}, [x1]
+        fmul            v3.4s, v3.4s, v2.4s
+
+1:      add             x1, x1, #4
+        ld1             {v4.4s}, [x1]
+        add             x1, x1, #4
+        ld1             {v5.4s}, [x1]
+        add             x1, x1, #4
+        ld1             {v6.4s}, [x1]
+        add             x1, x1, #4
+        ld1             {v7.4s}, [x1]
+
+        fmla            v3.4s, v7.4s, v2.4s
+        fadd            v6.4s, v6.4s, v4.4s
+
+        ld1             {v4.4s}, [x0]
+        fmla            v4.4s, v5.4s, v0.4s
+
+        fmul            v6.4s, v6.4s, v1.4s
+        fadd            v6.4s, v6.4s, v3.4s
 
-        fadd v4.4s, v4.4s, v6.4s
-        fmul v3.4s, v7.4s, v2.4s
+        fadd            v4.4s, v4.4s, v6.4s
+        fmul            v3.4s, v7.4s, v2.4s
 
-        st1  {v4.4s}, [x0], #16
+        st1             {v4.4s}, [x0], #16
 
-        subs w3, w3, #4
-        b.gt 1b
+        subs            w3, w3, #4
+        b.gt            1b
 
         ret
 endfunc
diff -Nru ffmpeg-5.1.8/libavcodec/aarch64/sbrdsp_neon.S ffmpeg-5.1.9/libavcodec/aarch64/sbrdsp_neon.S
--- ffmpeg-5.1.8/libavcodec/aarch64/sbrdsp_neon.S	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/aarch64/sbrdsp_neon.S	2026-03-16 18:10:00.000000000 +0000
@@ -46,49 +46,49 @@
         add             x3, x0, #192*4
         add             x4, x0, #256*4
         mov             x5, #64
-1:      ld1             {v0.4S}, [x0]
-        ld1             {v1.4S}, [x1], #16
-        fadd            v0.4S, v0.4S, v1.4S
-        ld1             {v2.4S}, [x2], #16
-        fadd            v0.4S, v0.4S, v2.4S
-        ld1             {v3.4S}, [x3], #16
-        fadd            v0.4S, v0.4S, v3.4S
-        ld1             {v4.4S}, [x4], #16
-        fadd            v0.4S, v0.4S, v4.4S
-        st1             {v0.4S}, [x0], #16
+1:      ld1             {v0.4s}, [x0]
+        ld1             {v1.4s}, [x1], #16
+        fadd            v0.4s, v0.4s, v1.4s
+        ld1             {v2.4s}, [x2], #16
+        fadd            v0.4s, v0.4s, v2.4s
+        ld1             {v3.4s}, [x3], #16
+        fadd            v0.4s, v0.4s, v3.4s
+        ld1             {v4.4s}, [x4], #16
+        fadd            v0.4s, v0.4s, v4.4s
+        st1             {v0.4s}, [x0], #16
         subs            x5, x5, #4
         b.gt            1b
         ret
 endfunc
 
 function ff_sbr_sum_square_neon, export=1
-        movi            v0.4S, #0
-1:      ld1             {v1.4S}, [x0], #16
-        fmla            v0.4S, v1.4S, v1.4S
+        movi            v0.4s, #0
+1:      ld1             {v1.4s}, [x0], #16
+        fmla            v0.4s, v1.4s, v1.4s
         subs            w1, w1, #2
         b.gt            1b
-        faddp           v0.4S, v0.4S, v0.4S
-        faddp           v0.4S, v0.4S, v0.4S
+        faddp           v0.4s, v0.4s, v0.4s
+        faddp           v0.4s, v0.4s, v0.4s
         ret
 endfunc
 
 function ff_sbr_neg_odd_64_neon, export=1
         mov             x1, x0
-        movi            v5.4S, #1<<7, lsl #24
-        ld2             {v0.4S, v1.4S}, [x0], #32
-        eor             v1.16B, v1.16B, v5.16B
-        ld2             {v2.4S, v3.4S}, [x0], #32
+        movi            v5.4s, #1<<7, lsl #24
+        ld2             {v0.4s, v1.4s}, [x0], #32
+        eor             v1.16b, v1.16b, v5.16b
+        ld2             {v2.4s, v3.4s}, [x0], #32
 .rept 3
-        st2             {v0.4S, v1.4S}, [x1], #32
-        eor             v3.16B, v3.16B, v5.16B
-        ld2             {v0.4S, v1.4S}, [x0], #32
-        st2             {v2.4S, v3.4S}, [x1], #32
-        eor             v1.16B, v1.16B, v5.16B
-        ld2             {v2.4S, v3.4S}, [x0], #32
+        st2             {v0.4s, v1.4s}, [x1], #32
+        eor             v3.16b, v3.16b, v5.16b
+        ld2             {v0.4s, v1.4s}, [x0], #32
+        st2             {v2.4s, v3.4s}, [x1], #32
+        eor             v1.16b, v1.16b, v5.16b
+        ld2             {v2.4s, v3.4s}, [x0], #32
 .endr
-        eor             v3.16B, v3.16B, v5.16B
-        st2             {v0.4S, v1.4S}, [x1], #32
-        st2             {v2.4S, v3.4S}, [x1], #32
+        eor             v3.16b, v3.16b, v5.16b
+        st2             {v0.4s, v1.4s}, [x1], #32
+        st2             {v2.4s, v3.4s}, [x1], #32
         ret
 endfunc
 
@@ -97,26 +97,26 @@
         add             x2, x0, #64*4
         mov             x3, #-16
         mov             x4, #-4
-        movi            v6.4S, #1<<7, lsl #24
-        ld1             {v0.2S}, [x0], #8
-        st1             {v0.2S}, [x2], #8
+        movi            v6.4s, #1<<7, lsl #24
+        ld1             {v0.2s}, [x0], #8
+        st1             {v0.2s}, [x2], #8
 .rept 7
-        ld1             {v1.4S}, [x1], x3
-        ld1             {v2.4S}, [x0], #16
-        eor             v1.16B, v1.16B, v6.16B
-        rev64           v1.4S, v1.4S
-        ext             v1.16B, v1.16B, v1.16B, #8
-        st2             {v1.4S, v2.4S}, [x2], #32
+        ld1             {v1.4s}, [x1], x3
+        ld1             {v2.4s}, [x0], #16
+        eor             v1.16b, v1.16b, v6.16b
+        rev64           v1.4s, v1.4s
+        ext             v1.16b, v1.16b, v1.16b, #8
+        st2             {v1.4s, v2.4s}, [x2], #32
 .endr
         add             x1, x1, #8
-        ld1             {v1.2S}, [x1], x4
-        ld1             {v2.2S}, [x0], #8
-        ld1             {v1.S}[3], [x1]
-        ld1             {v2.S}[2], [x0]
-        eor             v1.16B, v1.16B, v6.16B
-        rev64           v1.4S, v1.4S
-        st2             {v1.2S, v2.2S}, [x2], #16
-        st2             {v1.S, v2.S}[2], [x2]
+        ld1             {v1.2s}, [x1], x4
+        ld1             {v2.2s}, [x0], #8
+        ld1             {v1.s}[3], [x1]
+        ld1             {v2.s}[2], [x0]
+        eor             v1.16b, v1.16b, v6.16b
+        rev64           v1.4s, v1.4s
+        st2             {v1.2s, v2.2s}, [x2], #16
+        st2             {v1.s, v2.s}[2], [x2]
         ret
 endfunc
 
@@ -124,13 +124,13 @@
         add             x2, x1, #60*4
         mov             x3, #-16
         mov             x4, #32
-        movi            v6.4S, #1<<7, lsl #24
-1:      ld1             {v0.4S}, [x2], x3
-        ld1             {v1.4S}, [x1], #16
-        eor             v0.16B, v0.16B, v6.16B
-        rev64           v0.4S, v0.4S
-        ext             v0.16B, v0.16B, v0.16B, #8
-        st2             {v0.4S, v1.4S}, [x0], #32
+        movi            v6.4s, #1<<7, lsl #24
+1:      ld1             {v0.4s}, [x2], x3
+        ld1             {v1.4s}, [x1], #16
+        eor             v0.16b, v0.16b, v6.16b
+        rev64           v0.4s, v0.4s
+        ext             v0.16b, v0.16b, v0.16b, #8
+        st2             {v0.4s, v1.4s}, [x0], #32
         subs            x4, x4, #4
         b.gt            1b
         ret
@@ -141,13 +141,13 @@
         add             x2, x0, #60*4
         mov             x3, #-32
         mov             x4, #32
-        movi            v2.4S, #1<<7, lsl #24
-1:      ld2             {v0.4S, v1.4S}, [x1], x3
-        eor             v0.16B, v0.16B, v2.16B
-        rev64           v1.4S, v1.4S
-        ext             v1.16B, v1.16B, v1.16B, #8
-        st1             {v0.4S}, [x2]
-        st1             {v1.4S}, [x0], #16
+        movi            v2.4s, #1<<7, lsl #24
+1:      ld2             {v0.4s, v1.4s}, [x1], x3
+        eor             v0.16b, v0.16b, v2.16b
+        rev64           v1.4s, v1.4s
+        ext             v1.16b, v1.16b, v1.16b, #8
+        st1             {v0.4s}, [x2]
+        st1             {v1.4s}, [x0], #16
         sub             x2, x2, #16
         subs            x4, x4, #4
         b.gt            1b
@@ -159,16 +159,16 @@
         add             x3, x0, #124*4
         mov             x4, #64
         mov             x5, #-16
-1:      ld1             {v0.4S}, [x1], #16
-        ld1             {v1.4S}, [x2], x5
-        rev64           v2.4S, v0.4S
-        ext             v2.16B, v2.16B, v2.16B, #8
-        rev64           v3.4S, v1.4S
-        ext             v3.16B, v3.16B, v3.16B, #8
-        fadd            v1.4S, v1.4S, v2.4S
-        fsub            v0.4S, v0.4S, v3.4S
-        st1             {v0.4S}, [x0], #16
-        st1             {v1.4S}, [x3], x5
+1:      ld1             {v0.4s}, [x1], #16
+        ld1             {v1.4s}, [x2], x5
+        rev64           v2.4s, v0.4s
+        ext             v2.16b, v2.16b, v2.16b, #8
+        rev64           v3.4s, v1.4s
+        ext             v3.16b, v3.16b, v3.16b, #8
+        fadd            v1.4s, v1.4s, v2.4s
+        fsub            v0.4s, v0.4s, v3.4s
+        st1             {v0.4s}, [x0], #16
+        st1             {v1.4s}, [x3], x5
         subs            x4, x4, #4
         b.gt            1b
         ret
@@ -178,32 +178,32 @@
         sxtw            x4, w4
         sxtw            x5, w5
         movrel          x6, factors
-        ld1             {v7.4S}, [x6]
-        dup             v1.4S, v0.S[0]
-        mov             v2.8B, v1.8B
-        mov             v2.S[2], v7.S[0]
-        mov             v2.S[3], v7.S[0]
-        fmul            v1.4S, v1.4S, v2.4S
-        ld1             {v0.D}[0], [x3]
-        ld1             {v0.D}[1], [x2]
-        fmul            v0.4S, v0.4S, v1.4S
-        fmul            v1.4S, v0.4S, v7.4S
-        rev64           v0.4S, v0.4S
+        ld1             {v7.4s}, [x6]
+        dup             v1.4s, v0.s[0]
+        mov             v2.8b, v1.8b
+        mov             v2.s[2], v7.s[0]
+        mov             v2.s[3], v7.s[0]
+        fmul            v1.4s, v1.4s, v2.4s
+        ld1             {v0.d}[0], [x3]
+        ld1             {v0.d}[1], [x2]
+        fmul            v0.4s, v0.4s, v1.4s
+        fmul            v1.4s, v0.4s, v7.4s
+        rev64           v0.4s, v0.4s
         sub             x7, x5, x4
         add             x0, x0, x4, lsl #3
         add             x1, x1, x4, lsl #3
         sub             x1, x1, #16
-1:      ld1             {v2.4S}, [x1], #16
-        ld1             {v3.2S}, [x1]
-        fmul            v4.4S, v2.4S, v1.4S
-        fmul            v5.4S, v2.4S, v0.4S
-        faddp           v4.4S, v4.4S, v4.4S
-        faddp           v5.4S, v5.4S, v5.4S
-        faddp           v4.4S, v4.4S, v4.4S
-        faddp           v5.4S, v5.4S, v5.4S
-        mov             v4.S[1], v5.S[0]
-        fadd            v4.2S, v4.2S, v3.2S
-        st1             {v4.2S}, [x0], #8
+1:      ld1             {v2.4s}, [x1], #16
+        ld1             {v3.2s}, [x1]
+        fmul            v4.4s, v2.4s, v1.4s
+        fmul            v5.4s, v2.4s, v0.4s
+        faddp           v4.4s, v4.4s, v4.4s
+        faddp           v5.4s, v5.4s, v5.4s
+        faddp           v4.4s, v4.4s, v4.4s
+        faddp           v5.4s, v5.4s, v5.4s
+        mov             v4.s[1], v5.s[0]
+        fadd            v4.2s, v4.2s, v3.2s
+        st1             {v4.2s}, [x0], #8
         sub             x1, x1, #8
         subs            x7, x7, #1
         b.gt            1b
@@ -215,10 +215,10 @@
         sxtw            x4, w4
         mov             x5, #40*2*4
         add             x1, x1, x4, lsl #3
-1:      ld1             {v0.2S}, [x1], x5
-        ld1             {v1.S}[0], [x2], #4
-        fmul            v2.4S, v0.4S, v1.S[0]
-        st1             {v2.2S}, [x0], #8
+1:      ld1             {v0.2s}, [x1], x5
+        ld1             {v1.s}[0], [x2], #4
+        fmul            v2.4s, v0.4s, v1.s[0]
+        st1             {v2.2s}, [x0], #8
         subs            x3, x3, #1
         b.gt            1b
         ret
@@ -227,46 +227,46 @@
 function ff_sbr_autocorrelate_neon, export=1
         mov             x2, #38
         movrel          x3, factors
-        ld1             {v0.4S}, [x3]
-        movi            v1.4S, #0
-        movi            v2.4S, #0
-        movi            v3.4S, #0
-        ld1             {v4.2S}, [x0], #8
-        ld1             {v5.2S}, [x0], #8
-        fmul            v16.2S, v4.2S, v4.2S
-        fmul            v17.2S, v5.2S, v4.S[0]
-        fmul            v18.2S, v5.2S, v4.S[1]
-1:      ld1             {v5.D}[1], [x0], #8
-        fmla            v1.2S, v4.2S, v4.2S
-        fmla            v2.4S, v5.4S, v4.S[0]
-        fmla            v3.4S, v5.4S, v4.S[1]
-        mov             v4.D[0], v5.D[0]
-        mov             v5.D[0], v5.D[1]
+        ld1             {v0.4s}, [x3]
+        movi            v1.4s, #0
+        movi            v2.4s, #0
+        movi            v3.4s, #0
+        ld1             {v4.2s}, [x0], #8
+        ld1             {v5.2s}, [x0], #8
+        fmul            v16.2s, v4.2s, v4.2s
+        fmul            v17.2s, v5.2s, v4.s[0]
+        fmul            v18.2s, v5.2s, v4.s[1]
+1:      ld1             {v5.d}[1], [x0], #8
+        fmla            v1.2s, v4.2s, v4.2s
+        fmla            v2.4s, v5.4s, v4.s[0]
+        fmla            v3.4s, v5.4s, v4.s[1]
+        mov             v4.d[0], v5.d[0]
+        mov             v5.d[0], v5.d[1]
         subs            x2, x2, #1
         b.gt            1b
-        fmul            v19.2S, v4.2S, v4.2S
-        fmul            v20.2S, v5.2S, v4.S[0]
-        fmul            v21.2S, v5.2S, v4.S[1]
-        fadd            v22.4S, v2.4S, v20.4S
-        fsub            v22.4S, v22.4S, v17.4S
-        fadd            v23.4S, v3.4S, v21.4S
-        fsub            v23.4S, v23.4S, v18.4S
-        rev64           v23.4S, v23.4S
-        fmul            v23.4S, v23.4S, v0.4S
-        fadd            v22.4S, v22.4S, v23.4S
-        st1             {v22.4S}, [x1], #16
-        fadd            v23.2S, v1.2S, v19.2S
-        fsub            v23.2S, v23.2S, v16.2S
-        faddp           v23.2S, v23.2S, v23.2S
-        st1             {v23.S}[0], [x1]
+        fmul            v19.2s, v4.2s, v4.2s
+        fmul            v20.2s, v5.2s, v4.s[0]
+        fmul            v21.2s, v5.2s, v4.s[1]
+        fadd            v22.4s, v2.4s, v20.4s
+        fsub            v22.4s, v22.4s, v17.4s
+        fadd            v23.4s, v3.4s, v21.4s
+        fsub            v23.4s, v23.4s, v18.4s
+        rev64           v23.4s, v23.4s
+        fmul            v23.4s, v23.4s, v0.4s
+        fadd            v22.4s, v22.4s, v23.4s
+        st1             {v22.4s}, [x1], #16
+        fadd            v23.2s, v1.2s, v19.2s
+        fsub            v23.2s, v23.2s, v16.2s
+        faddp           v23.2s, v23.2s, v23.2s
+        st1             {v23.s}[0], [x1]
         add             x1, x1, #8
-        rev64           v3.2S, v3.2S
-        fmul            v3.2S, v3.2S, v0.2S
-        fadd            v2.2S, v2.2S, v3.2S
-        st1             {v2.2S}, [x1]
+        rev64           v3.2s, v3.2s
+        fmul            v3.2s, v3.2s, v0.2s
+        fadd            v2.2s, v2.2s, v3.2s
+        st1             {v2.2s}, [x1]
         add             x1, x1, #16
-        faddp           v1.2S, v1.2S, v1.2S
-        st1             {v1.S}[0], [x1]
+        faddp           v1.2s, v1.2s, v1.2s
+        st1             {v1.s}[0], [x1]
         ret
 endfunc
 
@@ -278,25 +278,25 @@
 1:      and             x3, x3, #0x1ff
         add             x8, x7, x3, lsl #3
         add             x3, x3, #2
-        ld1             {v2.4S}, [x0]
-        ld1             {v3.2S}, [x1], #8
-        ld1             {v4.2S}, [x2], #8
-        ld1             {v5.4S}, [x8]
-        mov             v6.16B, v2.16B
-        zip1            v3.4S, v3.4S, v3.4S
-        zip1            v4.4S, v4.4S, v4.4S
-        fmla            v6.4S, v1.4S, v3.4S
-        fmla            v2.4S, v5.4S, v4.4S
-        fcmeq           v7.4S, v3.4S, #0
-        bif             v2.16B, v6.16B, v7.16B
-        st1             {v2.4S}, [x0], #16
+        ld1             {v2.4s}, [x0]
+        ld1             {v3.2s}, [x1], #8
+        ld1             {v4.2s}, [x2], #8
+        ld1             {v5.4s}, [x8]
+        mov             v6.16b, v2.16b
+        zip1            v3.4s, v3.4s, v3.4s
+        zip1            v4.4s, v4.4s, v4.4s
+        fmla            v6.4s, v1.4s, v3.4s
+        fmla            v2.4s, v5.4s, v4.4s
+        fcmeq           v7.4s, v3.4s, #0
+        bif             v2.16b, v6.16b, v7.16b
+        st1             {v2.4s}, [x0], #16
         subs            x5, x5, #2
         b.gt            1b
 .endm
 
 function ff_sbr_hf_apply_noise_0_neon, export=1
         movrel          x9, phi_noise_0
-        ld1             {v1.4S}, [x9]
+        ld1             {v1.4s}, [x9]
         apply_noise_common
         ret
 endfunc
@@ -305,14 +305,14 @@
         movrel          x9, phi_noise_1
         and             x4, x4, #1
         add             x9, x9, x4, lsl #4
-        ld1             {v1.4S}, [x9]
+        ld1             {v1.4s}, [x9]
         apply_noise_common
         ret
 endfunc
 
 function ff_sbr_hf_apply_noise_2_neon, export=1
         movrel          x9, phi_noise_2
-        ld1             {v1.4S}, [x9]
+        ld1             {v1.4s}, [x9]
         apply_noise_common
         ret
 endfunc
@@ -321,7 +321,7 @@
         movrel          x9, phi_noise_3
         and             x4, x4, #1
         add             x9, x9, x4, lsl #4
-        ld1             {v1.4S}, [x9]
+        ld1             {v1.4s}, [x9]
         apply_noise_common
         ret
 endfunc
diff -Nru ffmpeg-5.1.8/libavcodec/aarch64/simple_idct_neon.S ffmpeg-5.1.9/libavcodec/aarch64/simple_idct_neon.S
--- ffmpeg-5.1.8/libavcodec/aarch64/simple_idct_neon.S	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/aarch64/simple_idct_neon.S	2026-03-16 18:10:00.000000000 +0000
@@ -54,7 +54,7 @@
         prfm            pldl1keep, [\data]
         mov             x10, x30
         movrel          x3, idct_coeff_neon
-        ld1             {v0.2D}, [x3]
+        ld1             {v0.2d}, [x3]
 .endm
 
 .macro idct_end
@@ -74,146 +74,146 @@
 .endm
 
 .macro idct_col4_top y1, y2, y3, y4, i, l
-        smull\i         v7.4S,  \y3\l, z2
-        smull\i         v16.4S, \y3\l, z6
-        smull\i         v17.4S, \y2\l, z1
-        add             v19.4S, v23.4S, v7.4S
-        smull\i         v18.4S, \y2\l, z3
-        add             v20.4S, v23.4S, v16.4S
-        smull\i         v5.4S,  \y2\l, z5
-        sub             v21.4S, v23.4S, v16.4S
-        smull\i         v6.4S,  \y2\l, z7
-        sub             v22.4S, v23.4S, v7.4S
-
-        smlal\i         v17.4S, \y4\l, z3
-        smlsl\i         v18.4S, \y4\l, z7
-        smlsl\i         v5.4S,  \y4\l, z1
-        smlsl\i         v6.4S,  \y4\l, z5
+        smull\i         v7.4s,  \y3\l, z2
+        smull\i         v16.4s, \y3\l, z6
+        smull\i         v17.4s, \y2\l, z1
+        add             v19.4s, v23.4s, v7.4s
+        smull\i         v18.4s, \y2\l, z3
+        add             v20.4s, v23.4s, v16.4s
+        smull\i         v5.4s,  \y2\l, z5
+        sub             v21.4s, v23.4s, v16.4s
+        smull\i         v6.4s,  \y2\l, z7
+        sub             v22.4s, v23.4s, v7.4s
+
+        smlal\i         v17.4s, \y4\l, z3
+        smlsl\i         v18.4s, \y4\l, z7
+        smlsl\i         v5.4s,  \y4\l, z1
+        smlsl\i         v6.4s,  \y4\l, z5
 .endm
 
 .macro idct_row4_neon y1, y2, y3, y4, pass
-        ld1             {\y1\().2D,\y2\().2D}, [x2], #32
-        movi            v23.4S, #1<<2, lsl #8
-        orr             v5.16B, \y1\().16B, \y2\().16B
-        ld1             {\y3\().2D,\y4\().2D}, [x2], #32
-        orr             v6.16B, \y3\().16B, \y4\().16B
-        orr             v5.16B, v5.16B, v6.16B
-        mov             x3, v5.D[1]
-        smlal           v23.4S, \y1\().4H, z4
+        ld1             {\y1\().2d,\y2\().2d}, [x2], #32
+        movi            v23.4s, #1<<2, lsl #8
+        orr             v5.16b, \y1\().16b, \y2\().16b
+        ld1             {\y3\().2d,\y4\().2d}, [x2], #32
+        orr             v6.16b, \y3\().16b, \y4\().16b
+        orr             v5.16b, v5.16b, v6.16b
+        mov             x3, v5.d[1]
+        smlal           v23.4s, \y1\().4h, z4
 
-        idct_col4_top   \y1, \y2, \y3, \y4, 1, .4H
+        idct_col4_top   \y1, \y2, \y3, \y4, 1, .4h
 
         cmp             x3, #0
         b.eq            \pass\()f
 
-        smull2          v7.4S, \y1\().8H, z4
-        smlal2          v17.4S, \y2\().8H, z5
-        smlsl2          v18.4S, \y2\().8H, z1
-        smull2          v16.4S, \y3\().8H, z2
-        smlal2          v5.4S, \y2\().8H, z7
-        add             v19.4S, v19.4S, v7.4S
-        sub             v20.4S, v20.4S, v7.4S
-        sub             v21.4S, v21.4S, v7.4S
-        add             v22.4S, v22.4S, v7.4S
-        smlal2          v6.4S, \y2\().8H, z3
-        smull2          v7.4S, \y3\().8H, z6
-        smlal2          v17.4S, \y4\().8H, z7
-        smlsl2          v18.4S, \y4\().8H, z5
-        smlal2          v5.4S, \y4\().8H, z3
-        smlsl2          v6.4S, \y4\().8H, z1
-        add             v19.4S, v19.4S, v7.4S
-        sub             v20.4S, v20.4S, v16.4S
-        add             v21.4S, v21.4S, v16.4S
-        sub             v22.4S, v22.4S, v7.4S
+        smull2          v7.4s, \y1\().8h, z4
+        smlal2          v17.4s, \y2\().8h, z5
+        smlsl2          v18.4s, \y2\().8h, z1
+        smull2          v16.4s, \y3\().8h, z2
+        smlal2          v5.4s, \y2\().8h, z7
+        add             v19.4s, v19.4s, v7.4s
+        sub             v20.4s, v20.4s, v7.4s
+        sub             v21.4s, v21.4s, v7.4s
+        add             v22.4s, v22.4s, v7.4s
+        smlal2          v6.4s, \y2\().8h, z3
+        smull2          v7.4s, \y3\().8h, z6
+        smlal2          v17.4s, \y4\().8h, z7
+        smlsl2          v18.4s, \y4\().8h, z5
+        smlal2          v5.4s, \y4\().8h, z3
+        smlsl2          v6.4s, \y4\().8h, z1
+        add             v19.4s, v19.4s, v7.4s
+        sub             v20.4s, v20.4s, v16.4s
+        add             v21.4s, v21.4s, v16.4s
+        sub             v22.4s, v22.4s, v7.4s
 
 \pass:  add             \y3\().4S, v19.4S, v17.4S
-        add             \y4\().4S, v20.4S, v18.4S
-        shrn            \y1\().4H, \y3\().4S, #ROW_SHIFT
-        shrn            \y2\().4H, \y4\().4S, #ROW_SHIFT
-        add             v7.4S, v21.4S, v5.4S
-        add             v16.4S, v22.4S, v6.4S
-        shrn            \y3\().4H, v7.4S, #ROW_SHIFT
-        shrn            \y4\().4H, v16.4S, #ROW_SHIFT
-        sub             v22.4S, v22.4S, v6.4S
-        sub             v19.4S, v19.4S, v17.4S
-        sub             v21.4S, v21.4S, v5.4S
-        shrn2           \y1\().8H, v22.4S, #ROW_SHIFT
-        sub             v20.4S, v20.4S, v18.4S
-        shrn2           \y2\().8H, v21.4S, #ROW_SHIFT
-        shrn2           \y3\().8H, v20.4S, #ROW_SHIFT
-        shrn2           \y4\().8H, v19.4S, #ROW_SHIFT
-
-        trn1            v16.8H, \y1\().8H, \y2\().8H
-        trn2            v17.8H, \y1\().8H, \y2\().8H
-        trn1            v18.8H, \y3\().8H, \y4\().8H
-        trn2            v19.8H, \y3\().8H, \y4\().8H
-        trn1            \y1\().4S, v16.4S, v18.4S
-        trn1            \y2\().4S, v17.4S, v19.4S
-        trn2            \y3\().4S, v16.4S, v18.4S
-        trn2            \y4\().4S, v17.4S, v19.4S
+        add             \y4\().4s, v20.4s, v18.4s
+        shrn            \y1\().4h, \y3\().4s, #ROW_SHIFT
+        shrn            \y2\().4h, \y4\().4s, #ROW_SHIFT
+        add             v7.4s, v21.4s, v5.4s
+        add             v16.4s, v22.4s, v6.4s
+        shrn            \y3\().4h, v7.4s, #ROW_SHIFT
+        shrn            \y4\().4h, v16.4s, #ROW_SHIFT
+        sub             v22.4s, v22.4s, v6.4s
+        sub             v19.4s, v19.4s, v17.4s
+        sub             v21.4s, v21.4s, v5.4s
+        shrn2           \y1\().8h, v22.4s, #ROW_SHIFT
+        sub             v20.4s, v20.4s, v18.4s
+        shrn2           \y2\().8h, v21.4s, #ROW_SHIFT
+        shrn2           \y3\().8h, v20.4s, #ROW_SHIFT
+        shrn2           \y4\().8h, v19.4s, #ROW_SHIFT
+
+        trn1            v16.8h, \y1\().8h, \y2\().8h
+        trn2            v17.8h, \y1\().8h, \y2\().8h
+        trn1            v18.8h, \y3\().8h, \y4\().8h
+        trn2            v19.8h, \y3\().8h, \y4\().8h
+        trn1            \y1\().4s, v16.4s, v18.4s
+        trn1            \y2\().4s, v17.4s, v19.4s
+        trn2            \y3\().4s, v16.4s, v18.4s
+        trn2            \y4\().4s, v17.4s, v19.4s
 .endm
 
 .macro declare_idct_col4_neon i, l
 function idct_col4_neon\i
-        dup             v23.4H, z4c
+        dup             v23.4h, z4c
 .if \i == 1
-        add             v23.4H, v23.4H, v24.4H
+        add             v23.4h, v23.4h, v24.4h
 .else
-        mov             v5.D[0], v24.D[1]
-        add             v23.4H, v23.4H, v5.4H
+        mov             v5.d[0], v24.d[1]
+        add             v23.4h, v23.4h, v5.4h
 .endif
-        smull           v23.4S, v23.4H, z4
+        smull           v23.4s, v23.4h, z4
 
         idct_col4_top   v24, v25, v26, v27, \i, \l
 
-        mov             x4, v28.D[\i - 1]
-        mov             x5, v29.D[\i - 1]
+        mov             x4, v28.d[\i - 1]
+        mov             x5, v29.d[\i - 1]
         cmp             x4, #0
         b.eq            1f
 
-        smull\i         v7.4S,  v28\l,  z4
-        add             v19.4S, v19.4S, v7.4S
-        sub             v20.4S, v20.4S, v7.4S
-        sub             v21.4S, v21.4S, v7.4S
-        add             v22.4S, v22.4S, v7.4S
+        smull\i         v7.4s,  v28\l,  z4
+        add             v19.4s, v19.4s, v7.4s
+        sub             v20.4s, v20.4s, v7.4s
+        sub             v21.4s, v21.4s, v7.4s
+        add             v22.4s, v22.4s, v7.4s
 
-1:      mov             x4, v30.D[\i - 1]
+1:      mov             x4, v30.d[\i - 1]
         cmp             x5, #0
         b.eq            2f
 
-        smlal\i         v17.4S, v29\l, z5
-        smlsl\i         v18.4S, v29\l, z1
-        smlal\i         v5.4S,  v29\l, z7
-        smlal\i         v6.4S,  v29\l, z3
+        smlal\i         v17.4s, v29\l, z5
+        smlsl\i         v18.4s, v29\l, z1
+        smlal\i         v5.4s,  v29\l, z7
+        smlal\i         v6.4s,  v29\l, z3
 
-2:      mov             x5, v31.D[\i - 1]
+2:      mov             x5, v31.d[\i - 1]
         cmp             x4, #0
         b.eq            3f
 
-        smull\i         v7.4S,  v30\l, z6
-        smull\i         v16.4S, v30\l, z2
-        add             v19.4S, v19.4S, v7.4S
-        sub             v22.4S, v22.4S, v7.4S
-        sub             v20.4S, v20.4S, v16.4S
-        add             v21.4S, v21.4S, v16.4S
+        smull\i         v7.4s,  v30\l, z6
+        smull\i         v16.4s, v30\l, z2
+        add             v19.4s, v19.4s, v7.4s
+        sub             v22.4s, v22.4s, v7.4s
+        sub             v20.4s, v20.4s, v16.4s
+        add             v21.4s, v21.4s, v16.4s
 
 3:      cmp             x5, #0
         b.eq            4f
 
-        smlal\i         v17.4S, v31\l, z7
-        smlsl\i         v18.4S, v31\l, z5
-        smlal\i         v5.4S,  v31\l, z3
-        smlsl\i         v6.4S,  v31\l, z1
-
-4:      addhn           v7.4H, v19.4S, v17.4S
-        addhn2          v7.8H, v20.4S, v18.4S
-        subhn           v18.4H, v20.4S, v18.4S
-        subhn2          v18.8H, v19.4S, v17.4S
-
-        addhn           v16.4H, v21.4S, v5.4S
-        addhn2          v16.8H, v22.4S, v6.4S
-        subhn           v17.4H, v22.4S, v6.4S
-        subhn2          v17.8H, v21.4S, v5.4S
+        smlal\i         v17.4s, v31\l, z7
+        smlsl\i         v18.4s, v31\l, z5
+        smlal\i         v5.4s,  v31\l, z3
+        smlsl\i         v6.4s,  v31\l, z1
+
+4:      addhn           v7.4h, v19.4s, v17.4s
+        addhn2          v7.8h, v20.4s, v18.4s
+        subhn           v18.4h, v20.4s, v18.4s
+        subhn2          v18.8h, v19.4s, v17.4s
+
+        addhn           v16.4h, v21.4s, v5.4s
+        addhn2          v16.8h, v22.4s, v6.4s
+        subhn           v17.4h, v22.4s, v6.4s
+        subhn2          v17.8h, v21.4s, v5.4s
 
         ret
 endfunc
@@ -229,33 +229,33 @@
         idct_row4_neon  v28, v29, v30, v31, 2
         bl              idct_col4_neon1
 
-        sqshrun         v1.8B,  v7.8H, #COL_SHIFT-16
-        sqshrun2        v1.16B, v16.8H, #COL_SHIFT-16
-        sqshrun         v3.8B,  v17.8H, #COL_SHIFT-16
-        sqshrun2        v3.16B, v18.8H, #COL_SHIFT-16
+        sqshrun         v1.8b,  v7.8h, #COL_SHIFT-16
+        sqshrun2        v1.16b, v16.8h, #COL_SHIFT-16
+        sqshrun         v3.8b,  v17.8h, #COL_SHIFT-16
+        sqshrun2        v3.16b, v18.8h, #COL_SHIFT-16
 
         bl              idct_col4_neon2
 
-        sqshrun         v2.8B,  v7.8H, #COL_SHIFT-16
-        sqshrun2        v2.16B, v16.8H, #COL_SHIFT-16
-        sqshrun         v4.8B,  v17.8H, #COL_SHIFT-16
-        sqshrun2        v4.16B, v18.8H, #COL_SHIFT-16
-
-        zip1            v16.4S, v1.4S, v2.4S
-        zip2            v17.4S, v1.4S, v2.4S
-
-        st1             {v16.D}[0], [x0], x1
-        st1             {v16.D}[1], [x0], x1
-
-        zip1            v18.4S, v3.4S, v4.4S
-        zip2            v19.4S, v3.4S, v4.4S
-
-        st1             {v17.D}[0], [x0], x1
-        st1             {v17.D}[1], [x0], x1
-        st1             {v18.D}[0], [x0], x1
-        st1             {v18.D}[1], [x0], x1
-        st1             {v19.D}[0], [x0], x1
-        st1             {v19.D}[1], [x0], x1
+        sqshrun         v2.8b,  v7.8h, #COL_SHIFT-16
+        sqshrun2        v2.16b, v16.8h, #COL_SHIFT-16
+        sqshrun         v4.8b,  v17.8h, #COL_SHIFT-16
+        sqshrun2        v4.16b, v18.8h, #COL_SHIFT-16
+
+        zip1            v16.4s, v1.4s, v2.4s
+        zip2            v17.4s, v1.4s, v2.4s
+
+        st1             {v16.d}[0], [x0], x1
+        st1             {v16.d}[1], [x0], x1
+
+        zip1            v18.4s, v3.4s, v4.4s
+        zip2            v19.4s, v3.4s, v4.4s
+
+        st1             {v17.d}[0], [x0], x1
+        st1             {v17.d}[1], [x0], x1
+        st1             {v18.d}[0], [x0], x1
+        st1             {v18.d}[1], [x0], x1
+        st1             {v19.d}[0], [x0], x1
+        st1             {v19.d}[1], [x0], x1
 
         idct_end
 endfunc
@@ -267,59 +267,59 @@
         idct_row4_neon  v28, v29, v30, v31, 2
         bl              idct_col4_neon1
 
-        sshr            v1.8H, v7.8H, #COL_SHIFT-16
-        sshr            v2.8H, v16.8H, #COL_SHIFT-16
-        sshr            v3.8H, v17.8H, #COL_SHIFT-16
-        sshr            v4.8H, v18.8H, #COL_SHIFT-16
+        sshr            v1.8h, v7.8h, #COL_SHIFT-16
+        sshr            v2.8h, v16.8h, #COL_SHIFT-16
+        sshr            v3.8h, v17.8h, #COL_SHIFT-16
+        sshr            v4.8h, v18.8h, #COL_SHIFT-16
 
         bl              idct_col4_neon2
 
-        sshr            v7.8H, v7.8H, #COL_SHIFT-16
-        sshr            v16.8H, v16.8H, #COL_SHIFT-16
-        sshr            v17.8H, v17.8H, #COL_SHIFT-16
-        sshr            v18.8H, v18.8H, #COL_SHIFT-16
+        sshr            v7.8h, v7.8h, #COL_SHIFT-16
+        sshr            v16.8h, v16.8h, #COL_SHIFT-16
+        sshr            v17.8h, v17.8h, #COL_SHIFT-16
+        sshr            v18.8h, v18.8h, #COL_SHIFT-16
 
         mov             x9,  x0
-        ld1             {v19.D}[0], [x0], x1
-        zip1            v23.2D, v1.2D, v7.2D
-        zip2            v24.2D, v1.2D, v7.2D
-        ld1             {v19.D}[1], [x0], x1
-        zip1            v25.2D, v2.2D, v16.2D
-        zip2            v26.2D, v2.2D, v16.2D
-        ld1             {v20.D}[0], [x0], x1
-        zip1            v27.2D, v3.2D, v17.2D
-        zip2            v28.2D, v3.2D, v17.2D
-        ld1             {v20.D}[1], [x0], x1
-        zip1            v29.2D, v4.2D, v18.2D
-        zip2            v30.2D, v4.2D, v18.2D
-        ld1             {v21.D}[0], [x0], x1
-        uaddw           v23.8H, v23.8H, v19.8B
-        uaddw2          v24.8H, v24.8H, v19.16B
-        ld1             {v21.D}[1], [x0], x1
-        sqxtun          v23.8B, v23.8H
-        sqxtun2         v23.16B, v24.8H
-        ld1             {v22.D}[0], [x0], x1
-        uaddw           v24.8H, v25.8H, v20.8B
-        uaddw2          v25.8H, v26.8H, v20.16B
-        ld1             {v22.D}[1], [x0], x1
-        sqxtun          v24.8B, v24.8H
-        sqxtun2         v24.16B, v25.8H
-        st1             {v23.D}[0], [x9], x1
-        uaddw           v25.8H, v27.8H, v21.8B
-        uaddw2          v26.8H, v28.8H, v21.16B
-        st1             {v23.D}[1], [x9], x1
-        sqxtun          v25.8B, v25.8H
-        sqxtun2         v25.16B, v26.8H
-        st1             {v24.D}[0], [x9], x1
-        uaddw           v26.8H, v29.8H, v22.8B
-        uaddw2          v27.8H, v30.8H, v22.16B
-        st1             {v24.D}[1], [x9], x1
-        sqxtun          v26.8B, v26.8H
-        sqxtun2         v26.16B, v27.8H
-        st1             {v25.D}[0], [x9], x1
-        st1             {v25.D}[1], [x9], x1
-        st1             {v26.D}[0], [x9], x1
-        st1             {v26.D}[1], [x9], x1
+        ld1             {v19.d}[0], [x0], x1
+        zip1            v23.2d, v1.2d, v7.2d
+        zip2            v24.2d, v1.2d, v7.2d
+        ld1             {v19.d}[1], [x0], x1
+        zip1            v25.2d, v2.2d, v16.2d
+        zip2            v26.2d, v2.2d, v16.2d
+        ld1             {v20.d}[0], [x0], x1
+        zip1            v27.2d, v3.2d, v17.2d
+        zip2            v28.2d, v3.2d, v17.2d
+        ld1             {v20.d}[1], [x0], x1
+        zip1            v29.2d, v4.2d, v18.2d
+        zip2            v30.2d, v4.2d, v18.2d
+        ld1             {v21.d}[0], [x0], x1
+        uaddw           v23.8h, v23.8h, v19.8b
+        uaddw2          v24.8h, v24.8h, v19.16b
+        ld1             {v21.d}[1], [x0], x1
+        sqxtun          v23.8b, v23.8h
+        sqxtun2         v23.16b, v24.8h
+        ld1             {v22.d}[0], [x0], x1
+        uaddw           v24.8h, v25.8h, v20.8b
+        uaddw2          v25.8h, v26.8h, v20.16b
+        ld1             {v22.d}[1], [x0], x1
+        sqxtun          v24.8b, v24.8h
+        sqxtun2         v24.16b, v25.8h
+        st1             {v23.d}[0], [x9], x1
+        uaddw           v25.8h, v27.8h, v21.8b
+        uaddw2          v26.8h, v28.8h, v21.16b
+        st1             {v23.d}[1], [x9], x1
+        sqxtun          v25.8b, v25.8h
+        sqxtun2         v25.16b, v26.8h
+        st1             {v24.d}[0], [x9], x1
+        uaddw           v26.8h, v29.8h, v22.8b
+        uaddw2          v27.8h, v30.8h, v22.16b
+        st1             {v24.d}[1], [x9], x1
+        sqxtun          v26.8b, v26.8h
+        sqxtun2         v26.16b, v27.8h
+        st1             {v25.d}[0], [x9], x1
+        st1             {v25.d}[1], [x9], x1
+        st1             {v26.d}[0], [x9], x1
+        st1             {v26.d}[1], [x9], x1
 
         idct_end
 endfunc
@@ -333,30 +333,30 @@
         sub             x2, x2, #128
         bl              idct_col4_neon1
 
-        sshr            v1.8H, v7.8H, #COL_SHIFT-16
-        sshr            v2.8H, v16.8H, #COL_SHIFT-16
-        sshr            v3.8H, v17.8H, #COL_SHIFT-16
-        sshr            v4.8H, v18.8H, #COL_SHIFT-16
+        sshr            v1.8h, v7.8h, #COL_SHIFT-16
+        sshr            v2.8h, v16.8h, #COL_SHIFT-16
+        sshr            v3.8h, v17.8h, #COL_SHIFT-16
+        sshr            v4.8h, v18.8h, #COL_SHIFT-16
 
         bl              idct_col4_neon2
 
-        sshr            v7.8H, v7.8H, #COL_SHIFT-16
-        sshr            v16.8H, v16.8H, #COL_SHIFT-16
-        sshr            v17.8H, v17.8H, #COL_SHIFT-16
-        sshr            v18.8H, v18.8H, #COL_SHIFT-16
-
-        zip1            v23.2D, v1.2D, v7.2D
-        zip2            v24.2D, v1.2D, v7.2D
-        st1             {v23.2D,v24.2D}, [x2], #32
-        zip1            v25.2D, v2.2D, v16.2D
-        zip2            v26.2D, v2.2D, v16.2D
-        st1             {v25.2D,v26.2D}, [x2], #32
-        zip1            v27.2D, v3.2D, v17.2D
-        zip2            v28.2D, v3.2D, v17.2D
-        st1             {v27.2D,v28.2D}, [x2], #32
-        zip1            v29.2D, v4.2D, v18.2D
-        zip2            v30.2D, v4.2D, v18.2D
-        st1             {v29.2D,v30.2D}, [x2], #32
+        sshr            v7.8h, v7.8h, #COL_SHIFT-16
+        sshr            v16.8h, v16.8h, #COL_SHIFT-16
+        sshr            v17.8h, v17.8h, #COL_SHIFT-16
+        sshr            v18.8h, v18.8h, #COL_SHIFT-16
+
+        zip1            v23.2d, v1.2d, v7.2d
+        zip2            v24.2d, v1.2d, v7.2d
+        st1             {v23.2d,v24.2d}, [x2], #32
+        zip1            v25.2d, v2.2d, v16.2d
+        zip2            v26.2d, v2.2d, v16.2d
+        st1             {v25.2d,v26.2d}, [x2], #32
+        zip1            v27.2d, v3.2d, v17.2d
+        zip2            v28.2d, v3.2d, v17.2d
+        st1             {v27.2d,v28.2d}, [x2], #32
+        zip1            v29.2d, v4.2d, v18.2d
+        zip2            v30.2d, v4.2d, v18.2d
+        st1             {v29.2d,v30.2d}, [x2], #32
 
         idct_end
 endfunc
diff -Nru ffmpeg-5.1.8/libavcodec/aarch64/vp8dsp_neon.S ffmpeg-5.1.9/libavcodec/aarch64/vp8dsp_neon.S
--- ffmpeg-5.1.8/libavcodec/aarch64/vp8dsp_neon.S	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/aarch64/vp8dsp_neon.S	2026-03-16 18:10:00.000000000 +0000
@@ -330,32 +330,32 @@
         //   v17: hev
 
         // convert to signed value:
-        eor            v3.16b, v3.16b, v21.16b           // PS0 = P0 ^ 0x80
-        eor            v4.16b, v4.16b, v21.16b           // QS0 = Q0 ^ 0x80
+        eor             v3.16b, v3.16b, v21.16b           // PS0 = P0 ^ 0x80
+        eor             v4.16b, v4.16b, v21.16b           // QS0 = Q0 ^ 0x80
 
-        movi           v20.8h, #3
-        ssubl          v18.8h, v4.8b,  v3.8b             // QS0 - PS0
-        ssubl2         v19.8h, v4.16b, v3.16b            //   (widened to 16bit)
-        eor            v2.16b, v2.16b, v21.16b           // PS1 = P1 ^ 0x80
-        eor            v5.16b, v5.16b, v21.16b           // QS1 = Q1 ^ 0x80
-        mul            v18.8h, v18.8h, v20.8h            // w = 3 * (QS0 - PS0)
-        mul            v19.8h, v19.8h, v20.8h
-
-        sqsub          v20.16b, v2.16b, v5.16b           // clamp(PS1-QS1)
-        movi           v22.16b, #4
-        movi           v23.16b, #3
+        movi            v20.8h, #3
+        ssubl           v18.8h, v4.8b,  v3.8b             // QS0 - PS0
+        ssubl2          v19.8h, v4.16b, v3.16b            //   (widened to 16bit)
+        eor             v2.16b, v2.16b, v21.16b           // PS1 = P1 ^ 0x80
+        eor             v5.16b, v5.16b, v21.16b           // QS1 = Q1 ^ 0x80
+        mul             v18.8h, v18.8h, v20.8h            // w = 3 * (QS0 - PS0)
+        mul             v19.8h, v19.8h, v20.8h
+
+        sqsub           v20.16b, v2.16b, v5.16b           // clamp(PS1-QS1)
+        movi            v22.16b, #4
+        movi            v23.16b, #3
     .if \inner
-        and            v20.16b, v20.16b, v17.16b         // if(hev) w += clamp(PS1-QS1)
+        and             v20.16b, v20.16b, v17.16b         // if(hev) w += clamp(PS1-QS1)
     .endif
-        saddw          v18.8h,  v18.8h, v20.8b           // w += clamp(PS1-QS1)
-        saddw2         v19.8h,  v19.8h, v20.16b
-        sqxtn          v18.8b,  v18.8h                   // narrow result back into v18
-        sqxtn2         v18.16b, v19.8h
+        saddw           v18.8h,  v18.8h, v20.8b           // w += clamp(PS1-QS1)
+        saddw2          v19.8h,  v19.8h, v20.16b
+        sqxtn           v18.8b,  v18.8h                   // narrow result back into v18
+        sqxtn2          v18.16b, v19.8h
     .if !\inner && !\simple
-        eor            v1.16b,  v1.16b,  v21.16b         // PS2 = P2 ^ 0x80
-        eor            v6.16b,  v6.16b,  v21.16b         // QS2 = Q2 ^ 0x80
+        eor             v1.16b,  v1.16b,  v21.16b         // PS2 = P2 ^ 0x80
+        eor             v6.16b,  v6.16b,  v21.16b         // QS2 = Q2 ^ 0x80
     .endif
-        and            v18.16b, v18.16b, v16.16b         // w &= normal_limit
+        and             v18.16b, v18.16b, v16.16b         // w &= normal_limit
 
         // registers used at this point..
         //   v0 -> P3  (don't corrupt)
@@ -375,44 +375,44 @@
         //   P0 = s2u(PS0 + c2);
 
     .if \simple
-        sqadd          v19.16b, v18.16b, v22.16b           // c1 = clamp((w&hev)+4)
-        sqadd          v20.16b, v18.16b, v23.16b           // c2 = clamp((w&hev)+3)
-        sshr           v19.16b, v19.16b, #3                // c1 >>= 3
-        sshr           v20.16b, v20.16b, #3                // c2 >>= 3
-        sqsub          v4.16b,  v4.16b,  v19.16b           // QS0 = clamp(QS0-c1)
-        sqadd          v3.16b,  v3.16b,  v20.16b           // PS0 = clamp(PS0+c2)
-        eor            v4.16b,  v4.16b,  v21.16b           // Q0 = QS0 ^ 0x80
-        eor            v3.16b,  v3.16b,  v21.16b           // P0 = PS0 ^ 0x80
-        eor            v5.16b,  v5.16b,  v21.16b           // Q1 = QS1 ^ 0x80
-        eor            v2.16b,  v2.16b,  v21.16b           // P1 = PS1 ^ 0x80
+        sqadd           v19.16b, v18.16b, v22.16b           // c1 = clamp((w&hev)+4)
+        sqadd           v20.16b, v18.16b, v23.16b           // c2 = clamp((w&hev)+3)
+        sshr            v19.16b, v19.16b, #3                // c1 >>= 3
+        sshr            v20.16b, v20.16b, #3                // c2 >>= 3
+        sqsub           v4.16b,  v4.16b,  v19.16b           // QS0 = clamp(QS0-c1)
+        sqadd           v3.16b,  v3.16b,  v20.16b           // PS0 = clamp(PS0+c2)
+        eor             v4.16b,  v4.16b,  v21.16b           // Q0 = QS0 ^ 0x80
+        eor             v3.16b,  v3.16b,  v21.16b           // P0 = PS0 ^ 0x80
+        eor             v5.16b,  v5.16b,  v21.16b           // Q1 = QS1 ^ 0x80
+        eor             v2.16b,  v2.16b,  v21.16b           // P1 = PS1 ^ 0x80
     .elseif \inner
         // the !is4tap case of filter_common, only used for inner blocks
         //   c3 = ((c1&~hev) + 1) >> 1;
         //   Q1 = s2u(QS1 - c3);
         //   P1 = s2u(PS1 + c3);
-        sqadd          v19.16b, v18.16b, v22.16b           // c1 = clamp((w&hev)+4)
-        sqadd          v20.16b, v18.16b, v23.16b           // c2 = clamp((w&hev)+3)
-        sshr           v19.16b, v19.16b, #3                // c1 >>= 3
-        sshr           v20.16b, v20.16b, #3                // c2 >>= 3
-        sqsub          v4.16b,  v4.16b,  v19.16b           // QS0 = clamp(QS0-c1)
-        sqadd          v3.16b,  v3.16b,  v20.16b           // PS0 = clamp(PS0+c2)
-        bic            v19.16b, v19.16b, v17.16b           // c1 & ~hev
-        eor            v4.16b,  v4.16b,  v21.16b           // Q0 = QS0 ^ 0x80
-        srshr          v19.16b, v19.16b, #1                // c3 >>= 1
-        eor            v3.16b,  v3.16b,  v21.16b           // P0 = PS0 ^ 0x80
-        sqsub          v5.16b,  v5.16b,  v19.16b           // QS1 = clamp(QS1-c3)
-        sqadd          v2.16b,  v2.16b,  v19.16b           // PS1 = clamp(PS1+c3)
-        eor            v5.16b,  v5.16b,  v21.16b           // Q1 = QS1 ^ 0x80
-        eor            v2.16b,  v2.16b,  v21.16b           // P1 = PS1 ^ 0x80
+        sqadd           v19.16b, v18.16b, v22.16b           // c1 = clamp((w&hev)+4)
+        sqadd           v20.16b, v18.16b, v23.16b           // c2 = clamp((w&hev)+3)
+        sshr            v19.16b, v19.16b, #3                // c1 >>= 3
+        sshr            v20.16b, v20.16b, #3                // c2 >>= 3
+        sqsub           v4.16b,  v4.16b,  v19.16b           // QS0 = clamp(QS0-c1)
+        sqadd           v3.16b,  v3.16b,  v20.16b           // PS0 = clamp(PS0+c2)
+        bic             v19.16b, v19.16b, v17.16b           // c1 & ~hev
+        eor             v4.16b,  v4.16b,  v21.16b           // Q0 = QS0 ^ 0x80
+        srshr           v19.16b, v19.16b, #1                // c3 >>= 1
+        eor             v3.16b,  v3.16b,  v21.16b           // P0 = PS0 ^ 0x80
+        sqsub           v5.16b,  v5.16b,  v19.16b           // QS1 = clamp(QS1-c3)
+        sqadd           v2.16b,  v2.16b,  v19.16b           // PS1 = clamp(PS1+c3)
+        eor             v5.16b,  v5.16b,  v21.16b           // Q1 = QS1 ^ 0x80
+        eor             v2.16b,  v2.16b,  v21.16b           // P1 = PS1 ^ 0x80
     .else
-        and            v20.16b, v18.16b, v17.16b           // w & hev
-        sqadd          v19.16b, v20.16b, v22.16b           // c1 = clamp((w&hev)+4)
-        sqadd          v20.16b, v20.16b, v23.16b           // c2 = clamp((w&hev)+3)
-        sshr           v19.16b, v19.16b, #3                // c1 >>= 3
-        sshr           v20.16b, v20.16b, #3                // c2 >>= 3
-        bic            v18.16b, v18.16b, v17.16b           // w &= ~hev
-        sqsub          v4.16b,  v4.16b,  v19.16b           // QS0 = clamp(QS0-c1)
-        sqadd          v3.16b,  v3.16b,  v20.16b           // PS0 = clamp(PS0+c2)
+        and             v20.16b, v18.16b, v17.16b           // w & hev
+        sqadd           v19.16b, v20.16b, v22.16b           // c1 = clamp((w&hev)+4)
+        sqadd           v20.16b, v20.16b, v23.16b           // c2 = clamp((w&hev)+3)
+        sshr            v19.16b, v19.16b, #3                // c1 >>= 3
+        sshr            v20.16b, v20.16b, #3                // c2 >>= 3
+        bic             v18.16b, v18.16b, v17.16b           // w &= ~hev
+        sqsub           v4.16b,  v4.16b,  v19.16b           // QS0 = clamp(QS0-c1)
+        sqadd           v3.16b,  v3.16b,  v20.16b           // PS0 = clamp(PS0+c2)
 
         // filter_mbedge:
         //   a = clamp((27*w + 63) >> 7);
@@ -424,35 +424,35 @@
         //   a = clamp((9*w + 63) >> 7);
         //   Q2 = s2u(QS2 - a);
         //   P2 = s2u(PS2 + a);
-        movi           v17.8h,  #63
-        sshll          v22.8h,  v18.8b, #3
-        sshll2         v23.8h,  v18.16b, #3
-        saddw          v22.8h,  v22.8h, v18.8b
-        saddw2         v23.8h,  v23.8h, v18.16b
-        add            v16.8h,  v17.8h, v22.8h
-        add            v17.8h,  v17.8h, v23.8h           //  9*w + 63
-        add            v19.8h,  v16.8h, v22.8h
-        add            v20.8h,  v17.8h, v23.8h           // 18*w + 63
-        add            v22.8h,  v19.8h, v22.8h
-        add            v23.8h,  v20.8h, v23.8h           // 27*w + 63
-        sqshrn         v16.8b,  v16.8h,  #7
-        sqshrn2        v16.16b, v17.8h, #7              // clamp(( 9*w + 63)>>7)
-        sqshrn         v19.8b,  v19.8h, #7
-        sqshrn2        v19.16b, v20.8h, #7              // clamp((18*w + 63)>>7)
-        sqshrn         v22.8b,  v22.8h, #7
-        sqshrn2        v22.16b, v23.8h, #7              // clamp((27*w + 63)>>7)
-        sqadd          v1.16b,  v1.16b,  v16.16b        // PS2 = clamp(PS2+a)
-        sqsub          v6.16b,  v6.16b,  v16.16b        // QS2 = clamp(QS2-a)
-        sqadd          v2.16b,  v2.16b,  v19.16b        // PS1 = clamp(PS1+a)
-        sqsub          v5.16b,  v5.16b,  v19.16b        // QS1 = clamp(QS1-a)
-        sqadd          v3.16b,  v3.16b,  v22.16b        // PS0 = clamp(PS0+a)
-        sqsub          v4.16b,  v4.16b,  v22.16b        // QS0 = clamp(QS0-a)
-        eor            v3.16b,  v3.16b,  v21.16b        // P0 = PS0 ^ 0x80
-        eor            v4.16b,  v4.16b,  v21.16b        // Q0 = QS0 ^ 0x80
-        eor            v2.16b,  v2.16b,  v21.16b        // P1 = PS1 ^ 0x80
-        eor            v5.16b,  v5.16b,  v21.16b        // Q1 = QS1 ^ 0x80
-        eor            v1.16b,  v1.16b,  v21.16b        // P2 = PS2 ^ 0x80
-        eor            v6.16b,  v6.16b,  v21.16b        // Q2 = QS2 ^ 0x80
+        movi            v17.8h,  #63
+        sshll           v22.8h,  v18.8b, #3
+        sshll2          v23.8h,  v18.16b, #3
+        saddw           v22.8h,  v22.8h, v18.8b
+        saddw2          v23.8h,  v23.8h, v18.16b
+        add             v16.8h,  v17.8h, v22.8h
+        add             v17.8h,  v17.8h, v23.8h           //  9*w + 63
+        add             v19.8h,  v16.8h, v22.8h
+        add             v20.8h,  v17.8h, v23.8h           // 18*w + 63
+        add             v22.8h,  v19.8h, v22.8h
+        add             v23.8h,  v20.8h, v23.8h           // 27*w + 63
+        sqshrn          v16.8b,  v16.8h,  #7
+        sqshrn2         v16.16b, v17.8h, #7              // clamp(( 9*w + 63)>>7)
+        sqshrn          v19.8b,  v19.8h, #7
+        sqshrn2         v19.16b, v20.8h, #7              // clamp((18*w + 63)>>7)
+        sqshrn          v22.8b,  v22.8h, #7
+        sqshrn2         v22.16b, v23.8h, #7              // clamp((27*w + 63)>>7)
+        sqadd           v1.16b,  v1.16b,  v16.16b        // PS2 = clamp(PS2+a)
+        sqsub           v6.16b,  v6.16b,  v16.16b        // QS2 = clamp(QS2-a)
+        sqadd           v2.16b,  v2.16b,  v19.16b        // PS1 = clamp(PS1+a)
+        sqsub           v5.16b,  v5.16b,  v19.16b        // QS1 = clamp(QS1-a)
+        sqadd           v3.16b,  v3.16b,  v22.16b        // PS0 = clamp(PS0+a)
+        sqsub           v4.16b,  v4.16b,  v22.16b        // QS0 = clamp(QS0-a)
+        eor             v3.16b,  v3.16b,  v21.16b        // P0 = PS0 ^ 0x80
+        eor             v4.16b,  v4.16b,  v21.16b        // Q0 = QS0 ^ 0x80
+        eor             v2.16b,  v2.16b,  v21.16b        // P1 = PS1 ^ 0x80
+        eor             v5.16b,  v5.16b,  v21.16b        // Q1 = QS1 ^ 0x80
+        eor             v1.16b,  v1.16b,  v21.16b        // P2 = PS2 ^ 0x80
+        eor             v6.16b,  v6.16b,  v21.16b        // Q2 = QS2 ^ 0x80
     .endif
 .endm
 
@@ -507,48 +507,48 @@
         sub             x0,  x0,  x2,  lsl #2
         sub             x1,  x1,  x2,  lsl #2
         // Load pixels:
-        ld1          {v0.d}[0],     [x0], x2  // P3
-        ld1          {v0.d}[1],     [x1], x2  // P3
-        ld1          {v1.d}[0],     [x0], x2  // P2
-        ld1          {v1.d}[1],     [x1], x2  // P2
-        ld1          {v2.d}[0],     [x0], x2  // P1
-        ld1          {v2.d}[1],     [x1], x2  // P1
-        ld1          {v3.d}[0],     [x0], x2  // P0
-        ld1          {v3.d}[1],     [x1], x2  // P0
-        ld1          {v4.d}[0],     [x0], x2  // Q0
-        ld1          {v4.d}[1],     [x1], x2  // Q0
-        ld1          {v5.d}[0],     [x0], x2  // Q1
-        ld1          {v5.d}[1],     [x1], x2  // Q1
-        ld1          {v6.d}[0],     [x0], x2  // Q2
-        ld1          {v6.d}[1],     [x1], x2  // Q2
-        ld1          {v7.d}[0],     [x0]      // Q3
-        ld1          {v7.d}[1],     [x1]      // Q3
+        ld1             {v0.d}[0],     [x0], x2  // P3
+        ld1             {v0.d}[1],     [x1], x2  // P3
+        ld1             {v1.d}[0],     [x0], x2  // P2
+        ld1             {v1.d}[1],     [x1], x2  // P2
+        ld1             {v2.d}[0],     [x0], x2  // P1
+        ld1             {v2.d}[1],     [x1], x2  // P1
+        ld1             {v3.d}[0],     [x0], x2  // P0
+        ld1             {v3.d}[1],     [x1], x2  // P0
+        ld1             {v4.d}[0],     [x0], x2  // Q0
+        ld1             {v4.d}[1],     [x1], x2  // Q0
+        ld1             {v5.d}[0],     [x0], x2  // Q1
+        ld1             {v5.d}[1],     [x1], x2  // Q1
+        ld1             {v6.d}[0],     [x0], x2  // Q2
+        ld1             {v6.d}[1],     [x1], x2  // Q2
+        ld1             {v7.d}[0],     [x0]      // Q3
+        ld1             {v7.d}[1],     [x1]      // Q3
 
-        dup          v22.16b, w3                 // flim_E
-        dup          v23.16b, w4                 // flim_I
+        dup             v22.16b, w3                 // flim_E
+        dup             v23.16b, w4                 // flim_I
 
         vp8_loop_filter inner=\inner, hev_thresh=w5
 
         // back up to P2:  u,v -= stride * 6
-        sub          x0,  x0,  x2,  lsl #2
-        sub          x1,  x1,  x2,  lsl #2
-        sub          x0,  x0,  x2,  lsl #1
-        sub          x1,  x1,  x2,  lsl #1
+        sub             x0,  x0,  x2,  lsl #2
+        sub             x1,  x1,  x2,  lsl #2
+        sub             x0,  x0,  x2,  lsl #1
+        sub             x1,  x1,  x2,  lsl #1
 
         // Store pixels:
 
-        st1          {v1.d}[0],     [x0], x2  // P2
-        st1          {v1.d}[1],     [x1], x2  // P2
-        st1          {v2.d}[0],     [x0], x2  // P1
-        st1          {v2.d}[1],     [x1], x2  // P1
-        st1          {v3.d}[0],     [x0], x2  // P0
-        st1          {v3.d}[1],     [x1], x2  // P0
-        st1          {v4.d}[0],     [x0], x2  // Q0
-        st1          {v4.d}[1],     [x1], x2  // Q0
-        st1          {v5.d}[0],     [x0], x2  // Q1
-        st1          {v5.d}[1],     [x1], x2  // Q1
-        st1          {v6.d}[0],     [x0]      // Q2
-        st1          {v6.d}[1],     [x1]      // Q2
+        st1             {v1.d}[0],     [x0], x2  // P2
+        st1             {v1.d}[1],     [x1], x2  // P2
+        st1             {v2.d}[0],     [x0], x2  // P1
+        st1             {v2.d}[1],     [x1], x2  // P1
+        st1             {v3.d}[0],     [x0], x2  // P0
+        st1             {v3.d}[1],     [x1], x2  // P0
+        st1             {v4.d}[0],     [x0], x2  // Q0
+        st1             {v4.d}[1],     [x1], x2  // Q0
+        st1             {v5.d}[0],     [x0], x2  // Q1
+        st1             {v5.d}[1],     [x1], x2  // Q1
+        st1             {v6.d}[0],     [x0]      // Q2
+        st1             {v6.d}[1],     [x1]      // Q2
 
         ret
 endfunc
@@ -579,7 +579,7 @@
         ld1             {v6.d}[1], [x0], x1
         ld1             {v7.d}[1], [x0], x1
 
-        transpose_8x16B   v0,  v1,  v2,  v3,  v4,  v5,  v6,  v7, v30, v31
+        transpose_8x16B v0,  v1,  v2,  v3,  v4,  v5,  v6,  v7, v30, v31
 
         dup             v22.16b, w2                 // flim_E
     .if !\simple
@@ -590,7 +590,7 @@
 
         sub             x0,  x0,  x1, lsl #4    // backup 16 rows
 
-        transpose_8x16B   v0,  v1,  v2,  v3,  v4,  v5,  v6,  v7, v30, v31
+        transpose_8x16B v0,  v1,  v2,  v3,  v4,  v5,  v6,  v7, v30, v31
 
         // Store pixels:
         st1             {v0.d}[0], [x0], x1
@@ -624,24 +624,24 @@
         sub             x1,  x1,  #4
 
         // Load pixels:
-        ld1          {v0.d}[0],     [x0], x2 // load u
-        ld1          {v0.d}[1],     [x1], x2 // load v
-        ld1          {v1.d}[0],     [x0], x2
-        ld1          {v1.d}[1],     [x1], x2
-        ld1          {v2.d}[0],     [x0], x2
-        ld1          {v2.d}[1],     [x1], x2
-        ld1          {v3.d}[0],     [x0], x2
-        ld1          {v3.d}[1],     [x1], x2
-        ld1          {v4.d}[0],     [x0], x2
-        ld1          {v4.d}[1],     [x1], x2
-        ld1          {v5.d}[0],     [x0], x2
-        ld1          {v5.d}[1],     [x1], x2
-        ld1          {v6.d}[0],     [x0], x2
-        ld1          {v6.d}[1],     [x1], x2
-        ld1          {v7.d}[0],     [x0], x2
-        ld1          {v7.d}[1],     [x1], x2
+        ld1             {v0.d}[0],     [x0], x2 // load u
+        ld1             {v0.d}[1],     [x1], x2 // load v
+        ld1             {v1.d}[0],     [x0], x2
+        ld1             {v1.d}[1],     [x1], x2
+        ld1             {v2.d}[0],     [x0], x2
+        ld1             {v2.d}[1],     [x1], x2
+        ld1             {v3.d}[0],     [x0], x2
+        ld1             {v3.d}[1],     [x1], x2
+        ld1             {v4.d}[0],     [x0], x2
+        ld1             {v4.d}[1],     [x1], x2
+        ld1             {v5.d}[0],     [x0], x2
+        ld1             {v5.d}[1],     [x1], x2
+        ld1             {v6.d}[0],     [x0], x2
+        ld1             {v6.d}[1],     [x1], x2
+        ld1             {v7.d}[0],     [x0], x2
+        ld1             {v7.d}[1],     [x1], x2
 
-        transpose_8x16B   v0,  v1,  v2,  v3,  v4,  v5,  v6,  v7, v30, v31
+        transpose_8x16B v0,  v1,  v2,  v3,  v4,  v5,  v6,  v7, v30, v31
 
         dup             v22.16b, w3                 // flim_E
         dup             v23.16b, w4                 // flim_I
@@ -651,25 +651,25 @@
         sub             x0,  x0,  x2, lsl #3    // backup u 8 rows
         sub             x1,  x1,  x2, lsl #3    // backup v 8 rows
 
-        transpose_8x16B   v0,  v1,  v2,  v3,  v4,  v5,  v6,  v7, v30, v31
+        transpose_8x16B v0,  v1,  v2,  v3,  v4,  v5,  v6,  v7, v30, v31
 
         // Store pixels:
-        st1          {v0.d}[0],     [x0], x2 // load u
-        st1          {v0.d}[1],     [x1], x2 // load v
-        st1          {v1.d}[0],     [x0], x2
-        st1          {v1.d}[1],     [x1], x2
-        st1          {v2.d}[0],     [x0], x2
-        st1          {v2.d}[1],     [x1], x2
-        st1          {v3.d}[0],     [x0], x2
-        st1          {v3.d}[1],     [x1], x2
-        st1          {v4.d}[0],     [x0], x2
-        st1          {v4.d}[1],     [x1], x2
-        st1          {v5.d}[0],     [x0], x2
-        st1          {v5.d}[1],     [x1], x2
-        st1          {v6.d}[0],     [x0], x2
-        st1          {v6.d}[1],     [x1], x2
-        st1          {v7.d}[0],     [x0]
-        st1          {v7.d}[1],     [x1]
+        st1             {v0.d}[0],     [x0], x2 // load u
+        st1             {v0.d}[1],     [x1], x2 // load v
+        st1             {v1.d}[0],     [x0], x2
+        st1             {v1.d}[1],     [x1], x2
+        st1             {v2.d}[0],     [x0], x2
+        st1             {v2.d}[1],     [x1], x2
+        st1             {v3.d}[0],     [x0], x2
+        st1             {v3.d}[1],     [x1], x2
+        st1             {v4.d}[0],     [x0], x2
+        st1             {v4.d}[1],     [x1], x2
+        st1             {v5.d}[0],     [x0], x2
+        st1             {v5.d}[1],     [x1], x2
+        st1             {v6.d}[0],     [x0], x2
+        st1             {v6.d}[1],     [x1], x2
+        st1             {v7.d}[0],     [x0]
+        st1             {v7.d}[1],     [x1]
 
         ret
 
diff -Nru ffmpeg-5.1.8/libavcodec/adpcm.c ffmpeg-5.1.9/libavcodec/adpcm.c
--- ffmpeg-5.1.8/libavcodec/adpcm.c	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/adpcm.c	2026-05-05 15:50:55.000000000 +0000
@@ -857,6 +857,8 @@
 
     if(ch <= 0)
         return 0;
+    if (buf_size > INT_MAX / 2)
+        return 0;
 
     switch (avctx->codec->id) {
     /* constant, only check buf_size */
diff -Nru ffmpeg-5.1.8/libavcodec/alsdec.c ffmpeg-5.1.9/libavcodec/alsdec.c
--- ffmpeg-5.1.8/libavcodec/alsdec.c	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/alsdec.c	2026-05-05 15:50:55.000000000 +0000
@@ -1539,8 +1539,12 @@
                     return AVERROR_INVALIDDATA;
                 }
 
+                j = 0;
                 for (i = 0; i < frame_length; ++i) {
-                    ctx->raw_mantissa[c][i] = AV_RB32(larray);
+                    if (ctx->raw_samples[c][i] == 0) {
+                        ctx->raw_mantissa[c][i] = AV_RB32(larray + j);
+                        j += 4;
+                    }
                 }
             }
         }
@@ -1551,7 +1555,10 @@
                 if (ctx->raw_samples[c][i] != 0) {
                     //The following logic is taken from Tabel 14.45 and 14.46 from the ISO spec
                     if (av_cmp_sf_ieee754(acf[c], FLOAT_1)) {
-                        nbits[i] = 23 - av_log2(abs(ctx->raw_samples[c][i]));
+                        int nbit = av_log2(FFABSU(ctx->raw_samples[c][i]));
+                        if (nbit > 23)
+                            return AVERROR_INVALIDDATA;
+                        nbits[i] = 23 - nbit;
                     } else {
                         nbits[i] = 23;
                     }
@@ -1625,7 +1632,7 @@
                 tmp_32 = (sign << 31) | ((e + EXP_BIAS) << 23) | (mantissa);
                 ctx->raw_samples[c][i] = tmp_32;
             } else {
-                ctx->raw_samples[c][i] = raw_mantissa[c][i] & 0x007fffffUL;
+                ctx->raw_samples[c][i] = raw_mantissa[c][i];
             }
         }
         align_get_bits(gb);
@@ -1781,7 +1788,9 @@
     }
 
     if (sconf->floating) {
-        read_diff_float_data(ctx, ra_frame);
+        ret = read_diff_float_data(ctx, ra_frame);
+        if (ret < 0)
+            return ret;
     }
 
     if (get_bits_left(gb) < 0) {
diff -Nru ffmpeg-5.1.8/libavcodec/arm/int_neon.S ffmpeg-5.1.9/libavcodec/arm/int_neon.S
--- ffmpeg-5.1.8/libavcodec/arm/int_neon.S	2025-11-21 01:15:18.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/arm/int_neon.S	2026-05-05 14:22:01.000000000 +0000
@@ -48,4 +48,3 @@
         vmov.32         r0,  d3[0]
         bx              lr
 endfunc
-
diff -Nru ffmpeg-5.1.8/libavcodec/av1dec.c ffmpeg-5.1.9/libavcodec/av1dec.c
--- ffmpeg-5.1.8/libavcodec/av1dec.c	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/av1dec.c	2026-05-05 15:50:55.000000000 +0000
@@ -89,12 +89,11 @@
 
 static void read_global_param(AV1DecContext *s, int type, int ref, int idx)
 {
-    uint8_t primary_frame, prev_frame;
+    int primary_frame;
     uint32_t abs_bits, prec_bits, round, prec_diff, sub, mx;
     int32_t r, prev_gm_param;
 
     primary_frame = s->raw_frame_header->primary_ref_frame;
-    prev_frame = s->raw_frame_header->ref_frame_idx[primary_frame];
     abs_bits = AV1_GM_ABS_ALPHA_BITS;
     prec_bits = AV1_GM_ALPHA_PREC_BITS;
 
@@ -104,8 +103,10 @@
      */
     if (s->raw_frame_header->primary_ref_frame == AV1_PRIMARY_REF_NONE)
         prev_gm_param = s->cur_frame.gm_params[ref][idx];
-    else
+    else {
+        int prev_frame = s->raw_frame_header->ref_frame_idx[primary_frame];
         prev_gm_param = s->ref[prev_frame].gm_params[ref][idx];
+    }
 
     if (idx < 2) {
         if (type == AV1_WARP_MODEL_TRANSLATION) {
@@ -1042,6 +1043,8 @@
             }
 
             s->raw_seq = &obu->obu.sequence_header;
+            s->raw_frame_header = NULL;
+            raw_tile_group      = NULL;
 
             ret = set_context_with_sequence(avctx, s->raw_seq);
             if (ret < 0) {
@@ -1091,6 +1094,8 @@
                 goto end;
             }
 
+            raw_tile_group      = NULL;
+
             if (unit->type == AV1_OBU_FRAME)
                 s->raw_frame_header = &obu->obu.frame.header;
             else
@@ -1170,8 +1175,11 @@
                 }
             }
             break;
-        case AV1_OBU_TILE_LIST:
         case AV1_OBU_TEMPORAL_DELIMITER:
+            s->raw_frame_header = NULL;
+            raw_tile_group      = NULL;
+        // fall-through
+        case AV1_OBU_TILE_LIST:
         case AV1_OBU_PADDING:
         case AV1_OBU_METADATA:
             break;
diff -Nru ffmpeg-5.1.8/libavcodec/bmp.c ffmpeg-5.1.9/libavcodec/bmp.c
--- ffmpeg-5.1.8/libavcodec/bmp.c	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/bmp.c	2026-05-05 15:50:55.000000000 +0000
@@ -129,7 +129,7 @@
         rgb[1] = bytestream_get_le32(&buf);
         rgb[2] = bytestream_get_le32(&buf);
         if (ihsize > 40)
-        alpha = bytestream_get_le32(&buf);
+            alpha = bytestream_get_le32(&buf);
     }
 
     ret = ff_set_dimensions(avctx, width, height > 0 ? height : -(unsigned)height);
diff -Nru ffmpeg-5.1.8/libavcodec/cfhd.c ffmpeg-5.1.9/libavcodec/cfhd.c
--- ffmpeg-5.1.8/libavcodec/cfhd.c	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/cfhd.c	2026-05-05 15:50:55.000000000 +0000
@@ -638,7 +638,7 @@
         } else
             av_log(avctx, AV_LOG_DEBUG,  "Unknown tag %i data %x\n", tag, data);
 
-        if (tag == BitstreamMarker && data == 0xf0f &&
+        if (tag == BitstreamMarker && data == CoefficientSegment &&
             s->coded_format != AV_PIX_FMT_NONE) {
             int lowpass_height = s->plane[s->channel_num].band[0][0].height;
             int lowpass_width  = s->plane[s->channel_num].band[0][0].width;
@@ -705,10 +705,15 @@
 
         if (s->subband_num_actual == 255)
             goto finish;
+
+        if (tag == BitstreamMarker && data == CoefficientSegment || tag == BandHeader || tag == BandSecondPass || s->peak.level)
+            if (s->transform_type != s->a_transform_type)
+                return AVERROR_PATCHWELCOME;
+
         coeff_data = s->plane[s->channel_num].subband[s->subband_num_actual];
 
         /* Lowpass coefficients */
-        if (tag == BitstreamMarker && data == 0xf0f) {
+        if (tag == BitstreamMarker && data == CoefficientSegment) {
             int lowpass_height, lowpass_width, lowpass_a_height, lowpass_a_width;
 
             if (!s->a_width || !s->a_height) {
diff -Nru ffmpeg-5.1.8/libavcodec/cfhd.h ffmpeg-5.1.9/libavcodec/cfhd.h
--- ffmpeg-5.1.8/libavcodec/cfhd.h	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/cfhd.h	2026-05-05 15:50:55.000000000 +0000
@@ -93,6 +93,15 @@
     ChannelHeight    = 105,
 };
 
+enum CFHDSegment {
+    LowPassSegment      = 0x1a4a,
+    LowPassEndSegment   = 0x1b4b,
+    HighPassSegment     = 0x0d0d,
+    BandSegment         = 0x0e0e,
+    HighPassEndSegment  = 0x0c0c,
+    CoefficientSegment  = 0x0f0f,
+};
+
 #define VLC_BITS       9
 #define SUBBAND_COUNT 10
 #define SUBBAND_COUNT_3D 17
diff -Nru ffmpeg-5.1.8/libavcodec/cfhdenc.c ffmpeg-5.1.9/libavcodec/cfhdenc.c
--- ffmpeg-5.1.8/libavcodec/cfhdenc.c	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/cfhdenc.c	2026-05-05 15:50:55.000000000 +0000
@@ -624,7 +624,7 @@
         }
 
         bytestream2_put_be16(pby, BitstreamMarker);
-        bytestream2_put_be16(pby, 0x1a4a);
+        bytestream2_put_be16(pby, LowPassSegment);
 
         pos = bytestream2_tell_p(pby);
 
@@ -650,7 +650,7 @@
         bytestream2_put_be16(pby, 16);
 
         bytestream2_put_be16(pby, BitstreamMarker);
-        bytestream2_put_be16(pby, 0x0f0f);
+        bytestream2_put_be16(pby, CoefficientSegment);
 
         for (int i = 0; i < height; i++) {
             for (int j = 0; j < width; j++)
@@ -659,7 +659,7 @@
         }
 
         bytestream2_put_be16(pby, BitstreamMarker);
-        bytestream2_put_be16(pby, 0x1b4b);
+        bytestream2_put_be16(pby, LowPassEndSegment);
 
         for (int l = 0; l < 3; l++) {
             for (int i = 0; i < 3; i++) {
@@ -674,7 +674,7 @@
             int height = s->plane[p].band[l][0].height;
 
             bytestream2_put_be16(pby, BitstreamMarker);
-            bytestream2_put_be16(pby, 0x0d0d);
+            bytestream2_put_be16(pby, HighPassSegment);
 
             bytestream2_put_be16(pby, WaveletType);
             bytestream2_put_be16(pby, 3 + 2 * (l == 2));
@@ -711,7 +711,7 @@
                 int count = 0, padd = 0;
 
                 bytestream2_put_be16(pby, BitstreamMarker);
-                bytestream2_put_be16(pby, 0x0e0e);
+                bytestream2_put_be16(pby, BandSegment);
 
                 bytestream2_put_be16(pby, SubbandNumber);
                 bytestream2_put_be16(pby, i + 1);
@@ -781,7 +781,7 @@
             }
 
             bytestream2_put_be16(pby, BitstreamMarker);
-            bytestream2_put_be16(pby, 0x0c0c);
+            bytestream2_put_be16(pby, HighPassEndSegment);
         }
 
         s->plane[p].size = bytestream2_tell_p(pby) - pos;
diff -Nru ffmpeg-5.1.8/libavcodec/cljrdec.c ffmpeg-5.1.9/libavcodec/cljrdec.c
--- ffmpeg-5.1.8/libavcodec/cljrdec.c	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/cljrdec.c	2026-05-05 15:50:52.000000000 +0000
@@ -91,4 +91,3 @@
     .p.capabilities = AV_CODEC_CAP_DR1,
     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE,
 };
-
diff -Nru ffmpeg-5.1.8/libavcodec/dca_xll.c ffmpeg-5.1.9/libavcodec/dca_xll.c
--- ffmpeg-5.1.8/libavcodec/dca_xll.c	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/dca_xll.c	2026-05-05 15:50:55.000000000 +0000
@@ -62,12 +62,16 @@
         array[i] = get_linear(gb, n);
 }
 
-static void get_rice_array(GetBitContext *gb, int32_t *array, int size, int k)
+static int get_rice_array(GetBitContext *gb, int32_t *array, int size, int k)
 {
     int i;
 
-    for (i = 0; i < size; i++)
+    for (i = 0; i < size && get_bits_left(gb) > k; i++)
         array[i] = get_rice(gb, k);
+
+    if (i < size)
+        return AVERROR_INVALIDDATA;
+    return 0;
 }
 
 static int parse_dmix_coeffs(DCAXllDecoder *s, DCAXllChSet *c)
@@ -527,8 +531,10 @@
         } else {
             // Rice codes
             // Unpack all residuals of part A of segment 0
-            get_rice_array(&s->gb, part_a, c->nsamples_part_a[k],
-                           c->bitalloc_part_a[k]);
+            int ret = get_rice_array(&s->gb, part_a, c->nsamples_part_a[k],
+                                     c->bitalloc_part_a[k]);
+            if (ret < 0)
+                return ret;
 
             if (c->bitalloc_hybrid_linear[k]) {
                 // Hybrid Rice codes
@@ -558,7 +564,9 @@
             } else {
                 // Rice codes
                 // Unpack all residuals of part B of segment 0 and others
-                get_rice_array(&s->gb, part_b, nsamples_part_b, c->bitalloc_part_b[k]);
+                ret = get_rice_array(&s->gb, part_b, nsamples_part_b, c->bitalloc_part_b[k]);
+                if (ret < 0)
+                    return ret;
             }
         }
     }
@@ -1076,6 +1084,7 @@
         return AVERROR(ENOMEM);
 
     memcpy(s->pbr_buffer, data, size);
+    memset(s->pbr_buffer + size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
     s->pbr_length = size;
     s->pbr_delay = delay;
     return 0;
@@ -1130,6 +1139,7 @@
 
     memcpy(s->pbr_buffer + s->pbr_length, data, size);
     s->pbr_length += size;
+    memset(s->pbr_buffer + s->pbr_length, 0, AV_INPUT_BUFFER_PADDING_SIZE);
 
     // Respect decoding delay after synchronization error
     if (s->pbr_delay > 0 && --s->pbr_delay)
diff -Nru ffmpeg-5.1.8/libavcodec/dfpwmdec.c ffmpeg-5.1.9/libavcodec/dfpwmdec.c
--- ffmpeg-5.1.8/libavcodec/dfpwmdec.c	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/dfpwmdec.c	2026-05-05 15:50:55.000000000 +0000
@@ -106,15 +106,16 @@
 {
     DFPWMState *state = ctx->priv_data;
     int ret;
+    uint64_t nb_samples = packet->size * 8LL / ctx->ch_layout.nb_channels;
 
     if (packet->size * 8LL % ctx->ch_layout.nb_channels)
         return AVERROR_PATCHWELCOME;
 
-    frame->nb_samples = packet->size * 8LL / ctx->ch_layout.nb_channels;
-    if (frame->nb_samples <= 0) {
+    if (nb_samples > INT_MAX || !nb_samples) {
         av_log(ctx, AV_LOG_ERROR, "invalid number of samples in packet\n");
         return AVERROR_INVALIDDATA;
     }
+    frame->nb_samples = nb_samples;
 
     if ((ret = ff_get_buffer(ctx, frame, 0)) < 0)
         return ret;
diff -Nru ffmpeg-5.1.8/libavcodec/dv_profile.c ffmpeg-5.1.9/libavcodec/dv_profile.c
--- ffmpeg-5.1.8/libavcodec/dv_profile.c	2025-11-21 01:15:18.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/dv_profile.c	2026-05-05 14:22:01.000000000 +0000
@@ -337,4 +337,3 @@
 
     return p;
 }
-
diff -Nru ffmpeg-5.1.8/libavcodec/dvdsub_parser.c ffmpeg-5.1.9/libavcodec/dvdsub_parser.c
--- ffmpeg-5.1.8/libavcodec/dvdsub_parser.c	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/dvdsub_parser.c	2026-05-05 15:50:55.000000000 +0000
@@ -59,7 +59,7 @@
         pc->packet = av_malloc(pc->packet_len + AV_INPUT_BUFFER_PADDING_SIZE);
     }
     if (pc->packet) {
-        if (pc->packet_index + buf_size <= pc->packet_len) {
+        if (buf_size <= pc->packet_len - pc->packet_index) {
             memcpy(pc->packet + pc->packet_index, buf, buf_size);
             pc->packet_index += buf_size;
             if (pc->packet_index >= pc->packet_len) {
diff -Nru ffmpeg-5.1.8/libavcodec/escape130.c ffmpeg-5.1.9/libavcodec/escape130.c
--- ffmpeg-5.1.8/libavcodec/escape130.c	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/escape130.c	2026-05-05 15:50:55.000000000 +0000
@@ -125,7 +125,7 @@
         return AVERROR_INVALIDDATA;
     }
 
-    s->old_y_avg = av_malloc(avctx->width * avctx->height / 4);
+    s->old_y_avg = av_mallocz(avctx->width * avctx->height / 4);
     s->buf1      = av_malloc(avctx->width * avctx->height * 3 / 2);
     s->buf2      = av_malloc(avctx->width * avctx->height * 3 / 2);
     if (!s->old_y_avg || !s->buf1 || !s->buf2) {
diff -Nru ffmpeg-5.1.8/libavcodec/exr.c ffmpeg-5.1.9/libavcodec/exr.c
--- ffmpeg-5.1.8/libavcodec/exr.c	2025-11-26 02:41:35.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/exr.c	2026-05-05 15:50:55.000000000 +0000
@@ -631,6 +631,9 @@
                                max_non_zero - min_non_zero + 1);
     memset(td->bitmap + max_non_zero + 1, 0, BITMAP_SIZE - max_non_zero - 1);
 
+    if (bytestream2_get_bytes_left(&gb) < 4)
+        return AVERROR_INVALIDDATA;
+
     maxval = reverse_lut(td->bitmap, td->lut);
 
     bytestream2_skip(&gb, 4);
@@ -1471,7 +1474,8 @@
                 }
 
                 // Zero out the end if xmax+1 is not w
-                memset(ptr_x, 0, axmax);
+                if (s->desc->flags & AV_PIX_FMT_FLAG_PLANAR || !c)
+                    memset(ptr_x, 0, axmax);
                 channel_buffer[c] += td->channel_line_size;
             }
         }
@@ -1793,12 +1797,17 @@
                     }
                 }
 
-                s->channels = av_realloc(s->channels,
-                                         ++s->nb_channels * sizeof(EXRChannel));
-                if (!s->channels) {
+                av_assert0(s->nb_channels < INT_MAX); // Impossible due to size of the bitstream
+                EXRChannel *new_channels = av_realloc_array(s->channels,
+                                                            s->nb_channels + 1,
+                                                            sizeof(EXRChannel));
+                if (!new_channels) {
                     ret = AVERROR(ENOMEM);
                     goto fail;
                 }
+                s->nb_channels ++;
+                s->channels = new_channels;
+
                 channel             = &s->channels[s->nb_channels - 1];
                 channel->pixel_type = current_pixel_type;
                 channel->xsub       = xsub;
@@ -1821,7 +1830,7 @@
                 s->is_luma = 1;
             } else {
                 avpriv_request_sample(s->avctx, "Uncommon channel combination");
-                ret = AVERROR(AVERROR_PATCHWELCOME);
+                ret = AVERROR_PATCHWELCOME;
                 goto fail;
             }
 
@@ -2214,6 +2223,8 @@
     }
 
     if (s->is_tile) {
+        if (s->tile_attr.ySize <= 0 || s->tile_attr.xSize <= 0)
+            return AVERROR_INVALIDDATA;
         nb_blocks = ((s->xdelta + s->tile_attr.xSize - 1) / s->tile_attr.xSize) *
         ((s->ydelta + s->tile_attr.ySize - 1) / s->tile_attr.ySize);
     } else { /* scanline */
diff -Nru ffmpeg-5.1.8/libavcodec/ffv1_template.c ffmpeg-5.1.9/libavcodec/ffv1_template.c
--- ffmpeg-5.1.8/libavcodec/ffv1_template.c	2025-11-23 02:57:58.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/ffv1_template.c	2026-05-05 14:22:01.000000000 +0000
@@ -50,4 +50,3 @@
                p->quant_table[1][(LT - T) & 0xFF] +
                p->quant_table[2][(T - RT) & 0xFF];
 }
-
diff -Nru ffmpeg-5.1.8/libavcodec/ffv1enc_template.c ffmpeg-5.1.9/libavcodec/ffv1enc_template.c
--- ffmpeg-5.1.8/libavcodec/ffv1enc_template.c	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/ffv1enc_template.c	2026-05-05 15:50:52.000000000 +0000
@@ -199,4 +199,3 @@
     }
     return 0;
 }
-
diff -Nru ffmpeg-5.1.8/libavcodec/flashsv.c ffmpeg-5.1.9/libavcodec/flashsv.c
--- ffmpeg-5.1.8/libavcodec/flashsv.c	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/flashsv.c	2026-05-05 15:50:55.000000000 +0000
@@ -314,6 +314,9 @@
     v_blocks = s->image_height / s->block_height;
     v_part   = s->image_height % s->block_height;
 
+    if (h_blocks * v_blocks * 16 > get_bits_left(&gb))
+        return AVERROR_INVALIDDATA;
+
     /* the block size could change between frames, make sure the buffer
      * is large enough, if not, get a larger one */
     if (s->block_size < s->block_width * s->block_height) {
diff -Nru ffmpeg-5.1.8/libavcodec/golomb.h ffmpeg-5.1.9/libavcodec/golomb.h
--- ffmpeg-5.1.8/libavcodec/golomb.h	2025-08-05 00:22:34.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/golomb.h	2026-05-05 15:50:55.000000000 +0000
@@ -455,7 +455,7 @@
             buf = get_bits_long(gb, k);
 
             return buf + (i << k);
-        } else if (i == limit - 1) {
+        } else if (esc_len && i == limit - 1) {
             buf = get_bits_long(gb, esc_len);
 
             return buf + 1;
@@ -512,7 +512,7 @@
             }
 
             buf += ((SUINT)i << k);
-        } else if (i == limit - 1) {
+        } else if (esc_len && i == limit - 1) {
             buf = SHOW_UBITS(re, gb, esc_len);
             LAST_SKIP_BITS(re, gb, esc_len);
 
diff -Nru ffmpeg-5.1.8/libavcodec/h264_direct.c ffmpeg-5.1.9/libavcodec/h264_direct.c
--- ffmpeg-5.1.8/libavcodec/h264_direct.c	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/h264_direct.c	2026-05-05 15:50:55.000000000 +0000
@@ -121,26 +121,30 @@
 {
     H264Ref *const ref1 = &sl->ref_list[1][0];
     H264Picture *const cur = h->cur_pic_ptr;
-    int list, j, field;
+    int list, field;
     int sidx     = (h->picture_structure & 1) ^ 1;
     int ref1sidx = (ref1->reference      & 1) ^ 1;
 
-    for (list = 0; list < sl->list_count; list++) {
-        cur->ref_count[sidx][list] = sl->ref_count[list];
-        for (j = 0; j < sl->ref_count[list]; j++)
-            cur->ref_poc[sidx][list][j] = 4 * sl->ref_list[list][j].parent->frame_num +
-                                          (sl->ref_list[list][j].reference & 3);
-    }
+    /* Updates to cur_pic are not safe once ff_thread_finish_setup() has been
+     * called (other threads may already be reading these fields). */
+    if (!h->setup_finished) {
+        for (list = 0; list < sl->list_count; list++) {
+            cur->ref_count[sidx][list] = sl->ref_count[list];
+            for (int j = 0; j < sl->ref_count[list]; j++)
+                cur->ref_poc[sidx][list][j] = 4 * sl->ref_list[list][j].parent->frame_num +
+                                                 (sl->ref_list[list][j].reference & 3);
+        }
 
-    if (h->picture_structure == PICT_FRAME) {
-        memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
-        memcpy(cur->ref_poc[1],   cur->ref_poc[0],   sizeof(cur->ref_poc[0]));
-    }
+        if (h->picture_structure == PICT_FRAME) {
+            memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
+            memcpy(cur->ref_poc[1],   cur->ref_poc[0],   sizeof(cur->ref_poc[0]));
+        }
 
-    if (h->current_slice == 0) {
-        cur->mbaff = FRAME_MBAFF(h);
-    } else {
-        av_assert0(cur->mbaff == FRAME_MBAFF(h));
+        if (h->current_slice == 0) {
+            cur->mbaff = FRAME_MBAFF(h);
+        } else {
+            av_assert0(cur->mbaff == FRAME_MBAFF(h));
+        }
     }
 
     sl->col_fieldoff = 0;
diff -Nru ffmpeg-5.1.8/libavcodec/h264_mc_template.c ffmpeg-5.1.9/libavcodec/h264_mc_template.c
--- ffmpeg-5.1.8/libavcodec/h264_mc_template.c	2025-11-21 01:15:18.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/h264_mc_template.c	2026-05-05 14:22:01.000000000 +0000
@@ -162,4 +162,3 @@
     if (USES_LIST(mb_type, 1))
         prefetch_motion(h, sl, 1, PIXEL_SHIFT, CHROMA_IDC);
 }
-
diff -Nru ffmpeg-5.1.8/libavcodec/h264_parser.c ffmpeg-5.1.9/libavcodec/h264_parser.c
--- ffmpeg-5.1.8/libavcodec/h264_parser.c	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/h264_parser.c	2026-05-05 15:50:55.000000000 +0000
@@ -222,6 +222,9 @@
     if (get_bits1(gb)) { // adaptive_ref_pic_marking_mode_flag
         int i;
         for (i = 0; i < H264_MAX_MMCO_COUNT; i++) {
+            if (get_bits_left(gb) < 1)
+                return AVERROR_INVALIDDATA;
+
             MMCOOpcode opcode = get_ue_golomb_31(gb);
             if (opcode > (unsigned) MMCO_LONG) {
                 av_log(logctx, AV_LOG_ERROR,
@@ -651,8 +654,12 @@
                 s->dts = av_sat_add64(p->reference_dts, av_rescale(s->dts_ref_dts_delta, num, den));
             }
 
-            if (p->reference_dts != AV_NOPTS_VALUE && s->pts == AV_NOPTS_VALUE)
-                s->pts = s->dts + av_rescale(s->pts_dts_delta, num, den);
+            if (p->reference_dts != AV_NOPTS_VALUE && s->pts == AV_NOPTS_VALUE) {
+                int64_t pts_dts_delta = av_rescale(s->pts_dts_delta, num, den);
+                uint64_t pts = (uint64_t)s->dts + pts_dts_delta;
+                if (pts == av_sat_add64(s->dts, pts_dts_delta))
+                    s->pts = pts;
+            }
 
             if (s->dts_sync_point > 0)
                 p->reference_dts = s->dts; // new reference
diff -Nru ffmpeg-5.1.8/libavcodec/h264_refs.c ffmpeg-5.1.9/libavcodec/h264_refs.c
--- ffmpeg-5.1.8/libavcodec/h264_refs.c	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/h264_refs.c	2026-05-05 15:50:55.000000000 +0000
@@ -158,8 +158,7 @@
                                   h->long_ref, 16, 1, h->picture_structure);
             av_assert0(len <= 32);
 
-            if (len < sl->ref_count[list])
-                memset(&sl->ref_list[list][len], 0, sizeof(H264Ref) * (sl->ref_count[list] - len));
+            memset(&sl->ref_list[list][len], 0, sizeof(H264Ref) * (32 - len));
             lens[list] = len;
         }
 
@@ -179,8 +178,7 @@
                               h-> long_ref, 16, 1, h->picture_structure);
         av_assert0(len <= 32);
 
-        if (len < sl->ref_count[0])
-            memset(&sl->ref_list[0][len], 0, sizeof(H264Ref) * (sl->ref_count[0] - len));
+        memset(&sl->ref_list[0][len], 0, sizeof(H264Ref) * (32 - len));
     }
 #ifdef TRACE
     for (i = 0; i < sl->ref_count[0]; i++) {
diff -Nru ffmpeg-5.1.8/libavcodec/h264_slice.c ffmpeg-5.1.9/libavcodec/h264_slice.c
--- ffmpeg-5.1.8/libavcodec/h264_slice.c	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/h264_slice.c	2026-05-05 15:50:55.000000000 +0000
@@ -2086,8 +2086,7 @@
 
     if (sl->slice_type_nos == AV_PICTURE_TYPE_B && !sl->direct_spatial_mv_pred)
         ff_h264_direct_dist_scale_factor(h, sl);
-    if (!h->setup_finished)
-        ff_h264_direct_ref_list_init(h, sl);
+    ff_h264_direct_ref_list_init(h, sl);
 
     if (h->avctx->skip_loop_filter >= AVDISCARD_ALL ||
         (h->avctx->skip_loop_filter >= AVDISCARD_NONKEY &&
@@ -2116,6 +2115,12 @@
                           h->ps.pps->chroma_qp_index_offset[1]) +
                    6 * (h->ps.sps->bit_depth_luma - 8);
 
+    // slice_table is uint16_t initialized to 0xFFFF as a sentinel.
+    if (h->current_slice >= 0xFFFE) {
+        av_log(h->avctx, AV_LOG_ERROR, "Too many slices (%d)\n", h->current_slice + 1);
+        return AVERROR_PATCHWELCOME;
+    }
+
     sl->slice_num       = ++h->current_slice;
 
     if (sl->slice_num)
diff -Nru ffmpeg-5.1.8/libavcodec/hevc_cabac.c ffmpeg-5.1.9/libavcodec/hevc_cabac.c
--- ffmpeg-5.1.8/libavcodec/hevc_cabac.c	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/hevc_cabac.c	2026-05-05 15:50:52.000000000 +0000
@@ -1561,4 +1561,3 @@
     case 0: lc->pu.mvd.y = 0;                       break;
     }
 }
-
diff -Nru ffmpeg-5.1.8/libavcodec/imgconvert.c ffmpeg-5.1.9/libavcodec/imgconvert.c
--- ffmpeg-5.1.8/libavcodec/imgconvert.c	2025-11-21 01:15:18.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/imgconvert.c	2026-05-05 14:22:01.000000000 +0000
@@ -45,4 +45,3 @@
         *loss_ptr = loss;
     return best;
 }
-
diff -Nru ffmpeg-5.1.8/libavcodec/imm5.c ffmpeg-5.1.9/libavcodec/imm5.c
--- ffmpeg-5.1.8/libavcodec/imm5.c	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/imm5.c	2026-05-05 15:50:55.000000000 +0000
@@ -139,6 +139,8 @@
     }
 
     ret = avcodec_receive_frame(codec_avctx, frame);
+    if (ret == AVERROR(EAGAIN))
+        return avpkt->size;
     if (ret < 0)
         return ret;
 
diff -Nru ffmpeg-5.1.8/libavcodec/interplayacm.c ffmpeg-5.1.9/libavcodec/interplayacm.c
--- ffmpeg-5.1.8/libavcodec/interplayacm.c	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/interplayacm.c	2026-05-05 15:50:55.000000000 +0000
@@ -434,6 +434,9 @@
     unsigned i, ind;
     int ret;
 
+    if (get_bits_left(gb) < s->cols * 5)
+        return AVERROR_INVALIDDATA;
+
     for (i = 0; i < s->cols; i++) {
         ind = get_bits(gb, 5);
         ret = filler_list[ind](s, ind, i);
diff -Nru ffmpeg-5.1.8/libavcodec/jpeg2000dec.c ffmpeg-5.1.9/libavcodec/jpeg2000dec.c
--- ffmpeg-5.1.8/libavcodec/jpeg2000dec.c	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/jpeg2000dec.c	2026-05-05 15:50:55.000000000 +0000
@@ -1801,7 +1801,7 @@
 
     while (passno--) {
         if (bpno < 0 || bpno > 29) {
-            av_log(s->avctx, AV_LOG_ERROR, "bpno became invalid\n");
+            av_log(s->avctx, AV_LOG_ERROR, "bpno (%d) became invalid\n", bpno);
             return AVERROR_INVALIDDATA;
         }
         switch(pass_t) {
@@ -2060,9 +2060,12 @@
             int h            = tile->comp[compno].coord[1][1] -                                   \
                                ff_jpeg2000_ceildiv(s->image_offset_y, s->cdy[compno]);            \
             int plane        = 0;                                                                 \
+            ptrdiff_t dstoffset = 0;                                                              \
                                                                                                   \
             if (planar)                                                                           \
                 plane = s->cdef[compno] ? s->cdef[compno]-1 : (s->ncomponents-1);                 \
+            else                                                                                  \
+                dstoffset = s->cdef[compno] ? s->cdef[compno] - 1 : compno;                       \
                                                                                                   \
             y    = tile->comp[compno].coord[1][0] -                                               \
                    ff_jpeg2000_ceildiv(s->image_offset_y, s->cdy[compno]);                        \
@@ -2072,7 +2075,7 @@
                                                                                                   \
                 x   = tile->comp[compno].coord[0][0] -                                            \
                       ff_jpeg2000_ceildiv(s->image_offset_x, s->cdx[compno]);                     \
-                dst = line + x * pixelsize + compno*!planar;                                      \
+                dst = line + x * pixelsize + dstoffset;                                           \
                                                                                                   \
                 if (codsty->transform == FF_DWT97) {                                              \
                     for (; x < w; x++) {                                                          \
diff -Nru ffmpeg-5.1.8/libavcodec/lcldec.c ffmpeg-5.1.9/libavcodec/lcldec.c
--- ffmpeg-5.1.8/libavcodec/lcldec.c	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/lcldec.c	2026-05-05 15:50:55.000000000 +0000
@@ -175,7 +175,7 @@
     int height = avctx->height; // Real image height
     unsigned int mszh_dlen;
     unsigned char yq, y1q, uq, vq;
-    int uqvq, ret;
+    int ret;
     unsigned int mthread_inlen, mthread_outlen;
     unsigned int len = buf_size;
     int linesize, offset;
@@ -304,7 +304,7 @@
             for (row = 0; row < height; row++) {
                 pixel_ptr = row * width * 3;
                 yq = encoded[pixel_ptr++];
-                uqvq = AV_RL16(encoded+pixel_ptr);
+                unsigned uqvq = AV_RL16(encoded+pixel_ptr);
                 pixel_ptr += 2;
                 for (col = 1; col < width; col++) {
                     encoded[pixel_ptr] = yq -= encoded[pixel_ptr];
diff -Nru ffmpeg-5.1.8/libavcodec/magicyuv.c ffmpeg-5.1.9/libavcodec/magicyuv.c
--- ffmpeg-5.1.8/libavcodec/magicyuv.c	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/magicyuv.c	2026-05-05 15:50:55.000000000 +0000
@@ -343,7 +343,8 @@
                 s->llviddsp.add_left_pred(dst, dst, width, 0);
                 dst += stride;
             }
-            lefttop = left = dst[0];
+            if (1 + interlaced < height)
+                lefttop = left = dst[0];
             for (k = 1 + interlaced; k < height; k++) {
                 s->llviddsp.add_median_pred(dst, dst - fake_stride,
                                              dst, width, &left, &lefttop);
diff -Nru ffmpeg-5.1.8/libavcodec/mdec.c ffmpeg-5.1.9/libavcodec/mdec.c
--- ffmpeg-5.1.8/libavcodec/mdec.c	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/mdec.c	2026-05-05 15:50:55.000000000 +0000
@@ -175,6 +175,9 @@
     int buf_size          = avpkt->size;
     int ret;
 
+    if (a->mb_width * a->mb_height * 3 > buf_size)
+        return AVERROR_INVALIDDATA;
+
     if ((ret = ff_thread_get_buffer(avctx, frame, 0)) < 0)
         return ret;
     frame->pict_type = AV_PICTURE_TYPE_I;
diff -Nru ffmpeg-5.1.8/libavcodec/mjpegdec.c ffmpeg-5.1.9/libavcodec/mjpegdec.c
--- ffmpeg-5.1.8/libavcodec/mjpegdec.c	2025-11-26 02:41:35.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/mjpegdec.c	2026-05-05 15:50:55.000000000 +0000
@@ -150,7 +150,7 @@
     if ((ret = init_default_huffman_tables(s)) < 0)
         return ret;
 
-    if (s->extern_huff) {
+    if (s->extern_huff && avctx->extradata) {
         av_log(avctx, AV_LOG_INFO, "using external huffman table\n");
         if ((ret = init_get_bits(&s->gb, avctx->extradata, avctx->extradata_size * 8)) < 0)
             return ret;
@@ -344,9 +344,11 @@
     if (av_image_check_size(width, height, 0, s->avctx) < 0)
         return AVERROR_INVALIDDATA;
 
-    // A valid frame requires at least 1 bit for DC + 1 bit for AC for each 8x8 block.
-    if (s->buf_size && (width + 7) / 8 * ((height + 7) / 8) > s->buf_size * 4LL)
-        return AVERROR_INVALIDDATA;
+    if (!s->progressive && !s->ls) {
+        // A valid frame requires at least 1 bit for DC + 1 bit for AC for each 8x8 block.
+        if (s->buf_size && (width + 7) / 8 * ((height + 7) / 8) > s->buf_size * 4LL)
+            return AVERROR_INVALIDDATA;
+    }
 
     nb_components = get_bits(&s->gb, 8);
     if (nb_components <= 0 ||
diff -Nru ffmpeg-5.1.8/libavcodec/mpegaudiodsp_template.c ffmpeg-5.1.9/libavcodec/mpegaudiodsp_template.c
--- ffmpeg-5.1.8/libavcodec/mpegaudiodsp_template.c	2025-11-21 01:15:18.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/mpegaudiodsp_template.c	2026-05-05 14:22:01.000000000 +0000
@@ -369,4 +369,3 @@
         out++;
     }
 }
-
diff -Nru ffmpeg-5.1.8/libavcodec/mpegaudioenc_template.c ffmpeg-5.1.9/libavcodec/mpegaudioenc_template.c
--- ffmpeg-5.1.8/libavcodec/mpegaudioenc_template.c	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/mpegaudioenc_template.c	2026-05-05 15:50:52.000000000 +0000
@@ -783,4 +783,3 @@
     { "b", "0" },
     { NULL },
 };
-
diff -Nru ffmpeg-5.1.8/libavcodec/mpegvideo_enc.c ffmpeg-5.1.9/libavcodec/mpegvideo_enc.c
--- ffmpeg-5.1.8/libavcodec/mpegvideo_enc.c	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/mpegvideo_enc.c	2026-05-05 15:50:55.000000000 +0000
@@ -2706,7 +2706,7 @@
     bytestream_put_byte(&ptr, 0); /* vmv2 */
 }
 
-static void update_mb_info(MpegEncContext *s, int startcode)
+static void update_mb_info(MpegEncContext *s)
 {
     if (!s->mb_info)
         return;
@@ -2714,14 +2714,6 @@
         s->mb_info_size += 12;
         s->prev_mb_info = s->last_mb_info;
     }
-    if (startcode) {
-        s->prev_mb_info = put_bytes_count(&s->pb, 0);
-        /* This might have incremented mb_info_size above, and we return without
-         * actually writing any info into that slot yet. But in that case,
-         * this will be called again at the start of the after writing the
-         * start code, actually writing the mb info. */
-        return;
-    }
 
     s->last_mb_info = put_bytes_count(&s->pb, 0);
     if (!s->mb_info_size)
@@ -2938,8 +2930,11 @@
                     case AV_CODEC_ID_H263:
                     case AV_CODEC_ID_H263P:
                         if (CONFIG_H263_ENCODER) {
-                            update_mb_info(s, 1);
+                            if (s->mb_info && put_bytes_count(&s->pb, 0) - s->prev_mb_info >= s->mb_info)
+                                s->mb_info_size += 12;
+
                             ff_h263_encode_gob_header(s, mb_y);
+                            s->prev_mb_info = put_bits_count(&s->pb)/8;
                         }
                     break;
                     }
@@ -2965,7 +2960,7 @@
             s->mb_skipped=0;
             s->dquant=0; //only for QP_RD
 
-            update_mb_info(s, 0);
+            update_mb_info(s);
 
             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
                 int next_block=0;
diff -Nru ffmpeg-5.1.8/libavcodec/msmpeg4.c ffmpeg-5.1.9/libavcodec/msmpeg4.c
--- ffmpeg-5.1.8/libavcodec/msmpeg4.c	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/msmpeg4.c	2026-05-05 15:50:52.000000000 +0000
@@ -344,4 +344,3 @@
     *dc_val_ptr = &dc_val[0];
     return pred;
 }
-
diff -Nru ffmpeg-5.1.8/libavcodec/notchlc.c ffmpeg-5.1.9/libavcodec/notchlc.c
--- ffmpeg-5.1.8/libavcodec/notchlc.c	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/notchlc.c	2026-05-05 15:50:55.000000000 +0000
@@ -79,7 +79,7 @@
                           PutByteContext *pb)
 {
     unsigned reference_pos, match_length, delta, pos = 0;
-    uint8_t history[64 * 1024];
+    uint8_t history[64 * 1024] = { 0 };
 
     while (bytestream2_get_bytes_left(gb) > 0) {
         uint8_t token = bytestream2_get_byte(gb);
@@ -89,6 +89,8 @@
             unsigned char current;
             do {
                 current = bytestream2_get_byte(gb);
+                if (current > INT_MAX - num_literals)
+                    return AVERROR_INVALIDDATA;
                 num_literals += current;
             } while (current == 255);
         }
@@ -121,6 +123,8 @@
 
             do {
                 current = bytestream2_get_byte(gb);
+                if (current > INT_MAX - match_length)
+                    return AVERROR_INVALIDDATA;
                 match_length += current;
             } while (current == 255);
         }
diff -Nru ffmpeg-5.1.8/libavcodec/omx.c ffmpeg-5.1.9/libavcodec/omx.c
--- ffmpeg-5.1.8/libavcodec/omx.c	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/omx.c	2026-05-05 15:50:55.000000000 +0000
@@ -683,6 +683,11 @@
             buffer = get_buffer(&s->output_mutex, &s->output_cond,
                                 &s->num_done_out_buffers, s->done_out_buffers, 1);
             if (buffer->nFlags & OMX_BUFFERFLAG_CODECCONFIG) {
+                if (buffer->nFilledLen > INT32_MAX - AV_INPUT_BUFFER_PADDING_SIZE - avctx->extradata_size) {
+                    ret = AVERROR(ENOMEM);
+                    goto fail;
+                }
+
                 if ((ret = av_reallocp(&avctx->extradata, avctx->extradata_size + buffer->nFilledLen + AV_INPUT_BUFFER_PADDING_SIZE)) < 0) {
                     avctx->extradata_size = 0;
                     goto fail;
diff -Nru ffmpeg-5.1.8/libavcodec/qdm2.c ffmpeg-5.1.9/libavcodec/qdm2.c
--- ffmpeg-5.1.8/libavcodec/qdm2.c	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/qdm2.c	2026-05-05 15:50:55.000000000 +0000
@@ -1852,6 +1852,8 @@
     if(buf_size < s->checksum_size)
         return -1;
 
+    s->sub_packet = 0;
+
     /* get output buffer */
     frame->nb_samples = 16 * s->frame_size;
     if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
diff -Nru ffmpeg-5.1.8/libavcodec/ralf.c ffmpeg-5.1.9/libavcodec/ralf.c
--- ffmpeg-5.1.8/libavcodec/ralf.c	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/ralf.c	2026-05-05 15:50:55.000000000 +0000
@@ -158,6 +158,7 @@
     if (ctx->max_frame_size > (1 << 20) || !ctx->max_frame_size) {
         av_log(avctx, AV_LOG_ERROR, "invalid frame size %d\n",
                ctx->max_frame_size);
+        return AVERROR_INVALIDDATA;
     }
     ctx->max_frame_size = FFMAX(ctx->max_frame_size, avctx->sample_rate);
 
diff -Nru ffmpeg-5.1.8/libavcodec/rasc.c ffmpeg-5.1.9/libavcodec/rasc.c
--- ffmpeg-5.1.8/libavcodec/rasc.c	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/rasc.c	2026-05-05 15:50:55.000000000 +0000
@@ -52,6 +52,8 @@
     GetByteContext  gb;
     uint8_t        *delta;
     int             delta_size;
+    uint8_t        *mv_scratch;
+    unsigned int    mv_scratch_size;
     uint8_t        *cursor;
     int             cursor_size;
     unsigned        cursor_w;
@@ -295,10 +297,8 @@
                 b2 -= s->frame2->linesize[0];
             }
         } else if (type == 0) {
-            uint8_t *buffer;
-
-            av_fast_padded_malloc(&s->delta, &s->delta_size, w * h * s->bpp);
-            buffer = s->delta;
+            av_fast_padded_malloc(&s->mv_scratch, &s->mv_scratch_size, w * h * s->bpp);
+            uint8_t *buffer = s->mv_scratch;
             if (!buffer)
                 return AVERROR(ENOMEM);
 
@@ -770,6 +770,8 @@
     s->cursor_size = 0;
     av_freep(&s->delta);
     s->delta_size = 0;
+    av_freep(&s->mv_scratch);
+    s->mv_scratch_size = 0;
     av_frame_free(&s->frame1);
     av_frame_free(&s->frame2);
     ff_inflate_end(&s->zstream);
diff -Nru ffmpeg-5.1.8/libavcodec/snow_dwt.c ffmpeg-5.1.9/libavcodec/snow_dwt.c
--- ffmpeg-5.1.8/libavcodec/snow_dwt.c	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/snow_dwt.c	2026-05-05 15:50:52.000000000 +0000
@@ -857,5 +857,3 @@
     ff_dwt_init_x86(c);
 #endif
 }
-
-
diff -Nru ffmpeg-5.1.8/libavcodec/svq1dec.c ffmpeg-5.1.9/libavcodec/svq1dec.c
--- ffmpeg-5.1.8/libavcodec/svq1dec.c	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/svq1dec.c	2026-05-05 15:50:55.000000000 +0000
@@ -680,6 +680,11 @@
         avctx->skip_frame >= AVDISCARD_ALL)
         return buf_size;
 
+    // Reject obviously too-small packets early: require at least one remaining bit per aligned luma macroblock.
+    // FFALIGN(s->width,  16) * FFALIGN(s->height, 16) / 256 represent the number of Macroblocks
+    if (get_bits_left(&s->gb) < FFALIGN(s->width,  16) * FFALIGN(s->height, 16) / 256)
+        return AVERROR_INVALIDDATA;
+
     result = ff_get_buffer(avctx, cur, s->nonref ? 0 : AV_GET_BUFFER_FLAG_REF);
     if (result < 0)
         return result;
diff -Nru ffmpeg-5.1.8/libavcodec/tdsc.c ffmpeg-5.1.9/libavcodec/tdsc.c
--- ffmpeg-5.1.8/libavcodec/tdsc.c	2025-11-26 02:41:31.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/tdsc.c	2026-05-05 15:50:55.000000000 +0000
@@ -242,7 +242,6 @@
                     bits <<= 1;
                 }
             }
-            dst += ctx->cursor_stride - ctx->cursor_w * 4;
         }
 
         dst = ctx->cursor;
@@ -274,7 +273,6 @@
                     bits <<= 1;
                 }
             }
-            dst += ctx->cursor_stride - ctx->cursor_w * 4;
         }
         break;
     case CUR_FMT_BGRA:
@@ -360,7 +358,8 @@
     }
 
     ret = avcodec_receive_frame(ctx->jpeg_avctx, ctx->jpgframe);
-    if (ret < 0 || ctx->jpgframe->format != AV_PIX_FMT_YUVJ420P) {
+    if (ret < 0 || ctx->jpgframe->format != AV_PIX_FMT_YUVJ420P ||
+        w > ctx->jpgframe->width || h > ctx->jpgframe->height) {
         av_log(avctx, AV_LOG_ERROR,
                "JPEG decoding error (%d).\n", ret);
 
@@ -404,7 +403,7 @@
         }
 
         tile_size = bytestream2_get_le32(&ctx->gbc);
-        if (bytestream2_get_bytes_left(&ctx->gbc) < tile_size)
+        if (bytestream2_get_bytes_left(&ctx->gbc) < tile_size + 24LL)
             return AVERROR_INVALIDDATA;
 
         tile_mode = bytestream2_get_le32(&ctx->gbc);
@@ -437,6 +436,9 @@
             if (ret < 0)
                 return ret;
         } else if (tile_mode == MKTAG(' ','W','A','R')) {
+            if (3LL * w * h > tile_size)
+                return AVERROR_INVALIDDATA;
+
             /* Just copy the buffer to output */
             av_image_copy_plane(ctx->refframe->data[0] + x * 3 +
                                 ctx->refframe->linesize[0] * y,
diff -Nru ffmpeg-5.1.8/libavcodec/vp3.c ffmpeg-5.1.9/libavcodec/vp3.c
--- ffmpeg-5.1.8/libavcodec/vp3.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/vp3.c	2026-05-05 15:50:55.000000000 +0000
@@ -2921,6 +2921,8 @@
     if (av_image_check_size(visible_width, visible_height, 0, avctx) < 0 ||
         visible_width  + offset_x > s->width ||
         visible_height + offset_y > s->height ||
+        visible_width  + 512 < s->width  ||
+        visible_height + 512 < s->height ||
         visible_width < 18
     ) {
         av_log(avctx, AV_LOG_ERROR,
diff -Nru ffmpeg-5.1.8/libavcodec/vp9.c ffmpeg-5.1.9/libavcodec/vp9.c
--- ffmpeg-5.1.8/libavcodec/vp9.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/vp9.c	2026-05-05 15:50:55.000000000 +0000
@@ -192,10 +192,12 @@
     uint8_t *p;
     int bytesperpixel = s->bytesperpixel, ret, cols, rows;
     int lflvl_len, i;
+    int changed = 0;
 
     av_assert0(w > 0 && h > 0);
 
     if (!(s->pix_fmt == s->gf_fmt && w == s->w && h == s->h)) {
+        changed = 1;
         if ((ret = ff_set_dimensions(avctx, w, h)) < 0)
             return ret;
 
@@ -239,8 +241,10 @@
         *fmtp = AV_PIX_FMT_NONE;
 
         ret = ff_thread_get_format(avctx, pix_fmts);
-        if (ret < 0)
+        if (ret < 0) {
+            ff_set_dimensions(avctx, s->w, s->h);
             return ret;
+        }
 
         avctx->pix_fmt = ret;
         s->gf_fmt  = s->pix_fmt;
@@ -252,7 +256,7 @@
     rows = (h + 7) >> 3;
 
     if (s->intra_pred_data[0] && cols == s->cols && rows == s->rows && s->pix_fmt == s->last_fmt)
-        return 0;
+        return changed;
 
     s->last_fmt  = s->pix_fmt;
     s->sb_cols   = (w + 63) >> 6;
@@ -297,9 +301,10 @@
         ff_vp9dsp_init(&s->dsp, s->s.h.bpp, avctx->flags & AV_CODEC_FLAG_BITEXACT);
         ff_videodsp_init(&s->vdsp, s->s.h.bpp);
         s->last_bpp = s->s.h.bpp;
+        changed = 1;
     }
 
-    return 0;
+    return changed;
 }
 
 static int update_block_buffers(AVCodecContext *avctx)
@@ -506,6 +511,7 @@
     int c, i, j, k, l, m, n, w, h, max, size2, ret, sharp;
     int last_invisible;
     const uint8_t *data2;
+    int changed;
 
     /* general header */
     if ((ret = init_get_bits8(&s->gb, data, size)) < 0) {
@@ -770,10 +776,10 @@
     }
 
     /* tiling info */
-    if ((ret = update_size(avctx, w, h)) < 0) {
+    if ((changed = update_size(avctx, w, h)) < 0) {
         av_log(avctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n",
                w, h, s->pix_fmt);
-        return ret;
+        return changed;
     }
     for (s->s.h.tiling.log2_tile_cols = 0;
          s->sb_cols > (64 << s->s.h.tiling.log2_tile_cols);
@@ -788,7 +794,7 @@
     }
     s->s.h.tiling.log2_tile_rows = decode012(&s->gb);
     s->s.h.tiling.tile_rows = 1 << s->s.h.tiling.log2_tile_rows;
-    if (s->s.h.tiling.tile_cols != (1 << s->s.h.tiling.log2_tile_cols)) {
+    if (s->s.h.tiling.tile_cols != (1 << s->s.h.tiling.log2_tile_cols) || changed) {
         int n_range_coders;
         VP56RangeCoder *rc;
 
diff -Nru ffmpeg-5.1.8/libavcodec/wmaenc.c ffmpeg-5.1.9/libavcodec/wmaenc.c
--- ffmpeg-5.1.8/libavcodec/wmaenc.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/wmaenc.c	2026-05-05 15:50:55.000000000 +0000
@@ -65,14 +65,14 @@
     flags1 = 0;
     flags2 = 1;
     if (avctx->codec->id == AV_CODEC_ID_WMAV1) {
-        extradata             = av_malloc(4);
+        extradata             = av_mallocz(4 + AV_INPUT_BUFFER_PADDING_SIZE);
         if (!extradata)
             return AVERROR(ENOMEM);
         avctx->extradata_size = 4;
         AV_WL16(extradata, flags1);
         AV_WL16(extradata + 2, flags2);
     } else if (avctx->codec->id == AV_CODEC_ID_WMAV2) {
-        extradata             = av_mallocz(10);
+        extradata             = av_mallocz(10 + AV_INPUT_BUFFER_PADDING_SIZE);
         if (!extradata)
             return AVERROR(ENOMEM);
         avctx->extradata_size = 10;
diff -Nru ffmpeg-5.1.8/libavcodec/x86/fmtconvert.asm ffmpeg-5.1.9/libavcodec/x86/fmtconvert.asm
--- ffmpeg-5.1.8/libavcodec/x86/fmtconvert.asm	2025-11-21 01:15:18.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/x86/fmtconvert.asm	2026-05-05 14:22:01.000000000 +0000
@@ -85,4 +85,3 @@
 
 INIT_XMM sse2
 INT32_TO_FLOAT_FMUL_ARRAY8
-
diff -Nru ffmpeg-5.1.8/libavcodec/x86/mpegvideoencdsp.asm ffmpeg-5.1.9/libavcodec/x86/mpegvideoencdsp.asm
--- ffmpeg-5.1.8/libavcodec/x86/mpegvideoencdsp.asm	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/x86/mpegvideoencdsp.asm	2026-05-05 15:50:52.000000000 +0000
@@ -106,4 +106,3 @@
 
 INIT_XMM sse2
 PIX_NORM1 6, 8
-
diff -Nru ffmpeg-5.1.8/libavcodec/xxan.c ffmpeg-5.1.9/libavcodec/xxan.c
--- ffmpeg-5.1.8/libavcodec/xxan.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/xxan.c	2026-05-05 15:50:55.000000000 +0000
@@ -68,7 +68,7 @@
     }
 
     s->buffer_size = avctx->width * avctx->height;
-    s->y_buffer = av_malloc(s->buffer_size);
+    s->y_buffer = av_mallocz(s->buffer_size);
     if (!s->y_buffer)
         return AVERROR(ENOMEM);
     s->scratch_buffer = av_malloc(s->buffer_size + 130);
diff -Nru ffmpeg-5.1.8/libavcodec/zmbv.c ffmpeg-5.1.9/libavcodec/zmbv.c
--- ffmpeg-5.1.8/libavcodec/zmbv.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavcodec/zmbv.c	2026-05-05 15:50:55.000000000 +0000
@@ -138,6 +138,8 @@
             }
 
             if (d) { /* apply XOR'ed difference */
+                if (c->decomp_len - (src - c->decomp_buf) < bw2 * bh2)
+                    return AVERROR_INVALIDDATA;
                 out = output + x;
                 for (j = 0; j < bh2; j++) {
                     for (i = 0; i < bw2; i++)
@@ -212,6 +214,8 @@
             }
 
             if (d) { /* apply XOR'ed difference */
+                if (c->decomp_len - (src - c->decomp_buf) < bw2 * bh2 * 2)
+                    return AVERROR_INVALIDDATA;
                 out = output + x;
                 for (j = 0; j < bh2; j++){
                     for (i = 0; i < bw2; i++) {
@@ -296,6 +300,8 @@
             }
 
             if (d) { /* apply XOR'ed difference */
+                if (c->decomp_len - (src - c->decomp_buf) < bw2 * bh2 * 3)
+                    return AVERROR_INVALIDDATA;
                 out = output + x * 3;
                 for (j = 0; j < bh2; j++) {
                     for (i = 0; i < bw2; i++) {
@@ -374,6 +380,8 @@
             }
 
             if (d) { /* apply XOR'ed difference */
+                if (c->decomp_len - (src - c->decomp_buf) < bw2 * bh2 * 4)
+                    return AVERROR_INVALIDDATA;
                 out = output + x;
                 for (j = 0; j < bh2; j++){
                     for (i = 0; i < bw2; i++) {
@@ -568,8 +576,10 @@
         frame->pict_type = AV_PICTURE_TYPE_P;
         if (c->decomp_len < 2LL * ((c->width + c->bw - 1) / c->bw) * ((c->height + c->bh - 1) / c->bh))
             return AVERROR_INVALIDDATA;
-        if (c->decomp_len)
-            c->decode_xor(c);
+        if (c->decomp_len) {
+            if ((ret = c->decode_xor(c)) < 0)
+                return ret;
+        }
     }
 
     /* update frames */
diff -Nru ffmpeg-5.1.8/libavfilter/aarch64/vf_nlmeans_neon.S ffmpeg-5.1.9/libavfilter/aarch64/vf_nlmeans_neon.S
--- ffmpeg-5.1.8/libavfilter/aarch64/vf_nlmeans_neon.S	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavfilter/aarch64/vf_nlmeans_neon.S	2026-05-03 23:09:50.000000000 +0000
@@ -22,52 +22,52 @@
 
 // acc_sum_store(ABCD) = {X+A, X+A+B, X+A+B+C, X+A+B+C+D}
 .macro acc_sum_store x, xb
-        dup             v24.4S, v24.S[3]                                // ...X -> XXXX
-        ext             v25.16B, v26.16B, \xb, #12                      // ext(0000,ABCD,12)=0ABC
-        add             v24.4S, v24.4S, \x                              // XXXX+ABCD={X+A,X+B,X+C,X+D}
-        add             v24.4S, v24.4S, v25.4S                          // {X+A,X+B+A,X+C+B,X+D+C}       (+0ABC)
-        ext             v25.16B, v26.16B, v25.16B, #12                  // ext(0000,0ABC,12)=00AB
-        add             v24.4S, v24.4S, v25.4S                          // {X+A,X+B+A,X+C+B+A,X+D+C+B}   (+00AB)
-        ext             v25.16B, v26.16B, v25.16B, #12                  // ext(0000,00AB,12)=000A
-        add             v24.4S, v24.4S, v25.4S                          // {X+A,X+B+A,X+C+B+A,X+D+C+B+A} (+000A)
-        st1             {v24.4S}, [x0], #16                             // write 4x32-bit final values
+        dup             v24.4s, v24.s[3]                                // ...X -> XXXX
+        ext             v25.16b, v26.16b, \xb, #12                      // ext(0000,ABCD,12)=0ABC
+        add             v24.4s, v24.4s, \x                              // XXXX+ABCD={X+A,X+B,X+C,X+D}
+        add             v24.4s, v24.4s, v25.4s                          // {X+A,X+B+A,X+C+B,X+D+C}       (+0ABC)
+        ext             v25.16b, v26.16b, v25.16b, #12                  // ext(0000,0ABC,12)=00AB
+        add             v24.4s, v24.4s, v25.4s                          // {X+A,X+B+A,X+C+B+A,X+D+C+B}   (+00AB)
+        ext             v25.16b, v26.16b, v25.16b, #12                  // ext(0000,00AB,12)=000A
+        add             v24.4s, v24.4s, v25.4s                          // {X+A,X+B+A,X+C+B+A,X+D+C+B+A} (+000A)
+        st1             {v24.4s}, [x0], #16                             // write 4x32-bit final values
 .endm
 
 function ff_compute_safe_ssd_integral_image_neon, export=1
-        movi            v26.4S, #0                                      // used as zero for the "rotations" in acc_sum_store
-        sub             x3, x3, w6, UXTW                                // s1 padding (s1_linesize - w)
-        sub             x5, x5, w6, UXTW                                // s2 padding (s2_linesize - w)
-        sub             x9, x0, w1, UXTW #2                             // dst_top
-        sub             x1, x1, w6, UXTW                                // dst padding (dst_linesize_32 - w)
+        movi            v26.4s, #0                                      // used as zero for the "rotations" in acc_sum_store
+        sub             x3, x3, w6, uxtw                                // s1 padding (s1_linesize - w)
+        sub             x5, x5, w6, uxtw                                // s2 padding (s2_linesize - w)
+        sub             x9, x0, w1, uxtw #2                             // dst_top
+        sub             x1, x1, w6, uxtw                                // dst padding (dst_linesize_32 - w)
         lsl             x1, x1, #2                                      // dst padding expressed in bytes
 1:      mov             w10, w6                                         // width copy for each line
         sub             x0, x0, #16                                     // beginning of the dst line minus 4 sums
         sub             x8, x9, #4                                      // dst_top-1
-        ld1             {v24.4S}, [x0], #16                             // load ...X (contextual last sums)
-2:      ld1             {v0.16B}, [x2], #16                             // s1[x + 0..15]
-        ld1             {v1.16B}, [x4], #16                             // s2[x + 0..15]
-        ld1             {v16.4S,v17.4S}, [x8], #32                      // dst_top[x + 0..7 - 1]
-        usubl           v2.8H, v0.8B,  v1.8B                            // d[x + 0..7]  = s1[x + 0..7]  - s2[x + 0..7]
-        usubl2          v3.8H, v0.16B, v1.16B                           // d[x + 8..15] = s1[x + 8..15] - s2[x + 8..15]
-        ld1             {v18.4S,v19.4S}, [x8], #32                      // dst_top[x + 8..15 - 1]
-        smull           v4.4S, v2.4H, v2.4H                             // d[x + 0..3]^2
-        smull2          v5.4S, v2.8H, v2.8H                             // d[x + 4..7]^2
-        ld1             {v20.4S,v21.4S}, [x9], #32                      // dst_top[x + 0..7]
-        smull           v6.4S, v3.4H, v3.4H                             // d[x + 8..11]^2
-        smull2          v7.4S, v3.8H, v3.8H                             // d[x + 12..15]^2
-        ld1             {v22.4S,v23.4S}, [x9], #32                      // dst_top[x + 8..15]
-        sub             v0.4S, v20.4S, v16.4S                           // dst_top[x + 0..3] - dst_top[x + 0..3 - 1]
-        sub             v1.4S, v21.4S, v17.4S                           // dst_top[x + 4..7] - dst_top[x + 4..7 - 1]
-        add             v0.4S, v0.4S, v4.4S                             // + d[x + 0..3]^2
-        add             v1.4S, v1.4S, v5.4S                             // + d[x + 4..7]^2
-        sub             v2.4S, v22.4S, v18.4S                           // dst_top[x +  8..11] - dst_top[x +  8..11 - 1]
-        sub             v3.4S, v23.4S, v19.4S                           // dst_top[x + 12..15] - dst_top[x + 12..15 - 1]
-        add             v2.4S, v2.4S, v6.4S                             // + d[x +  8..11]^2
-        add             v3.4S, v3.4S, v7.4S                             // + d[x + 12..15]^2
-        acc_sum_store   v0.4S, v0.16B                                   // accumulate and store dst[ 0..3]
-        acc_sum_store   v1.4S, v1.16B                                   // accumulate and store dst[ 4..7]
-        acc_sum_store   v2.4S, v2.16B                                   // accumulate and store dst[ 8..11]
-        acc_sum_store   v3.4S, v3.16B                                   // accumulate and store dst[12..15]
+        ld1             {v24.4s}, [x0], #16                             // load ...X (contextual last sums)
+2:      ld1             {v0.16b}, [x2], #16                             // s1[x + 0..15]
+        ld1             {v1.16b}, [x4], #16                             // s2[x + 0..15]
+        ld1             {v16.4s,v17.4s}, [x8], #32                      // dst_top[x + 0..7 - 1]
+        usubl           v2.8h, v0.8b,  v1.8b                            // d[x + 0..7]  = s1[x + 0..7]  - s2[x + 0..7]
+        usubl2          v3.8h, v0.16b, v1.16b                           // d[x + 8..15] = s1[x + 8..15] - s2[x + 8..15]
+        ld1             {v18.4s,v19.4s}, [x8], #32                      // dst_top[x + 8..15 - 1]
+        smull           v4.4s, v2.4h, v2.4h                             // d[x + 0..3]^2
+        smull2          v5.4s, v2.8h, v2.8h                             // d[x + 4..7]^2
+        ld1             {v20.4s,v21.4s}, [x9], #32                      // dst_top[x + 0..7]
+        smull           v6.4s, v3.4h, v3.4h                             // d[x + 8..11]^2
+        smull2          v7.4s, v3.8h, v3.8h                             // d[x + 12..15]^2
+        ld1             {v22.4s,v23.4s}, [x9], #32                      // dst_top[x + 8..15]
+        sub             v0.4s, v20.4s, v16.4s                           // dst_top[x + 0..3] - dst_top[x + 0..3 - 1]
+        sub             v1.4s, v21.4s, v17.4s                           // dst_top[x + 4..7] - dst_top[x + 4..7 - 1]
+        add             v0.4s, v0.4s, v4.4s                             // + d[x + 0..3]^2
+        add             v1.4s, v1.4s, v5.4s                             // + d[x + 4..7]^2
+        sub             v2.4s, v22.4s, v18.4s                           // dst_top[x +  8..11] - dst_top[x +  8..11 - 1]
+        sub             v3.4s, v23.4s, v19.4s                           // dst_top[x + 12..15] - dst_top[x + 12..15 - 1]
+        add             v2.4s, v2.4s, v6.4s                             // + d[x +  8..11]^2
+        add             v3.4s, v3.4s, v7.4s                             // + d[x + 12..15]^2
+        acc_sum_store   v0.4s, v0.16b                                   // accumulate and store dst[ 0..3]
+        acc_sum_store   v1.4s, v1.16b                                   // accumulate and store dst[ 4..7]
+        acc_sum_store   v2.4s, v2.16b                                   // accumulate and store dst[ 8..11]
+        acc_sum_store   v3.4s, v3.16b                                   // accumulate and store dst[12..15]
         subs            w10, w10, #16                                   // width dec
         b.ne            2b                                              // loop til next line
         add             x2, x2, x3                                      // skip to next line (s1)
diff -Nru ffmpeg-5.1.8/libavfilter/af_amerge.c ffmpeg-5.1.9/libavfilter/af_amerge.c
--- ffmpeg-5.1.8/libavfilter/af_amerge.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavfilter/af_amerge.c	2026-05-05 15:50:52.000000000 +0000
@@ -76,7 +76,7 @@
     AVChannelLayout *inlayout[SWR_CH_MAX] = { NULL }, outlayout = { 0 };
     uint64_t outmask = 0;
     AVFilterChannelLayouts *layouts;
-    int i, ret, overlap = 0, nb_ch = 0;
+    int i, ret, nb_ch = 0;
 
     for (i = 0; i < s->nb_inputs; i++) {
         if (!ctx->inputs[i]->incfg.channel_layouts ||
@@ -91,15 +91,11 @@
             av_channel_layout_describe(inlayout[i], buf, sizeof(buf));
             av_log(ctx, AV_LOG_INFO, "Using \"%s\" for input %d\n", buf, i + 1);
         }
-        s->in[i].nb_ch = FF_LAYOUT2COUNT(inlayout[i]);
-        if (s->in[i].nb_ch) {
-            overlap++;
-        } else {
-            s->in[i].nb_ch = inlayout[i]->nb_channels;
-            if (av_channel_layout_subset(inlayout[i], outmask))
-                overlap++;
-            outmask |= inlayout[i]->order == AV_CHANNEL_ORDER_NATIVE ?
-                       inlayout[i]->u.mask : 0;
+        s->in[i].nb_ch = inlayout[i]->nb_channels;
+        for (int j = 0; j < s->in[i].nb_ch; j++) {
+            enum AVChannel id = av_channel_layout_channel_from_index(inlayout[i], j);
+            if (id >= 0 && id < 64)
+                outmask |= (1ULL << id);
         }
         nb_ch += s->in[i].nb_ch;
     }
@@ -107,7 +103,7 @@
         av_log(ctx, AV_LOG_ERROR, "Too many channels (max %d)\n", SWR_CH_MAX);
         return AVERROR(EINVAL);
     }
-    if (overlap) {
+    if (av_popcount64(outmask) != nb_ch) {
         av_log(ctx, AV_LOG_WARNING,
                "Input channel layouts overlap: "
                "output layout will be determined by the number of distinct input channels\n");
diff -Nru ffmpeg-5.1.8/libavfilter/af_lv2.c ffmpeg-5.1.9/libavfilter/af_lv2.c
--- ffmpeg-5.1.8/libavfilter/af_lv2.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavfilter/af_lv2.c	2026-05-05 15:50:55.000000000 +0000
@@ -72,6 +72,7 @@
     float *controls;
 
     LilvInstance *instance;
+    int           instance_activated;
 
     LilvNode  *atom_AtomPort;
     LilvNode  *atom_Sequence;
@@ -389,6 +390,9 @@
         inlink->min_samples = inlink->max_samples = 4096;
     }
 
+    lilv_instance_activate(s->instance);
+    s->instance_activated = 1;
+
     return 0;
 }
 
@@ -568,6 +572,8 @@
 {
     LV2Context *s = ctx->priv;
 
+    if (s->instance_activated)
+        lilv_instance_deactivate(s->instance);
     lilv_node_free(s->powerOf2BlockLength);
     lilv_node_free(s->fixedBlockLength);
     lilv_node_free(s->boundedBlockLength);
diff -Nru ffmpeg-5.1.8/libavfilter/af_pan.c ffmpeg-5.1.9/libavfilter/af_pan.c
--- ffmpeg-5.1.8/libavfilter/af_pan.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavfilter/af_pan.c	2026-05-05 15:50:55.000000000 +0000
@@ -69,7 +69,7 @@
 
     skip_spaces(arg);
     /* try to parse a channel name, e.g. "FL" */
-    if (sscanf(*arg, "%7[A-Z]%n", buf, &len)) {
+    if (sscanf(*arg, "%7[A-Z]%n", buf, &len) >= 1) {
         channel_id = av_channel_from_string(buf);
         if (channel_id < 0)
             return channel_id;
@@ -80,7 +80,7 @@
         return 0;
     }
     /* try to parse a channel number, e.g. "c2" */
-    if (sscanf(*arg, "c%d%n", &channel_id, &len) &&
+    if (sscanf(*arg, "c%d%n", &channel_id, &len) >= 1 &&
         channel_id >= 0 && channel_id < MAX_CHANNELS) {
         *rchannel = channel_id;
         *rnamed = 0;
diff -Nru ffmpeg-5.1.8/libavfilter/afir_template.c ffmpeg-5.1.9/libavfilter/afir_template.c
--- ffmpeg-5.1.8/libavfilter/afir_template.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavfilter/afir_template.c	2026-05-05 15:50:52.000000000 +0000
@@ -388,5 +388,3 @@
 
     return 0;
 }
-
-
diff -Nru ffmpeg-5.1.8/libavfilter/convolution.h ffmpeg-5.1.9/libavfilter/convolution.h
--- ffmpeg-5.1.8/libavfilter/convolution.h	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavfilter/convolution.h	2026-05-05 15:50:55.000000000 +0000
@@ -21,6 +21,7 @@
 #ifndef AVFILTER_CONVOLUTION_H
 #define AVFILTER_CONVOLUTION_H
 #include "avfilter.h"
+#include "libavutil/internal.h"
 
 enum MatrixMode {
     MATRIX_SQUARE,
diff -Nru ffmpeg-5.1.8/libavfilter/qp_table.c ffmpeg-5.1.9/libavfilter/qp_table.c
--- ffmpeg-5.1.8/libavfilter/qp_table.c	2025-11-21 01:15:18.000000000 +0000
+++ ffmpeg-5.1.9/libavfilter/qp_table.c	2026-05-05 14:22:01.000000000 +0000
@@ -66,4 +66,3 @@
 
     return 0;
 }
-
diff -Nru ffmpeg-5.1.8/libavfilter/scale_eval.c ffmpeg-5.1.9/libavfilter/scale_eval.c
--- ffmpeg-5.1.8/libavfilter/scale_eval.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavfilter/scale_eval.c	2026-05-05 15:50:55.000000000 +0000
@@ -83,18 +83,27 @@
     av_expr_parse_and_eval(&res, (expr = w_expr),
                            var_names, var_values,
                            NULL, NULL, NULL, NULL, NULL, 0, log_ctx);
-    eval_w = var_values[VAR_OUT_W] = var_values[VAR_OW] = (int) res == 0 ? inlink->w : (int) res;
+    var_values[VAR_OUT_W] = var_values[VAR_OW] = res == 0 ? inlink->w : trunc(res);
 
     if ((ret = av_expr_parse_and_eval(&res, (expr = h_expr),
                                       var_names, var_values,
                                       NULL, NULL, NULL, NULL, NULL, 0, log_ctx)) < 0)
         goto fail;
+    if (!(res >= INT32_MIN && res <= INT32_MAX)) {
+        ret = AVERROR(EINVAL);
+        goto fail;
+    }
+
     eval_h = var_values[VAR_OUT_H] = var_values[VAR_OH] = (int) res == 0 ? inlink->h : (int) res;
     /* evaluate again the width, as it may depend on the output height */
     if ((ret = av_expr_parse_and_eval(&res, (expr = w_expr),
                                       var_names, var_values,
                                       NULL, NULL, NULL, NULL, NULL, 0, log_ctx)) < 0)
         goto fail;
+    if (!(res >= INT32_MIN && res <= INT32_MAX)) {
+        ret = AVERROR(EINVAL);
+        goto fail;
+    }
     eval_w = (int) res == 0 ? inlink->w : (int) res;
 
     *ret_w = eval_w;
@@ -115,7 +124,7 @@
     int force_original_aspect_ratio, int force_divisible_by)
 {
     int64_t w, h;
-    int factor_w, factor_h;
+    int64_t factor_w, factor_h;
 
     w = *ret_w;
     h = *ret_h;
diff -Nru ffmpeg-5.1.8/libavfilter/scene_sad.c ffmpeg-5.1.9/libavfilter/scene_sad.c
--- ffmpeg-5.1.8/libavfilter/scene_sad.c	2025-11-21 01:15:18.000000000 +0000
+++ ffmpeg-5.1.9/libavfilter/scene_sad.c	2026-05-05 14:22:01.000000000 +0000
@@ -70,4 +70,3 @@
     }
     return sad;
 }
-
diff -Nru ffmpeg-5.1.8/libavfilter/vf_codecview.c ffmpeg-5.1.9/libavfilter/vf_codecview.c
--- ffmpeg-5.1.8/libavfilter/vf_codecview.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavfilter/vf_codecview.c	2026-05-05 15:50:55.000000000 +0000
@@ -264,9 +264,22 @@
             if (par->nb_blocks) {
                 for (int block_idx = 0; block_idx < par->nb_blocks; block_idx++) {
                     AVVideoBlockParams *b = av_video_enc_params_block(par, block_idx);
-                    uint8_t *buf = frame->data[0] + b->src_y * stride;
 
-                    draw_block_rectangle(buf, b->src_x, b->src_y, b->w, b->h, stride, 100);
+                    int64_t x0 = b->src_x;
+                    int64_t y0 = b->src_y;
+                    int64_t x1 = x0 + b->w;
+                    int64_t y1 = y0 + b->h;
+
+                    x0 = FFMAX(x0, 0);
+                    y0 = FFMAX(y0, 0);
+                    x1 = FFMIN(x1, frame->width);
+                    y1 = FFMIN(y1, frame->height);
+
+                    if (x1 <= x0 || y1 <= y0)
+                        continue;
+
+                    uint8_t *buf = frame->data[0] + y0 * stride;
+                    draw_block_rectangle(buf, x0, y0, x1-x0, y1-y0, stride, 100);
                 }
             }
         }
diff -Nru ffmpeg-5.1.8/libavfilter/vf_convolution.c ffmpeg-5.1.9/libavfilter/vf_convolution.c
--- ffmpeg-5.1.8/libavfilter/vf_convolution.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavfilter/vf_convolution.c	2026-05-05 15:50:55.000000000 +0000
@@ -574,11 +574,8 @@
     int i;
 
     for (i = 0; i < 25; i++) {
-        int xoff = FFABS(x + ((i % 5) - 2));
-        int yoff = FFABS(y + (i / 5) - 2);
-
-        xoff = xoff >= w ? 2 * w - 1 - xoff : xoff;
-        yoff = yoff >= h ? 2 * h - 1 - yoff : yoff;
+        int xoff = avpriv_mirror(x + (i % 5) - 2, w - 1);
+        int yoff = avpriv_mirror(y + (i / 5) - 2, h - 1);
 
         c[i] = src + xoff * bpc + yoff * stride;
     }
@@ -590,11 +587,8 @@
     int i;
 
     for (i = 0; i < 49; i++) {
-        int xoff = FFABS(x + ((i % 7) - 3));
-        int yoff = FFABS(y + (i / 7) - 3);
-
-        xoff = xoff >= w ? 2 * w - 1 - xoff : xoff;
-        yoff = yoff >= h ? 2 * h - 1 - yoff : yoff;
+        int xoff = avpriv_mirror(x + (i % 7) - 3, w - 1);
+        int yoff = avpriv_mirror(y + (i / 7) - 3, h - 1);
 
         c[i] = src + xoff * bpc + yoff * stride;
     }
@@ -606,9 +600,7 @@
     int i;
 
     for (i = 0; i < radius * 2 + 1; i++) {
-        int xoff = FFABS(x + i - radius);
-
-        xoff = xoff >= w ? 2 * w - 1 - xoff : xoff;
+        int xoff = avpriv_mirror(x + i - radius, w - 1);
 
         c[i] = src + xoff * bpc + y * stride;
     }
@@ -620,9 +612,7 @@
     int i;
 
     for (i = 0; i < radius * 2 + 1; i++) {
-        int xoff = FFABS(x + i - radius);
-
-        xoff = xoff >= h ? 2 * h - 1 - xoff : xoff;
+        int xoff = avpriv_mirror(x + i - radius, h - 1);
 
         c[i] = src + y * bpc + xoff * stride;
     }
@@ -668,10 +658,12 @@
             continue;
         }
         for (y = slice_start; y < slice_end; y += step) {
-            const int xoff = mode == MATRIX_COLUMN ? (y - slice_start) * bpc : radius * bpc;
-            const int yoff = mode == MATRIX_COLUMN ? radius * dstride : 0;
+            const int left = FFMIN(radius, sizew);
+            const int right = FFMAX(left, sizew - radius);
+            const int xoff = mode == MATRIX_COLUMN ? (y - slice_start) * bpc : left * bpc;
+            const int yoff = mode == MATRIX_COLUMN ? left * dstride : 0;
 
-            for (x = 0; x < radius; x++) {
+            for (x = 0; x < left; x++) {
                 const int xoff = mode == MATRIX_COLUMN ? (y - slice_start) * bpc : x * bpc;
                 const int yoff = mode == MATRIX_COLUMN ? x * dstride : 0;
 
@@ -680,11 +672,11 @@
                                  bias, matrix, c, s->max, radius,
                                  dstride, stride, slice_end - step);
             }
-            s->setup[plane](radius, c, src, stride, radius, width, y, height, bpc);
-            s->filter[plane](dst + yoff + xoff, sizew - 2 * radius,
+            s->setup[plane](radius, c, src, stride, left, width, y, height, bpc);
+            s->filter[plane](dst + yoff + xoff, right - left,
                              rdiv, bias, matrix, c, s->max, radius,
                              dstride, stride, slice_end - step);
-            for (x = sizew - radius; x < sizew; x++) {
+            for (x = right; x < sizew; x++) {
                 const int xoff = mode == MATRIX_COLUMN ? (y - slice_start) * bpc : x * bpc;
                 const int yoff = mode == MATRIX_COLUMN ? x * dstride : 0;
 
diff -Nru ffmpeg-5.1.8/libavfilter/vf_find_rect.c ffmpeg-5.1.9/libavfilter/vf_find_rect.c
--- ffmpeg-5.1.8/libavfilter/vf_find_rect.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavfilter/vf_find_rect.c	2026-05-05 15:50:55.000000000 +0000
@@ -51,8 +51,8 @@
     { "mipmaps", "set mipmaps", OFFSET(mipmaps), AV_OPT_TYPE_INT, {.i64 = 3}, 1, MAX_MIPMAPS, FLAGS },
     { "xmin", "", OFFSET(xmin), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS },
     { "ymin", "", OFFSET(ymin), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS },
-    { "xmax", "", OFFSET(xmax), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS },
-    { "ymax", "", OFFSET(ymax), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS },
+    { "xmax", "", OFFSET(xmax), AV_OPT_TYPE_INT, {.i64 = INT_MAX}, 0, INT_MAX, FLAGS },
+    { "ymax", "", OFFSET(ymax), AV_OPT_TYPE_INT, {.i64 = INT_MAX}, 0, INT_MAX, FLAGS },
     { "discard", "", OFFSET(discard), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS },
     { NULL }
 };
@@ -78,8 +78,10 @@
     src = in   ->data[0];
     dst = frame->data[0];
 
-    for(y = 0; y < frame->height; y++) {
-        for(x = 0; x < frame->width; x++) {
+    int w2 = in->width/2;
+    int h2 = in->height/2;
+    for(y = 0; y < h2; y++) {
+        for(x = 0; x < w2; x++) {
             dst[x] = (  src[2*x+0]
                       + src[2*x+1]
                       + src[2*x+0 + in->linesize[0]]
@@ -89,6 +91,22 @@
         src += 2*in->linesize[0];
         dst += frame->linesize[0];
     }
+    src = in   ->data[0];
+    dst = frame->data[0];
+    for(y = 0; y < frame->height; y++) {
+        int yd = y < h2 ? in->linesize[0] : 0;
+        x = yd ? w2 : 0;
+        for(; x < frame->width; x++) {
+            dst[x] = (  src[2*x+0]
+                      + src[FFMIN(2*x+1, w2)]
+                      + src[2*x+0            + yd]
+                      + src[FFMIN(2*x+1, w2) + yd]
+                      + 2) >> 2;
+        }
+        src += 2*in->linesize[0];
+        dst += frame->linesize[0];
+    }
+
     return frame;
 }
 
@@ -130,19 +148,6 @@
     return 1 - fabs(c);
 }
 
-static int config_input(AVFilterLink *inlink)
-{
-    AVFilterContext *ctx = inlink->dst;
-    FOCContext *foc = ctx->priv;
-
-    if (foc->xmax <= 0)
-        foc->xmax = inlink->w - foc->obj_frame->width;
-    if (foc->ymax <= 0)
-        foc->ymax = inlink->h - foc->obj_frame->height;
-
-    return 0;
-}
-
 static float search(FOCContext *foc, int pass, int maxpass, int xmin, int xmax, int ymin, int ymax, int *best_x, int *best_y, float best_score)
 {
     int x, y;
@@ -178,19 +183,24 @@
     int i;
     char buf[32];
 
+    int xmin = FFMAX(foc->xmin, 0);
+    int ymin = FFMAX(foc->ymin, 0);
+    int xmax = FFMIN(foc->xmax, inlink->w - foc->obj_frame->width );
+    int ymax = FFMIN(foc->ymax, inlink->h - foc->obj_frame->height);
+
     foc->haystack_frame[0] = av_frame_clone(in);
     for (i=1; i<foc->mipmaps; i++) {
         foc->haystack_frame[i] = downscale(foc->haystack_frame[i-1]);
     }
 
     best_score = search(foc, 0, 0,
-                        FFMAX(foc->xmin, foc->last_x - 8),
-                        FFMIN(foc->xmax, foc->last_x + 8),
-                        FFMAX(foc->ymin, foc->last_y - 8),
-                        FFMIN(foc->ymax, foc->last_y + 8),
+                        FFMAX(xmin, foc->last_x - 8),
+                        FFMIN(xmax, foc->last_x + 8),
+                        FFMAX(ymin, foc->last_y - 8),
+                        FFMIN(ymax, foc->last_y + 8),
                         &best_x, &best_y, 2.0);
 
-    best_score = search(foc, 0, foc->mipmaps - 1, foc->xmin, foc->xmax, foc->ymin, foc->ymax,
+    best_score = search(foc, 0, foc->mipmaps - 1, xmin, xmax, ymin, ymax,
                         &best_x, &best_y, best_score);
 
     for (i=0; i<MAX_MIPMAPS; i++) {
@@ -276,7 +286,6 @@
     {
         .name         = "default",
         .type         = AVMEDIA_TYPE_VIDEO,
-        .config_props = config_input,
         .filter_frame = filter_frame,
     },
 };
diff -Nru ffmpeg-5.1.8/libavfilter/vf_kerndeint.c ffmpeg-5.1.9/libavfilter/vf_kerndeint.c
--- ffmpeg-5.1.8/libavfilter/vf_kerndeint.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavfilter/vf_kerndeint.c	2026-05-05 15:50:55.000000000 +0000
@@ -84,6 +84,12 @@
 
     kerndeint->is_packed_rgb = av_pix_fmt_desc_get(inlink->format)->flags & AV_PIX_FMT_FLAG_RGB;
     kerndeint->vsub = desc->log2_chroma_h;
+    if (AV_CEIL_RSHIFT(inlink->h, kerndeint->vsub) < 4) {
+        av_log(inlink->dst, AV_LOG_ERROR,
+               "Input height %d is too small; minimum chroma plane height is 4\n",
+               inlink->h);
+        return AVERROR(EINVAL);
+    }
 
     ret = av_image_alloc(kerndeint->tmp_data, kerndeint->tmp_linesize,
                          inlink->w, inlink->h, inlink->format, 16);
diff -Nru ffmpeg-5.1.8/libavfilter/vf_libopencv.c ffmpeg-5.1.9/libavfilter/vf_libopencv.c
--- ffmpeg-5.1.8/libavfilter/vf_libopencv.c	2025-11-23 02:57:58.000000000 +0000
+++ ffmpeg-5.1.9/libavfilter/vf_libopencv.c	2026-05-05 15:50:55.000000000 +0000
@@ -209,7 +209,7 @@
     int cols = 0, rows = 0, anchor_x = 0, anchor_y = 0, shape = CV_SHAPE_RECT;
     int *values = NULL, ret = 0;
 
-    sscanf(buf, "%dx%d+%dx%d/%32[^=]=%127s", &cols, &rows, &anchor_x, &anchor_y, shape_str, shape_filename);
+    sscanf(buf, "%dx%d+%dx%d/%31[^=]=%127s", &cols, &rows, &anchor_x, &anchor_y, shape_str, shape_filename);
 
     if      (!strcmp(shape_str, "rect"   )) shape = CV_SHAPE_RECT;
     else if (!strcmp(shape_str, "cross"  )) shape = CV_SHAPE_CROSS;
diff -Nru ffmpeg-5.1.8/libavfilter/vf_neighbor_opencl.c ffmpeg-5.1.9/libavfilter/vf_neighbor_opencl.c
--- ffmpeg-5.1.8/libavfilter/vf_neighbor_opencl.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavfilter/vf_neighbor_opencl.c	2026-05-05 15:50:55.000000000 +0000
@@ -69,6 +69,9 @@
         kernel_name = "erosion_global";
     } else if (!strcmp(avctx->filter->name, "dilation_opencl")){
         kernel_name = "dilation_global";
+    } else {
+        err = AVERROR_BUG;
+        goto fail;
     }
     ctx->kernel = clCreateKernel(ctx->ocf.program, kernel_name, &cle);
     CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create "
diff -Nru ffmpeg-5.1.8/libavfilter/vf_overlay_cuda.cu ffmpeg-5.1.9/libavfilter/vf_overlay_cuda.cu
--- ffmpeg-5.1.8/libavfilter/vf_overlay_cuda.cu	2025-11-21 01:15:18.000000000 +0000
+++ ffmpeg-5.1.9/libavfilter/vf_overlay_cuda.cu	2026-05-05 14:22:01.000000000 +0000
@@ -51,4 +51,3 @@
 }
 
 }
-
diff -Nru ffmpeg-5.1.8/libavfilter/vf_scale.c ffmpeg-5.1.9/libavfilter/vf_scale.c
--- ffmpeg-5.1.8/libavfilter/vf_scale.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavfilter/vf_scale.c	2026-05-05 15:50:55.000000000 +0000
@@ -503,8 +503,8 @@
 
     if (outlink->w > INT_MAX ||
         outlink->h > INT_MAX ||
-        (outlink->h * inlink->w) > INT_MAX ||
-        (outlink->w * inlink->h) > INT_MAX)
+        (outlink->h * (uint64_t)inlink->w) > INT_MAX ||
+        (outlink->w * (uint64_t)inlink->h) > INT_MAX)
         av_log(ctx, AV_LOG_ERROR, "Rescaled value for width or height is too big.\n");
 
     /* TODO: make algorithm configurable */
diff -Nru ffmpeg-5.1.8/libavfilter/vf_stack.c ffmpeg-5.1.9/libavfilter/vf_stack.c
--- ffmpeg-5.1.8/libavfilter/vf_stack.c	2025-11-23 02:57:58.000000000 +0000
+++ ffmpeg-5.1.9/libavfilter/vf_stack.c	2026-05-05 15:50:55.000000000 +0000
@@ -227,6 +227,8 @@
                 item->y[1] = item->y[2] = AV_CEIL_RSHIFT(height, s->desc->log2_chroma_h);
                 item->y[0] = item->y[3] = height;
 
+                if (height > INT_MAX - ctx->inputs[i]->h)
+                    return AVERROR(EINVAL);
                 height += ctx->inputs[i]->h;
             }
         }
@@ -252,6 +254,8 @@
                     return ret;
                 }
 
+                if (width > INT_MAX - ctx->inputs[i]->w)
+                    return AVERROR(EINVAL);
                 width += ctx->inputs[i]->w;
             }
         }
@@ -287,8 +291,13 @@
 
                 item->y[1] = item->y[2] = AV_CEIL_RSHIFT(inh, s->desc->log2_chroma_h);
                 item->y[0] = item->y[3] = inh;
+
+                if (inw > INT_MAX - ctx->inputs[k]->w)
+                    return AVERROR(EINVAL);
                 inw += ctx->inputs[k]->w;
             }
+            if (height > INT_MAX - row_height)
+                return AVERROR(EINVAL);
             height += row_height;
             if (!i)
                 width = inw;
@@ -339,26 +348,41 @@
                         if (size == i || size < 0 || size >= s->nb_inputs)
                             return AVERROR(EINVAL);
 
-                        if (!j)
+                        if (!j) {
+                            if (inw > INT_MAX - ctx->inputs[size]->w)
+                                return AVERROR(EINVAL);
                             inw += ctx->inputs[size]->w;
-                        else
+                        } else {
+                            if (inh > INT_MAX - ctx->inputs[size]->w)
+                                return AVERROR(EINVAL);
                             inh += ctx->inputs[size]->w;
+                        }
                     } else if (sscanf(arg3, "h%d", &size) == 1) {
                         if (size == i || size < 0 || size >= s->nb_inputs)
                             return AVERROR(EINVAL);
 
-                        if (!j)
+                        if (!j) {
+                            if (inw > INT_MAX - ctx->inputs[size]->h)
+                                return AVERROR(EINVAL);
                             inw += ctx->inputs[size]->h;
-                        else
+                        } else {
+                            if (inh > INT_MAX - ctx->inputs[size]->h)
+                                return AVERROR(EINVAL);
                             inh += ctx->inputs[size]->h;
+                        }
                     } else if (sscanf(arg3, "%d", &size) == 1) {
                         if (size < 0)
                             return AVERROR(EINVAL);
 
-                        if (!j)
+                        if (!j) {
+                            if (inw > INT_MAX - size)
+                                return AVERROR(EINVAL);
                             inw += size;
-                        else
+                        } else {
+                            if (inh > INT_MAX - size)
+                                return AVERROR(EINVAL);
                             inh += size;
+                        }
                     } else {
                         return AVERROR(EINVAL);
                     }
@@ -372,6 +396,8 @@
             item->y[1] = item->y[2] = AV_CEIL_RSHIFT(inh, s->desc->log2_chroma_h);
             item->y[0] = item->y[3] = inh;
 
+            if (inlink->w > INT_MAX - inw || inlink->h > INT_MAX - inh)
+                return AVERROR(EINVAL);
             width  = FFMAX(width,  inlink->w + inw);
             height = FFMAX(height, inlink->h + inh);
         }
diff -Nru ffmpeg-5.1.8/libavfilter/vf_v360.c ffmpeg-5.1.9/libavfilter/vf_v360.c
--- ffmpeg-5.1.8/libavfilter/vf_v360.c	2025-11-23 02:57:58.000000000 +0000
+++ ffmpeg-5.1.9/libavfilter/vf_v360.c	2026-05-05 15:50:55.000000000 +0000
@@ -286,7 +286,8 @@
     const AVFrame *in = td->in;                                                                            \
     AVFrame *out = td->out;                                                                                \
                                                                                                            \
-    for (int stereo = 0; stereo < 1 + s->out_stereo > STEREO_2D; stereo++) {                               \
+                                                                                                           \
+    for (int stereo = 0; stereo < 1 + (s->out_stereo > STEREO_2D); stereo++) {                               \
         for (int plane = 0; plane < s->nb_planes; plane++) {                                               \
             const unsigned map = s->map[plane];                                                            \
             const int in_linesize  = in->linesize[plane];                                                  \
diff -Nru ffmpeg-5.1.8/libavfilter/vf_zscale.c ffmpeg-5.1.9/libavfilter/vf_zscale.c
--- ffmpeg-5.1.8/libavfilter/vf_zscale.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavfilter/vf_zscale.c	2026-05-05 15:50:55.000000000 +0000
@@ -251,7 +251,7 @@
     double var_values[VARS_NB], res;
     char *expr;
     int ret;
-    int factor_w, factor_h;
+    int64_t factor_w, factor_h;
 
     var_values[VAR_IN_W]  = var_values[VAR_IW] = inlink->w;
     var_values[VAR_IN_H]  = var_values[VAR_IH] = inlink->h;
@@ -270,17 +270,26 @@
     av_expr_parse_and_eval(&res, (expr = s->w_expr),
                            var_names, var_values,
                            NULL, NULL, NULL, NULL, NULL, 0, ctx);
-    s->w = var_values[VAR_OUT_W] = var_values[VAR_OW] = res;
+    var_values[VAR_OUT_W] = var_values[VAR_OW] = trunc(res);
     if ((ret = av_expr_parse_and_eval(&res, (expr = s->h_expr),
                                       var_names, var_values,
                                       NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0)
         goto fail;
+    if (!(res >= INT32_MIN && res <= INT32_MAX)) {
+        ret = AVERROR(EINVAL);
+        goto fail;
+    }
+
     s->h = var_values[VAR_OUT_H] = var_values[VAR_OH] = res;
     /* evaluate again the width, as it may depend on the output height */
     if ((ret = av_expr_parse_and_eval(&res, (expr = s->w_expr),
                                       var_names, var_values,
                                       NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0)
         goto fail;
+    if (!(res >= INT32_MIN && res <= INT32_MAX)) {
+        ret = AVERROR(EINVAL);
+        goto fail;
+    }
     s->w = res;
 
     w = s->w;
diff -Nru ffmpeg-5.1.8/libavformat/avidec.c ffmpeg-5.1.9/libavformat/avidec.c
--- ffmpeg-5.1.8/libavformat/avidec.c	2025-11-26 02:41:35.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/avidec.c	2026-05-05 15:50:55.000000000 +0000
@@ -549,9 +549,11 @@
                     avi->movi_end = avi->fsize;
                 av_log(s, AV_LOG_TRACE, "movi end=%"PRIx64"\n", avi->movi_end);
                 goto end_of_header;
-            } else if (tag1 == MKTAG('I', 'N', 'F', 'O'))
+            } else if (tag1 == MKTAG('I', 'N', 'F', 'O')) {
+                if (size < 4)
+                    return AVERROR_INVALIDDATA;
                 ff_read_riff_info(s, size - 4);
-            else if (tag1 == MKTAG('n', 'c', 'd', 't'))
+            } else if (tag1 == MKTAG('n', 'c', 'd', 't'))
                 avi_read_nikon(s, list_end);
 
             break;
@@ -1820,6 +1822,10 @@
             avi->index_loaded=2;
             ret = 0;
         }else if (tag == MKTAG('L', 'I', 'S', 'T')) {
+            if (size < 4) {
+                av_log(s, AV_LOG_WARNING, "Invalid size (%u) LIST in index\n", size);
+                break;
+            }
             uint32_t tag1 = avio_rl32(pb);
 
             if (tag1 == MKTAG('I', 'N', 'F', 'O'))
diff -Nru ffmpeg-5.1.8/libavformat/cafdec.c ffmpeg-5.1.9/libavformat/cafdec.c
--- ffmpeg-5.1.8/libavformat/cafdec.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/cafdec.c	2026-05-05 15:50:55.000000000 +0000
@@ -281,6 +281,10 @@
     AVIOContext *pb = s->pb;
     unsigned int i;
     unsigned int nb_entries = avio_rb32(pb);
+
+    if (3LL * nb_entries > size)
+        return;
+
     for (i = 0; i < nb_entries && !avio_feof(pb); i++) {
         char key[32];
         char value[1024];
@@ -494,6 +498,8 @@
         frame_cnt  = caf->frames_per_packet * packet_cnt;
     } else if (sti->nb_index_entries) {
         packet_cnt = av_index_search_timestamp(st, timestamp, flags);
+        if (packet_cnt < 0)
+            return -1;
         frame_cnt  = sti->index_entries[packet_cnt].timestamp;
         pos        = sti->index_entries[packet_cnt].pos;
     } else {
diff -Nru ffmpeg-5.1.8/libavformat/concat.c ffmpeg-5.1.9/libavformat/concat.c
--- ffmpeg-5.1.8/libavformat/concat.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/concat.c	2026-05-05 15:50:55.000000000 +0000
@@ -111,6 +111,12 @@
             break;
         }
 
+        if (total_size > INT64_MAX - size) {
+            ffurl_close(uc);
+            err = AVERROR_INVALIDDATA;
+            break;
+        }
+
         /* assembling */
         nodes[i].uc   = uc;
         nodes[i].size = size;
@@ -280,6 +286,12 @@
             break;
         }
 
+        if (total_size > INT64_MAX - size) {
+            ffurl_close(uc);
+            err = AVERROR_INVALIDDATA;
+            break;
+        }
+
         nodes = av_fast_realloc(data->nodes, &nodes_size, sizeof(*nodes) * len);
         if (!nodes) {
             ffurl_close(uc);
diff -Nru ffmpeg-5.1.8/libavformat/dash.c ffmpeg-5.1.9/libavformat/dash.c
--- ffmpeg-5.1.8/libavformat/dash.c	2025-11-21 01:15:18.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/dash.c	2026-05-05 14:22:01.000000000 +0000
@@ -152,5 +152,3 @@
         t_cur = t_next;
     }
 }
-
-
diff -Nru ffmpeg-5.1.8/libavformat/dashdec.c ffmpeg-5.1.9/libavformat/dashdec.c
--- ffmpeg-5.1.8/libavformat/dashdec.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/dashdec.c	2026-05-05 15:50:55.000000000 +0000
@@ -828,6 +828,43 @@
 
 }
 
+#define SET_REPRESENTATION_SEQUENCE_BASE_INFO(arg, cnt) { \
+        val = get_val_from_nodes_tab((arg), (cnt), "duration"); \
+        if (val) { \
+            int64_t fragment_duration = (int64_t) strtoll(val, NULL, 10); \
+            if (fragment_duration < 0) { \
+                av_log(s, AV_LOG_WARNING, "duration invalid, autochanged to 0.\n"); \
+                fragment_duration = 0; \
+            } \
+            rep->fragment_duration = fragment_duration; \
+            av_log(s, AV_LOG_TRACE, "rep->fragment_duration = [%"PRId64"]\n", rep->fragment_duration); \
+            xmlFree(val); \
+        } \
+        val = get_val_from_nodes_tab((arg), (cnt), "timescale"); \
+        if (val) { \
+            int64_t fragment_timescale = (int64_t) strtoll(val, NULL, 10); \
+            if (fragment_timescale < 0) { \
+                av_log(s, AV_LOG_WARNING, "timescale invalid, autochanged to 0.\n"); \
+                fragment_timescale = 0; \
+            } \
+            rep->fragment_timescale = fragment_timescale; \
+            av_log(s, AV_LOG_TRACE, "rep->fragment_timescale = [%"PRId64"]\n", rep->fragment_timescale); \
+            xmlFree(val); \
+        } \
+        val = get_val_from_nodes_tab((arg), (cnt), "startNumber"); \
+        if (val) { \
+            int64_t start_number = (int64_t) strtoll(val, NULL, 10); \
+            if (start_number < 0) { \
+                av_log(s, AV_LOG_WARNING, "startNumber invalid, autochanged to 0.\n"); \
+                start_number = 0; \
+            } \
+            rep->start_number = rep->first_seq_no = start_number; \
+            av_log(s, AV_LOG_TRACE, "rep->first_seq_no = [%"PRId64"]\n", rep->first_seq_no); \
+            xmlFree(val); \
+        } \
+    }
+
+
 static int parse_manifest_representation(AVFormatContext *s, const char *url,
                                          xmlNodePtr node,
                                          xmlNodePtr adaptionset_node,
@@ -942,28 +979,17 @@
         }
         val = get_val_from_nodes_tab(fragment_templates_tab, 4, "presentationTimeOffset");
         if (val) {
-            rep->presentation_timeoffset = (int64_t) strtoll(val, NULL, 10);
+            int64_t presentation_timeoffset = (int64_t) strtoll(val, NULL, 10);
+            if (presentation_timeoffset < 0) {
+                av_log(s, AV_LOG_WARNING, "presentationTimeOffset invalid, autochanged to 0.\n");
+                presentation_timeoffset = 0;
+            }
+            rep->presentation_timeoffset = presentation_timeoffset;
             av_log(s, AV_LOG_TRACE, "rep->presentation_timeoffset = [%"PRId64"]\n", rep->presentation_timeoffset);
             xmlFree(val);
         }
-        val = get_val_from_nodes_tab(fragment_templates_tab, 4, "duration");
-        if (val) {
-            rep->fragment_duration = (int64_t) strtoll(val, NULL, 10);
-            av_log(s, AV_LOG_TRACE, "rep->fragment_duration = [%"PRId64"]\n", rep->fragment_duration);
-            xmlFree(val);
-        }
-        val = get_val_from_nodes_tab(fragment_templates_tab, 4, "timescale");
-        if (val) {
-            rep->fragment_timescale = (int64_t) strtoll(val, NULL, 10);
-            av_log(s, AV_LOG_TRACE, "rep->fragment_timescale = [%"PRId64"]\n", rep->fragment_timescale);
-            xmlFree(val);
-        }
-        val = get_val_from_nodes_tab(fragment_templates_tab, 4, "startNumber");
-        if (val) {
-            rep->start_number = rep->first_seq_no = (int64_t) strtoll(val, NULL, 10);
-            av_log(s, AV_LOG_TRACE, "rep->first_seq_no = [%"PRId64"]\n", rep->first_seq_no);
-            xmlFree(val);
-        }
+
+        SET_REPRESENTATION_SEQUENCE_BASE_INFO(fragment_templates_tab, 4);
         if (adaptionset_supplementalproperty_node) {
             char *scheme_id_uri = xmlGetProp(adaptionset_supplementalproperty_node, "schemeIdUri");
             if (scheme_id_uri) {
@@ -1020,25 +1046,7 @@
         segmentlists_tab[1] = adaptionset_segmentlist_node;
         segmentlists_tab[2] = period_segmentlist_node;
 
-        val = get_val_from_nodes_tab(segmentlists_tab, 3, "duration");
-        if (val) {
-            rep->fragment_duration = (int64_t) strtoll(val, NULL, 10);
-            av_log(s, AV_LOG_TRACE, "rep->fragment_duration = [%"PRId64"]\n", rep->fragment_duration);
-            xmlFree(val);
-        }
-        val = get_val_from_nodes_tab(segmentlists_tab, 3, "timescale");
-        if (val) {
-            rep->fragment_timescale = (int64_t) strtoll(val, NULL, 10);
-            av_log(s, AV_LOG_TRACE, "rep->fragment_timescale = [%"PRId64"]\n", rep->fragment_timescale);
-            xmlFree(val);
-        }
-        val = get_val_from_nodes_tab(segmentlists_tab, 3, "startNumber");
-        if (val) {
-            rep->start_number = rep->first_seq_no = (int64_t) strtoll(val, NULL, 10);
-            av_log(s, AV_LOG_TRACE, "rep->first_seq_no = [%"PRId64"]\n", rep->first_seq_no);
-            xmlFree(val);
-        }
-
+        SET_REPRESENTATION_SEQUENCE_BASE_INFO(segmentlists_tab, 3)
         fragmenturl_node = xmlFirstElementChild(representation_segmentlist_node);
         while (fragmenturl_node) {
             ret = parse_manifest_segmenturlnode(s, rep, fragmenturl_node,
diff -Nru ffmpeg-5.1.8/libavformat/demux.c ffmpeg-5.1.9/libavformat/demux.c
--- ffmpeg-5.1.8/libavformat/demux.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/demux.c	2026-05-05 15:50:55.000000000 +0000
@@ -761,9 +761,14 @@
         } else {
             for (int i = 0; i < delay; i++) {
                 if (pts_buffer[i] != AV_NOPTS_VALUE) {
-                    int64_t diff = FFABS(pts_buffer[i] - dts)
-                                   + (uint64_t)sti->pts_reorder_error[i];
-                    diff = FFMAX(diff, sti->pts_reorder_error[i]);
+#define ABSDIFF(a,b) (((a) < (b)) ? (b) - (uint64_t)(a) : ((a) - (uint64_t)(b)))
+                    uint64_t diff = ABSDIFF(pts_buffer[i], dts);
+
+                    if (diff > INT64_MAX - sti->pts_reorder_error[i]) {
+                        diff = INT64_MAX;
+                    } else
+                        diff += sti->pts_reorder_error[i];
+
                     sti->pts_reorder_error[i] = diff;
                     sti->pts_reorder_error_count[i]++;
                     if (sti->pts_reorder_error_count[i] > 250) {
diff -Nru ffmpeg-5.1.8/libavformat/dhav.c ffmpeg-5.1.9/libavformat/dhav.c
--- ffmpeg-5.1.8/libavformat/dhav.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/dhav.c	2026-05-05 15:50:55.000000000 +0000
@@ -288,7 +288,9 @@
                 if (seek_back < 9)
                     break;
                 dhav->last_good_pos = avio_tell(s->pb);
-                avio_seek(s->pb, -seek_back, SEEK_CUR);
+                int64_t ret64 = avio_seek(s->pb, -seek_back, SEEK_CUR);
+                if (ret64 < 0)
+                    return ret64;
             }
             avio_seek(s->pb, dhav->last_good_pos, SEEK_SET);
         }
diff -Nru ffmpeg-5.1.8/libavformat/dss.c ffmpeg-5.1.9/libavformat/dss.c
--- ffmpeg-5.1.8/libavformat/dss.c	2025-11-23 02:57:58.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/dss.c	2026-05-05 15:50:55.000000000 +0000
@@ -24,6 +24,7 @@
 
 #include "avformat.h"
 #include "internal.h"
+#include "avio_internal.h"
 
 #define DSS_HEAD_OFFSET_AUTHOR        0xc
 #define DSS_AUTHOR_SIZE               16
@@ -336,7 +337,9 @@
     if (ret < 0)
         return ret;
 
-    avio_read(s->pb, header, DSS_AUDIO_BLOCK_HEADER_SIZE);
+    ret = ffio_read_size(s->pb, header, DSS_AUDIO_BLOCK_HEADER_SIZE);
+    if (ret < 0)
+        return ret;
     ctx->swap = !!(header[0] & 0x80);
     offset = 2*header[1] + 2*ctx->swap;
     if (offset < DSS_AUDIO_BLOCK_HEADER_SIZE)
diff -Nru ffmpeg-5.1.8/libavformat/dtshddec.c ffmpeg-5.1.9/libavformat/dtshddec.c
--- ffmpeg-5.1.8/libavformat/dtshddec.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/dtshddec.c	2026-05-05 15:50:55.000000000 +0000
@@ -24,6 +24,7 @@
 #include "libavcodec/dca.h"
 #include "avformat.h"
 #include "internal.h"
+#include "avio_internal.h"
 
 #define AUPR_HDR 0x415550522D484452
 #define AUPRINFO 0x41555052494E464F
@@ -114,7 +115,11 @@
             value = av_malloc(chunk_size);
             if (!value)
                 goto skip;
-            avio_read(pb, value, chunk_size);
+            ret = ffio_read_size(pb, value, chunk_size);
+            if (ret < 0) {
+                av_free(value);
+                goto skip;
+            }
             value[chunk_size - 1] = 0;
             av_dict_set(&s->metadata, "fileinfo", value,
                         AV_DICT_DONT_STRDUP_VAL);
diff -Nru ffmpeg-5.1.8/libavformat/fifo_test.c ffmpeg-5.1.9/libavformat/fifo_test.c
--- ffmpeg-5.1.8/libavformat/fifo_test.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/fifo_test.c	2026-05-05 15:50:52.000000000 +0000
@@ -148,4 +148,3 @@
     .priv_class     = &failing_muxer_class,
     .flags          = AVFMT_NOFILE | AVFMT_ALLOW_FLUSH,
 };
-
diff -Nru ffmpeg-5.1.8/libavformat/flac_picture.c ffmpeg-5.1.9/libavformat/flac_picture.c
--- ffmpeg-5.1.8/libavformat/flac_picture.c	2025-11-23 02:57:58.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/flac_picture.c	2026-05-05 15:50:55.000000000 +0000
@@ -23,6 +23,7 @@
 #include "libavcodec/bytestream.h"
 #include "libavcodec/png.h"
 #include "avformat.h"
+#include "avio_internal.h"
 #include "demux.h"
 #include "flac_picture.h"
 #include "id3v2.h"
@@ -160,8 +161,9 @@
             // If truncation was detected copy all data from block and
             // read missing bytes not included in the block size.
             bytestream2_get_bufferu(&g, data->data, left);
-            if (avio_read(s->pb, data->data + len - trunclen, trunclen) < trunclen)
-                RETURN_ERROR(AVERROR_INVALIDDATA);
+            ret = ffio_read_size(s->pb, data->data + len - trunclen, trunclen);
+            if (ret < 0)
+                goto fail;
         }
     }
     memset(data->data + len, 0, AV_INPUT_BUFFER_PADDING_SIZE);
diff -Nru ffmpeg-5.1.8/libavformat/g726.c ffmpeg-5.1.9/libavformat/g726.c
--- ffmpeg-5.1.8/libavformat/g726.c	2025-11-23 02:57:58.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/g726.c	2026-05-05 14:22:01.000000000 +0000
@@ -97,4 +97,3 @@
     .raw_codec_id   = AV_CODEC_ID_ADPCM_G726LE,
 };
 #endif
-
diff -Nru ffmpeg-5.1.8/libavformat/hls.c ffmpeg-5.1.9/libavformat/hls.c
--- ffmpeg-5.1.8/libavformat/hls.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/hls.c	2026-05-05 15:50:55.000000000 +0000
@@ -958,13 +958,22 @@
             if (pls)
                 pls->finished = 1;
         } else if (av_strstart(line, "#EXTINF:", &ptr)) {
+            double d = atof(ptr) * AV_TIME_BASE;
+            if (d < 0 || d > INT64_MAX || isnan(d)) {
+                av_log(c->ctx, AV_LOG_WARNING, "EXTINF %f unsupported\n", d / AV_TIME_BASE);
+                d = 0;
+            }
+            duration = d;
             is_segment = 1;
-            duration   = atof(ptr) * AV_TIME_BASE;
         } else if (av_strstart(line, "#EXT-X-BYTERANGE:", &ptr)) {
             seg_size = strtoll(ptr, NULL, 10);
             ptr = strchr(ptr, '@');
             if (ptr)
                 seg_offset = strtoll(ptr+1, NULL, 10);
+            if (seg_size < 0 || seg_offset > INT64_MAX - seg_size) {
+                ret = AVERROR_INVALIDDATA;
+                goto fail;
+            }
         } else if (av_strstart(line, "#", NULL)) {
             av_log(c->ctx, AV_LOG_INFO, "Skip ('%s')\n", line);
             continue;
diff -Nru ffmpeg-5.1.8/libavformat/hls_sample_encryption.c ffmpeg-5.1.9/libavformat/hls_sample_encryption.c
--- ffmpeg-5.1.8/libavformat/hls_sample_encryption.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/hls_sample_encryption.c	2026-05-05 15:50:55.000000000 +0000
@@ -86,6 +86,7 @@
         return;
 
     memcpy(info->setup_data, buf, info->setup_data_length);
+    memset(info->setup_data + info->setup_data_length, 0, AV_INPUT_BUFFER_PADDING_SIZE);
 }
 
 int ff_hls_senc_parse_audio_setup_info(AVStream *st, HLSAudioSetupInfo *info)
diff -Nru ffmpeg-5.1.8/libavformat/hls_sample_encryption.h ffmpeg-5.1.9/libavformat/hls_sample_encryption.h
--- ffmpeg-5.1.8/libavformat/hls_sample_encryption.h	2025-11-21 01:15:18.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/hls_sample_encryption.h	2026-05-05 15:50:55.000000000 +0000
@@ -52,7 +52,7 @@
     uint16_t            priming;
     uint8_t             version;
     uint8_t             setup_data_length;
-    uint8_t             setup_data[HLS_MAX_AUDIO_SETUP_DATA_LEN];
+    uint8_t             setup_data[HLS_MAX_AUDIO_SETUP_DATA_LEN + AV_INPUT_BUFFER_PADDING_SIZE];
 } HLSAudioSetupInfo;
 
 
@@ -63,4 +63,3 @@
 int ff_hls_senc_decrypt_frame(enum AVCodecID codec_id, HLSCryptoContext *crypto_ctx, AVPacket *pkt);
 
 #endif /* AVFORMAT_HLS_SAMPLE_ENCRYPTION_H */
-
diff -Nru ffmpeg-5.1.8/libavformat/hlsplaylist.c ffmpeg-5.1.9/libavformat/hlsplaylist.c
--- ffmpeg-5.1.8/libavformat/hlsplaylist.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/hlsplaylist.c	2026-05-05 15:50:52.000000000 +0000
@@ -192,4 +192,3 @@
         return;
     avio_printf(out, "#EXT-X-ENDLIST\n");
 }
-
diff -Nru ffmpeg-5.1.8/libavformat/http.c ffmpeg-5.1.9/libavformat/http.c
--- ffmpeg-5.1.8/libavformat/http.c	2025-11-26 02:41:35.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/http.c	2026-05-05 15:50:55.000000000 +0000
@@ -136,6 +136,7 @@
     char *new_location;
     AVDictionary *redirect_cache;
     uint64_t filesize_from_content_range;
+    int max_redirects;
 } HTTPContext;
 
 #define OFFSET(x) offsetof(HTTPContext, x)
@@ -178,6 +179,7 @@
     { "resource", "The resource requested by a client", OFFSET(resource), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, E },
     { "reply_code", "The http status code to return to a client", OFFSET(reply_code), AV_OPT_TYPE_INT, { .i64 = 200}, INT_MIN, 599, E},
     { "short_seek_size", "Threshold to favor readahead over seek.", OFFSET(short_seek_size), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, D },
+    { "max_redirects", "Maximum number of redirects", OFFSET(max_redirects), AV_OPT_TYPE_INT, { .i64 = MAX_REDIRECTS }, 0, INT_MAX, D },
     { NULL }
 };
 
@@ -233,7 +235,11 @@
             if (err < 0)
                 goto end;
         }
+    } else if (strcmp(proto, "http")) {
+        err = AVERROR(EINVAL);
+        goto end;
     }
+
     if (port < 0)
         port = 80;
 
@@ -362,6 +368,9 @@
 
     cached = redirect_cache_get(s);
     if (cached) {
+        if (redirects++ >= s->max_redirects)
+            return AVERROR(EIO);
+
         av_free(s->location);
         s->location = av_strdup(cached);
         if (!s->location) {
@@ -418,7 +427,7 @@
         s->new_location) {
         /* url moved, get next */
         ffurl_closep(&s->hd);
-        if (redirects++ >= MAX_REDIRECTS)
+        if (redirects++ >= s->max_redirects)
             return AVERROR(EIO);
 
         if (!s->expires) {
diff -Nru ffmpeg-5.1.8/libavformat/icodec.c ffmpeg-5.1.9/libavformat/icodec.c
--- ffmpeg-5.1.8/libavformat/icodec.c	2025-11-23 02:57:58.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/icodec.c	2026-05-05 15:50:55.000000000 +0000
@@ -111,7 +111,7 @@
         avio_skip(pb, 5);
 
         ico->images[i].size   = avio_rl32(pb);
-        if (ico->images[i].size <= 0) {
+        if (ico->images[i].size <= 0 || ico->images[i].size > INT_MAX - 14) {
             av_log(s, AV_LOG_ERROR, "Invalid image size %d\n", ico->images[i].size);
             return AVERROR_INVALIDDATA;
         }
diff -Nru ffmpeg-5.1.8/libavformat/iff.c ffmpeg-5.1.9/libavformat/iff.c
--- ffmpeg-5.1.8/libavformat/iff.c	2025-11-23 02:57:58.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/iff.c	2026-05-05 15:50:55.000000000 +0000
@@ -338,8 +338,10 @@
             if (config != 0xFFFF) {
                 if (config < FF_ARRAY_ELEMS(dsd_loudspeaker_config))
                     st->codecpar->ch_layout = dsd_loudspeaker_config[config];
-                if (!st->codecpar->ch_layout.nb_channels)
+                if (!st->codecpar->ch_layout.nb_channels) {
                     avpriv_request_sample(s, "loudspeaker configuration %d", config);
+                    return AVERROR_PATCHWELCOME;
+                }
             }
             break;
         }
diff -Nru ffmpeg-5.1.8/libavformat/img2dec.c ffmpeg-5.1.9/libavformat/img2dec.c
--- ffmpeg-5.1.8/libavformat/img2dec.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/img2dec.c	2026-05-05 15:50:55.000000000 +0000
@@ -413,8 +413,9 @@
     char filename_bytes[1024];
     char *filename = filename_bytes;
     int i, res;
-    int size[3]           = { 0 }, ret[3] = { 0 };
-    AVIOContext *f[3]     = { NULL };
+    int ret[3] = { 0 };
+    int64_t size[3] = { 0 };
+    AVIOContext *f[3] = { NULL };
     AVCodecParameters *par = s1->streams[0]->codecpar;
 
     if (!s->is_pipe) {
@@ -494,7 +495,15 @@
         }
     }
 
-    res = av_new_packet(pkt, size[0] + size[1] + size[2]);
+    int total_size = 0;
+    for (int i = 0; i < 3; i++) {
+        if ((uint64_t)size[i] > INT_MAX - total_size)
+            return AVERROR_INVALIDDATA;
+
+        total_size += size[i];
+    }
+
+    res = av_new_packet(pkt, total_size);
     if (res < 0) {
         goto fail;
     }
diff -Nru ffmpeg-5.1.8/libavformat/img2enc.c ffmpeg-5.1.9/libavformat/img2enc.c
--- ffmpeg-5.1.8/libavformat/img2enc.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/img2enc.c	2026-05-05 15:50:55.000000000 +0000
@@ -204,6 +204,11 @@
             ysize *= 2;
             usize *= 2;
         }
+        if (ysize + 2*usize + (desc->nb_components > 3) * ysize > pkt->size) {
+            ret = AVERROR(EINVAL);
+            goto fail;
+        }
+
         if ((ret = write_and_close(s, &pb[0], pkt->data                , ysize)) < 0 ||
             (ret = write_and_close(s, &pb[1], pkt->data + ysize        , usize)) < 0 ||
             (ret = write_and_close(s, &pb[2], pkt->data + ysize + usize, usize)) < 0)
diff -Nru ffmpeg-5.1.8/libavformat/lrcdec.c ffmpeg-5.1.9/libavformat/lrcdec.c
--- ffmpeg-5.1.8/libavformat/lrcdec.c	2025-11-26 02:41:35.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/lrcdec.c	2026-05-05 15:50:55.000000000 +0000
@@ -88,7 +88,7 @@
         return 0;
     }
     int ret = sscanf(p, "%2[[-]%"SCNu32":%lf]", prefix, &mm, &ss);
-    if (ret != 3 || prefix[0] != '[' || ss < 0 || ss > 60) {
+    if (ret != 3 || prefix[0] != '[' || ss < 0 || ss > 60 || !isfinite(ss)) {
         return 0;
     }
     *start = llrint((mm * 60 + ss) * AV_TIME_BASE);
diff -Nru ffmpeg-5.1.8/libavformat/matroskadec.c ffmpeg-5.1.9/libavformat/matroskadec.c
--- ffmpeg-5.1.8/libavformat/matroskadec.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/matroskadec.c	2026-05-05 15:50:55.000000000 +0000
@@ -4043,6 +4043,10 @@
         // Clusters.
         cue_desc.end_offset = cues_start - matroska->segment_start;
     }
+
+    if (cue_desc.end_time_ns < cue_desc.start_time_ns)
+        return (CueDesc) {-1, -1, -1, -1};
+
     return cue_desc;
 }
 
diff -Nru ffmpeg-5.1.8/libavformat/mlvdec.c ffmpeg-5.1.9/libavformat/mlvdec.c
--- ffmpeg-5.1.8/libavformat/mlvdec.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/mlvdec.c	2026-05-05 15:50:55.000000000 +0000
@@ -30,6 +30,7 @@
 #include "avformat.h"
 #include "demux.h"
 #include "internal.h"
+#include "avio_internal.h"
 #include "riff.h"
 
 #define MLV_VERSION "v2.0"
@@ -64,12 +65,15 @@
 {
     unsigned int size;
     uint8_t version[8];
+    int ret;
 
     avio_skip(pb, 4);
     size = avio_rl32(pb);
     if (size < 52)
         return AVERROR_INVALIDDATA;
-    avio_read(pb, version, 8);
+    ret = ffio_read_size(pb, version, 8);
+    if (ret < 0)
+        return ret;
     if (memcmp(version, MLV_VERSION, 5) || avio_rl64(pb) != guid)
         return AVERROR_INVALIDDATA;
     avio_skip(pb, size - 24);
@@ -87,7 +91,7 @@
     }
 
     ret = avio_read(pb, value, size);
-    if (ret != size || !value[0]) {
+    if (ret != size || !size || !value[0]) {
         av_free(value);
         return;
     }
diff -Nru ffmpeg-5.1.8/libavformat/mov.c ffmpeg-5.1.9/libavformat/mov.c
--- ffmpeg-5.1.8/libavformat/mov.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/mov.c	2026-05-05 15:50:55.000000000 +0000
@@ -2705,6 +2705,7 @@
             av_freep(&sc->extradata[j]);
     }
 
+    sc->stsd_count = 0;
     av_freep(&sc->extradata);
     av_freep(&sc->extradata_size);
     return ret;
@@ -3103,6 +3104,9 @@
     av_freep(&sc->sdtp_data);
     sc->sdtp_count = 0;
 
+    if (entries < 0 || entries > SIZE_MAX)
+        return AVERROR(ERANGE);
+
     sc->sdtp_data = av_malloc(entries);
     if (!sc->sdtp_data)
         return AVERROR(ENOMEM);
@@ -3722,7 +3726,12 @@
                st->index, edit_list_index, edit_list_media_time, edit_list_duration);
         edit_list_index++;
         edit_list_dts_counter = edit_list_dts_entry_end;
-        edit_list_dts_entry_end += edit_list_duration;
+        edit_list_dts_entry_end = av_sat_add64(edit_list_dts_entry_end, edit_list_duration);
+        if (edit_list_dts_entry_end == INT64_MAX) {
+            av_log(mov->fc, AV_LOG_ERROR, "Cannot calculate dts entry length with duration %"PRId64"\n",
+                   edit_list_duration);
+            break;
+        }
         num_discarded_begin = 0;
         if (!found_non_empty_edit && edit_list_media_time == -1) {
             empty_edits_sum_duration += edit_list_duration;
@@ -6897,7 +6906,7 @@
     }
 
     for (i = 0; i < sample->subsample_count; i++) {
-        if (sample->subsamples[i].bytes_of_clear_data + sample->subsamples[i].bytes_of_protected_data > size) {
+        if (sample->subsamples[i].bytes_of_clear_data + (int64_t)sample->subsamples[i].bytes_of_protected_data > size) {
             av_log(c->fc, AV_LOG_ERROR, "subsample size exceeds the packet size left\n");
             return AVERROR_INVALIDDATA;
         }
@@ -6952,7 +6961,7 @@
     }
 
     for (i = 0; i < sample->subsample_count; i++) {
-        if (sample->subsamples[i].bytes_of_clear_data + sample->subsamples[i].bytes_of_protected_data > size) {
+        if (sample->subsamples[i].bytes_of_clear_data + (int64_t)sample->subsamples[i].bytes_of_protected_data > size) {
             av_log(c->fc, AV_LOG_ERROR, "subsample size exceeds the packet size left\n");
             return AVERROR_INVALIDDATA;
         }
@@ -7014,7 +7023,7 @@
     }
 
     for (i = 0; i < sample->subsample_count; i++) {
-        if (sample->subsamples[i].bytes_of_clear_data + sample->subsamples[i].bytes_of_protected_data > size) {
+        if (sample->subsamples[i].bytes_of_clear_data + (int64_t)sample->subsamples[i].bytes_of_protected_data > size) {
             av_log(c->fc, AV_LOG_ERROR, "subsample size exceeds the packet size left\n");
             return AVERROR_INVALIDDATA;
         }
@@ -7079,7 +7088,7 @@
     }
 
     for (i = 0; i < sample->subsample_count; i++) {
-        if (sample->subsamples[i].bytes_of_clear_data + sample->subsamples[i].bytes_of_protected_data > size) {
+        if (sample->subsamples[i].bytes_of_clear_data + (int64_t)sample->subsamples[i].bytes_of_protected_data > size) {
             av_log(c->fc, AV_LOG_ERROR, "subsample size exceeds the packet size left\n");
             return AVERROR_INVALIDDATA;
         }
@@ -7208,7 +7217,7 @@
         return 0;
     st = c->fc->streams[c->fc->nb_streams-1];
 
-    if ((uint64_t)atom.size > (1<<30) || atom.size < 11)
+    if ((uint64_t)atom.size > (1<<30) || atom.size < 11 || st->codecpar->extradata)
         return AVERROR_INVALIDDATA;
 
     /* Check OpusSpecificBox version. */
@@ -7226,7 +7235,11 @@
     AV_WL32(st->codecpar->extradata, MKTAG('O','p','u','s'));
     AV_WL32(st->codecpar->extradata + 4, MKTAG('H','e','a','d'));
     AV_WB8(st->codecpar->extradata + 8, 1); /* OpusHead version */
-    avio_read(pb, st->codecpar->extradata + 9, size - 9);
+    if ((ret = ffio_read_size(pb, st->codecpar->extradata + 9, size - 9)) < 0) {
+        av_freep(&st->codecpar->extradata);
+        st->codecpar->extradata_size = 0;
+        return ret;
+    }
 
     /* OpusSpecificBox is stored in big-endian, but OpusHead is
        little-endian; aside from the preceeding magic and version they're
diff -Nru ffmpeg-5.1.8/libavformat/mpegts.c ffmpeg-5.1.9/libavformat/mpegts.c
--- ffmpeg-5.1.8/libavformat/mpegts.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/mpegts.c	2026-05-05 15:50:55.000000000 +0000
@@ -1675,7 +1675,7 @@
 
     ret = parse_mp4_descr(&d, avio_tell(&d.pb.pub), size, MP4IODescrTag);
 
-    *descr_count = d.descr_count;
+    *descr_count += d.descr_count;
     return ret;
 }
 
@@ -2373,7 +2373,8 @@
     av_log(ts->stream, AV_LOG_TRACE, "pcr_pid=0x%x\n", pcr_pid);
 
     program_info_length = get16(&p, p_end);
-    if (program_info_length < 0)
+
+    if (program_info_length < 0 || (program_info_length & 0xFFF) > p_end - p)
         return;
     program_info_length &= 0xfff;
     while (program_info_length >= 2) {
@@ -2388,12 +2389,12 @@
             // something else is broken, exit the program_descriptors_loop
             break;
         program_info_length -= len;
-        if (tag == IOD_DESCRIPTOR) {
+        if (tag == IOD_DESCRIPTOR && len >= 2) {
             get8(&p, p_end); // scope
             get8(&p, p_end); // label
             len -= 2;
             mp4_read_iods(ts->stream, p, len, mp4_descr + mp4_descr_count,
-                          &mp4_descr_count, MAX_MP4_DESCR_COUNT);
+                          &mp4_descr_count, MAX_MP4_DESCR_COUNT - mp4_descr_count);
         } else if (tag == REGISTRATION_DESCRIPTOR && len >= 4) {
             prog_reg_desc = bytestream_get_le32(&p);
             len -= 4;
diff -Nru ffmpeg-5.1.8/libavformat/mpegtsenc.c ffmpeg-5.1.9/libavformat/mpegtsenc.c
--- ffmpeg-5.1.8/libavformat/mpegtsenc.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/mpegtsenc.c	2026-05-05 15:50:55.000000000 +0000
@@ -51,6 +51,7 @@
     int discontinuity;
     void (*write_packet)(struct MpegTSSection *s, const uint8_t *packet);
     void *opaque;
+    int remaining;
 } MpegTSSection;
 
 typedef struct MpegTSService {
@@ -1001,6 +1002,10 @@
         av_log(s, AV_LOG_ERROR, "Too long service or provider name\n");
         goto fail;
     }
+    ts->sdt.remaining -= 10 + service->provider_name[0] + service->name[0];
+    if (ts->sdt.remaining < 0)
+        goto fail;
+
     if (av_dynarray_add_nofree(&ts->services, &ts->nb_services, service) < 0)
         goto fail;
 
@@ -1111,6 +1116,8 @@
     // round up to a whole number of TS packets
     ts->pes_payload_size = (ts->pes_payload_size + 14 + 183) / 184 * 184 - 14;
 
+    ts->sdt.remaining    = SECTION_LENGTH - 3;
+
     if (!s->nb_programs) {
         /* allocate a single DVB service */
         if (!mpegts_add_service(s, ts->service_id, s->metadata, NULL))
diff -Nru ffmpeg-5.1.8/libavformat/mpjpegdec.c ffmpeg-5.1.9/libavformat/mpjpegdec.c
--- ffmpeg-5.1.8/libavformat/mpjpegdec.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/mpjpegdec.c	2026-05-05 15:50:52.000000000 +0000
@@ -393,5 +393,3 @@
     .priv_class        = &mpjpeg_demuxer_class,
     .flags             = AVFMT_NOTIMESTAMPS,
 };
-
-
diff -Nru ffmpeg-5.1.8/libavformat/os_support.h ffmpeg-5.1.9/libavformat/os_support.h
--- ffmpeg-5.1.8/libavformat/os_support.h	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/os_support.h	2026-05-05 14:21:58.000000000 +0000
@@ -42,6 +42,7 @@
 
 #ifdef _WIN32
 #  include <fcntl.h>
+#  include <stdint.h>
 #  ifdef lseek
 #   undef lseek
 #  endif
diff -Nru ffmpeg-5.1.8/libavformat/pcm.c ffmpeg-5.1.9/libavformat/pcm.c
--- ffmpeg-5.1.8/libavformat/pcm.c	2025-11-23 02:57:58.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/pcm.c	2026-05-05 15:50:55.000000000 +0000
@@ -57,7 +57,8 @@
                      int stream_index, int64_t timestamp, int flags)
 {
     AVStream *st;
-    int block_align, byte_rate;
+    int block_align;
+    int64_t byte_rate;
     int64_t pos, ret;
 
     st = s->streams[0];
@@ -65,9 +66,9 @@
     block_align = st->codecpar->block_align ? st->codecpar->block_align :
         (av_get_bits_per_sample(st->codecpar->codec_id) * st->codecpar->ch_layout.nb_channels) >> 3;
     byte_rate = st->codecpar->bit_rate ? st->codecpar->bit_rate >> 3 :
-        block_align * st->codecpar->sample_rate;
+        block_align * (int64_t)st->codecpar->sample_rate;
 
-    if (block_align <= 0 || byte_rate <= 0)
+    if (block_align <= 0 || byte_rate <= 0 || FFMAX(timestamp, st->time_base.num) > INT64_MAX / byte_rate)
         return -1;
     if (timestamp < 0) timestamp = 0;
 
@@ -76,6 +77,9 @@
                          st->time_base.num,
                          st->time_base.den * (int64_t)block_align,
                          (flags & AVSEEK_FLAG_BACKWARD) ? AV_ROUND_DOWN : AV_ROUND_UP);
+
+    if (pos > (INT64_MAX - FFMAX(ffformatcontext(s)->data_offset, 0)) / block_align)
+        return -1;
     pos *= block_align;
 
     /* recompute exact position */
diff -Nru ffmpeg-5.1.8/libavformat/rdt.c ffmpeg-5.1.9/libavformat/rdt.c
--- ffmpeg-5.1.8/libavformat/rdt.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/rdt.c	2026-05-05 15:50:52.000000000 +0000
@@ -572,4 +572,3 @@
 RDT_HANDLER(live_audio, "x-pn-multirate-realaudio-live", AVMEDIA_TYPE_AUDIO);
 RDT_HANDLER(video,      "x-pn-realvideo",                AVMEDIA_TYPE_VIDEO);
 RDT_HANDLER(audio,      "x-pn-realaudio",                AVMEDIA_TYPE_AUDIO);
-
diff -Nru ffmpeg-5.1.8/libavformat/rsd.c ffmpeg-5.1.9/libavformat/rsd.c
--- ffmpeg-5.1.8/libavformat/rsd.c	2025-11-23 02:57:58.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/rsd.c	2026-05-05 15:50:55.000000000 +0000
@@ -22,6 +22,7 @@
 #include "libavutil/intreadwrite.h"
 #include "avformat.h"
 #include "avio.h"
+#include "avio_internal.h"
 #include "demux.h"
 #include "internal.h"
 
@@ -131,9 +132,9 @@
             return ret;
 
         for (i = 0; i < par->ch_layout.nb_channels; i++) {
-            if (avio_feof(pb))
-                return AVERROR_EOF;
-            avio_read(s->pb, st->codecpar->extradata + 32 * i, 32);
+            ret = ffio_read_size(s->pb, st->codecpar->extradata + 32 * i, 32);
+            if (ret < 0)
+                return ret;
             avio_skip(s->pb, 8);
         }
         break;
diff -Nru ffmpeg-5.1.8/libavformat/rtmpproto.c ffmpeg-5.1.9/libavformat/rtmpproto.c
--- ffmpeg-5.1.8/libavformat/rtmpproto.c	2025-11-26 02:41:35.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/rtmpproto.c	2026-05-05 15:50:55.000000000 +0000
@@ -1164,6 +1164,10 @@
     if (!memcmp(in_data, "CWS", 3)) {
 #if CONFIG_ZLIB
         int64_t out_size;
+        if (in_size < 8) {
+            ret = AVERROR_INVALIDDATA;
+            goto fail;
+        }
         /* Decompress the SWF player file using Zlib. */
         if (!(out_data = av_malloc(8))) {
             ret = AVERROR(ENOMEM);
@@ -2207,9 +2211,14 @@
 {
     int old_flv_size;
 
+    if (size < 0)
+        return AVERROR(EINVAL);
+
     // generate packet header and put data into buffer for FLV demuxer
     if (rt->flv_off < rt->flv_size) {
         // There is old unread data in the buffer, thus append at the end
+        if (rt->flv_size > INT_MAX - size)
+            return AVERROR(ERANGE);
         old_flv_size  = rt->flv_size;
         rt->flv_size += size;
     } else {
@@ -2236,7 +2245,11 @@
         rt->has_video = 1;
     }
 
+    if (size > INT_MAX - 15)
+        return AVERROR(ERANGE);
     old_flv_size = update_offset(rt, size + 15);
+    if (old_flv_size < 0)
+        return old_flv_size;
 
     if ((ret = av_reallocp(&rt->flv_data, rt->flv_size)) < 0) {
         rt->flv_size = rt->flv_off = 0;
@@ -2366,48 +2379,50 @@
 static int handle_metadata(RTMPContext *rt, RTMPPacket *pkt)
 {
     int ret, old_flv_size, type;
-    const uint8_t *next;
-    uint8_t *p;
+    PutByteContext pbc;
+    GetByteContext gbc;
     uint32_t size;
     uint32_t ts, cts, pts = 0;
 
     old_flv_size = update_offset(rt, pkt->size);
+    if (old_flv_size < 0)
+        return old_flv_size;
 
     if ((ret = av_reallocp(&rt->flv_data, rt->flv_size)) < 0) {
         rt->flv_size = rt->flv_off = 0;
         return ret;
     }
 
-    next = pkt->data;
-    p    = rt->flv_data + old_flv_size;
+    bytestream2_init(&gbc, pkt->data, pkt->size);
+    bytestream2_init_writer(&pbc, rt->flv_data, rt->flv_size);
+    bytestream2_skip_p(&pbc, old_flv_size);
 
     /* copy data while rewriting timestamps */
     ts = pkt->timestamp;
 
-    while (next - pkt->data < pkt->size - RTMP_HEADER) {
-        type = bytestream_get_byte(&next);
-        size = bytestream_get_be24(&next);
-        cts  = bytestream_get_be24(&next);
-        cts |= bytestream_get_byte(&next) << 24;
+    while (bytestream2_get_bytes_left(&gbc) > RTMP_HEADER) {
+        type = bytestream2_get_byte(&gbc);
+        size = bytestream2_get_be24(&gbc);
+        cts  = bytestream2_get_be24(&gbc);
+        cts |= bytestream2_get_byte(&gbc) << 24;
         if (!pts)
             pts = cts;
         ts += cts - pts;
         pts = cts;
-        if (size + 3 + 4 > pkt->data + pkt->size - next)
+        if (size + 3 + 4 > bytestream2_get_bytes_left(&gbc))
             break;
-        bytestream_put_byte(&p, type);
-        bytestream_put_be24(&p, size);
-        bytestream_put_be24(&p, ts);
-        bytestream_put_byte(&p, ts >> 24);
-        memcpy(p, next, size + 3 + 4);
-        p    += size + 3;
-        bytestream_put_be32(&p, size + RTMP_HEADER);
-        next += size + 3 + 4;
+        bytestream2_put_byte(&pbc, type);
+        bytestream2_put_be24(&pbc, size);
+        bytestream2_put_be24(&pbc, ts);
+        bytestream2_put_byte(&pbc, ts >> 24);
+        bytestream2_copy_buffer(&pbc, &gbc, size + 3);
+        bytestream2_skip(&gbc, 4);
+        bytestream2_put_be32(&pbc, size + RTMP_HEADER);
     }
-    if (p != rt->flv_data + rt->flv_size) {
+    if (bytestream2_tell_p(&pbc) != rt->flv_size) {
         av_log(rt, AV_LOG_WARNING, "Incomplete flv packets in "
                                      "RTMP_PT_METADATA packet\n");
-        rt->flv_size = p - rt->flv_data;
+        rt->flv_size = bytestream2_tell_p(&pbc);
     }
 
     return 0;
@@ -2674,7 +2689,8 @@
         if (rt->listen)
             ff_url_join(buf, sizeof(buf), "tcp", NULL, hostname, port,
                         "?listen&listen_timeout=%d&tcp_nodelay=%d",
-                        rt->listen_timeout * 1000, rt->tcp_nodelay);
+                        rt->listen_timeout < 0 ? -1 : rt->listen_timeout * 1000,
+                        rt->tcp_nodelay);
         else
             ff_url_join(buf, sizeof(buf), "tcp", NULL, hostname, port, "?tcp_nodelay=%d", rt->tcp_nodelay);
     }
diff -Nru ffmpeg-5.1.8/libavformat/rtpdec_jpeg.c ffmpeg-5.1.9/libavformat/rtpdec_jpeg.c
--- ffmpeg-5.1.8/libavformat/rtpdec_jpeg.c	2025-11-21 01:15:18.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/rtpdec_jpeg.c	2026-05-05 15:50:55.000000000 +0000
@@ -274,6 +274,12 @@
                 av_log(ctx, AV_LOG_WARNING, "Only 8-bit precision is supported.\n");
 
             if (qtable_len > 0) {
+                if (qtable_len != 128) {
+                    av_log(ctx, AV_LOG_ERROR, "Invalid RTP/JPEG packet. Invalid qtable length %d.\n", qtable_len);
+                    if (qtable_len%64 || qtable_len > 4*64)
+                        return AVERROR_INVALIDDATA;
+                }
+
                 if (len < qtable_len) {
                     av_log(ctx, AV_LOG_ERROR, "Too short RTP/JPEG packet.\n");
                     return AVERROR_INVALIDDATA;
diff -Nru ffmpeg-5.1.8/libavformat/rtpdec_latm.c ffmpeg-5.1.9/libavformat/rtpdec_latm.c
--- ffmpeg-5.1.8/libavformat/rtpdec_latm.c	2025-11-23 02:57:58.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/rtpdec_latm.c	2026-05-05 15:50:55.000000000 +0000
@@ -72,11 +72,15 @@
     cur_len = 0;
     while (data->pos < data->len) {
         uint8_t val = data->buf[data->pos++];
+        if (val > data->len - cur_len) {
+            av_log(ctx, AV_LOG_ERROR, "Malformed LATM packet\n");
+            return AVERROR_INVALIDDATA;
+        }
         cur_len += val;
         if (val != 0xff)
             break;
     }
-    if (data->pos + cur_len > data->len) {
+    if (cur_len > data->len - data->pos) {
         av_log(ctx, AV_LOG_ERROR, "Malformed LATM packet\n");
         return AVERROR(EIO);
     }
diff -Nru ffmpeg-5.1.8/libavformat/rtpdec_mpeg4.c ffmpeg-5.1.9/libavformat/rtpdec_mpeg4.c
--- ffmpeg-5.1.8/libavformat/rtpdec_mpeg4.c	2025-11-23 02:57:58.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/rtpdec_mpeg4.c	2026-05-05 15:50:55.000000000 +0000
@@ -133,7 +133,7 @@
        length in bits */
     au_headers_length = AV_RB16(buf);
 
-    if (au_headers_length > RTP_MAX_PACKET_LENGTH)
+    if (au_headers_length == 0 || au_headers_length > RTP_MAX_PACKET_LENGTH)
       return -1;
 
     data->au_headers_length_bytes = (au_headers_length + 7) / 8;
diff -Nru ffmpeg-5.1.8/libavformat/rtpdec_qdm2.c ffmpeg-5.1.9/libavformat/rtpdec_qdm2.c
--- ffmpeg-5.1.8/libavformat/rtpdec_qdm2.c	2025-08-05 00:22:34.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/rtpdec_qdm2.c	2026-05-05 15:50:55.000000000 +0000
@@ -186,8 +186,9 @@
  */
 static int qdm2_restore_block(PayloadContext *qdm, AVStream *st, AVPacket *pkt)
 {
-    int to_copy, n, res, include_csum;
+    int to_copy, n, res;
     uint8_t *p, *csum_pos = NULL;
+    int include_csum = qdm->block_type == 2 || qdm->block_type == 4;
 
     /* create packet to hold subpkts into a superblock */
     av_assert0(qdm->cache > 0);
@@ -196,6 +197,11 @@
             break;
     av_assert0(n < 0x80);
 
+    int min_size = 2 + (qdm->len[n] > 0xff) + 2*include_csum;
+
+    if (qdm->block_size < min_size)
+        return AVERROR_INVALIDDATA;
+
     if ((res = av_new_packet(pkt, qdm->block_size)) < 0)
         return res;
     memset(pkt->data, 0, pkt->size);
@@ -211,7 +217,7 @@
         *p++ = qdm->block_type;
         *p++ = qdm->len[n];
     }
-    if ((include_csum = (qdm->block_type == 2 || qdm->block_type == 4))) {
+    if (include_csum) {
         csum_pos = p;
         p       += 2;
     }
diff -Nru ffmpeg-5.1.8/libavformat/rtsp.c ffmpeg-5.1.9/libavformat/rtsp.c
--- ffmpeg-5.1.8/libavformat/rtsp.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/rtsp.c	2026-05-05 15:50:55.000000000 +0000
@@ -582,9 +582,10 @@
                              NULL, NULL, 0, p);
                 if (proto[0] == '\0') {
                     /* relative control URL */
-                    if (rtsp_st->control_url[strlen(rtsp_st->control_url)-1]!='/')
-                    av_strlcat(rtsp_st->control_url, "/",
-                               sizeof(rtsp_st->control_url));
+                    size_t len = strlen(rtsp_st->control_url);
+                    if (len == 0 || rtsp_st->control_url[len - 1] != '/')
+                        av_strlcat(rtsp_st->control_url, "/",
+                                   sizeof(rtsp_st->control_url));
                     av_strlcat(rtsp_st->control_url, p,
                                sizeof(rtsp_st->control_url));
                 } else
@@ -1772,7 +1773,8 @@
     } else if (!strcmp(proto, "satip")) {
         av_strlcpy(proto, "rtsp", sizeof(proto));
         rt->server_type = RTSP_SERVER_SATIP;
-    }
+    } else if (strcmp(proto, "rtsp"))
+        return AVERROR_INVALIDDATA;
 
     if (*auth) {
         av_strlcpy(rt->auth, auth, sizeof(rt->auth));
@@ -1838,6 +1840,15 @@
                 err = AVERROR(ENOMEM);
                 goto fail;
             }
+        }
+
+        if (!rt->rtsp_hd->protocol_blacklist && s->protocol_blacklist) {
+            rt->rtsp_hd->protocol_blacklist = av_strdup(s->protocol_blacklist);
+            if (!rt->rtsp_hd->protocol_blacklist) {
+                av_dict_free(&options);
+                err = AVERROR(ENOMEM);
+                goto fail;
+            }
         }
 
         /* complete the connection */
diff -Nru ffmpeg-5.1.8/libavformat/rtspdec.c ffmpeg-5.1.9/libavformat/rtspdec.c
--- ffmpeg-5.1.8/libavformat/rtspdec.c	2025-11-23 02:57:58.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/rtspdec.c	2026-05-05 15:50:55.000000000 +0000
@@ -188,7 +188,7 @@
         rtsp_send_reply(s, RTSP_STATUS_SERVICE, NULL, request.seq);
         return AVERROR_OPTION_NOT_FOUND;
     }
-    if (request.content_length) {
+    if (request.content_length > 0) {
         sdp = av_malloc(request.content_length + 1);
         if (!sdp)
             return AVERROR(ENOMEM);
@@ -212,10 +212,10 @@
         return 0;
     }
     av_log(s, AV_LOG_ERROR,
-           "Content-Length header value exceeds sdp allocated buffer (4KB)\n");
+           "Invalid ANNOUNCE Content-Length %d\n", request.content_length);
     rtsp_send_reply(s, RTSP_STATUS_INTERNAL,
-                    "Content-Length exceeds buffer size", request.seq);
-    return AVERROR(EIO);
+                    "Invalid Content-Length", request.seq);
+    return AVERROR_INVALIDDATA;
 }
 
 static int rtsp_read_options(AVFormatContext *s)
diff -Nru ffmpeg-5.1.8/libavformat/scd.c ffmpeg-5.1.9/libavformat/scd.c
--- ffmpeg-5.1.8/libavformat/scd.c	2025-11-23 02:57:58.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/scd.c	2026-05-05 15:50:55.000000000 +0000
@@ -28,6 +28,7 @@
 #include "libavutil/avassert.h"
 #include "libavformat/internal.h"
 #include "avformat.h"
+#include "avio_internal.h"
 
 #define SCD_MAGIC              ((uint64_t)MKBETAG('S', 'E', 'D', 'B') << 32 | \
                                           MKBETAG('S', 'S', 'C', 'F'))
@@ -118,7 +119,7 @@
     SCDDemuxContext  *ctx = s->priv_data;
     uint8_t buf[SCD_OFFSET_HEADER_SIZE];
 
-    if ((ret = avio_read(s->pb, buf, SCD_OFFSET_HEADER_SIZE)) < 0)
+    if ((ret = ffio_read_size(s->pb, buf, SCD_OFFSET_HEADER_SIZE)) < 0)
         return ret;
 
     ctx->hdr.table0.count  = AV_RB16(buf +  0);
diff -Nru ffmpeg-5.1.8/libavformat/segafilm.c ffmpeg-5.1.9/libavformat/segafilm.c
--- ffmpeg-5.1.8/libavformat/segafilm.c	2025-11-23 02:57:58.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/segafilm.c	2026-05-05 15:50:55.000000000 +0000
@@ -160,7 +160,7 @@
         st->codecpar->height = AV_RB32(&scratch[12]);
 
         if (film->video_type == AV_CODEC_ID_RAWVIDEO) {
-            if (scratch[20] == 24) {
+            if (film->version == 0 || scratch[20] == 24) {
                 st->codecpar->format = AV_PIX_FMT_RGB24;
             } else {
                 av_log(s, AV_LOG_ERROR, "raw video is using unhandled %dbpp\n", scratch[20]);
diff -Nru ffmpeg-5.1.8/libavformat/vividas.c ffmpeg-5.1.9/libavformat/vividas.c
--- ffmpeg-5.1.8/libavformat/vividas.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/vividas.c	2026-05-05 15:50:55.000000000 +0000
@@ -584,7 +584,9 @@
         block_type = avio_r8(pb);
 
         if (block_type == 22) {
-            avio_read(pb, keybuffer, 187);
+            ret = ffio_read_size(pb, keybuffer, 187);
+            if (ret < 0)
+                return ret;
             b22_key = decode_key(keybuffer);
             b22_size = avio_rl32(pb);
         }
@@ -718,8 +720,10 @@
         }
         last_start =
         viv->audio_subpackets[viv->n_audio_subpackets].start = (int)(off - avio_tell(pb));
-        if (last_start < last)
+        if (last_start < last) {
+            viv->n_audio_subpackets = 0;
             return AVERROR_INVALIDDATA;
+        }
         viv->current_audio_subpacket = 0;
 
     } else {
diff -Nru ffmpeg-5.1.8/libavformat/wavdec.c ffmpeg-5.1.9/libavformat/wavdec.c
--- ffmpeg-5.1.8/libavformat/wavdec.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/wavdec.c	2026-05-05 15:50:55.000000000 +0000
@@ -683,7 +683,8 @@
     int64_t size;
 
     while (!avio_feof(pb)) {
-        avio_read(pb, guid, 16);
+        if (avio_read(pb, guid, 16) != 16)
+            break;
         size = avio_rl64(pb);
         if (size <= 24 || size > INT64_MAX - 8)
             return AVERROR_INVALIDDATA;
diff -Nru ffmpeg-5.1.8/libavformat/wtvdec.c ffmpeg-5.1.9/libavformat/wtvdec.c
--- ffmpeg-5.1.8/libavformat/wtvdec.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/wtvdec.c	2026-05-05 15:50:55.000000000 +0000
@@ -885,7 +885,8 @@
                 AVStream *st = s->streams[stream_index];
                 uint8_t language[4];
                 avio_skip(pb, 12);
-                avio_read(pb, language, 3);
+                if (avio_read(pb, language, 3) != 3)
+                    return AVERROR_INVALIDDATA;
                 if (language[0]) {
                     language[3] = 0;
                     av_dict_set(&st->metadata, "language", language, 0);
diff -Nru ffmpeg-5.1.8/libavformat/xwma.c ffmpeg-5.1.9/libavformat/xwma.c
--- ffmpeg-5.1.8/libavformat/xwma.c	2025-11-23 02:57:58.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/xwma.c	2026-05-05 15:50:55.000000000 +0000
@@ -267,7 +267,7 @@
              * an offset / timestamp pair.
              */
             av_add_index_entry(st,
-                               cur_pos + (i+1) * st->codecpar->block_align, /* pos */
+                               cur_pos + (i+1LL) * st->codecpar->block_align, /* pos */
                                dpds_table[i] / bytes_per_sample,            /* timestamp */
                                st->codecpar->block_align,                   /* size */
                                0,                                           /* duration */
diff -Nru ffmpeg-5.1.8/libavformat/yuv4mpegenc.c ffmpeg-5.1.9/libavformat/yuv4mpegenc.c
--- ffmpeg-5.1.8/libavformat/yuv4mpegenc.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavformat/yuv4mpegenc.c	2026-05-05 15:50:55.000000000 +0000
@@ -190,6 +190,9 @@
 
     width  = st->codecpar->width;
     height = st->codecpar->height;
+    if (frame->width != width || frame->height != height)
+        return AVERROR(EINVAL);
+
     desc   = av_pix_fmt_desc_get(st->codecpar->format);
 
     /* The following code presumes all planes to be non-interleaved. */
diff -Nru ffmpeg-5.1.8/libavutil/aarch64/float_dsp_neon.S ffmpeg-5.1.9/libavutil/aarch64/float_dsp_neon.S
--- ffmpeg-5.1.8/libavutil/aarch64/float_dsp_neon.S	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavutil/aarch64/float_dsp_neon.S	2026-03-16 18:10:00.000000000 +0000
@@ -25,16 +25,16 @@
 
 function ff_vector_fmul_neon, export=1
 1:      subs            w3,  w3,  #16
-        ld1             {v0.4S, v1.4S}, [x1], #32
-        ld1             {v2.4S, v3.4S}, [x1], #32
-        ld1             {v4.4S, v5.4S}, [x2], #32
-        ld1             {v6.4S, v7.4S}, [x2], #32
-        fmul            v16.4S, v0.4S,  v4.4S
-        fmul            v17.4S, v1.4S,  v5.4S
-        fmul            v18.4S, v2.4S,  v6.4S
-        fmul            v19.4S, v3.4S,  v7.4S
-        st1             {v16.4S, v17.4S}, [x0], #32
-        st1             {v18.4S, v19.4S}, [x0], #32
+        ld1             {v0.4s, v1.4s}, [x1], #32
+        ld1             {v2.4s, v3.4s}, [x1], #32
+        ld1             {v4.4s, v5.4s}, [x2], #32
+        ld1             {v6.4s, v7.4s}, [x2], #32
+        fmul            v16.4s, v0.4s,  v4.4s
+        fmul            v17.4s, v1.4s,  v5.4s
+        fmul            v18.4s, v2.4s,  v6.4s
+        fmul            v19.4s, v3.4s,  v7.4s
+        st1             {v16.4s, v17.4s}, [x0], #32
+        st1             {v18.4s, v19.4s}, [x0], #32
         b.ne            1b
         ret
 endfunc
@@ -42,16 +42,16 @@
 function ff_vector_fmac_scalar_neon, export=1
         mov             x3,  #-32
 1:      subs            w2,  w2,  #16
-        ld1             {v16.4S, v17.4S}, [x0], #32
-        ld1             {v18.4S, v19.4S}, [x0], x3
-        ld1             {v4.4S,  v5.4S},  [x1], #32
-        ld1             {v6.4S,  v7.4S},  [x1], #32
-        fmla            v16.4S, v4.4S,  v0.S[0]
-        fmla            v17.4S, v5.4S,  v0.S[0]
-        fmla            v18.4S, v6.4S,  v0.S[0]
-        fmla            v19.4S, v7.4S,  v0.S[0]
-        st1             {v16.4S, v17.4S}, [x0], #32
-        st1             {v18.4S, v19.4S}, [x0], #32
+        ld1             {v16.4s, v17.4s}, [x0], #32
+        ld1             {v18.4s, v19.4s}, [x0], x3
+        ld1             {v4.4s,  v5.4s},  [x1], #32
+        ld1             {v6.4s,  v7.4s},  [x1], #32
+        fmla            v16.4s, v4.4s,  v0.s[0]
+        fmla            v17.4s, v5.4s,  v0.s[0]
+        fmla            v18.4s, v6.4s,  v0.s[0]
+        fmla            v19.4s, v7.4s,  v0.s[0]
+        st1             {v16.4s, v17.4s}, [x0], #32
+        st1             {v18.4s, v19.4s}, [x0], #32
         b.ne            1b
         ret
 endfunc
@@ -59,43 +59,43 @@
 function ff_vector_fmul_scalar_neon, export=1
         mov             w4,  #15
         bics            w3,  w2,  w4
-        dup             v16.4S, v0.S[0]
+        dup             v16.4s, v0.s[0]
         b.eq            3f
-        ld1             {v0.4S, v1.4S}, [x1], #32
+        ld1             {v0.4s, v1.4s}, [x1], #32
 1:      subs            w3,  w3,  #16
-        fmul            v0.4S,  v0.4S,  v16.4S
-        ld1             {v2.4S, v3.4S}, [x1], #32
-        fmul            v1.4S,  v1.4S,  v16.4S
-        fmul            v2.4S,  v2.4S,  v16.4S
-        st1             {v0.4S, v1.4S}, [x0], #32
-        fmul            v3.4S,  v3.4S,  v16.4S
+        fmul            v0.4s,  v0.4s,  v16.4s
+        ld1             {v2.4s, v3.4s}, [x1], #32
+        fmul            v1.4s,  v1.4s,  v16.4s
+        fmul            v2.4s,  v2.4s,  v16.4s
+        st1             {v0.4s, v1.4s}, [x0], #32
+        fmul            v3.4s,  v3.4s,  v16.4s
         b.eq            2f
-        ld1             {v0.4S, v1.4S}, [x1], #32
-        st1             {v2.4S, v3.4S}, [x0], #32
+        ld1             {v0.4s, v1.4s}, [x1], #32
+        st1             {v2.4s, v3.4s}, [x0], #32
         b               1b
 2:      ands            w2,  w2,  #15
-        st1             {v2.4S, v3.4S}, [x0], #32
+        st1             {v2.4s, v3.4s}, [x0], #32
         b.eq            4f
-3:      ld1             {v0.4S}, [x1], #16
-        fmul            v0.4S,  v0.4S,  v16.4S
-        st1             {v0.4S}, [x0], #16
+3:      ld1             {v0.4s}, [x1], #16
+        fmul            v0.4s,  v0.4s,  v16.4s
+        st1             {v0.4s}, [x0], #16
         subs            w2,  w2,  #4
         b.gt            3b
 4:      ret
 endfunc
 
 function ff_vector_dmul_scalar_neon, export=1
-        dup             v16.2D, v0.D[0]
-        ld1             {v0.2D, v1.2D}, [x1], #32
+        dup             v16.2d, v0.d[0]
+        ld1             {v0.2d, v1.2d}, [x1], #32
 1:      subs            w2,  w2,  #8
-        fmul            v0.2D,  v0.2D,  v16.2D
-        ld1             {v2.2D, v3.2D}, [x1], #32
-        fmul            v1.2D,  v1.2D,  v16.2D
-        fmul            v2.2D,  v2.2D,  v16.2D
-        st1             {v0.2D, v1.2D}, [x0], #32
-        fmul            v3.2D,  v3.2D,  v16.2D
-        ld1             {v0.2D, v1.2D}, [x1], #32
-        st1             {v2.2D, v3.2D}, [x0], #32
+        fmul            v0.2d,  v0.2d,  v16.2d
+        ld1             {v2.2d, v3.2d}, [x1], #32
+        fmul            v1.2d,  v1.2d,  v16.2d
+        fmul            v2.2d,  v2.2d,  v16.2d
+        st1             {v0.2d, v1.2d}, [x0], #32
+        fmul            v3.2d,  v3.2d,  v16.2d
+        ld1             {v0.2d, v1.2d}, [x1], #32
+        st1             {v2.2d, v3.2d}, [x0], #32
         b.gt            1b
         ret
 endfunc
@@ -108,49 +108,49 @@
         add             x6,  x3,  x5, lsl #3    // win  + 8 * (len - 2)
         add             x5,  x0,  x5, lsl #3    // dst  + 8 * (len - 2)
         mov             x7,  #-16
-        ld1             {v0.4S},  [x1], #16     // s0
-        ld1             {v2.4S},  [x3], #16     // wi
-        ld1             {v1.4S},  [x2], x7      // s1
-1:      ld1             {v3.4S},  [x6], x7      // wj
+        ld1             {v0.4s},  [x1], #16     // s0
+        ld1             {v2.4s},  [x3], #16     // wi
+        ld1             {v1.4s},  [x2], x7      // s1
+1:      ld1             {v3.4s},  [x6], x7      // wj
         subs            x4,  x4,  #4
-        fmul            v17.4S, v0.4S,  v2.4S   // s0 * wi
-        rev64           v4.4S,  v1.4S
-        rev64           v5.4S,  v3.4S
-        rev64           v17.4S, v17.4S
-        ext             v4.16B,  v4.16B,  v4.16B,  #8 // s1_r
-        ext             v5.16B,  v5.16B,  v5.16B,  #8 // wj_r
-        ext             v17.16B, v17.16B, v17.16B, #8 // (s0 * wi)_rev
-        fmul            v16.4S, v0.4S,  v5.4S  // s0 * wj_r
-        fmla            v17.4S, v1.4S,  v3.4S  // (s0 * wi)_rev + s1 * wj
+        fmul            v17.4s, v0.4s,  v2.4s   // s0 * wi
+        rev64           v4.4s,  v1.4s
+        rev64           v5.4s,  v3.4s
+        rev64           v17.4s, v17.4s
+        ext             v4.16b,  v4.16b,  v4.16b,  #8 // s1_r
+        ext             v5.16b,  v5.16b,  v5.16b,  #8 // wj_r
+        ext             v17.16b, v17.16b, v17.16b, #8 // (s0 * wi)_rev
+        fmul            v16.4s, v0.4s,  v5.4s  // s0 * wj_r
+        fmla            v17.4s, v1.4s,  v3.4s  // (s0 * wi)_rev + s1 * wj
         b.eq            2f
-        ld1             {v0.4S},  [x1], #16
-        fmls            v16.4S, v4.4S,  v2.4S  // s0 * wj_r - s1_r * wi
-        st1             {v17.4S}, [x5], x7
-        ld1             {v2.4S},  [x3], #16
-        ld1             {v1.4S},  [x2], x7
-        st1             {v16.4S}, [x0], #16
+        ld1             {v0.4s},  [x1], #16
+        fmls            v16.4s, v4.4s,  v2.4s  // s0 * wj_r - s1_r * wi
+        st1             {v17.4s}, [x5], x7
+        ld1             {v2.4s},  [x3], #16
+        ld1             {v1.4s},  [x2], x7
+        st1             {v16.4s}, [x0], #16
         b               1b
 2:
-        fmls            v16.4S, v4.4S,  v2.4S  // s0 * wj_r - s1_r * wi
-        st1             {v17.4S}, [x5], x7
-        st1             {v16.4S}, [x0], #16
+        fmls            v16.4s, v4.4s,  v2.4s  // s0 * wj_r - s1_r * wi
+        st1             {v17.4s}, [x5], x7
+        st1             {v16.4s}, [x0], #16
         ret
 endfunc
 
 function ff_vector_fmul_add_neon, export=1
-        ld1             {v0.4S, v1.4S},  [x1], #32
-        ld1             {v2.4S, v3.4S},  [x2], #32
-        ld1             {v4.4S, v5.4S},  [x3], #32
+        ld1             {v0.4s, v1.4s},  [x1], #32
+        ld1             {v2.4s, v3.4s},  [x2], #32
+        ld1             {v4.4s, v5.4s},  [x3], #32
 1:      subs            w4,  w4,  #8
-        fmla            v4.4S,  v0.4S,  v2.4S
-        fmla            v5.4S,  v1.4S,  v3.4S
+        fmla            v4.4s,  v0.4s,  v2.4s
+        fmla            v5.4s,  v1.4s,  v3.4s
         b.eq            2f
-        ld1             {v0.4S, v1.4S},  [x1], #32
-        ld1             {v2.4S, v3.4S},  [x2], #32
-        st1             {v4.4S, v5.4S},  [x0], #32
-        ld1             {v4.4S, v5.4S},  [x3], #32
+        ld1             {v0.4s, v1.4s},  [x1], #32
+        ld1             {v2.4s, v3.4s},  [x2], #32
+        st1             {v4.4s, v5.4s},  [x0], #32
+        ld1             {v4.4s, v5.4s},  [x3], #32
         b               1b
-2:      st1             {v4.4S, v5.4S},  [x0], #32
+2:      st1             {v4.4s, v5.4s},  [x0], #32
         ret
 endfunc
 
@@ -159,44 +159,44 @@
         add             x2,  x2,  x3,  lsl #2
         sub             x2,  x2,  #32
         mov             x4, #-32
-        ld1             {v2.4S, v3.4S},  [x2], x4
-        ld1             {v0.4S, v1.4S},  [x1], #32
+        ld1             {v2.4s, v3.4s},  [x2], x4
+        ld1             {v0.4s, v1.4s},  [x1], #32
 1:      subs            x3,  x3,  #8
-        rev64           v3.4S,  v3.4S
-        rev64           v2.4S,  v2.4S
-        ext             v3.16B, v3.16B, v3.16B,  #8
-        ext             v2.16B, v2.16B, v2.16B,  #8
-        fmul            v16.4S, v0.4S,  v3.4S
-        fmul            v17.4S, v1.4S,  v2.4S
+        rev64           v3.4s,  v3.4s
+        rev64           v2.4s,  v2.4s
+        ext             v3.16b, v3.16b, v3.16b,  #8
+        ext             v2.16b, v2.16b, v2.16b,  #8
+        fmul            v16.4s, v0.4s,  v3.4s
+        fmul            v17.4s, v1.4s,  v2.4s
         b.eq            2f
-        ld1             {v2.4S, v3.4S},  [x2], x4
-        ld1             {v0.4S, v1.4S},  [x1], #32
-        st1             {v16.4S, v17.4S},  [x0], #32
+        ld1             {v2.4s, v3.4s},  [x2], x4
+        ld1             {v0.4s, v1.4s},  [x1], #32
+        st1             {v16.4s, v17.4s},  [x0], #32
         b               1b
-2:      st1             {v16.4S, v17.4S},  [x0], #32
+2:      st1             {v16.4s, v17.4s},  [x0], #32
         ret
 endfunc
 
 function ff_butterflies_float_neon, export=1
-1:      ld1             {v0.4S}, [x0]
-        ld1             {v1.4S}, [x1]
+1:      ld1             {v0.4s}, [x0]
+        ld1             {v1.4s}, [x1]
         subs            w2,  w2,  #4
-        fsub            v2.4S,   v0.4S,  v1.4S
-        fadd            v3.4S,   v0.4S,  v1.4S
-        st1             {v2.4S}, [x1],   #16
-        st1             {v3.4S}, [x0],   #16
+        fsub            v2.4s,   v0.4s,  v1.4s
+        fadd            v3.4s,   v0.4s,  v1.4s
+        st1             {v2.4s}, [x1],   #16
+        st1             {v3.4s}, [x0],   #16
         b.gt            1b
         ret
 endfunc
 
 function ff_scalarproduct_float_neon, export=1
-        movi            v2.4S,  #0
-1:      ld1             {v0.4S}, [x0],   #16
-        ld1             {v1.4S}, [x1],   #16
+        movi            v2.4s,  #0
+1:      ld1             {v0.4s}, [x0],   #16
+        ld1             {v1.4s}, [x1],   #16
         subs            w2,      w2,     #4
-        fmla            v2.4S,   v0.4S,  v1.4S
+        fmla            v2.4s,   v0.4s,  v1.4s
         b.gt            1b
-        faddp           v0.4S,   v2.4S,  v2.4S
-        faddp           s0,      v0.2S
+        faddp           v0.4s,   v2.4s,  v2.4s
+        faddp           s0,      v0.2s
         ret
 endfunc
diff -Nru ffmpeg-5.1.8/libavutil/aes.c ffmpeg-5.1.9/libavutil/aes.c
--- ffmpeg-5.1.8/libavutil/aes.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavutil/aes.c	2026-05-05 15:50:52.000000000 +0000
@@ -269,4 +269,3 @@
 
     return 0;
 }
-
diff -Nru ffmpeg-5.1.8/libavutil/bswap.h ffmpeg-5.1.9/libavutil/bswap.h
--- ffmpeg-5.1.8/libavutil/bswap.h	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavutil/bswap.h	2026-05-05 15:50:55.000000000 +0000
@@ -72,7 +72,7 @@
 #ifndef av_bswap64
 static inline uint64_t av_const av_bswap64(uint64_t x)
 {
-    return (uint64_t)av_bswap32(x) << 32 | av_bswap32(x >> 32);
+    return (uint64_t)av_bswap32((uint32_t)x) << 32 | av_bswap32((uint32_t)(x >> 32));
 }
 #endif
 
diff -Nru ffmpeg-5.1.8/libavutil/eval.c ffmpeg-5.1.9/libavutil/eval.c
--- ffmpeg-5.1.8/libavutil/eval.c	2025-11-23 02:57:58.000000000 +0000
+++ ffmpeg-5.1.9/libavutil/eval.c	2026-05-05 15:50:55.000000000 +0000
@@ -40,6 +40,8 @@
 #include "timer.h"
 #include "reverse.h"
 
+#define MAX_DEPTH 100
+
 typedef struct Parser {
     const AVClass *class;
     int stack_index;
@@ -174,6 +176,7 @@
     } a;
     struct AVExpr *param[3];
     double *var;
+    int depth;
 };
 
 static double etime(double v)
@@ -422,6 +425,14 @@
     }
     p->s++; // ")"
 
+    for (int i = 0; i<3; i++)
+        if (d->param[i])
+            d->depth = FFMAX(d->depth, d->param[i]->depth+1);
+    if (d->depth > MAX_DEPTH) {
+        av_expr_free(d);
+        return AVERROR(EINVAL);
+    }
+
     d->type = e_func0;
          if (strmatch(next, "sinh"  )) d->a.func0 = sinh;
     else if (strmatch(next, "cosh"  )) d->a.func0 = cosh;
@@ -505,6 +516,9 @@
 
 static AVExpr *make_eval_expr(int type, int value, AVExpr *p0, AVExpr *p1)
 {
+    int depth = FFMAX(p0->depth, p1->depth) + 1;
+    if (depth > MAX_DEPTH)
+        return NULL;
     AVExpr *e = av_mallocz(sizeof(AVExpr));
     if (!e)
         return NULL;
@@ -512,6 +526,7 @@
     e->value    =value  ;
     e->param[0] =p0     ;
     e->param[1] =p1     ;
+    e->depth    = depth;
     return e;
 }
 
diff -Nru ffmpeg-5.1.8/libavutil/hwcontext_cuda_internal.h ffmpeg-5.1.9/libavutil/hwcontext_cuda_internal.h
--- ffmpeg-5.1.8/libavutil/hwcontext_cuda_internal.h	2025-11-21 01:15:18.000000000 +0000
+++ ffmpeg-5.1.9/libavutil/hwcontext_cuda_internal.h	2026-05-05 14:22:01.000000000 +0000
@@ -36,4 +36,3 @@
 };
 
 #endif /* AVUTIL_HWCONTEXT_CUDA_INTERNAL_H */
-
diff -Nru ffmpeg-5.1.8/libavutil/hwcontext_qsv.h ffmpeg-5.1.9/libavutil/hwcontext_qsv.h
--- ffmpeg-5.1.8/libavutil/hwcontext_qsv.h	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavutil/hwcontext_qsv.h	2026-05-05 15:50:52.000000000 +0000
@@ -50,4 +50,3 @@
 } AVQSVFramesContext;
 
 #endif /* AVUTIL_HWCONTEXT_QSV_H */
-
diff -Nru ffmpeg-5.1.8/libavutil/samplefmt.h ffmpeg-5.1.9/libavutil/samplefmt.h
--- ffmpeg-5.1.8/libavutil/samplefmt.h	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libavutil/samplefmt.h	2026-05-05 15:50:55.000000000 +0000
@@ -122,8 +122,7 @@
  * @param sample_fmt the number of the sample format to print the
  * corresponding info string, or a negative value to print the
  * corresponding header.
- * @return the pointer to the filled buffer or NULL if sample_fmt is
- * unknown or in case of other errors
+ * @return the pointer to the filled buffer or NULL in case of other errors
  */
 char *av_get_sample_fmt_string(char *buf, int buf_size, enum AVSampleFormat sample_fmt);
 
diff -Nru ffmpeg-5.1.8/libavutil/tests/blowfish.c ffmpeg-5.1.9/libavutil/tests/blowfish.c
--- ffmpeg-5.1.8/libavutil/tests/blowfish.c	2025-11-21 01:15:18.000000000 +0000
+++ ffmpeg-5.1.9/libavutil/tests/blowfish.c	2026-05-05 14:22:01.000000000 +0000
@@ -191,4 +191,3 @@
 
     return 0;
 }
-
diff -Nru ffmpeg-5.1.8/libavutil/timecode.c ffmpeg-5.1.9/libavutil/timecode.c
--- ffmpeg-5.1.8/libavutil/timecode.c	2025-11-21 01:15:18.000000000 +0000
+++ ffmpeg-5.1.9/libavutil/timecode.c	2026-05-05 15:50:55.000000000 +0000
@@ -232,6 +232,7 @@
 int av_timecode_init_from_components(AVTimecode *tc, AVRational rate, int flags, int hh, int mm, int ss, int ff, void *log_ctx)
 {
     int ret;
+    int64_t s;
 
     memset(tc, 0, sizeof(*tc));
     tc->flags = flags;
@@ -242,7 +243,15 @@
     if (ret < 0)
         return ret;
 
-    tc->start = (hh*3600 + mm*60 + ss) * tc->fps + ff;
+    s = hh*3600LL + mm*60LL + ss;
+    if (s != (int32_t)s)
+        return AVERROR(EINVAL);
+
+    s = s * tc->fps + ff;
+    if (s != (int32_t)s)
+        return AVERROR(EINVAL);
+    tc->start = s;
+
     if (tc->flags & AV_TIMECODE_FLAG_DROPFRAME) { /* adjust frame number */
         int tmins = 60*hh + mm;
         tc->start -= (tc->fps / 30 * 2) * (tmins - tmins/10);
diff -Nru ffmpeg-5.1.8/libswresample/aarch64/resample.S ffmpeg-5.1.9/libswresample/aarch64/resample.S
--- ffmpeg-5.1.8/libswresample/aarch64/resample.S	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libswresample/aarch64/resample.S	2026-05-05 14:22:01.000000000 +0000
@@ -21,57 +21,57 @@
 #include "libavutil/aarch64/asm.S"
 
 function ff_resample_common_apply_filter_x4_float_neon, export=1
-    movi                v0.4S, #0                                      // accumulator
-1:  ld1                 {v1.4S}, [x1], #16                             // src[0..3]
-    ld1                 {v2.4S}, [x2], #16                             // filter[0..3]
-    fmla                v0.4S, v1.4S, v2.4S                            // accumulator += src[0..3] * filter[0..3]
-    subs                w3, w3, #4                                     // filter_length -= 4
-    b.gt                1b                                             // loop until filter_length
-    faddp               v0.4S, v0.4S, v0.4S                            // pair adding of the 4x32-bit accumulated values
-    faddp               v0.4S, v0.4S, v0.4S                            // pair adding of the 4x32-bit accumulated values
-    st1                 {v0.S}[0], [x0], #4                            // write accumulator
-    ret
+        movi            v0.4s, #0                                      // accumulator
+1:      ld1             {v1.4s}, [x1], #16                             // src[0..3]
+        ld1             {v2.4s}, [x2], #16                             // filter[0..3]
+        fmla            v0.4s, v1.4s, v2.4s                            // accumulator += src[0..3] * filter[0..3]
+        subs            w3, w3, #4                                     // filter_length -= 4
+        b.gt            1b                                             // loop until filter_length
+        faddp           v0.4s, v0.4s, v0.4s                            // pair adding of the 4x32-bit accumulated values
+        faddp           v0.4s, v0.4s, v0.4s                            // pair adding of the 4x32-bit accumulated values
+        st1             {v0.s}[0], [x0], #4                            // write accumulator
+        ret
 endfunc
 
 function ff_resample_common_apply_filter_x8_float_neon, export=1
-    movi                v0.4S, #0                                      // accumulator
-1:  ld1                 {v1.4S}, [x1], #16                             // src[0..3]
-    ld1                 {v2.4S}, [x2], #16                             // filter[0..3]
-    ld1                 {v3.4S}, [x1], #16                             // src[4..7]
-    ld1                 {v4.4S}, [x2], #16                             // filter[4..7]
-    fmla                v0.4S, v1.4S, v2.4S                            // accumulator += src[0..3] * filter[0..3]
-    fmla                v0.4S, v3.4S, v4.4S                            // accumulator += src[4..7] * filter[4..7]
-    subs                w3, w3, #8                                     // filter_length -= 8
-    b.gt                1b                                             // loop until filter_length
-    faddp               v0.4S, v0.4S, v0.4S                            // pair adding of the 4x32-bit accumulated values
-    faddp               v0.4S, v0.4S, v0.4S                            // pair adding of the 4x32-bit accumulated values
-    st1                 {v0.S}[0], [x0], #4                            // write accumulator
-    ret
+        movi            v0.4s, #0                                      // accumulator
+1:      ld1             {v1.4s}, [x1], #16                             // src[0..3]
+        ld1             {v2.4s}, [x2], #16                             // filter[0..3]
+        ld1             {v3.4s}, [x1], #16                             // src[4..7]
+        ld1             {v4.4s}, [x2], #16                             // filter[4..7]
+        fmla            v0.4s, v1.4s, v2.4s                            // accumulator += src[0..3] * filter[0..3]
+        fmla            v0.4s, v3.4s, v4.4s                            // accumulator += src[4..7] * filter[4..7]
+        subs            w3, w3, #8                                     // filter_length -= 8
+        b.gt            1b                                             // loop until filter_length
+        faddp           v0.4s, v0.4s, v0.4s                            // pair adding of the 4x32-bit accumulated values
+        faddp           v0.4s, v0.4s, v0.4s                            // pair adding of the 4x32-bit accumulated values
+        st1             {v0.s}[0], [x0], #4                            // write accumulator
+        ret
 endfunc
 
 function ff_resample_common_apply_filter_x4_s16_neon, export=1
-    movi                v0.4S, #0                                      // accumulator
-1:  ld1                 {v1.4H}, [x1], #8                              // src[0..3]
-    ld1                 {v2.4H}, [x2], #8                              // filter[0..3]
-    smlal               v0.4S, v1.4H, v2.4H                            // accumulator += src[0..3] * filter[0..3]
-    subs                w3, w3, #4                                     // filter_length -= 4
-    b.gt                1b                                             // loop until filter_length
-    addp                v0.4S, v0.4S, v0.4S                            // pair adding of the 4x32-bit accumulated values
-    addp                v0.4S, v0.4S, v0.4S                            // pair adding of the 4x32-bit accumulated values
-    st1                 {v0.S}[0], [x0], #4                            // write accumulator
-    ret
+        movi            v0.4s, #0                                      // accumulator
+1:      ld1             {v1.4h}, [x1], #8                              // src[0..3]
+        ld1             {v2.4h}, [x2], #8                              // filter[0..3]
+        smlal           v0.4s, v1.4h, v2.4h                            // accumulator += src[0..3] * filter[0..3]
+        subs            w3, w3, #4                                     // filter_length -= 4
+        b.gt            1b                                             // loop until filter_length
+        addp            v0.4s, v0.4s, v0.4s                            // pair adding of the 4x32-bit accumulated values
+        addp            v0.4s, v0.4s, v0.4s                            // pair adding of the 4x32-bit accumulated values
+        st1             {v0.s}[0], [x0], #4                            // write accumulator
+        ret
 endfunc
 
 function ff_resample_common_apply_filter_x8_s16_neon, export=1
-    movi                v0.4S, #0                                      // accumulator
-1:  ld1                 {v1.8H}, [x1], #16                             // src[0..7]
-    ld1                 {v2.8H}, [x2], #16                             // filter[0..7]
-    smlal               v0.4S, v1.4H, v2.4H                            // accumulator += src[0..3] * filter[0..3]
-    smlal2              v0.4S, v1.8H, v2.8H                            // accumulator += src[4..7] * filter[4..7]
-    subs                w3, w3, #8                                     // filter_length -= 8
-    b.gt                1b                                             // loop until filter_length
-    addp                v0.4S, v0.4S, v0.4S                            // pair adding of the 4x32-bit accumulated values
-    addp                v0.4S, v0.4S, v0.4S                            // pair adding of the 4x32-bit accumulated values
-    st1                 {v0.S}[0], [x0], #4                            // write accumulator
-    ret
+        movi            v0.4s, #0                                      // accumulator
+1:      ld1             {v1.8h}, [x1], #16                             // src[0..7]
+        ld1             {v2.8h}, [x2], #16                             // filter[0..7]
+        smlal           v0.4s, v1.4h, v2.4h                            // accumulator += src[0..3] * filter[0..3]
+        smlal2          v0.4s, v1.8h, v2.8h                            // accumulator += src[4..7] * filter[4..7]
+        subs            w3, w3, #8                                     // filter_length -= 8
+        b.gt            1b                                             // loop until filter_length
+        addp            v0.4s, v0.4s, v0.4s                            // pair adding of the 4x32-bit accumulated values
+        addp            v0.4s, v0.4s, v0.4s                            // pair adding of the 4x32-bit accumulated values
+        st1             {v0.s}[0], [x0], #4                            // write accumulator
+        ret
 endfunc
diff -Nru ffmpeg-5.1.8/libswresample/rematrix.c ffmpeg-5.1.9/libswresample/rematrix.c
--- ffmpeg-5.1.8/libswresample/rematrix.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libswresample/rematrix.c	2026-05-05 15:50:55.000000000 +0000
@@ -66,7 +66,10 @@
     int nb_in, nb_out, in, out;
     int user_in_chlayout_nb_channels, user_out_chlayout_nb_channels;
 
-    if (!s || s->in_convert) // s needs to be allocated but not initialized
+    if (!s || s->in_convert ||   // s needs to be allocated but not initialized
+        swri_check_chlayout(s, &s->user_in_chlayout , "input") ||
+        swri_check_chlayout(s, &s->user_out_chlayout, "output")
+    )
         return AVERROR(EINVAL);
     memset(s->matrix, 0, sizeof(s->matrix));
     memset(s->matrix_flt, 0, sizeof(s->matrix_flt));
diff -Nru ffmpeg-5.1.8/libswresample/resample_template.c ffmpeg-5.1.9/libswresample/resample_template.c
--- ffmpeg-5.1.8/libswresample/resample_template.c	2023-11-09 23:38:51.000000000 +0000
+++ ffmpeg-5.1.9/libswresample/resample_template.c	2026-05-05 15:50:55.000000000 +0000
@@ -25,6 +25,8 @@
  * @author Michael Niedermayer <michaelni@gmx.at>
  */
 
+// FELEM2U, a variant of FELEM2 which does not produce undefined overflow
+
 #if defined(TEMPLATE_RESAMPLE_DBL)
 
 #    define RENAME(N) N ## _double
@@ -32,6 +34,7 @@
 #    define DELEM  double
 #    define FELEM  double
 #    define FELEM2 double
+#    define FELEM2U double
 #    define FOFFSET 0
 #    define OUT(d, v) d = v
 
@@ -42,6 +45,7 @@
 #    define DELEM  float
 #    define FELEM  float
 #    define FELEM2 float
+#    define FELEM2U float
 #    define FOFFSET 0
 #    define OUT(d, v) d = v
 
@@ -52,6 +56,7 @@
 #    define DELEM  int32_t
 #    define FELEM  int32_t
 #    define FELEM2 int64_t
+#    define FELEM2U uint64_t
 #    define FELEM_MAX INT32_MAX
 #    define FELEM_MIN INT32_MIN
 #    define FOFFSET (1<<(FILTER_SHIFT-1))
@@ -64,6 +69,7 @@
 #    define DELEM  int16_t
 #    define FELEM  int16_t
 #    define FELEM2 int32_t
+#    define FELEM2U uint32_t
 #    define FELEML int64_t
 #    define FELEM_MAX INT16_MAX
 #    define FELEM_MIN INT16_MIN
@@ -161,7 +167,7 @@
 
     for (dst_index = 0; dst_index < n; dst_index++) {
         FELEM *filter = ((FELEM *) c->filter_bank) + c->filter_alloc * index;
-        FELEM2 val = FOFFSET, v2 = FOFFSET;
+        FELEM2U val = FOFFSET, v2 = FOFFSET;
 
         int i;
         for (i = 0; i < c->filter_length; i++) {
@@ -169,15 +175,15 @@
             v2  += src[sample_index + i] * (FELEM2)filter[i + c->filter_alloc];
         }
 #ifdef FELEML
-        val += (v2 - val) * (FELEML) frac / c->src_incr;
+        val += (FELEM2)(v2 - val) * (FELEML) frac / c->src_incr;
 #else
 #    if FILTER_SHIFT == 0
-        val += (v2 - val) * inv_src_incr * frac;
+        val += (FELEM2)(v2 - val) * inv_src_incr * frac;
 #    else
-        val += (v2 - val) / c->src_incr * frac;
+        val += (FELEM2)(v2 - val) / c->src_incr * frac;
 #    endif
 #endif
-        OUT(dst[dst_index], val);
+        OUT(dst[dst_index], (FELEM2)val);
 
         frac += c->dst_incr_mod;
         index += c->dst_incr_div;
@@ -205,6 +211,7 @@
 #undef DELEM
 #undef FELEM
 #undef FELEM2
+#undef FELEM2U
 #undef FELEML
 #undef FELEM_MAX
 #undef FELEM_MIN
diff -Nru ffmpeg-5.1.8/libswresample/soxr_resample.c ffmpeg-5.1.9/libswresample/soxr_resample.c
--- ffmpeg-5.1.8/libswresample/soxr_resample.c	2025-11-21 01:15:18.000000000 +0000
+++ ffmpeg-5.1.9/libswresample/soxr_resample.c	2026-05-05 14:22:01.000000000 +0000
@@ -127,4 +127,3 @@
     create, destroy, process, flush, NULL /* set_compensation */, get_delay,
     invert_initial_buffer, get_out_samples
 };
-
diff -Nru ffmpeg-5.1.8/libswresample/swresample.c ffmpeg-5.1.9/libswresample/swresample.c
--- ffmpeg-5.1.8/libswresample/swresample.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libswresample/swresample.c	2026-05-05 15:50:55.000000000 +0000
@@ -29,6 +29,20 @@
 
 #define ALIGN 32
 
+int swri_check_chlayout(struct SwrContext *s, const AVChannelLayout *chl, const char *name) {
+    char l1[1024];
+    int ret;
+
+    if (!(ret = av_channel_layout_check(chl)) || chl->nb_channels > SWR_CH_MAX) {
+        if (ret)
+            av_channel_layout_describe(chl, l1, sizeof(l1));
+        av_log(s, AV_LOG_WARNING, "%s channel layout \"%s\" is invalid or unsupported.\n", name, ret ? l1 : "");
+        return AVERROR(EINVAL);
+    }
+
+    return 0;
+}
+
 int swr_set_channel_mapping(struct SwrContext *s, const int *channel_map){
     if(!s || s->in_convert) // s needs to be allocated but not initialized
         return AVERROR(EINVAL);
@@ -99,6 +113,8 @@
 
     if ((ret = av_opt_set_chlayout(s, "ochl", out_ch_layout, 0)) < 0)
         goto fail;
+    if ((ret = swri_check_chlayout(s, out_ch_layout, "ochl")) < 0)
+        goto fail;
 
     if ((ret = av_opt_set_int(s, "osf", out_sample_fmt, 0)) < 0)
         goto fail;
@@ -108,6 +124,8 @@
 
     if ((ret = av_opt_set_chlayout(s, "ichl", in_ch_layout, 0)) < 0)
         goto fail;
+    if ((ret = swri_check_chlayout(s, in_ch_layout, "ichl")) < 0)
+        goto fail;
 
     if ((ret = av_opt_set_int(s, "isf", in_sample_fmt, 0)) < 0)
         goto fail;
@@ -265,19 +283,9 @@
     s->out.ch_count  = s-> user_out_chlayout.nb_channels;
     s-> in.ch_count  = s->  user_in_chlayout.nb_channels;
 
-    if (!(ret = av_channel_layout_check(&s->user_in_chlayout)) || s->user_in_chlayout.nb_channels > SWR_CH_MAX) {
-        if (ret)
-            av_channel_layout_describe(&s->user_in_chlayout, l1, sizeof(l1));
-        av_log(s, AV_LOG_WARNING, "Input channel layout \"%s\" is invalid or unsupported.\n", ret ? l1 : "");
+    if (swri_check_chlayout(s, &s->user_in_chlayout , "input") ||
+        swri_check_chlayout(s, &s->user_out_chlayout, "output"))
         return AVERROR(EINVAL);
-    }
-
-    if (!(ret = av_channel_layout_check(&s->user_out_chlayout)) || s->user_out_chlayout.nb_channels > SWR_CH_MAX) {
-        if (ret)
-            av_channel_layout_describe(&s->user_out_chlayout, l2, sizeof(l2));
-        av_log(s, AV_LOG_WARNING, "Output channel layout \"%s\" is invalid or unsupported.\n", ret ? l2 : "");
-        return AVERROR(EINVAL);
-    }
 
     ret  = av_channel_layout_copy(&s->in_ch_layout, &s->user_in_chlayout);
     ret |= av_channel_layout_copy(&s->out_ch_layout, &s->user_out_chlayout);
diff -Nru ffmpeg-5.1.8/libswresample/swresample_frame.c ffmpeg-5.1.9/libswresample/swresample_frame.c
--- ffmpeg-5.1.8/libswresample/swresample_frame.c	2025-11-23 02:57:58.000000000 +0000
+++ ffmpeg-5.1.9/libswresample/swresample_frame.c	2026-05-05 14:22:01.000000000 +0000
@@ -217,4 +217,3 @@
 
     return convert_frame(s, out, in);
 }
-
diff -Nru ffmpeg-5.1.8/libswresample/swresample_internal.h ffmpeg-5.1.9/libswresample/swresample_internal.h
--- ffmpeg-5.1.8/libswresample/swresample_internal.h	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libswresample/swresample_internal.h	2026-05-05 15:50:55.000000000 +0000
@@ -193,6 +193,7 @@
 
 av_warn_unused_result
 int swri_realloc_audio(AudioData *a, int count);
+int swri_check_chlayout(struct SwrContext *s, const AVChannelLayout *chl, const char *name);
 
 void swri_noise_shaping_int16 (SwrContext *s, AudioData *dsts, const AudioData *srcs, const AudioData *noises, int count);
 void swri_noise_shaping_int32 (SwrContext *s, AudioData *dsts, const AudioData *srcs, const AudioData *noises, int count);
diff -Nru ffmpeg-5.1.8/libswresample/version.c ffmpeg-5.1.9/libswresample/version.c
--- ffmpeg-5.1.8/libswresample/version.c	2025-11-23 02:57:58.000000000 +0000
+++ ffmpeg-5.1.9/libswresample/version.c	2026-05-05 14:22:01.000000000 +0000
@@ -42,4 +42,3 @@
 #define LICENSE_PREFIX "libswresample license: "
     return &LICENSE_PREFIX FFMPEG_LICENSE[sizeof(LICENSE_PREFIX) - 1];
 }
-
diff -Nru ffmpeg-5.1.8/libswscale/aarch64/hscale.S ffmpeg-5.1.9/libswscale/aarch64/hscale.S
--- ffmpeg-5.1.8/libswscale/aarch64/hscale.S	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libswscale/aarch64/hscale.S	2026-05-05 15:50:52.000000000 +0000
@@ -41,53 +41,53 @@
 ;----------------------------------------------------------------------------- */
 
 function ff_hscale8to15_X8_neon, export=1
-        sbfiz               x7, x6, #1, #32             // filterSize*2 (*2 because int16)
-1:      ldr                 w8, [x5], #4                // filterPos[idx]
-        ldr                 w0, [x5], #4                // filterPos[idx + 1]
-        ldr                 w11, [x5], #4               // filterPos[idx + 2]
-        ldr                 w9, [x5], #4                // filterPos[idx + 3]
-        mov                 x16, x4                     // filter0 = filter
-        add                 x12, x16, x7                // filter1 = filter0 + filterSize*2
-        add                 x13, x12, x7                // filter2 = filter1 + filterSize*2
-        add                 x4, x13, x7                 // filter3 = filter2 + filterSize*2
-        movi                v0.2D, #0                   // val sum part 1 (for dst[0])
-        movi                v1.2D, #0                   // val sum part 2 (for dst[1])
-        movi                v2.2D, #0                   // val sum part 3 (for dst[2])
-        movi                v3.2D, #0                   // val sum part 4 (for dst[3])
-        add                 x17, x3, w8, UXTW           // srcp + filterPos[0]
-        add                 x8,  x3, w0, UXTW           // srcp + filterPos[1]
-        add                 x0, x3, w11, UXTW           // srcp + filterPos[2]
-        add                 x11, x3, w9, UXTW           // srcp + filterPos[3]
-        mov                 w15, w6                     // filterSize counter
-2:      ld1                 {v4.8B}, [x17], #8          // srcp[filterPos[0] + {0..7}]
-        ld1                 {v5.8H}, [x16], #16         // load 8x16-bit filter values, part 1
-        ld1                 {v6.8B}, [x8], #8           // srcp[filterPos[1] + {0..7}]
-        ld1                 {v7.8H}, [x12], #16         // load 8x16-bit at filter+filterSize
-        uxtl                v4.8H, v4.8B                // unpack part 1 to 16-bit
-        smlal               v0.4S, v4.4H, v5.4H         // v0 accumulates srcp[filterPos[0] + {0..3}] * filter[{0..3}]
-        smlal2              v0.4S, v4.8H, v5.8H         // v0 accumulates srcp[filterPos[0] + {4..7}] * filter[{4..7}]
-        ld1                 {v16.8B}, [x0], #8          // srcp[filterPos[2] + {0..7}]
-        ld1                 {v17.8H}, [x13], #16        // load 8x16-bit at filter+2*filterSize
-        uxtl                v6.8H, v6.8B                // unpack part 2 to 16-bit
-        smlal               v1.4S, v6.4H, v7.4H         // v1 accumulates srcp[filterPos[1] + {0..3}] * filter[{0..3}]
-        uxtl                v16.8H, v16.8B              // unpack part 3 to 16-bit
-        smlal               v2.4S, v16.4H, v17.4H       // v2 accumulates srcp[filterPos[2] + {0..3}] * filter[{0..3}]
-        smlal2              v2.4S, v16.8H, v17.8H       // v2 accumulates srcp[filterPos[2] + {4..7}] * filter[{4..7}]
-        ld1                 {v18.8B}, [x11], #8         // srcp[filterPos[3] + {0..7}]
-        smlal2              v1.4S, v6.8H, v7.8H         // v1 accumulates srcp[filterPos[1] + {4..7}] * filter[{4..7}]
-        ld1                 {v19.8H}, [x4], #16         // load 8x16-bit at filter+3*filterSize
-        subs                w15, w15, #8                // j -= 8: processed 8/filterSize
-        uxtl                v18.8H, v18.8B              // unpack part 4 to 16-bit
-        smlal               v3.4S, v18.4H, v19.4H       // v3 accumulates srcp[filterPos[3] + {0..3}] * filter[{0..3}]
-        smlal2              v3.4S, v18.8H, v19.8H       // v3 accumulates srcp[filterPos[3] + {4..7}] * filter[{4..7}]
-        b.gt                2b                          // inner loop if filterSize not consumed completely
-        addp                v0.4S, v0.4S, v1.4S         // part01 horizontal pair adding
-        addp                v2.4S, v2.4S, v3.4S         // part23 horizontal pair adding
-        addp                v0.4S, v0.4S, v2.4S         // part0123 horizontal pair adding
-        subs                w2, w2, #4                  // dstW -= 4
-        sqshrn              v0.4H, v0.4S, #7            // shift and clip the 2x16-bit final values
-        st1                 {v0.4H}, [x1], #8           // write to destination part0123
-        b.gt                1b                          // loop until end of line
+        sbfiz           x7, x6, #1, #32             // filterSize*2 (*2 because int16)
+1:      ldr             w8, [x5], #4                // filterPos[idx]
+        ldr             w0, [x5], #4                // filterPos[idx + 1]
+        ldr             w11, [x5], #4               // filterPos[idx + 2]
+        ldr             w9, [x5], #4                // filterPos[idx + 3]
+        mov             x16, x4                     // filter0 = filter
+        add             x12, x16, x7                // filter1 = filter0 + filterSize*2
+        add             x13, x12, x7                // filter2 = filter1 + filterSize*2
+        add             x4, x13, x7                 // filter3 = filter2 + filterSize*2
+        movi            v0.2d, #0                   // val sum part 1 (for dst[0])
+        movi            v1.2d, #0                   // val sum part 2 (for dst[1])
+        movi            v2.2d, #0                   // val sum part 3 (for dst[2])
+        movi            v3.2d, #0                   // val sum part 4 (for dst[3])
+        add             x17, x3, w8, uxtw           // srcp + filterPos[0]
+        add             x8,  x3, w0, uxtw           // srcp + filterPos[1]
+        add             x0, x3, w11, uxtw           // srcp + filterPos[2]
+        add             x11, x3, w9, uxtw           // srcp + filterPos[3]
+        mov             w15, w6                     // filterSize counter
+2:      ld1             {v4.8b}, [x17], #8          // srcp[filterPos[0] + {0..7}]
+        ld1             {v5.8h}, [x16], #16         // load 8x16-bit filter values, part 1
+        ld1             {v6.8b}, [x8], #8           // srcp[filterPos[1] + {0..7}]
+        ld1             {v7.8h}, [x12], #16         // load 8x16-bit at filter+filterSize
+        uxtl            v4.8h, v4.8b                // unpack part 1 to 16-bit
+        smlal           v0.4s, v4.4h, v5.4h         // v0 accumulates srcp[filterPos[0] + {0..3}] * filter[{0..3}]
+        smlal2          v0.4s, v4.8h, v5.8h         // v0 accumulates srcp[filterPos[0] + {4..7}] * filter[{4..7}]
+        ld1             {v16.8b}, [x0], #8          // srcp[filterPos[2] + {0..7}]
+        ld1             {v17.8h}, [x13], #16        // load 8x16-bit at filter+2*filterSize
+        uxtl            v6.8h, v6.8b                // unpack part 2 to 16-bit
+        smlal           v1.4s, v6.4h, v7.4h         // v1 accumulates srcp[filterPos[1] + {0..3}] * filter[{0..3}]
+        uxtl            v16.8h, v16.8b              // unpack part 3 to 16-bit
+        smlal           v2.4s, v16.4h, v17.4h       // v2 accumulates srcp[filterPos[2] + {0..3}] * filter[{0..3}]
+        smlal2          v2.4s, v16.8h, v17.8h       // v2 accumulates srcp[filterPos[2] + {4..7}] * filter[{4..7}]
+        ld1             {v18.8b}, [x11], #8         // srcp[filterPos[3] + {0..7}]
+        smlal2          v1.4s, v6.8h, v7.8h         // v1 accumulates srcp[filterPos[1] + {4..7}] * filter[{4..7}]
+        ld1             {v19.8h}, [x4], #16         // load 8x16-bit at filter+3*filterSize
+        subs            w15, w15, #8                // j -= 8: processed 8/filterSize
+        uxtl            v18.8h, v18.8b              // unpack part 4 to 16-bit
+        smlal           v3.4s, v18.4h, v19.4h       // v3 accumulates srcp[filterPos[3] + {0..3}] * filter[{0..3}]
+        smlal2          v3.4s, v18.8h, v19.8h       // v3 accumulates srcp[filterPos[3] + {4..7}] * filter[{4..7}]
+        b.gt            2b                          // inner loop if filterSize not consumed completely
+        addp            v0.4s, v0.4s, v1.4s         // part01 horizontal pair adding
+        addp            v2.4s, v2.4s, v3.4s         // part23 horizontal pair adding
+        addp            v0.4s, v0.4s, v2.4s         // part0123 horizontal pair adding
+        subs            w2, w2, #4                  // dstW -= 4
+        sqshrn          v0.4h, v0.4s, #7            // shift and clip the 2x16-bit final values
+        st1             {v0.4h}, [x1], #8           // write to destination part0123
+        b.gt            1b                          // loop until end of line
         ret
 endfunc
 
@@ -112,131 +112,131 @@
 //  3. Complete madd
 //  4. Complete remaining iterations when dstW % 8 != 0
 
-        sub                 sp, sp, #32                 // allocate 32 bytes on the stack
-        cmp                 w2, #16                     // if dstW <16, skip to the last block used for wrapping up
-        b.lt                2f
+        sub             sp, sp, #32                 // allocate 32 bytes on the stack
+        cmp             w2, #16                     // if dstW <16, skip to the last block used for wrapping up
+        b.lt            2f
 
         // load 8 values from filterPos to be used as offsets into src
-        ldp                 w8, w9,  [x5]               // filterPos[idx + 0], [idx + 1]
-        ldp                 w10, w11, [x5, #8]          // filterPos[idx + 2], [idx + 3]
-        ldp                 w12, w13, [x5, #16]         // filterPos[idx + 4], [idx + 5]
-        ldp                 w14, w15, [x5, #24]         // filterPos[idx + 6], [idx + 7]
-        add                 x5, x5, #32                 // advance filterPos
+        ldp             w8, w9,  [x5]               // filterPos[idx + 0], [idx + 1]
+        ldp             w10, w11, [x5, #8]          // filterPos[idx + 2], [idx + 3]
+        ldp             w12, w13, [x5, #16]         // filterPos[idx + 4], [idx + 5]
+        ldp             w14, w15, [x5, #24]         // filterPos[idx + 6], [idx + 7]
+        add             x5, x5, #32                 // advance filterPos
 
         // gather random access data from src into contiguous memory
-        ldr                 w8, [x3, w8, UXTW]          // src[filterPos[idx + 0]][0..3]
-        ldr                 w9, [x3, w9, UXTW]          // src[filterPos[idx + 1]][0..3]
-        ldr                 w10, [x3, w10, UXTW]        // src[filterPos[idx + 2]][0..3]
-        ldr                 w11, [x3, w11, UXTW]        // src[filterPos[idx + 3]][0..3]
-        ldr                 w12, [x3, w12, UXTW]        // src[filterPos[idx + 4]][0..3]
-        ldr                 w13, [x3, w13, UXTW]        // src[filterPos[idx + 5]][0..3]
-        ldr                 w14, [x3, w14, UXTW]        // src[filterPos[idx + 6]][0..3]
-        ldr                 w15, [x3, w15, UXTW]        // src[filterPos[idx + 7]][0..3]
-        stp                 w8, w9, [sp]                // *scratch_mem = { src[filterPos[idx + 0]][0..3], src[filterPos[idx + 1]][0..3] }
-        stp                 w10, w11, [sp, #8]          // *scratch_mem = { src[filterPos[idx + 2]][0..3], src[filterPos[idx + 3]][0..3] }
-        stp                 w12, w13, [sp, #16]         // *scratch_mem = { src[filterPos[idx + 4]][0..3], src[filterPos[idx + 5]][0..3] }
-        stp                 w14, w15, [sp, #24]         // *scratch_mem = { src[filterPos[idx + 6]][0..3], src[filterPos[idx + 7]][0..3] }
+        ldr             w8, [x3, w8, uxtw]          // src[filterPos[idx + 0]][0..3]
+        ldr             w9, [x3, w9, uxtw]          // src[filterPos[idx + 1]][0..3]
+        ldr             w10, [x3, w10, uxtw]        // src[filterPos[idx + 2]][0..3]
+        ldr             w11, [x3, w11, uxtw]        // src[filterPos[idx + 3]][0..3]
+        ldr             w12, [x3, w12, uxtw]        // src[filterPos[idx + 4]][0..3]
+        ldr             w13, [x3, w13, uxtw]        // src[filterPos[idx + 5]][0..3]
+        ldr             w14, [x3, w14, uxtw]        // src[filterPos[idx + 6]][0..3]
+        ldr             w15, [x3, w15, uxtw]        // src[filterPos[idx + 7]][0..3]
+        stp             w8, w9, [sp]                // *scratch_mem = { src[filterPos[idx + 0]][0..3], src[filterPos[idx + 1]][0..3] }
+        stp             w10, w11, [sp, #8]          // *scratch_mem = { src[filterPos[idx + 2]][0..3], src[filterPos[idx + 3]][0..3] }
+        stp             w12, w13, [sp, #16]         // *scratch_mem = { src[filterPos[idx + 4]][0..3], src[filterPos[idx + 5]][0..3] }
+        stp             w14, w15, [sp, #24]         // *scratch_mem = { src[filterPos[idx + 6]][0..3], src[filterPos[idx + 7]][0..3] }
 
 1:
-        ld4                 {v16.8B, v17.8B, v18.8B, v19.8B}, [sp] // transpose 8 bytes each from src into 4 registers
+        ld4             {v16.8b, v17.8b, v18.8b, v19.8b}, [sp] // transpose 8 bytes each from src into 4 registers
 
         // load 8 values from filterPos to be used as offsets into src
-        ldp                 w8, w9,  [x5]               // filterPos[idx + 0][0..3], [idx + 1][0..3], next iteration
-        ldp                 w10, w11, [x5, #8]          // filterPos[idx + 2][0..3], [idx + 3][0..3], next iteration
-        ldp                 w12, w13, [x5, #16]         // filterPos[idx + 4][0..3], [idx + 5][0..3], next iteration
-        ldp                 w14, w15, [x5, #24]         // filterPos[idx + 6][0..3], [idx + 7][0..3], next iteration
+        ldp             w8, w9,  [x5]               // filterPos[idx + 0][0..3], [idx + 1][0..3], next iteration
+        ldp             w10, w11, [x5, #8]          // filterPos[idx + 2][0..3], [idx + 3][0..3], next iteration
+        ldp             w12, w13, [x5, #16]         // filterPos[idx + 4][0..3], [idx + 5][0..3], next iteration
+        ldp             w14, w15, [x5, #24]         // filterPos[idx + 6][0..3], [idx + 7][0..3], next iteration
 
-        movi                v0.2D, #0                   // Clear madd accumulator for idx 0..3
-        movi                v5.2D, #0                   // Clear madd accumulator for idx 4..7
+        movi            v0.2d, #0                   // Clear madd accumulator for idx 0..3
+        movi            v5.2d, #0                   // Clear madd accumulator for idx 4..7
 
-        ld4                 {v1.8H, v2.8H, v3.8H, v4.8H}, [x4], #64 // load filter idx + 0..7
+        ld4             {v1.8h, v2.8h, v3.8h, v4.8h}, [x4], #64 // load filter idx + 0..7
 
-        add                 x5, x5, #32                 // advance filterPos
+        add             x5, x5, #32                 // advance filterPos
 
         // interleaved SIMD and prefetching intended to keep ld/st and vector pipelines busy
-        uxtl                v16.8H, v16.8B              // unsigned extend long, covert src data to 16-bit
-        uxtl                v17.8H, v17.8B              // unsigned extend long, covert src data to 16-bit
-        ldr                 w8, [x3, w8, UXTW]          // src[filterPos[idx + 0]], next iteration
-        ldr                 w9, [x3, w9, UXTW]          // src[filterPos[idx + 1]], next iteration
-        uxtl                v18.8H, v18.8B              // unsigned extend long, covert src data to 16-bit
-        uxtl                v19.8H, v19.8B              // unsigned extend long, covert src data to 16-bit
-        ldr                 w10, [x3, w10, UXTW]        // src[filterPos[idx + 2]], next iteration
-        ldr                 w11, [x3, w11, UXTW]        // src[filterPos[idx + 3]], next iteration
-
-        smlal               v0.4S, v1.4H, v16.4H        // multiply accumulate inner loop j = 0, idx = 0..3
-        smlal               v0.4S, v2.4H, v17.4H        // multiply accumulate inner loop j = 1, idx = 0..3
-        ldr                 w12, [x3, w12, UXTW]        // src[filterPos[idx + 4]], next iteration
-        ldr                 w13, [x3, w13, UXTW]        // src[filterPos[idx + 5]], next iteration
-        smlal               v0.4S, v3.4H, v18.4H        // multiply accumulate inner loop j = 2, idx = 0..3
-        smlal               v0.4S, v4.4H, v19.4H        // multiply accumulate inner loop j = 3, idx = 0..3
-        ldr                 w14, [x3, w14, UXTW]        // src[filterPos[idx + 6]], next iteration
-        ldr                 w15, [x3, w15, UXTW]        // src[filterPos[idx + 7]], next iteration
-
-        smlal2              v5.4S, v1.8H, v16.8H        // multiply accumulate inner loop j = 0, idx = 4..7
-        smlal2              v5.4S, v2.8H, v17.8H        // multiply accumulate inner loop j = 1, idx = 4..7
-        stp                 w8, w9, [sp]                // *scratch_mem = { src[filterPos[idx + 0]][0..3], src[filterPos[idx + 1]][0..3] }
-        stp                 w10, w11, [sp, #8]          // *scratch_mem = { src[filterPos[idx + 2]][0..3], src[filterPos[idx + 3]][0..3] }
-        smlal2              v5.4S, v3.8H, v18.8H        // multiply accumulate inner loop j = 2, idx = 4..7
-        smlal2              v5.4S, v4.8H, v19.8H        // multiply accumulate inner loop j = 3, idx = 4..7
-        stp                 w12, w13, [sp, #16]         // *scratch_mem = { src[filterPos[idx + 4]][0..3], src[filterPos[idx + 5]][0..3] }
-        stp                 w14, w15, [sp, #24]         // *scratch_mem = { src[filterPos[idx + 6]][0..3], src[filterPos[idx + 7]][0..3] }
-
-        sub                 w2, w2, #8                  // dstW -= 8
-        sqshrn              v0.4H, v0.4S, #7            // shift and clip the 2x16-bit final values
-        sqshrn              v1.4H, v5.4S, #7            // shift and clip the 2x16-bit final values
-        st1                 {v0.4H, v1.4H}, [x1], #16   // write to dst[idx + 0..7]
-        cmp                 w2, #16                     // continue on main loop if there are at least 16 iterations left
-        b.ge                1b
+        uxtl            v16.8h, v16.8b              // unsigned extend long, covert src data to 16-bit
+        uxtl            v17.8h, v17.8b              // unsigned extend long, covert src data to 16-bit
+        ldr             w8, [x3, w8, uxtw]          // src[filterPos[idx + 0]], next iteration
+        ldr             w9, [x3, w9, uxtw]          // src[filterPos[idx + 1]], next iteration
+        uxtl            v18.8h, v18.8b              // unsigned extend long, covert src data to 16-bit
+        uxtl            v19.8h, v19.8b              // unsigned extend long, covert src data to 16-bit
+        ldr             w10, [x3, w10, uxtw]        // src[filterPos[idx + 2]], next iteration
+        ldr             w11, [x3, w11, uxtw]        // src[filterPos[idx + 3]], next iteration
+
+        smlal           v0.4s, v1.4h, v16.4h        // multiply accumulate inner loop j = 0, idx = 0..3
+        smlal           v0.4s, v2.4h, v17.4h        // multiply accumulate inner loop j = 1, idx = 0..3
+        ldr             w12, [x3, w12, uxtw]        // src[filterPos[idx + 4]], next iteration
+        ldr             w13, [x3, w13, uxtw]        // src[filterPos[idx + 5]], next iteration
+        smlal           v0.4s, v3.4h, v18.4h        // multiply accumulate inner loop j = 2, idx = 0..3
+        smlal           v0.4s, v4.4h, v19.4h        // multiply accumulate inner loop j = 3, idx = 0..3
+        ldr             w14, [x3, w14, uxtw]        // src[filterPos[idx + 6]], next iteration
+        ldr             w15, [x3, w15, uxtw]        // src[filterPos[idx + 7]], next iteration
+
+        smlal2          v5.4s, v1.8h, v16.8h        // multiply accumulate inner loop j = 0, idx = 4..7
+        smlal2          v5.4s, v2.8h, v17.8h        // multiply accumulate inner loop j = 1, idx = 4..7
+        stp             w8, w9, [sp]                // *scratch_mem = { src[filterPos[idx + 0]][0..3], src[filterPos[idx + 1]][0..3] }
+        stp             w10, w11, [sp, #8]          // *scratch_mem = { src[filterPos[idx + 2]][0..3], src[filterPos[idx + 3]][0..3] }
+        smlal2          v5.4s, v3.8h, v18.8h        // multiply accumulate inner loop j = 2, idx = 4..7
+        smlal2          v5.4s, v4.8h, v19.8h        // multiply accumulate inner loop j = 3, idx = 4..7
+        stp             w12, w13, [sp, #16]         // *scratch_mem = { src[filterPos[idx + 4]][0..3], src[filterPos[idx + 5]][0..3] }
+        stp             w14, w15, [sp, #24]         // *scratch_mem = { src[filterPos[idx + 6]][0..3], src[filterPos[idx + 7]][0..3] }
+
+        sub             w2, w2, #8                  // dstW -= 8
+        sqshrn          v0.4h, v0.4s, #7            // shift and clip the 2x16-bit final values
+        sqshrn          v1.4h, v5.4s, #7            // shift and clip the 2x16-bit final values
+        st1             {v0.4h, v1.4h}, [x1], #16   // write to dst[idx + 0..7]
+        cmp             w2, #16                     // continue on main loop if there are at least 16 iterations left
+        b.ge            1b
 
         // last full iteration
-        ld4                 {v16.8B, v17.8B, v18.8B, v19.8B}, [sp]
-        ld4                 {v1.8H, v2.8H, v3.8H, v4.8H}, [x4], #64 // load filter idx + 0..7
+        ld4             {v16.8b, v17.8b, v18.8b, v19.8b}, [sp]
+        ld4             {v1.8h, v2.8h, v3.8h, v4.8h}, [x4], #64 // load filter idx + 0..7
 
-        movi                v0.2D, #0                   // Clear madd accumulator for idx 0..3
-        movi                v5.2D, #0                   // Clear madd accumulator for idx 4..7
+        movi            v0.2d, #0                   // Clear madd accumulator for idx 0..3
+        movi            v5.2d, #0                   // Clear madd accumulator for idx 4..7
 
-        uxtl                v16.8H, v16.8B              // unsigned extend long, covert src data to 16-bit
-        uxtl                v17.8H, v17.8B              // unsigned extend long, covert src data to 16-bit
-        uxtl                v18.8H, v18.8B              // unsigned extend long, covert src data to 16-bit
-        uxtl                v19.8H, v19.8B              // unsigned extend long, covert src data to 16-bit
-
-        smlal               v0.4S, v1.4H, v16.4H        // multiply accumulate inner loop j = 0, idx = 0..3
-        smlal               v0.4S, v2.4H, v17.4H        // multiply accumulate inner loop j = 1, idx = 0..3
-        smlal               v0.4S, v3.4H, v18.4H        // multiply accumulate inner loop j = 2, idx = 0..3
-        smlal               v0.4S, v4.4H, v19.4H        // multiply accumulate inner loop j = 3, idx = 0..3
-
-        smlal2              v5.4S, v1.8H, v16.8H        // multiply accumulate inner loop j = 0, idx = 4..7
-        smlal2              v5.4S, v2.8H, v17.8H        // multiply accumulate inner loop j = 1, idx = 4..7
-        smlal2              v5.4S, v3.8H, v18.8H        // multiply accumulate inner loop j = 2, idx = 4..7
-        smlal2              v5.4S, v4.8H, v19.8H        // multiply accumulate inner loop j = 3, idx = 4..7
-
-        subs                w2, w2, #8                  // dstW -= 8
-        sqshrn              v0.4H, v0.4S, #7            // shift and clip the 2x16-bit final values
-        sqshrn              v1.4H, v5.4S, #7            // shift and clip the 2x16-bit final values
-        st1                 {v0.4H, v1.4H}, [x1], #16   // write to dst[idx + 0..7]
+        uxtl            v16.8h, v16.8b              // unsigned extend long, covert src data to 16-bit
+        uxtl            v17.8h, v17.8b              // unsigned extend long, covert src data to 16-bit
+        uxtl            v18.8h, v18.8b              // unsigned extend long, covert src data to 16-bit
+        uxtl            v19.8h, v19.8b              // unsigned extend long, covert src data to 16-bit
+
+        smlal           v0.4s, v1.4h, v16.4h        // multiply accumulate inner loop j = 0, idx = 0..3
+        smlal           v0.4s, v2.4h, v17.4h        // multiply accumulate inner loop j = 1, idx = 0..3
+        smlal           v0.4s, v3.4h, v18.4h        // multiply accumulate inner loop j = 2, idx = 0..3
+        smlal           v0.4s, v4.4h, v19.4h        // multiply accumulate inner loop j = 3, idx = 0..3
+
+        smlal2          v5.4s, v1.8h, v16.8h        // multiply accumulate inner loop j = 0, idx = 4..7
+        smlal2          v5.4s, v2.8h, v17.8h        // multiply accumulate inner loop j = 1, idx = 4..7
+        smlal2          v5.4s, v3.8h, v18.8h        // multiply accumulate inner loop j = 2, idx = 4..7
+        smlal2          v5.4s, v4.8h, v19.8h        // multiply accumulate inner loop j = 3, idx = 4..7
+
+        subs            w2, w2, #8                  // dstW -= 8
+        sqshrn          v0.4h, v0.4s, #7            // shift and clip the 2x16-bit final values
+        sqshrn          v1.4h, v5.4s, #7            // shift and clip the 2x16-bit final values
+        st1             {v0.4h, v1.4h}, [x1], #16   // write to dst[idx + 0..7]
 
-        cbnz                w2, 2f                      // if >0 iterations remain, jump to the wrap up section
+        cbnz            w2, 2f                      // if >0 iterations remain, jump to the wrap up section
 
-        add                 sp, sp, #32                 // clean up stack
+        add             sp, sp, #32                 // clean up stack
         ret
 
         // finish up when dstW % 8 != 0 or dstW < 16
 2:
         // load src
-        ldr                 w8, [x5], #4                // filterPos[i]
-        add                 x9, x3, w8, UXTW            // calculate the address for src load
-        ld1                 {v5.S}[0], [x9]             // src[filterPos[i] + 0..3]
+        ldr             w8, [x5], #4                // filterPos[i]
+        add             x9, x3, w8, uxtw            // calculate the address for src load
+        ld1             {v5.s}[0], [x9]             // src[filterPos[i] + 0..3]
         // load filter
-        ld1                 {v6.4H}, [x4], #8           // filter[filterSize * i + 0..3]
+        ld1             {v6.4h}, [x4], #8           // filter[filterSize * i + 0..3]
 
-        uxtl                v5.8H, v5.8B                // unsigned exten long, convert src data to 16-bit
-        smull               v0.4S, v5.4H, v6.4H         // 4 iterations of src[...] * filter[...]
-        addv                s0, v0.4S                   // add up products of src and filter values
-        sqshrn              h0, s0, #7                  // shift and clip the 2x16-bit final value
-        st1                 {v0.H}[0], [x1], #2         // dst[i] = ...
-        sub                 w2, w2, #1                  // dstW--
-        cbnz                w2, 2b
+        uxtl            v5.8h, v5.8b                // unsigned exten long, convert src data to 16-bit
+        smull           v0.4s, v5.4h, v6.4h         // 4 iterations of src[...] * filter[...]
+        addv            s0, v0.4s                   // add up products of src and filter values
+        sqshrn          h0, s0, #7                  // shift and clip the 2x16-bit final value
+        st1             {v0.h}[0], [x1], #2         // dst[i] = ...
+        sub             w2, w2, #1                  // dstW--
+        cbnz            w2, 2b
 
-        add                 sp, sp, #32                 // clean up stack
+        add             sp, sp, #32                 // clean up stack
         ret
 endfunc
diff -Nru ffmpeg-5.1.8/libswscale/aarch64/output.S ffmpeg-5.1.9/libswscale/aarch64/output.S
--- ffmpeg-5.1.8/libswscale/aarch64/output.S	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libswscale/aarch64/output.S	2026-05-05 15:50:52.000000000 +0000
@@ -21,38 +21,38 @@
 #include "libavutil/aarch64/asm.S"
 
 function ff_yuv2planeX_8_neon, export=1
-        ld1                 {v0.8B}, [x5]                   // load 8x8-bit dither
-        cbz                 w6, 1f                          // check if offsetting present
-        ext                 v0.8B, v0.8B, v0.8B, #3         // honor offsetting which can be 0 or 3 only
-1:      uxtl                v0.8H, v0.8B                    // extend dither to 16-bit
-        ushll               v1.4S, v0.4H, #12               // extend dither to 32-bit with left shift by 12 (part 1)
-        ushll2              v2.4S, v0.8H, #12               // extend dither to 32-bit with left shift by 12 (part 2)
-        mov                 x7, #0                          // i = 0
-2:      mov                 v3.16B, v1.16B                  // initialize accumulator part 1 with dithering value
-        mov                 v4.16B, v2.16B                  // initialize accumulator part 2 with dithering value
-        mov                 w8, w1                          // tmpfilterSize = filterSize
-        mov                 x9, x2                          // srcp    = src
-        mov                 x10, x0                         // filterp = filter
-3:      ldp                 x11, x12, [x9], #16             // get 2 pointers: src[j] and src[j+1]
-        add                 x11, x11, x7, lsl #1            // &src[j  ][i]
-        add                 x12, x12, x7, lsl #1            // &src[j+1][i]
-        ld1                 {v5.8H}, [x11]                  // read 8x16-bit @ src[j  ][i + {0..7}]: A,B,C,D,E,F,G,H
-        ld1                 {v6.8H}, [x12]                  // read 8x16-bit @ src[j+1][i + {0..7}]: I,J,K,L,M,N,O,P
-        ld1r                {v7.8H}, [x10], #2              // read 1x16-bit coeff X at filter[j  ] and duplicate across lanes
-        ld1r                {v16.8H}, [x10], #2             // read 1x16-bit coeff Y at filter[j+1] and duplicate across lanes
-        smlal               v3.4S, v5.4H, v7.4H             // val0 += {A,B,C,D} * X
-        smlal2              v4.4S, v5.8H, v7.8H             // val1 += {E,F,G,H} * X
-        smlal               v3.4S, v6.4H, v16.4H            // val0 += {I,J,K,L} * Y
-        smlal2              v4.4S, v6.8H, v16.8H            // val1 += {M,N,O,P} * Y
-        subs                w8, w8, #2                      // tmpfilterSize -= 2
-        b.gt                3b                              // loop until filterSize consumed
+        ld1             {v0.8b}, [x5]                   // load 8x8-bit dither
+        cbz             w6, 1f                          // check if offsetting present
+        ext             v0.8b, v0.8b, v0.8b, #3         // honor offsetting which can be 0 or 3 only
+1:      uxtl            v0.8h, v0.8b                    // extend dither to 16-bit
+        ushll           v1.4s, v0.4h, #12               // extend dither to 32-bit with left shift by 12 (part 1)
+        ushll2          v2.4s, v0.8h, #12               // extend dither to 32-bit with left shift by 12 (part 2)
+        mov             x7, #0                          // i = 0
+2:      mov             v3.16b, v1.16b                  // initialize accumulator part 1 with dithering value
+        mov             v4.16b, v2.16b                  // initialize accumulator part 2 with dithering value
+        mov             w8, w1                          // tmpfilterSize = filterSize
+        mov             x9, x2                          // srcp    = src
+        mov             x10, x0                         // filterp = filter
+3:      ldp             x11, x12, [x9], #16             // get 2 pointers: src[j] and src[j+1]
+        add             x11, x11, x7, lsl #1            // &src[j  ][i]
+        add             x12, x12, x7, lsl #1            // &src[j+1][i]
+        ld1             {v5.8h}, [x11]                  // read 8x16-bit @ src[j  ][i + {0..7}]: A,B,C,D,E,F,G,H
+        ld1             {v6.8h}, [x12]                  // read 8x16-bit @ src[j+1][i + {0..7}]: I,J,K,L,M,N,O,P
+        ld1r            {v7.8h}, [x10], #2              // read 1x16-bit coeff X at filter[j  ] and duplicate across lanes
+        ld1r            {v16.8h}, [x10], #2             // read 1x16-bit coeff Y at filter[j+1] and duplicate across lanes
+        smlal           v3.4s, v5.4h, v7.4h             // val0 += {A,B,C,D} * X
+        smlal2          v4.4s, v5.8h, v7.8h             // val1 += {E,F,G,H} * X
+        smlal           v3.4s, v6.4h, v16.4h            // val0 += {I,J,K,L} * Y
+        smlal2          v4.4s, v6.8h, v16.8h            // val1 += {M,N,O,P} * Y
+        subs            w8, w8, #2                      // tmpfilterSize -= 2
+        b.gt            3b                              // loop until filterSize consumed
 
-        sqshrun             v3.4h, v3.4s, #16               // clip16(val0>>16)
-        sqshrun2            v3.8h, v4.4s, #16               // clip16(val1>>16)
-        uqshrn              v3.8b, v3.8h, #3                // clip8(val>>19)
-        st1                 {v3.8b}, [x3], #8               // write to destination
-        subs                w4, w4, #8                      // dstW -= 8
-        add                 x7, x7, #8                      // i += 8
-        b.gt                2b                              // loop until width consumed
+        sqshrun         v3.4h, v3.4s, #16               // clip16(val0>>16)
+        sqshrun2        v3.8h, v4.4s, #16               // clip16(val1>>16)
+        uqshrn          v3.8b, v3.8h, #3                // clip8(val>>19)
+        st1             {v3.8b}, [x3], #8               // write to destination
+        subs            w4, w4, #8                      // dstW -= 8
+        add             x7, x7, #8                      // i += 8
+        b.gt            2b                              // loop until width consumed
         ret
 endfunc
diff -Nru ffmpeg-5.1.8/libswscale/aarch64/yuv2rgb_neon.S ffmpeg-5.1.9/libswscale/aarch64/yuv2rgb_neon.S
--- ffmpeg-5.1.8/libswscale/aarch64/yuv2rgb_neon.S	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libswscale/aarch64/yuv2rgb_neon.S	2026-05-05 14:22:01.000000000 +0000
@@ -23,187 +23,187 @@
 
 .macro load_yoff_ycoeff yoff ycoeff
 #if defined(__APPLE__)
-    ldp                 w9, w10, [sp, #\yoff]
+        ldp             w9, w10, [sp, #\yoff]
 #else
-    ldr                 w9,  [sp, #\yoff]
-    ldr                 w10, [sp, #\ycoeff]
+        ldr             w9,  [sp, #\yoff]
+        ldr             w10, [sp, #\ycoeff]
 #endif
 .endm
 
 .macro load_args_nv12
-    ldr                 x8,  [sp]                                       // table
-    load_yoff_ycoeff    8, 16                                           // y_offset, y_coeff
-    ld1                 {v1.1D}, [x8]
-    dup                 v0.8H, w10
-    dup                 v3.8H, w9
-    sub                 w3, w3, w0, lsl #2                              // w3 = linesize  - width * 4 (padding)
-    sub                 w5, w5, w0                                      // w5 = linesizeY - width     (paddingY)
-    sub                 w7, w7, w0                                      // w7 = linesizeC - width     (paddingC)
-    neg                 w11, w0
+        ldr             x8,  [sp]                                       // table
+        load_yoff_ycoeff 8, 16                                           // y_offset, y_coeff
+        ld1             {v1.1d}, [x8]
+        dup             v0.8h, w10
+        dup             v3.8h, w9
+        sub             w3, w3, w0, lsl #2                              // w3 = linesize  - width * 4 (padding)
+        sub             w5, w5, w0                                      // w5 = linesizeY - width     (paddingY)
+        sub             w7, w7, w0                                      // w7 = linesizeC - width     (paddingC)
+        neg             w11, w0
 .endm
 
 .macro load_args_nv21
-    load_args_nv12
+        load_args_nv12
 .endm
 
 .macro load_args_yuv420p
-    ldr                 x13, [sp]                                       // srcV
-    ldr                 w14, [sp, #8]                                   // linesizeV
-    ldr                 x8,  [sp, #16]                                  // table
-    load_yoff_ycoeff    24, 32                                          // y_offset, y_coeff
-    ld1                 {v1.1D}, [x8]
-    dup                 v0.8H, w10
-    dup                 v3.8H, w9
-    sub                 w3, w3, w0, lsl #2                              // w3 = linesize  - width * 4 (padding)
-    sub                 w5, w5, w0                                      // w5 = linesizeY - width     (paddingY)
-    sub                 w7,  w7,  w0, lsr #1                            // w7  = linesizeU - width / 2 (paddingU)
-    sub                 w14, w14, w0, lsr #1                            // w14 = linesizeV - width / 2 (paddingV)
-    lsr                 w11, w0, #1
-    neg                 w11, w11
+        ldr             x13, [sp]                                       // srcV
+        ldr             w14, [sp, #8]                                   // linesizeV
+        ldr             x8,  [sp, #16]                                  // table
+        load_yoff_ycoeff 24, 32                                          // y_offset, y_coeff
+        ld1             {v1.1d}, [x8]
+        dup             v0.8h, w10
+        dup             v3.8h, w9
+        sub             w3, w3, w0, lsl #2                              // w3 = linesize  - width * 4 (padding)
+        sub             w5, w5, w0                                      // w5 = linesizeY - width     (paddingY)
+        sub             w7,  w7,  w0, lsr #1                            // w7  = linesizeU - width / 2 (paddingU)
+        sub             w14, w14, w0, lsr #1                            // w14 = linesizeV - width / 2 (paddingV)
+        lsr             w11, w0, #1
+        neg             w11, w11
 .endm
 
 .macro load_args_yuv422p
-    ldr                 x13, [sp]                                       // srcV
-    ldr                 w14, [sp, #8]                                   // linesizeV
-    ldr                 x8,  [sp, #16]                                  // table
-    load_yoff_ycoeff    24, 32                                          // y_offset, y_coeff
-    ld1                 {v1.1D}, [x8]
-    dup                 v0.8H, w10
-    dup                 v3.8H, w9
-    sub                 w3, w3, w0, lsl #2                              // w3 = linesize  - width * 4 (padding)
-    sub                 w5, w5, w0                                      // w5 = linesizeY - width     (paddingY)
-    sub                 w7,  w7,  w0, lsr #1                            // w7  = linesizeU - width / 2 (paddingU)
-    sub                 w14, w14, w0, lsr #1                            // w14 = linesizeV - width / 2 (paddingV)
+        ldr             x13, [sp]                                       // srcV
+        ldr             w14, [sp, #8]                                   // linesizeV
+        ldr             x8,  [sp, #16]                                  // table
+        load_yoff_ycoeff 24, 32                                          // y_offset, y_coeff
+        ld1             {v1.1d}, [x8]
+        dup             v0.8h, w10
+        dup             v3.8h, w9
+        sub             w3, w3, w0, lsl #2                              // w3 = linesize  - width * 4 (padding)
+        sub             w5, w5, w0                                      // w5 = linesizeY - width     (paddingY)
+        sub             w7,  w7,  w0, lsr #1                            // w7  = linesizeU - width / 2 (paddingU)
+        sub             w14, w14, w0, lsr #1                            // w14 = linesizeV - width / 2 (paddingV)
 .endm
 
 .macro load_chroma_nv12
-    ld2                 {v16.8B, v17.8B}, [x6], #16
-    ushll               v18.8H, v16.8B, #3
-    ushll               v19.8H, v17.8B, #3
+        ld2             {v16.8b, v17.8b}, [x6], #16
+        ushll           v18.8h, v16.8b, #3
+        ushll           v19.8h, v17.8b, #3
 .endm
 
 .macro load_chroma_nv21
-    ld2                 {v16.8B, v17.8B}, [x6], #16
-    ushll               v19.8H, v16.8B, #3
-    ushll               v18.8H, v17.8B, #3
+        ld2             {v16.8b, v17.8b}, [x6], #16
+        ushll           v19.8h, v16.8b, #3
+        ushll           v18.8h, v17.8b, #3
 .endm
 
 .macro load_chroma_yuv420p
-    ld1                 {v16.8B}, [ x6], #8
-    ld1                 {v17.8B}, [x13], #8
-    ushll               v18.8H, v16.8B, #3
-    ushll               v19.8H, v17.8B, #3
+        ld1             {v16.8b}, [ x6], #8
+        ld1             {v17.8b}, [x13], #8
+        ushll           v18.8h, v16.8b, #3
+        ushll           v19.8h, v17.8b, #3
 .endm
 
 .macro load_chroma_yuv422p
-    load_chroma_yuv420p
+        load_chroma_yuv420p
 .endm
 
 .macro increment_nv12
-    ands                w15, w1, #1
-    csel                w16, w7, w11, ne                                // incC = (h & 1) ? paddincC : -width
-    add                 x6,  x6, w16, SXTW                              // srcC += incC
+        ands            w15, w1, #1
+        csel            w16, w7, w11, ne                                // incC = (h & 1) ? paddincC : -width
+        add             x6,  x6, w16, sxtw                              // srcC += incC
 .endm
 
 .macro increment_nv21
-    increment_nv12
+        increment_nv12
 .endm
 
 .macro increment_yuv420p
-    ands                w15, w1, #1
-    csel                w16,  w7, w11, ne                               // incU = (h & 1) ? paddincU : -width/2
-    csel                w17, w14, w11, ne                               // incV = (h & 1) ? paddincV : -width/2
-    add                 x6,  x6,  w16, SXTW                             // srcU += incU
-    add                 x13, x13, w17, SXTW                             // srcV += incV
+        ands            w15, w1, #1
+        csel            w16,  w7, w11, ne                               // incU = (h & 1) ? paddincU : -width/2
+        csel            w17, w14, w11, ne                               // incV = (h & 1) ? paddincV : -width/2
+        add             x6,  x6,  w16, sxtw                             // srcU += incU
+        add             x13, x13, w17, sxtw                             // srcV += incV
 .endm
 
 .macro increment_yuv422p
-    add                 x6,  x6,  w7, SXTW                              // srcU += incU
-    add                 x13, x13, w14, SXTW                             // srcV += incV
+        add             x6,  x6,  w7, sxtw                              // srcU += incU
+        add             x13, x13, w14, sxtw                             // srcV += incV
 .endm
 
 .macro compute_rgba r1 g1 b1 a1 r2 g2 b2 a2
-    add                 v20.8H, v26.8H, v20.8H                          // Y1 + R1
-    add                 v21.8H, v27.8H, v21.8H                          // Y2 + R2
-    add                 v22.8H, v26.8H, v22.8H                          // Y1 + G1
-    add                 v23.8H, v27.8H, v23.8H                          // Y2 + G2
-    add                 v24.8H, v26.8H, v24.8H                          // Y1 + B1
-    add                 v25.8H, v27.8H, v25.8H                          // Y2 + B2
-    sqrshrun            \r1, v20.8H, #1                                 // clip_u8((Y1 + R1) >> 1)
-    sqrshrun            \r2, v21.8H, #1                                 // clip_u8((Y2 + R1) >> 1)
-    sqrshrun            \g1, v22.8H, #1                                 // clip_u8((Y1 + G1) >> 1)
-    sqrshrun            \g2, v23.8H, #1                                 // clip_u8((Y2 + G1) >> 1)
-    sqrshrun            \b1, v24.8H, #1                                 // clip_u8((Y1 + B1) >> 1)
-    sqrshrun            \b2, v25.8H, #1                                 // clip_u8((Y2 + B1) >> 1)
-    movi                \a1, #255
-    movi                \a2, #255
+        add             v20.8h, v26.8h, v20.8h                          // Y1 + R1
+        add             v21.8h, v27.8h, v21.8h                          // Y2 + R2
+        add             v22.8h, v26.8h, v22.8h                          // Y1 + G1
+        add             v23.8h, v27.8h, v23.8h                          // Y2 + G2
+        add             v24.8h, v26.8h, v24.8h                          // Y1 + B1
+        add             v25.8h, v27.8h, v25.8h                          // Y2 + B2
+        sqrshrun        \r1, v20.8h, #1                                 // clip_u8((Y1 + R1) >> 1)
+        sqrshrun        \r2, v21.8h, #1                                 // clip_u8((Y2 + R1) >> 1)
+        sqrshrun        \g1, v22.8h, #1                                 // clip_u8((Y1 + G1) >> 1)
+        sqrshrun        \g2, v23.8h, #1                                 // clip_u8((Y2 + G1) >> 1)
+        sqrshrun        \b1, v24.8h, #1                                 // clip_u8((Y1 + B1) >> 1)
+        sqrshrun        \b2, v25.8h, #1                                 // clip_u8((Y2 + B1) >> 1)
+        movi            \a1, #255
+        movi            \a2, #255
 .endm
 
 .macro declare_func ifmt ofmt
 function ff_\ifmt\()_to_\ofmt\()_neon, export=1
-    load_args_\ifmt
-    mov                 w9, w1
+        load_args_\ifmt
+        mov             w9, w1
 1:
-    mov                 w8, w0                                          // w8 = width
+        mov             w8, w0                                          // w8 = width
 2:
-    movi                v5.8H, #4, lsl #8                               // 128 * (1<<3)
-    load_chroma_\ifmt
-    sub                 v18.8H, v18.8H, v5.8H                           // U*(1<<3) - 128*(1<<3)
-    sub                 v19.8H, v19.8H, v5.8H                           // V*(1<<3) - 128*(1<<3)
-    sqdmulh             v20.8H, v19.8H, v1.H[0]                         // V * v2r            (R)
-    sqdmulh             v22.8H, v18.8H, v1.H[1]                         // U * u2g
-    sqdmulh             v19.8H, v19.8H, v1.H[2]                         //           V * v2g
-    add                 v22.8H, v22.8H, v19.8H                          // U * u2g + V * v2g  (G)
-    sqdmulh             v24.8H, v18.8H, v1.H[3]                         // U * u2b            (B)
-    zip2                v21.8H, v20.8H, v20.8H                          // R2
-    zip1                v20.8H, v20.8H, v20.8H                          // R1
-    zip2                v23.8H, v22.8H, v22.8H                          // G2
-    zip1                v22.8H, v22.8H, v22.8H                          // G1
-    zip2                v25.8H, v24.8H, v24.8H                          // B2
-    zip1                v24.8H, v24.8H, v24.8H                          // B1
-    ld1                 {v2.16B}, [x4], #16                             // load luma
-    ushll               v26.8H, v2.8B,  #3                              // Y1*(1<<3)
-    ushll2              v27.8H, v2.16B, #3                              // Y2*(1<<3)
-    sub                 v26.8H, v26.8H, v3.8H                           // Y1*(1<<3) - y_offset
-    sub                 v27.8H, v27.8H, v3.8H                           // Y2*(1<<3) - y_offset
-    sqdmulh             v26.8H, v26.8H, v0.8H                           // ((Y1*(1<<3) - y_offset) * y_coeff) >> 15
-    sqdmulh             v27.8H, v27.8H, v0.8H                           // ((Y2*(1<<3) - y_offset) * y_coeff) >> 15
+        movi            v5.8h, #4, lsl #8                               // 128 * (1<<3)
+        load_chroma_\ifmt
+        sub             v18.8h, v18.8h, v5.8h                           // U*(1<<3) - 128*(1<<3)
+        sub             v19.8h, v19.8h, v5.8h                           // V*(1<<3) - 128*(1<<3)
+        sqdmulh         v20.8h, v19.8h, v1.h[0]                         // V * v2r            (R)
+        sqdmulh         v22.8h, v18.8h, v1.h[1]                         // U * u2g
+        sqdmulh         v19.8h, v19.8h, v1.h[2]                         //           V * v2g
+        add             v22.8h, v22.8h, v19.8h                          // U * u2g + V * v2g  (G)
+        sqdmulh         v24.8h, v18.8h, v1.h[3]                         // U * u2b            (B)
+        zip2            v21.8h, v20.8h, v20.8h                          // R2
+        zip1            v20.8h, v20.8h, v20.8h                          // R1
+        zip2            v23.8h, v22.8h, v22.8h                          // G2
+        zip1            v22.8h, v22.8h, v22.8h                          // G1
+        zip2            v25.8h, v24.8h, v24.8h                          // B2
+        zip1            v24.8h, v24.8h, v24.8h                          // B1
+        ld1             {v2.16b}, [x4], #16                             // load luma
+        ushll           v26.8h, v2.8b,  #3                              // Y1*(1<<3)
+        ushll2          v27.8h, v2.16b, #3                              // Y2*(1<<3)
+        sub             v26.8h, v26.8h, v3.8h                           // Y1*(1<<3) - y_offset
+        sub             v27.8h, v27.8h, v3.8h                           // Y2*(1<<3) - y_offset
+        sqdmulh         v26.8h, v26.8h, v0.8h                           // ((Y1*(1<<3) - y_offset) * y_coeff) >> 15
+        sqdmulh         v27.8h, v27.8h, v0.8h                           // ((Y2*(1<<3) - y_offset) * y_coeff) >> 15
 
 .ifc \ofmt,argb // 1 2 3 0
-    compute_rgba        v5.8B,v6.8B,v7.8B,v4.8B, v17.8B,v18.8B,v19.8B,v16.8B
+        compute_rgba    v5.8b,v6.8b,v7.8b,v4.8b, v17.8b,v18.8b,v19.8b,v16.8b
 .endif
 
 .ifc \ofmt,rgba // 0 1 2 3
-    compute_rgba        v4.8B,v5.8B,v6.8B,v7.8B, v16.8B,v17.8B,v18.8B,v19.8B
+        compute_rgba    v4.8b,v5.8b,v6.8b,v7.8b, v16.8b,v17.8b,v18.8b,v19.8b
 .endif
 
 .ifc \ofmt,abgr // 3 2 1 0
-    compute_rgba        v7.8B,v6.8B,v5.8B,v4.8B, v19.8B,v18.8B,v17.8B,v16.8B
+        compute_rgba    v7.8b,v6.8b,v5.8b,v4.8b, v19.8b,v18.8b,v17.8b,v16.8b
 .endif
 
 .ifc \ofmt,bgra // 2 1 0 3
-    compute_rgba        v6.8B,v5.8B,v4.8B,v7.8B, v18.8B,v17.8B,v16.8B,v19.8B
+        compute_rgba    v6.8b,v5.8b,v4.8b,v7.8b, v18.8b,v17.8b,v16.8b,v19.8b
 .endif
 
-    st4                 { v4.8B, v5.8B, v6.8B, v7.8B}, [x2], #32
-    st4                 {v16.8B,v17.8B,v18.8B,v19.8B}, [x2], #32
-    subs                w8, w8, #16                                     // width -= 16
-    b.gt                2b
-    add                 x2, x2, w3, SXTW                                // dst  += padding
-    add                 x4, x4, w5, SXTW                                // srcY += paddingY
-    increment_\ifmt
-    subs                w1, w1, #1                                      // height -= 1
-    b.gt                1b
-    mov                 w0, w9
-    ret
+        st4             { v4.8b, v5.8b, v6.8b, v7.8b}, [x2], #32
+        st4             {v16.8b,v17.8b,v18.8b,v19.8b}, [x2], #32
+        subs            w8, w8, #16                                     // width -= 16
+        b.gt            2b
+        add             x2, x2, w3, sxtw                                // dst  += padding
+        add             x4, x4, w5, sxtw                                // srcY += paddingY
+        increment_\ifmt
+        subs            w1, w1, #1                                      // height -= 1
+        b.gt            1b
+        mov             w0, w9
+        ret
 endfunc
 .endm
 
 .macro declare_rgb_funcs ifmt
-    declare_func \ifmt, argb
-    declare_func \ifmt, rgba
-    declare_func \ifmt, abgr
-    declare_func \ifmt, bgra
+        declare_func    \ifmt, argb
+        declare_func    \ifmt, rgba
+        declare_func    \ifmt, abgr
+        declare_func    \ifmt, bgra
 .endm
 
 declare_rgb_funcs nv12
diff -Nru ffmpeg-5.1.8/libswscale/gamma.c ffmpeg-5.1.9/libswscale/gamma.c
--- ffmpeg-5.1.8/libswscale/gamma.c	2025-11-23 02:57:58.000000000 +0000
+++ ffmpeg-5.1.9/libswscale/gamma.c	2026-05-05 14:22:01.000000000 +0000
@@ -69,4 +69,3 @@
 
     return 0;
 }
-
diff -Nru ffmpeg-5.1.8/libswscale/output.c ffmpeg-5.1.9/libswscale/output.c
--- ffmpeg-5.1.8/libswscale/output.c	2025-11-26 02:41:35.000000000 +0000
+++ ffmpeg-5.1.9/libswscale/output.c	2026-05-05 15:50:55.000000000 +0000
@@ -405,8 +405,10 @@
     for (i=0; i<dstW; i++) {
         int val = dither[(i + offset) & 7] << 12;
         int j;
-        for (j=0; j<filterSize; j++)
-            val += src[j][i] * filter[j];
+        for (j=0; j<filterSize; j++) {
+            val += (unsigned)(src[j][i] * filter[j]);
+
+        }
 
         dest[i]= av_clip_uint8(val>>19);
     }
@@ -1037,8 +1039,8 @@
         int j;
         unsigned Y1 = -0x40000000;
         unsigned Y2 = -0x40000000;
-        int U  = -(128 << 23); // 19
-        int V  = -(128 << 23);
+        unsigned U  = -(128 << 23); // 19
+        unsigned V  = -(128 << 23);
         int R, G, B;
 
         for (j = 0; j < lumFilterSize; j++) {
@@ -1068,8 +1070,8 @@
         Y1 += 0x10000;
         Y2 = (int)Y2 >> 14;
         Y2 += 0x10000;
-        U  >>= 14;
-        V  >>= 14;
+        U  = (int)U >> 14;
+        V  = (int)V >> 14;
 
         // 8 bits: 27 -> 17 bits, 16 bits: 31 - 14 = 17 bits
         Y1 -= c->yuv2rgb_y_offset;
@@ -1177,7 +1179,7 @@
 {
     const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
     int i;
-    int A1 = 0xffff<<14, A2= 0xffff<<14;
+    SUINT A1 = 0xffff<<14, A2= 0xffff<<14;
 
     if (uvalpha < 2048) {
         for (i = 0; i < ((dstW + 1) >> 1); i++) {
@@ -1195,8 +1197,8 @@
             Y2 += (1 << 13) - (1 << 29);
 
             if (hasAlpha) {
-                A1 = abuf0[i * 2    ] * (1 << 11);
-                A2 = abuf0[i * 2 + 1] * (1 << 11);
+                A1 = abuf0[i * 2    ] * (SUINT)(1 << 11);
+                A2 = abuf0[i * 2 + 1] * (SUINT)(1 << 11);
 
                 A1 += 1 << 13;
                 A2 += 1 << 13;
diff -Nru ffmpeg-5.1.8/libswscale/rgb2rgb_template.c ffmpeg-5.1.9/libswscale/rgb2rgb_template.c
--- ffmpeg-5.1.8/libswscale/rgb2rgb_template.c	2025-11-23 02:57:58.000000000 +0000
+++ ffmpeg-5.1.9/libswscale/rgb2rgb_template.c	2026-05-05 15:50:55.000000000 +0000
@@ -466,11 +466,11 @@
 
         for (i = 0; i < chromWidth; i++) {
 #if HAVE_BIGENDIAN
-            *idst++ = (uc[0] << 24) + (yc[0] << 16) +
+            *idst++ = ((unsigned)uc[0] << 24) + (yc[0] << 16) +
                       (vc[0] <<  8) + (yc[1] <<  0);
 #else
             *idst++ = uc[0] + (yc[0] << 8) +
-                      (vc[0] << 16) + (yc[1] << 24);
+                      (vc[0] << 16) + ((unsigned)yc[1] << 24);
 #endif
             yc += 2;
             uc++;
diff -Nru ffmpeg-5.1.8/libswscale/swscale_unscaled.c ffmpeg-5.1.9/libswscale/swscale_unscaled.c
--- ffmpeg-5.1.8/libswscale/swscale_unscaled.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libswscale/swscale_unscaled.c	2026-05-05 15:50:55.000000000 +0000
@@ -126,9 +126,13 @@
                       int srcSliceY, int srcSliceH, int width,
                       uint8_t *dst, int dstStride)
 {
+    if (!srcSliceH)
+        return;
+    av_assert0(srcSliceH > 0);
+
     dst += dstStride * srcSliceY;
     if (dstStride == srcStride && srcStride > 0) {
-        memcpy(dst, src, srcSliceH * dstStride);
+        memcpy(dst, src, (srcSliceH - 1) * dstStride + width);
     } else {
         int i;
         for (i = 0; i < srcSliceH; i++) {
diff -Nru ffmpeg-5.1.8/libswscale/utils.c ffmpeg-5.1.9/libswscale/utils.c
--- ffmpeg-5.1.8/libswscale/utils.c	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/libswscale/utils.c	2026-05-05 15:50:55.000000000 +0000
@@ -273,7 +273,8 @@
         if ((c->srcBpc == 8) && (c->dstBpc <= 14)) {
            int16_t *filterCopy = NULL;
            if (filterSize > 4) {
-               if (!FF_ALLOC_TYPED_ARRAY(filterCopy, dstW * filterSize))
+               filterCopy = av_malloc_array(dstW, filterSize * sizeof(*filterCopy));
+               if (!filterCopy)
                    return AVERROR(ENOMEM);
                memcpy(filterCopy, filter, dstW * filterSize * sizeof(int16_t));
            }
@@ -448,6 +449,11 @@
             sizeFactor = param[0] != SWS_PARAM_DEFAULT ? ceil(2 * param[0]) : 6;
         av_assert0(sizeFactor > 0);
 
+        if (sizeFactor > 50) {
+            ret = AVERROR(EINVAL);
+            goto fail;
+        }
+
         if (xInc <= 1 << 16)
             filterSize = 1 + sizeFactor;    // upscale
         else
@@ -456,7 +462,8 @@
         filterSize = FFMIN(filterSize, srcW - 2);
         filterSize = FFMAX(filterSize, 1);
 
-        if (!FF_ALLOC_TYPED_ARRAY(filter, dstW * filterSize))
+        filter = av_malloc_array(dstW, filterSize * sizeof(*filter));
+        if (!filter)
             goto nomem;
         xDstInSrc = ((dstPos*(int64_t)xInc)>>7) - ((srcPos*0x10000LL)>>7);
         for (i = 0; i < dstW; i++) {
@@ -555,7 +562,8 @@
     if (dstFilter)
         filter2Size += dstFilter->length - 1;
     av_assert0(filter2Size > 0);
-    if (!FF_ALLOCZ_TYPED_ARRAY(filter2, dstW * filter2Size))
+    filter2 = av_calloc(dstW, filter2Size * sizeof(*filter2));
+    if (!filter2)
         goto nomem;
     for (i = 0; i < dstW; i++) {
         int j, k;
@@ -714,7 +722,8 @@
 
     // Note the +1 is for the MMX scaler which reads over the end
     /* align at 16 for AltiVec (needed by hScale_altivec_real) */
-    if (!FF_ALLOCZ_TYPED_ARRAY(*outFilter, *outFilterSize * (dstW + 3)))
+    *outFilter = av_calloc(dstW + 3, *outFilterSize * sizeof(**outFilter));
+    if (!*outFilter)
         goto nomem;
 
     /* normalize & store in outFilter */
@@ -1408,8 +1417,8 @@
     if (!srcFilter)
         srcFilter = &dummyFilter;
 
-    c->lumXInc      = (((int64_t)srcW << 16) + (dstW >> 1)) / dstW;
-    c->lumYInc      = (((int64_t)srcH << 16) + (dstH >> 1)) / dstH;
+    int64_t lumXInc      = (((int64_t)srcW << 16) + (dstW >> 1)) / dstW;
+    int64_t lumYInc      = (((int64_t)srcH << 16) + (dstH >> 1)) / dstH;
     c->dstFormatBpp = av_get_bits_per_pixel(desc_dst);
     c->srcFormatBpp = av_get_bits_per_pixel(desc_src);
     c->vRounder     = 4 * 0x0001000100010001ULL;
@@ -1584,8 +1593,8 @@
     } else
         c->canMMXEXTBeUsed = 0;
 
-    c->chrXInc = (((int64_t)c->chrSrcW << 16) + (c->chrDstW >> 1)) / c->chrDstW;
-    c->chrYInc = (((int64_t)c->chrSrcH << 16) + (c->chrDstH >> 1)) / c->chrDstH;
+    int64_t chrXInc = (((int64_t)c->chrSrcW << 16) + (c->chrDstW >> 1)) / c->chrDstW;
+    int64_t chrYInc = (((int64_t)c->chrSrcH << 16) + (c->chrDstH >> 1)) / c->chrDstH;
 
     /* Match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src
      * to pixel n-2 of dst, but only for the FAST_BILINEAR mode otherwise do
@@ -1596,15 +1605,26 @@
      * some special code for the first and last pixel */
     if (flags & SWS_FAST_BILINEAR) {
         if (c->canMMXEXTBeUsed) {
-            c->lumXInc += 20;
-            c->chrXInc += 20;
+            lumXInc += 20;
+            chrXInc += 20;
         }
         // we don't use the x86 asm scaler if MMX is available
         else if (INLINE_MMX(cpu_flags) && c->dstBpc <= 14) {
-            c->lumXInc = ((int64_t)(srcW       - 2) << 16) / (dstW       - 2) - 20;
-            c->chrXInc = ((int64_t)(c->chrSrcW - 2) << 16) / (c->chrDstW - 2) - 20;
+            lumXInc = ((int64_t)(srcW       - 2) << 16) / (dstW       - 2) - 20;
+            chrXInc = ((int64_t)(c->chrSrcW - 2) << 16) / (c->chrDstW - 2) - 20;
         }
     }
+    if (chrXInc < 10 || chrXInc > INT_MAX ||
+        chrYInc < 10 || chrYInc > INT_MAX ||
+        lumXInc < 10 || lumXInc > INT_MAX ||
+        lumYInc < 10 || lumYInc > INT_MAX)
+        return AVERROR_PATCHWELCOME;
+
+    c->lumXInc = lumXInc;
+    c->lumYInc = lumYInc;
+    c->chrXInc = chrXInc;
+    c->chrYInc = chrYInc;
+
 
     // hardcoded for now
     c->gamma_value = 2.2;
@@ -1854,13 +1874,15 @@
                                 PPC_ALTIVEC(cpu_flags) ? 8 :
                                 have_neon(cpu_flags)   ? 2 : 1;
 
-        if ((ret = initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize,
+        ret = initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize,
                        c->lumYInc, srcH, dstH, filterAlign, (1 << 12),
                        (flags & SWS_BICUBLIN) ? (flags | SWS_BICUBIC) : flags,
                        cpu_flags, srcFilter->lumV, dstFilter->lumV,
                        c->param,
                        get_local_pos(c, 0, 0, 1),
-                       get_local_pos(c, 0, 0, 1))) < 0)
+                       get_local_pos(c, 0, 0, 1));
+        int usecascade = (ret == RETCODE_USE_CASCADE);
+        if (ret < 0 && !usecascade)
             goto fail;
         if ((ret = initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize,
                        c->chrYInc, c->chrSrcH, c->chrDstH,
@@ -1872,10 +1894,15 @@
                        get_local_pos(c, c->chrDstVSubSample, c->dst_v_chr_pos, 1))) < 0)
 
             goto fail;
+        if (usecascade) {
+            ret = RETCODE_USE_CASCADE;
+            goto fail;
+        }
 
 #if HAVE_ALTIVEC
-        if (!FF_ALLOC_TYPED_ARRAY(c->vYCoeffsBank, c->vLumFilterSize * c->dstH) ||
-            !FF_ALLOC_TYPED_ARRAY(c->vCCoeffsBank, c->vChrFilterSize * c->chrDstH))
+        c->vYCoeffsBank = av_malloc_array(c->dstH, c->vLumFilterSize * sizeof(*c->vYCoeffsBank));
+        c->vCCoeffsBank = av_malloc_array(c->chrDstH, c->vChrFilterSize * sizeof(*c->vCCoeffsBank));
+        if (c->vYCoeffsBank == NULL || c->vCCoeffsBank == NULL)
             goto nomem;
 
         for (i = 0; i < c->vLumFilterSize * c->dstH; i++) {
diff -Nru ffmpeg-5.1.8/libswscale/vscale.c ffmpeg-5.1.9/libswscale/vscale.c
--- ffmpeg-5.1.8/libswscale/vscale.c	2025-11-23 02:57:58.000000000 +0000
+++ ffmpeg-5.1.9/libswscale/vscale.c	2026-05-05 14:22:01.000000000 +0000
@@ -318,5 +318,3 @@
             lumCtx->pfn.yuv2anyX = yuv2anyX;
     }
 }
-
-
diff -Nru ffmpeg-5.1.8/libswscale/x86/yuv2rgb_template.c ffmpeg-5.1.9/libswscale/x86/yuv2rgb_template.c
--- ffmpeg-5.1.8/libswscale/x86/yuv2rgb_template.c	2025-11-23 02:57:58.000000000 +0000
+++ ffmpeg-5.1.9/libswscale/x86/yuv2rgb_template.c	2026-05-05 15:50:52.000000000 +0000
@@ -26,6 +26,23 @@
 #include "libavutil/x86/asm.h"
 #include "libswscale/swscale_internal.h"
 
+#undef EMMS_IF_MMX
+
+#if defined(COMPILE_TEMPLATE_MMX) || defined(COMPILE_TEMPLATE_MMXEXT)
+// Don't use emms_c() directly as it may entail an av_get_cpu_flags() call.
+#if HAVE_MMX_INLINE
+#   define EMMS_IF_MMX __asm__ volatile ("emms" ::: "memory");
+#elif HAVE_MM_EMPTY
+#   include <mmintrin.h>
+#   define EMMS_IF_MMX _mm_empty();
+#else
+#   include "libavutil/x86/emms.h"
+#   define EMMS_IF_MMX emms_c();
+#endif
+#else
+#define EMMS_IF_MMX
+#endif
+
 #define YUV2RGB_LOOP(depth)                                          \
     h_size = (c->dstW + 7) & ~7;                                     \
     if (h_size * depth > FFABS(dstStride[0]))                        \
@@ -84,6 +101,7 @@
 
         RENAME(ff_yuv_420_rgb15)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
     }
+    EMMS_IF_MMX
     return srcSliceH;
 }
 
@@ -104,6 +122,7 @@
 
         RENAME(ff_yuv_420_rgb16)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
     }
+    EMMS_IF_MMX
     return srcSliceH;
 }
 
@@ -118,6 +137,7 @@
 
         RENAME(ff_yuv_420_rgb32)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
     }
+    EMMS_IF_MMX
     return srcSliceH;
 }
 
@@ -132,6 +152,7 @@
 
         RENAME(ff_yuv_420_bgr32)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
     }
+    EMMS_IF_MMX
     return srcSliceH;
 }
 
@@ -146,6 +167,7 @@
         const uint8_t *pa = src[3] + y * srcStride[3];
         RENAME(ff_yuva_420_rgb32)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index, pa - 2 * index);
     }
+    EMMS_IF_MMX
     return srcSliceH;
 }
 
@@ -161,6 +183,7 @@
         const uint8_t *pa = src[3] + y * srcStride[3];
         RENAME(ff_yuva_420_bgr32)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index, pa - 2 * index);
     }
+    EMMS_IF_MMX
     return srcSliceH;
 }
 #endif
@@ -177,6 +200,7 @@
 
         RENAME(ff_yuv_420_rgb24)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
     }
+    EMMS_IF_MMX
     return srcSliceH;
 }
 
@@ -191,6 +215,7 @@
 
         RENAME(ff_yuv_420_bgr24)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
     }
+    EMMS_IF_MMX
     return srcSliceH;
 }
 #endif
diff -Nru ffmpeg-5.1.8/tests/extended.ffconcat ffmpeg-5.1.9/tests/extended.ffconcat
--- ffmpeg-5.1.8/tests/extended.ffconcat	2025-11-21 01:15:18.000000000 +0000
+++ ffmpeg-5.1.9/tests/extended.ffconcat	2026-05-05 14:22:01.000000000 +0000
@@ -111,4 +111,3 @@
 
 file      %SRCFILE%
 inpoint   00:00.40
-
diff -Nru ffmpeg-5.1.8/tests/fate/ffprobe.mak ffmpeg-5.1.9/tests/fate/ffprobe.mak
--- ffmpeg-5.1.8/tests/fate/ffprobe.mak	2025-11-23 02:57:58.000000000 +0000
+++ ffmpeg-5.1.9/tests/fate/ffprobe.mak	2026-05-05 14:22:01.000000000 +0000
@@ -38,4 +38,3 @@
 FATE_FFPROBE += $(FATE_FFPROBE-yes)
 
 fate-ffprobe: $(FATE_FFPROBE)
-
diff -Nru ffmpeg-5.1.8/tests/fate/lossless-audio.mak ffmpeg-5.1.9/tests/fate/lossless-audio.mak
--- ffmpeg-5.1.8/tests/fate/lossless-audio.mak	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/tests/fate/lossless-audio.mak	2026-05-05 15:50:52.000000000 +0000
@@ -30,4 +30,3 @@
 
 FATE_SAMPLES_FFMPEG += $(FATE_SAMPLES_LOSSLESS_AUDIO)
 fate-lossless-audio: $(FATE_SAMPLES_LOSSLESS_AUDIO)
-
diff -Nru ffmpeg-5.1.8/tests/ref/fate/zmbv-8bit ffmpeg-5.1.9/tests/ref/fate/zmbv-8bit
--- ffmpeg-5.1.8/tests/ref/fate/zmbv-8bit	2025-11-26 02:41:32.000000000 +0000
+++ ffmpeg-5.1.9/tests/ref/fate/zmbv-8bit	2026-05-05 15:50:55.000000000 +0000
@@ -278,4 +278,3 @@
 0,        272,        272,        1,   192000, 0xd08e49d1
 0,        273,        273,        1,   192000, 0xd08e49d1
 0,        274,        274,        1,   192000, 0xd08e49d1
-0,        275,        275,        1,   192000, 0x1f34135f
diff -Nru ffmpeg-5.1.8/tests/simple1.ffconcat ffmpeg-5.1.9/tests/simple1.ffconcat
--- ffmpeg-5.1.8/tests/simple1.ffconcat	2025-11-21 01:15:18.000000000 +0000
+++ ffmpeg-5.1.9/tests/simple1.ffconcat	2026-05-05 14:22:01.000000000 +0000
@@ -9,4 +9,3 @@
 inpoint   00:00.20
 outpoint  00:00.40
 file_packet_meta dummy 1
-
diff -Nru ffmpeg-5.1.8/tests/simple2.ffconcat ffmpeg-5.1.9/tests/simple2.ffconcat
--- ffmpeg-5.1.8/tests/simple2.ffconcat	2025-11-21 01:15:18.000000000 +0000
+++ ffmpeg-5.1.9/tests/simple2.ffconcat	2026-05-05 14:22:01.000000000 +0000
@@ -18,4 +18,3 @@
 file      %SRCFILE%
 inpoint   00:01.80
 outpoint  00:02.00
-
diff -Nru ffmpeg-5.1.8/tools/check_arm_indent.sh ffmpeg-5.1.9/tools/check_arm_indent.sh
--- ffmpeg-5.1.8/tools/check_arm_indent.sh	1970-01-01 00:00:00.000000000 +0000
+++ ffmpeg-5.1.9/tools/check_arm_indent.sh	2026-05-05 14:22:01.000000000 +0000
@@ -0,0 +1,58 @@
+#!/bin/sh
+#
+# Copyright (c) 2025 Martin Storsjo
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+cd $(dirname $0)/..
+
+if [ "$1" = "--apply" ]; then
+    apply=1
+fi
+
+ret=0
+
+for i in */aarch64/*.S */aarch64/*/*.S; do
+    if ! [ -f "$i" ]; then
+        continue
+    fi
+    case $i in
+        libavcodec/aarch64/h264idct_neon.S|libavcodec/aarch64/h26x/epel_neon.S|libavcodec/aarch64/h26x/qpel_neon.S|libavcodec/aarch64/vc1dsp_neon.S)
+        # Skip files with known (and tolerated) deviations from the tool.
+        continue
+    esac
+    ./tools/indent_arm_assembly.pl < "$i" > tmp.S || ret=$?
+    if ! git diff --quiet --no-index "$i" tmp.S; then
+        if [ -n "$apply" ]; then
+            mv tmp.S "$i"
+        else
+            git --no-pager diff --no-index "$i" tmp.S
+        fi
+        ret=1
+    fi
+done
+
+rm -f tmp.S
+
+exit $ret
diff -Nru ffmpeg-5.1.8/tools/indent_arm_assembly.pl ffmpeg-5.1.9/tools/indent_arm_assembly.pl
--- ffmpeg-5.1.8/tools/indent_arm_assembly.pl	1970-01-01 00:00:00.000000000 +0000
+++ ffmpeg-5.1.9/tools/indent_arm_assembly.pl	2026-05-05 14:22:01.000000000 +0000
@@ -0,0 +1,243 @@
+#!/usr/bin/env perl
+#
+# Copyright (c) 2025 Martin Storsjo
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+# A script for reformatting ARM/AArch64 assembly according to the following
+# style:
+# - Instructions start after 8 columns, operands start after 24 columns
+# - Vector register layouts and modifiers like "uxtw" are written in lowercase
+# - Optionally align operand columns vertically according to their
+#   maximum width (accommodating for e.g. x0 vs x10, or v0.8b vs v16.16b).
+#
+# The script can be executed as "indent_arm_assembly.pl file [outfile]".
+# If no outfile is specified, the given file is overwritten in place.
+#
+# Alternatively, the if no file parameters are given, the script reads input
+# code on stdin, and outputs the reformatted code on stdout.
+
+use strict;
+
+my $indent_operands = 0;
+my $instr_indent = 8;
+my $operand_indent = 24;
+my $match_indent = 0;
+my $file;
+my $outfile;
+
+while (@ARGV) {
+    my $opt = shift;
+
+    if ($opt eq "-operands") {
+        $indent_operands = 1;
+    } elsif ($opt eq "-indent") {
+        $instr_indent = shift;
+    } elsif ($opt eq "-operand-indent") {
+        $operand_indent = shift;
+    } elsif ($opt eq "-match-indent") {
+        $match_indent = 1;
+    } else {
+        if (!$file) {
+            $file = $opt;
+        } elsif (!$outfile) {
+            $outfile = $opt;
+        } else {
+            die "Unrecognized parameter $opt\n";
+        }
+    }
+}
+
+if ($operand_indent < $instr_indent) {
+    die "Can't indent operands to $operand_indent while indenting " .
+        "instructions to $instr_indent\n";
+}
+
+# Return a string consisting of n spaces
+sub spaces {
+    my $n = $_[0];
+    return " " x $n;
+}
+
+sub indentcolumns {
+    my $input = $_[0];
+    my $chars = $_[1];
+    my @operands = split(/,/, $input);
+    my $num = @operands;
+    my $ret = "";
+    for (my $i = 0; $i < $num; $i++) {
+        my $cur = $operands[$i];
+        # Trim out leading/trailing whitespace
+        $cur =~ s/^\s+|\s+$//g;
+        $ret .= $cur;
+        if ($i + 1 < $num) {
+            # If we have a following operand, add a comma and whitespace to
+            # align the next operand.
+            my $next = $operands[$i+1];
+            my $len = length($cur);
+            if ($len > $chars) {
+                # If this operand was too wide for the intended column width,
+                # don't try to realign the line at all, just return the input
+                # untouched.
+                return $input;
+            }
+            my $pad = $chars - $len;
+            if ($next =~ /[su]xt[bhw]|[la]s[lr]/) {
+                # If the next item isn't a regular operand, but a modifier,
+                # don't try to align that. E.g. "add x0,  x0,  w1, uxtw #1".
+                $pad = 0;
+            }
+            $ret .= "," . spaces(1 + $pad);
+        }
+    }
+    return $ret;
+}
+
+# Realign the operands part of an instruction line, making each operand
+# take up the maximum width for that kind of operand.
+sub columns {
+    my $rest = $_[0];
+    if ($rest !~ /,/) {
+        # No commas, no operands to split and align
+        return $rest;
+    }
+    if ($rest =~ /{|[^\w]\[/) {
+        # Check for instructions that use register ranges, like {v0.8b,v1.8b}
+        # or mem address operands, like "ldr x0, [sp]" - we skip trying to
+        # realign these.
+        return $rest;
+    }
+    if ($rest =~ /v[0-9]+\.[0-9]+[bhsd]/) {
+        # If we have references to aarch64 style vector registers, like
+        # v0.8b, then align all operands to the maximum width of such
+        # operands - v16.16b.
+        #
+        # TODO: Ideally, we'd handle mixed operand types individually.
+        return indentcolumns($rest, 7);
+    }
+    # Indent operands according to the maximum width of regular registers,
+    # like x10.
+    return indentcolumns($rest, 3);
+}
+
+my $in;
+my $out;
+my $tempfile;
+
+if ($file) {
+    open(INPUT, "$file") or die "Unable to open $file: $!";
+    $in = *INPUT;
+    if ($outfile) {
+        open(OUTPUT, ">$outfile") or die "Unable to open $outfile: $!";
+    } else {
+        $tempfile = "$file.tmp";
+        open(OUTPUT, ">$tempfile") or die "Unable to open $tempfile: $!";
+    }
+    $out = *OUTPUT;
+} else {
+    $in = *STDIN;
+    $out = *STDOUT;
+}
+
+while (<$in>) {
+    # Trim off trailing whitespace.
+    chomp;
+    if (/^([\.\w\d]+:)?(\s+)([\w\\][\w\\\.]*)(?:(\s+)(.*)|$)/) {
+        my $label = $1;
+        my $indent = $2;
+        my $instr = $3;
+        my $origspace = $4;
+        my $rest = $5;
+
+        my $orig_operand_indent = length($label) + length($indent) +
+                                  length($instr) + length($origspace);
+
+        if ($indent_operands) {
+            $rest = columns($rest);
+        }
+
+        my $size = $instr_indent;
+        if ($match_indent) {
+            # Try to check the current attempted indent size and normalize
+            # to it; match existing ident sizes of 4, 8, 10 and 12 columns.
+            my $cur_indent = length($label) + length($indent);
+            if ($cur_indent >= 3 && $cur_indent <= 5) {
+                $size = 4;
+            } elsif ($cur_indent >= 7 && $cur_indent <= 9) {
+                $size = 8;
+            } elsif ($cur_indent == 10 || $cur_indent == 12) {
+                $size = $cur_indent;
+            }
+        }
+        if (length($label) >= $size) {
+            # Not enough space for the label; just add a space between the label
+            # and the instruction.
+            $indent = " ";
+        } else {
+            $indent = spaces($size - length($label));
+        }
+
+        my $instr_end = length($label) + length($indent) + length($instr);
+        $size = $operand_indent - $instr_end;
+        if ($match_indent) {
+            # Check how the operands currently seem to be indented.
+            my $cur_indent = $orig_operand_indent;
+            if ($cur_indent >= 11 && $cur_indent <= 13) {
+                $size = 12;
+            } elsif ($cur_indent >= 14 && $cur_indent <= 17) {
+                $size = 16;
+            } elsif ($cur_indent >= 18 && $cur_indent <= 22) {
+                $size = 20;
+            } elsif ($cur_indent >= 23 && $cur_indent <= 27) {
+                $size = 24;
+            }
+            $size -= $instr_end;
+        }
+        my $operand_space = " ";
+        if ($size > 0) {
+            $operand_space = spaces($size);
+        }
+
+        # Lowercase the aarch64 vector layout description, .8B -> .8b
+        $rest =~ s/(\.[84216]*[BHSD])/lc($1)/ge;
+        # Lowercase modifiers like "uxtw" or "lsl"
+        $rest =~ s/([SU]XT[BWH]|[LA]S[LR])/lc($1)/ge;
+
+        # Reassemble the line
+        if ($rest eq "") {
+            $_ = $label . $indent . $instr;
+        } else {
+            $_ = $label . $indent . $instr . $operand_space . $rest;
+        }
+    }
+    print $out $_ . "\n";
+}
+
+if ($file) {
+    close(INPUT);
+    close(OUTPUT);
+}
+if ($tempfile) {
+    rename($tempfile, $file);
+}