Version in base suite: 5.1.8-0+deb12u1 Base version: ffmpeg_5.1.8-0+deb12u1 Target version: ffmpeg_5.1.9-0+deb12u1 Base file: /srv/ftp-master.debian.org/ftp/pool/main/f/ffmpeg/ffmpeg_5.1.8-0+deb12u1.dsc Target file: /srv/ftp-master.debian.org/policy/pool/main/f/ffmpeg/ffmpeg_5.1.9-0+deb12u1.dsc .forgejo/pre-commit/config.yaml | 23 + .forgejo/workflows/lint.yml | 29 + .forgejo/workflows/test.yml | 80 ++++ COPYING.LGPLv2.1 | 18 Changelog | 158 ++++++++ RELEASE | 2 VERSION | 2 configure | 2 debian/changelog | 6 doc/Doxyfile | 2 doc/build_system.txt | 1 doc/dev_community/resolution_process.md | 1 doc/fate_config.sh.template | 5 doc/nut.texi | 1 doc/undefined.txt | 1 ffbuild/libversion.sh | 2 fftools/ffmpeg.h | 3 fftools/ffmpeg_opt.c | 10 libavcodec/aarch64/aacpsdsp_neon.S | 218 +++++------ libavcodec/aarch64/fft_neon.S | 24 - libavcodec/aarch64/h264cmc_neon.S | 414 +++++++++++----------- libavcodec/aarch64/h264dsp_neon.S | 602 ++++++++++++++++---------------- libavcodec/aarch64/h264qpel_neon.S | 562 ++++++++++++++--------------- libavcodec/aarch64/hevcdsp_idct_neon.S | 294 +++++++-------- libavcodec/aarch64/hpeldsp_neon.S | 376 +++++++++---------- libavcodec/aarch64/me_cmp_neon.S | 2 libavcodec/aarch64/neon.S | 228 ++++++------ libavcodec/aarch64/opusdsp_neon.S | 114 +++--- libavcodec/aarch64/sbrdsp_neon.S | 294 +++++++-------- libavcodec/aarch64/simple_idct_neon.S | 398 ++++++++++----------- libavcodec/aarch64/vp8dsp_neon.S | 304 ++++++++-------- libavcodec/adpcm.c | 2 libavcodec/alsdec.c | 17 libavcodec/arm/int_neon.S | 1 libavcodec/av1dec.c | 16 libavcodec/bmp.c | 2 libavcodec/cfhd.c | 9 libavcodec/cfhd.h | 9 libavcodec/cfhdenc.c | 12 libavcodec/cljrdec.c | 1 libavcodec/dca_xll.c | 20 - libavcodec/dfpwmdec.c | 5 libavcodec/dv_profile.c | 1 libavcodec/dvdsub_parser.c | 2 libavcodec/escape130.c | 2 libavcodec/exr.c | 21 - libavcodec/ffv1_template.c | 1 libavcodec/ffv1enc_template.c | 1 libavcodec/flashsv.c | 3 libavcodec/golomb.h | 4 libavcodec/h264_direct.c | 34 + libavcodec/h264_mc_template.c | 1 libavcodec/h264_parser.c | 11 libavcodec/h264_refs.c | 6 libavcodec/h264_slice.c | 9 libavcodec/hevc_cabac.c | 1 libavcodec/imgconvert.c | 1 libavcodec/imm5.c | 2 libavcodec/interplayacm.c | 3 libavcodec/jpeg2000dec.c | 7 libavcodec/lcldec.c | 4 libavcodec/magicyuv.c | 3 libavcodec/mdec.c | 3 libavcodec/mjpegdec.c | 10 libavcodec/mpegaudiodsp_template.c | 1 libavcodec/mpegaudioenc_template.c | 1 libavcodec/mpegvideo_enc.c | 17 libavcodec/msmpeg4.c | 1 libavcodec/notchlc.c | 6 libavcodec/omx.c | 5 libavcodec/qdm2.c | 2 libavcodec/ralf.c | 1 libavcodec/rasc.c | 10 libavcodec/snow_dwt.c | 2 libavcodec/svq1dec.c | 5 libavcodec/tdsc.c | 10 libavcodec/vp3.c | 2 libavcodec/vp9.c | 18 libavcodec/wmaenc.c | 4 libavcodec/x86/fmtconvert.asm | 1 libavcodec/x86/mpegvideoencdsp.asm | 1 libavcodec/xxan.c | 2 libavcodec/zmbv.c | 14 libavfilter/aarch64/vf_nlmeans_neon.S | 78 ++-- libavfilter/af_amerge.c | 18 libavfilter/af_lv2.c | 6 libavfilter/af_pan.c | 4 libavfilter/afir_template.c | 2 libavfilter/convolution.h | 1 libavfilter/qp_table.c | 1 libavfilter/scale_eval.c | 13 libavfilter/scene_sad.c | 1 libavfilter/vf_codecview.c | 17 libavfilter/vf_convolution.c | 36 - libavfilter/vf_find_rect.c | 55 +- libavfilter/vf_kerndeint.c | 6 libavfilter/vf_libopencv.c | 2 libavfilter/vf_neighbor_opencl.c | 3 libavfilter/vf_overlay_cuda.cu | 1 libavfilter/vf_scale.c | 4 libavfilter/vf_stack.c | 38 +- libavfilter/vf_v360.c | 3 libavfilter/vf_zscale.c | 13 libavformat/avidec.c | 10 libavformat/cafdec.c | 6 libavformat/concat.c | 12 libavformat/dash.c | 2 libavformat/dashdec.c | 84 ++-- libavformat/demux.c | 11 libavformat/dhav.c | 4 libavformat/dss.c | 5 libavformat/dtshddec.c | 7 libavformat/fifo_test.c | 1 libavformat/flac_picture.c | 6 libavformat/g726.c | 1 libavformat/hls.c | 11 libavformat/hls_sample_encryption.c | 1 libavformat/hls_sample_encryption.h | 3 libavformat/hlsplaylist.c | 1 libavformat/http.c | 11 libavformat/icodec.c | 2 libavformat/iff.c | 4 libavformat/img2dec.c | 15 libavformat/img2enc.c | 5 libavformat/lrcdec.c | 2 libavformat/matroskadec.c | 4 libavformat/mlvdec.c | 8 libavformat/mov.c | 27 + libavformat/mpegts.c | 9 libavformat/mpegtsenc.c | 7 libavformat/mpjpegdec.c | 2 libavformat/os_support.h | 1 libavformat/pcm.c | 10 libavformat/rdt.c | 1 libavformat/rsd.c | 7 libavformat/rtmpproto.c | 58 +-- libavformat/rtpdec_jpeg.c | 6 libavformat/rtpdec_latm.c | 6 libavformat/rtpdec_mpeg4.c | 2 libavformat/rtpdec_qdm2.c | 10 libavformat/rtsp.c | 19 - libavformat/rtspdec.c | 8 libavformat/scd.c | 3 libavformat/segafilm.c | 2 libavformat/vividas.c | 8 libavformat/wavdec.c | 3 libavformat/wtvdec.c | 3 libavformat/xwma.c | 2 libavformat/yuv4mpegenc.c | 3 libavutil/aarch64/float_dsp_neon.S | 200 +++++----- libavutil/aes.c | 1 libavutil/bswap.h | 2 libavutil/eval.c | 15 libavutil/hwcontext_cuda_internal.h | 1 libavutil/hwcontext_qsv.h | 1 libavutil/samplefmt.h | 3 libavutil/tests/blowfish.c | 1 libavutil/timecode.c | 11 libswresample/aarch64/resample.S | 88 ++-- libswresample/rematrix.c | 5 libswresample/resample_template.c | 17 libswresample/soxr_resample.c | 1 libswresample/swresample.c | 32 + libswresample/swresample_frame.c | 1 libswresample/swresample_internal.h | 1 libswresample/version.c | 1 libswscale/aarch64/hscale.S | 292 +++++++-------- libswscale/aarch64/output.S | 64 +-- libswscale/aarch64/yuv2rgb_neon.S | 234 ++++++------ libswscale/gamma.c | 1 libswscale/output.c | 20 - libswscale/rgb2rgb_template.c | 4 libswscale/swscale_unscaled.c | 6 libswscale/utils.c | 59 ++- libswscale/vscale.c | 2 libswscale/x86/yuv2rgb_template.c | 25 + tests/extended.ffconcat | 1 tests/fate/ffprobe.mak | 1 tests/fate/lossless-audio.mak | 1 tests/ref/fate/zmbv-8bit | 1 tests/simple1.ffconcat | 1 tests/simple2.ffconcat | 1 tools/check_arm_indent.sh | 58 +++ tools/indent_arm_assembly.pl | 243 ++++++++++++ 184 files changed, 3822 insertions(+), 2791 deletions(-) dpkg-source: warning: cannot verify inline signature for /srv/release.debian.org/tmp/tmp3pcrtk3s/ffmpeg_5.1.8-0+deb12u1.dsc: no acceptable signature found dpkg-source: warning: cannot verify inline signature for /srv/release.debian.org/tmp/tmp3pcrtk3s/ffmpeg_5.1.9-0+deb12u1.dsc: no acceptable signature found diff -Nru ffmpeg-5.1.8/.forgejo/pre-commit/config.yaml ffmpeg-5.1.9/.forgejo/pre-commit/config.yaml --- ffmpeg-5.1.8/.forgejo/pre-commit/config.yaml 1970-01-01 00:00:00.000000000 +0000 +++ ffmpeg-5.1.9/.forgejo/pre-commit/config.yaml 2026-05-05 14:22:01.000000000 +0000 @@ -0,0 +1,23 @@ +exclude: ^tests/ref/ + +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: check-case-conflict + - id: check-executables-have-shebangs + - id: check-illegal-windows-names + - id: check-shebang-scripts-are-executable + - id: check-yaml + - id: end-of-file-fixer + - id: fix-byte-order-marker + - id: mixed-line-ending + - id: trailing-whitespace +- repo: local + hooks: + - id: aarch64-asm-indent + name: fix aarch64 assembly indentation + files: ^.*/aarch64/.*\.S$ + language: script + entry: ./tools/check_arm_indent.sh --apply + pass_filenames: false diff -Nru ffmpeg-5.1.8/.forgejo/workflows/lint.yml ffmpeg-5.1.9/.forgejo/workflows/lint.yml --- ffmpeg-5.1.8/.forgejo/workflows/lint.yml 1970-01-01 00:00:00.000000000 +0000 +++ ffmpeg-5.1.9/.forgejo/workflows/lint.yml 2026-05-05 15:50:52.000000000 +0000 @@ -0,0 +1,29 @@ +name: Lint + +on: + push: + branches: + - release/5.1 + pull_request: + +jobs: + lint: + name: Pre-Commit + runs-on: utilities + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Install pre-commit CI + id: install + run: | + python3 -m venv ~/pre-commit + ~/pre-commit/bin/pip install --upgrade pip setuptools + ~/pre-commit/bin/pip install pre-commit + echo "envhash=$({ python3 --version && cat .forgejo/pre-commit/config.yaml; } | sha256sum | cut -d' ' -f1)" >> $FORGEJO_OUTPUT + - name: Cache + uses: actions/cache@v4 + with: + path: ~/.cache/pre-commit + key: pre-commit-${{ steps.install.outputs.envhash }} + - name: Run pre-commit CI + run: ~/pre-commit/bin/pre-commit run -c .forgejo/pre-commit/config.yaml --show-diff-on-failure --color=always --all-files diff -Nru ffmpeg-5.1.8/.forgejo/workflows/test.yml ffmpeg-5.1.9/.forgejo/workflows/test.yml --- ffmpeg-5.1.8/.forgejo/workflows/test.yml 1970-01-01 00:00:00.000000000 +0000 +++ ffmpeg-5.1.9/.forgejo/workflows/test.yml 2026-05-05 15:50:52.000000000 +0000 @@ -0,0 +1,80 @@ +name: Test + +on: + push: + branches: + - release/5.1 + pull_request: + +jobs: + run_fate: + name: Fate (${{ matrix.runner }}, ${{ matrix.shared }}, ${{ matrix.bits }} bit) + strategy: + fail-fast: false + matrix: + runner: [linux-aarch64] + shared: ['static'] + bits: ['64'] + include: + - runner: linux-amd64 + shared: 'static' + bits: '32' + - runner: linux-amd64 + shared: 'shared' + bits: '64' + runs-on: ${{ matrix.runner }} + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Configure + run: | + ./configure --enable-gpl --enable-nonfree --enable-memory-poisoning --assert-level=2 \ + $([ "${{ matrix.bits }}" != "32" ] || echo --arch=x86_32 --extra-cflags=-m32 --extra-cxxflags=-m32 --extra-ldflags=-m32) \ + $([ "${{ matrix.shared }}" != "shared" ] || echo --enable-shared --disable-static) \ + || CFGRES=$? && CFGRES=$? + cat ffbuild/config.log + exit $CFGRES + - name: Build + run: make -j$(nproc) + - name: Restore Cached Fate-Suite + id: cache + uses: actions/cache/restore@v4 + with: + path: fate-suite + key: fate-suite + restore-keys: | + fate-suite- + - name: Sync Fate-Suite + id: fate + run: | + make fate-rsync SAMPLES=$PWD/fate-suite + echo "hash=$(find fate-suite -type f -printf "%P %s %T@\n" | sort | sha256sum | cut -d' ' -f1)" >> $FORGEJO_OUTPUT + - name: Cache Fate-Suite + uses: actions/cache/save@v4 + if: ${{ format('fate-suite-{0}', steps.fate.outputs.hash) != steps.cache.outputs.cache-matched-key }} + with: + path: fate-suite + key: fate-suite-${{ steps.fate.outputs.hash }} + - name: Run Fate + run: LD_LIBRARY_PATH="$(printf "%s:" "$PWD"/lib*)$PWD" make fate fate-build SAMPLES=$PWD/fate-suite -j$(nproc) + compile_only: + name: Fate (Win64, Build-Only) + strategy: + fail-fast: false + matrix: + image: ["ghcr.io/btbn/ffmpeg-builds/win64-gpl-5.1:latest"] + runs-on: linux-amd64 + container: ${{ matrix.image }} + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Configure + run: | + ./configure --pkg-config-flags="--static" $FFBUILD_TARGET_FLAGS $FF_CONFIGURE \ + --cc="$CC" --cxx="$CXX" --ar="$AR" --ranlib="$RANLIB" --nm="$NM" \ + --extra-cflags="$FF_CFLAGS" --extra-cxxflags="$FF_CXXFLAGS" \ + --extra-libs="$FF_LIBS" --extra-ldflags="$FF_LDFLAGS" --extra-ldexeflags="$FF_LDEXEFLAGS" + - name: Build + run: make -j$(nproc) + - name: Run Fate + run: make -j$(nproc) fate-build diff -Nru ffmpeg-5.1.8/COPYING.LGPLv2.1 ffmpeg-5.1.9/COPYING.LGPLv2.1 --- ffmpeg-5.1.8/COPYING.LGPLv2.1 2025-11-21 01:15:18.000000000 +0000 +++ ffmpeg-5.1.9/COPYING.LGPLv2.1 2026-05-05 14:22:01.000000000 +0000 @@ -55,7 +55,7 @@ that what they have is not the original version, so that the original author's reputation will not be affected by problems that might be introduced by others. - + Finally, software patents pose a constant threat to the existence of any free program. We wish to make sure that a company cannot effectively restrict the users of a free program by obtaining a @@ -111,7 +111,7 @@ "work based on the library" and a "work that uses the library". The former contains code derived from the library, whereas the latter must be combined with the library in order to run. - + GNU LESSER GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION @@ -158,7 +158,7 @@ You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. - + 2. You may modify your copy or copies of the Library or any portion of it, thus forming a work based on the Library, and copy and distribute such modifications or work under the terms of Section 1 @@ -216,7 +216,7 @@ ordinary GNU General Public License has appeared, then you can specify that version instead if you wish.) Do not make any other change in these notices. - + Once this change is made in a given copy, it is irreversible for that copy, so the ordinary GNU General Public License applies to all subsequent copies and derivative works made from that copy. @@ -267,7 +267,7 @@ distribute the object code for the work under the terms of Section 6. Any executables containing that work also fall under Section 6, whether or not they are linked directly with the Library itself. - + 6. As an exception to the Sections above, you may also combine or link a "work that uses the Library" with the Library to produce a work containing portions of the Library, and distribute that work @@ -329,7 +329,7 @@ accompany the operating system. Such a contradiction means you cannot use both them and the Library together in an executable that you distribute. - + 7. You may place library facilities that are a work based on the Library side-by-side in a single library together with other library facilities not covered by this License, and distribute such a combined @@ -370,7 +370,7 @@ restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties with this License. - + 11. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or @@ -422,7 +422,7 @@ the Free Software Foundation. If the Library does not specify a license version number, you may choose any version ever published by the Free Software Foundation. - + 14. If you wish to incorporate parts of the Library into other free programs whose distribution conditions are incompatible with these, write to the author to ask for permission. For software which is @@ -456,7 +456,7 @@ DAMAGES. END OF TERMS AND CONDITIONS - + How to Apply These Terms to Your New Libraries If you develop a new library, and you want it to be of the greatest diff -Nru ffmpeg-5.1.8/Changelog ffmpeg-5.1.9/Changelog --- ffmpeg-5.1.8/Changelog 2025-11-26 02:41:35.000000000 +0000 +++ ffmpeg-5.1.9/Changelog 2026-05-05 15:50:55.000000000 +0000 @@ -1,6 +1,164 @@ Entries are sorted chronologically from oldest to youngest within each release, releases are sorted from youngest to oldest. +version 5.1.9: + avcodec/av1dec: check that primary_ref_frame is within range + configure: bump CONFIG_THIS_YEAR to 2026 + avcodec/dfpwmdec: Check nb_samples + avcodec/alsdec: do not set nbits invalidly + swscale/swscale_unscaled: adjust last line copy + avformat/avidec: check LIST size in avi_load_index() + avformat/avidec: validate INFO list size before parsing + libavformat/xwma: fix overflow in seek position + avformat/pcm: Use 64bit for byte_rate + avfilter/vf_kerndeint: Check for minimum height + avcodec/ralf: Add the missing return statement after the error log + avfilter/vf_codecview: Clamp block to the visible frame region + avcodec/zmbv: reject XOR data that overruns the decompression buffer + avcodec/rasc: fix heap use-after-free in decode_move() + avformat/rtpdec_mpeg4: reject zero-length AU header sections + fftools/ffmpeg_opt: validate stream index in negative map handling + avformat/rtmpproto: prevent integer overflow accumulating FLV buffer size + avformat/rtmpproto: validate compressed SWF header length + avformat/rtsp: Fix out-of-bounds read in SDP parser when control_url is empty + avformat/rtpdec_latm: avoid integer overflow in LATM length parsing + avcodec/h264: recompute per-slice direct mode state for every slice + avcodec/h264_refs: Clear stale pointers from ref_list + avformat/concat: guard total_size overflow + avcodec/wmaenc: Fix missing padding in extradata + avcodec/tdsc: remove double stride adjustment + avformat/cafdec: fix negative index use in read_seek + avcodec/notchlc: Check 255 loops + avformat/rtpdec_jpeg: check qtable_len + avformat/vividas: use-of-uninitialized-value in keybuffer + avcodec/tdsc: Check jpeg size + avcodec/tdsc: Better input size check + avcodec/tdsc: Check tile_size + avformat/mov: check extradata in mov_read_dops() + avformat/mov: Check read size for opus extradata + avformat/rtspdec: reject non-positive ANNOUNCE Content-Length + avformat/wavdec: Fix use-of-uninitialized-value in find_guid() + avformat/hls_sample_encryption: add missing padding for audio setup buffer + avcodec/svq1dec: Check input space for minimum + avcodec/vp9: Rollback dimensions when format is rejected + avformat/rtpdec_qdm2: Check block_size + avcodec/escape130: Initialize old_y_avg + avutil/samplefmt: Dont claim that av_get_sample_fmt_string checks sample_fmt + avformat: check avio_read() return values in dss/dtshd/mlv + avcodec/alsdec: preserve full float value in zero-truncated samples + avcodec/alsdec: propagate read_diff_float_data() errors in read_frame_data() + avcodec/alsdec: fix mantissa unpacking in compressed Part A path + libavfilter/vf_v360: fix operator precedence in stereo loop condition + avcodec/alsdec: fix abs(INT_MIN) UB in read_diff_float_data() + avformat/rsd: reject short ADPCM_THP extradata reads + avformat/mov: Handle integer overflow in MOV parser + avcodec/dvdsub_parser: Fix buf_size check + avfilter/af_pan: fix sscanf() return value checks in parse_channel_name + avutil/bswap: fix implicit conversion warning in av_bswap64 + avformat/mpegts: fix descriptor accounting across multiple IOD descriptors + avcodec/xxan: zero-initialize y_buffer + avcodec/exr: Check input space before reverse_lut() + avcodec/h264_slice: reject slice_num >= 0xFFFF + avutil/timecode: Check for integer overflow in av_timecode_init_from_components() + avformat/mov: do not allocate out-of-range buffers + avfilter/af_lv2: call lilv_instance_activate before lilv_instance_run + avformat/rtmpproto: fix listen_timeout conversion for special negative values + swscale/output: fix integer overflows in chroma in yuv2rgba64_X_c_template() + avcodec/lcldec: Fixes uqvq overflow + avcodec/av1dec: sync frame header and tile group behavior with CBS + avformat/mlvdec: avoid uninitialized read in read_string() + avcodec/magicyuv: fix small median images + swscale/output: Fix integer overflow in alpha in yuv2rgba64_1_c_template() + swscale/utils: Check *Inc + avfilter/vf_scale: Fix integer overflow in config_props() + swscale/output: Fixes integer overflow in yuv2planeX_8_c + swscale/utils: initialize chroma when luma switched to cascade + avformat/rtsp: Pass blacklist + avformat/rtsp: Explicitly check protocol + avfilter/vf_convolution: Use avpriv_mirror + avfilter/vf_convolution: Handle corner cases with small frames + avformat/mov: use 64bit in CENC subsample bounds checks + avutil/eval: Check depth of AVExpr + avformat/vividas: Reset n_audio_subpackets on error + avformat/matroskadec: Check that end_time_ns >= start_time_ns + avcodec/vp3: Sanity check cropping + avformat/dhav: Check avio_seek() return + avformat/segafilm: dont read uninitialized value + avcodec/mpegvideo_enc: Restructure ff_h263_encode_gob_header() relation to update_mb_info() + avcodec/exr: check tile_attr.x/ySize + avformat/demux: Fix integer overflows in select_from_pts_buffer() + avcodec/golomb: Fix get_ur_golomb_jpegls() with esclen = 0 + swresample/resample_template: add casts to avoid undefined overflows + avcodec/h264_parser: Check pts for overflow + avformat/wtvdec: Check that language is fully read + avcodec/imm5: Dont pass EAGAIN on as is + avcodec/interplayacm: Check input for fill_block() + avcodec/flashsv: Check for input space before (re)allocating frame + avcodec/mdec: Check input space vs minimal block size + avcodec/h264_parser: Check remaining input length in loop in scan_mmco_reset() + avcodec/exr: fix AVERROR typo + avcodec/cfhd: Check transform type before continuing + avcodec/cfhd: Add CFHDSegment enum and named identifiers + avformat/icodec: Check size + avformat/lrcdec: Check ss for finiteness + avformat/http: Also count redirects from the cache + avformat/http: allow adjusting the redirect limit + fftools/ffmpeg_opt: limit recursion of presets + swscale/rgb2rgb_template: fix signed shift into sign bit + swresample: Check ch layouts in swr_alloc_set_opts2() + swresample: Check user chlayout in swr_set_matrix() + avcodec/bmp: fix indention + avcodec/exr: Handle axmax like bxmin in 04d7a6d3db56ea1a93908ff2d3d312e3fc40a58c + avformat/cafdec: Check nb_entries in read_info_chunk() + avcodec/vp9: Reallocate on resolution change which does not change tile_cols + avformat/img2dec: Check avio_size() for failure + avformat/mpegtsenc: Check remaining space in SDT + avformat/img2enc: Check split planes packet size + avformat/yuv4mpegen: Sanity check input packet frame dimensions + avformat/iff: Error out with 0 channel loudspeaker configuration + Fix overflow in STSD parser + avcodec/adpcm: Check input buffer size + avformat/scd: Use ffio_read_size() + avformat/hls: Check for integer overflow with #EXTINF: + avcodec/dca_xll: Clear padding in ff_dca_xll_parse() + vfilter/vf_find_rect: Clamp x/y min/max to valid values + avcodec/dca_xll: Check get_rice_array() + avformat/mpegts: Check program_info_length + avformat/mpegts: Check IOD_DESCRIPTOR len + avcodec/qdm2: fix heap-use-after-free in qdm2_decode_frame + avcodec/jpeg2000dec: Print bpno level when erroring out + avformat/dashdec: check value valid after read value from mpd xml + swscale/utils: zero init filter memory as before + lavc/j2kdec: Do not ignore colour association for packed formats + swscale/utils: Sanity check sizeFactor + swscale/utils: Avoid FF_ALLOC_TYPED_ARRAY() and use av_malloc_array() directly + avcodec/mjpegdec: fix segfault on extern_huff and no extradata + avcodec/exr: use av_realloc_array() + avcodec/omx: Check extradata size and nFilledLen + avfilter/scale_eval: Use 64bit for factor_w/h + avfilter/scale_eval: Avoid undefined behavior with double to int cast + avformat/http: Check that the protocol of redirects is http or https + avfilter/vf_find_rect: Fix handling odd sized images + avcodec/notchlc: zero-initialize history buffer + avfilter/vf_stack: add checks for the final canvas dimensions + avcodec/mjpegdec: only test the size bound in sequential mjpeg + avformat/hls: fix double space + avformat/hls: Check seg size and offset for overflow + avformat/flac_picture: Correct check + avfilter/vf_neighbor_opencl: add error condition when filter name doesn't match + avfilter/vf_libopencv: make sure there is space for null-terminator in shape_str + fate: add missing options in config template + (fforge/pr/22398) swscale/x86/yuv2rgb_template: Add emms to MMX(EXT) functions + forgejo: backport CI job names + (fforge/pr/21341) avformat/img2dec: reject input images too big to fit into a single packet + avfilter/af_amerge: fix possible crash with custom layouts + (fforge/pr/21063) avformat/os_support: Include stdint.h for int64_t + all: apply linter fixes + tools/check_arm_indent: skip empty glob + forgejo: apply needed CI changes for 5.1 + forgejo: backport CI to release/5.1 + + version 5.1.8: avutil/common: cast GET_BYTE/GET_16BIT returned value avfilter/vf_drawtext: Account for bbox text seperator diff -Nru ffmpeg-5.1.8/RELEASE ffmpeg-5.1.9/RELEASE --- ffmpeg-5.1.8/RELEASE 2025-11-26 02:41:35.000000000 +0000 +++ ffmpeg-5.1.9/RELEASE 2026-05-05 15:50:55.000000000 +0000 @@ -1 +1 @@ -5.1.8 +5.1.9 diff -Nru ffmpeg-5.1.8/VERSION ffmpeg-5.1.9/VERSION --- ffmpeg-5.1.8/VERSION 2025-11-26 02:41:35.000000000 +0000 +++ ffmpeg-5.1.9/VERSION 2026-05-05 15:50:55.000000000 +0000 @@ -1 +1 @@ -5.1.8 +5.1.9 diff -Nru ffmpeg-5.1.8/configure ffmpeg-5.1.9/configure --- ffmpeg-5.1.8/configure 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/configure 2026-05-05 15:50:55.000000000 +0000 @@ -7797,7 +7797,7 @@ #define FFMPEG_CONFIG_H #define FFMPEG_CONFIGURATION "$(c_escape $FFMPEG_CONFIGURATION)" #define FFMPEG_LICENSE "$(c_escape $license)" -#define CONFIG_THIS_YEAR 2025 +#define CONFIG_THIS_YEAR 2026 #define FFMPEG_DATADIR "$(eval c_escape $datadir)" #define AVCONV_DATADIR "$(eval c_escape $datadir)" #define CC_IDENT "$(c_escape ${cc_ident:-Unknown compiler})" diff -Nru ffmpeg-5.1.8/debian/changelog ffmpeg-5.1.9/debian/changelog --- ffmpeg-5.1.8/debian/changelog 2025-12-05 21:14:02.000000000 +0000 +++ ffmpeg-5.1.9/debian/changelog 2026-05-13 13:23:40.000000000 +0000 @@ -1,3 +1,9 @@ +ffmpeg (7:5.1.9-0+deb12u1) bookworm-security; urgency=medium + + * New upstream version 5.1.9 + + -- Sebastian Ramacher Wed, 13 May 2026 15:23:40 +0200 + ffmpeg (7:5.1.8-0+deb12u1) bookworm-security; urgency=medium * New upstream version 5.1.8 diff -Nru ffmpeg-5.1.8/doc/Doxyfile ffmpeg-5.1.9/doc/Doxyfile --- ffmpeg-5.1.8/doc/Doxyfile 2025-11-26 02:41:35.000000000 +0000 +++ ffmpeg-5.1.9/doc/Doxyfile 2026-05-05 15:50:55.000000000 +0000 @@ -38,7 +38,7 @@ # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = 5.1.8 +PROJECT_NUMBER = 5.1.9 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a diff -Nru ffmpeg-5.1.8/doc/build_system.txt ffmpeg-5.1.9/doc/build_system.txt --- ffmpeg-5.1.8/doc/build_system.txt 2025-11-21 01:15:18.000000000 +0000 +++ ffmpeg-5.1.9/doc/build_system.txt 2026-05-05 14:22:01.000000000 +0000 @@ -63,4 +63,3 @@ make -k Continue build in case of errors, this is useful for the regression tests sometimes but note that it will still not run all reg tests. - diff -Nru ffmpeg-5.1.8/doc/dev_community/resolution_process.md ffmpeg-5.1.9/doc/dev_community/resolution_process.md --- ffmpeg-5.1.8/doc/dev_community/resolution_process.md 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/doc/dev_community/resolution_process.md 2026-05-05 15:50:52.000000000 +0000 @@ -88,4 +88,3 @@ The decisions from the TC are final, until the matters are reopened after no less than one year. - diff -Nru ffmpeg-5.1.8/doc/fate_config.sh.template ffmpeg-5.1.9/doc/fate_config.sh.template --- ffmpeg-5.1.8/doc/fate_config.sh.template 2025-11-23 02:57:58.000000000 +0000 +++ ffmpeg-5.1.9/doc/fate_config.sh.template 2026-05-05 15:50:55.000000000 +0000 @@ -11,16 +11,21 @@ # the following are optional and map to configure options arch= cpu= +toolchain= cross_prefix= as= cc= +cxx= ld= +nm= target_os= sysroot= target_exec= target_path= target_samples= extra_cflags= +extra_cxxflags= +extra_objcflags= extra_ldflags= extra_libs= extra_conf= # extra configure options not covered above diff -Nru ffmpeg-5.1.8/doc/nut.texi ffmpeg-5.1.9/doc/nut.texi --- ffmpeg-5.1.8/doc/nut.texi 2025-11-21 01:15:18.000000000 +0000 +++ ffmpeg-5.1.9/doc/nut.texi 2026-05-05 14:22:01.000000000 +0000 @@ -157,4 +157,3 @@ @item XVID @tab non-compliant MPEG-4 generated by old Xvid @item XVIX @tab non-compliant MPEG-4 generated by old Xvid with interlacing bug @end multitable - diff -Nru ffmpeg-5.1.8/doc/undefined.txt ffmpeg-5.1.9/doc/undefined.txt --- ffmpeg-5.1.8/doc/undefined.txt 2025-11-21 01:15:18.000000000 +0000 +++ ffmpeg-5.1.9/doc/undefined.txt 2026-05-05 14:22:01.000000000 +0000 @@ -44,4 +44,3 @@ here the reader knows that a,b,c are meant to be signed integers but for C standard compliance / to avoid undefined behavior they are stored in unsigned ints. - diff -Nru ffmpeg-5.1.8/ffbuild/libversion.sh ffmpeg-5.1.9/ffbuild/libversion.sh --- ffmpeg-5.1.8/ffbuild/libversion.sh 2025-11-23 02:57:58.000000000 +0000 +++ ffmpeg-5.1.9/ffbuild/libversion.sh 2026-05-05 14:22:01.000000000 +0000 @@ -1,3 +1,5 @@ +#!/bin/sh + toupper(){ echo "$@" | tr abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ } diff -Nru ffmpeg-5.1.8/fftools/ffmpeg.h ffmpeg-5.1.9/fftools/ffmpeg.h --- ffmpeg-5.1.8/fftools/ffmpeg.h 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/fftools/ffmpeg.h 2026-05-05 15:50:55.000000000 +0000 @@ -237,6 +237,9 @@ int nb_autoscale; SpecifierOpt *bits_per_raw_sample; int nb_bits_per_raw_sample; + + + int depth; } OptionsContext; typedef struct InputFilter { diff -Nru ffmpeg-5.1.8/fftools/ffmpeg_opt.c ffmpeg-5.1.9/fftools/ffmpeg_opt.c --- ffmpeg-5.1.8/fftools/ffmpeg_opt.c 2025-11-26 02:41:35.000000000 +0000 +++ ffmpeg-5.1.9/fftools/ffmpeg_opt.c 2026-05-05 15:50:55.000000000 +0000 @@ -475,6 +475,8 @@ for (i = 0; i < o->nb_stream_maps; i++) { m = &o->stream_maps[i]; if (file_idx == m->file_index && + m->stream_index >= 0 && + m->stream_index < input_files[m->file_index]->nb_streams && check_stream_specifier(input_files[m->file_index]->ctx, input_files[m->file_index]->ctx->streams[m->stream_index], *p == ':' ? p + 1 : p) > 0) @@ -3267,6 +3269,12 @@ FILE *f=NULL; char filename[1000], line[1000], tmp_line[1000]; const char *codec_name = NULL; + int depth = o->depth; + + if (depth > 2) { + av_log(NULL, AV_LOG_ERROR, "too deep recursion\n"); + return AVERROR(EINVAL); + } tmp_line[0] = *opt; tmp_line[1] = 0; @@ -3280,6 +3288,7 @@ exit_program(1); } + o->depth ++; while (fgets(line, sizeof(line), f)) { char *key = tmp_line, *value, *endptr; @@ -3304,6 +3313,7 @@ } } + o->depth = depth; fclose(f); return 0; diff -Nru ffmpeg-5.1.8/libavcodec/aarch64/aacpsdsp_neon.S ffmpeg-5.1.9/libavcodec/aarch64/aacpsdsp_neon.S --- ffmpeg-5.1.8/libavcodec/aarch64/aacpsdsp_neon.S 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/aarch64/aacpsdsp_neon.S 2026-03-16 18:10:00.000000000 +0000 @@ -19,130 +19,130 @@ #include "libavutil/aarch64/asm.S" function ff_ps_add_squares_neon, export=1 -1: ld1 {v0.4S,v1.4S}, [x1], #32 - fmul v0.4S, v0.4S, v0.4S - fmul v1.4S, v1.4S, v1.4S - faddp v2.4S, v0.4S, v1.4S - ld1 {v3.4S}, [x0] - fadd v3.4S, v3.4S, v2.4S - st1 {v3.4S}, [x0], #16 - subs w2, w2, #4 - b.gt 1b +1: ld1 {v0.4s,v1.4s}, [x1], #32 + fmul v0.4s, v0.4s, v0.4s + fmul v1.4s, v1.4s, v1.4s + faddp v2.4s, v0.4s, v1.4s + ld1 {v3.4s}, [x0] + fadd v3.4s, v3.4s, v2.4s + st1 {v3.4s}, [x0], #16 + subs w2, w2, #4 + b.gt 1b ret endfunc function ff_ps_mul_pair_single_neon, export=1 -1: ld1 {v0.4S,v1.4S}, [x1], #32 - ld1 {v2.4S}, [x2], #16 - zip1 v3.4S, v2.4S, v2.4S - zip2 v4.4S, v2.4S, v2.4S - fmul v0.4S, v0.4S, v3.4S - fmul v1.4S, v1.4S, v4.4S - st1 {v0.4S,v1.4S}, [x0], #32 - subs w3, w3, #4 - b.gt 1b +1: ld1 {v0.4s,v1.4s}, [x1], #32 + ld1 {v2.4s}, [x2], #16 + zip1 v3.4s, v2.4s, v2.4s + zip2 v4.4s, v2.4s, v2.4s + fmul v0.4s, v0.4s, v3.4s + fmul v1.4s, v1.4s, v4.4s + st1 {v0.4s,v1.4s}, [x0], #32 + subs w3, w3, #4 + b.gt 1b ret endfunc function ff_ps_stereo_interpolate_neon, export=1 - ld1 {v0.4S}, [x2] - ld1 {v1.4S}, [x3] - zip1 v4.4S, v0.4S, v0.4S - zip2 v5.4S, v0.4S, v0.4S - zip1 v6.4S, v1.4S, v1.4S - zip2 v7.4S, v1.4S, v1.4S -1: ld1 {v2.2S}, [x0] - ld1 {v3.2S}, [x1] - fadd v4.4S, v4.4S, v6.4S - fadd v5.4S, v5.4S, v7.4S - mov v2.D[1], v2.D[0] - mov v3.D[1], v3.D[0] - fmul v2.4S, v2.4S, v4.4S - fmla v2.4S, v3.4S, v5.4S - st1 {v2.D}[0], [x0], #8 - st1 {v2.D}[1], [x1], #8 - subs w4, w4, #1 - b.gt 1b + ld1 {v0.4s}, [x2] + ld1 {v1.4s}, [x3] + zip1 v4.4s, v0.4s, v0.4s + zip2 v5.4s, v0.4s, v0.4s + zip1 v6.4s, v1.4s, v1.4s + zip2 v7.4s, v1.4s, v1.4s +1: ld1 {v2.2s}, [x0] + ld1 {v3.2s}, [x1] + fadd v4.4s, v4.4s, v6.4s + fadd v5.4s, v5.4s, v7.4s + mov v2.d[1], v2.d[0] + mov v3.d[1], v3.d[0] + fmul v2.4s, v2.4s, v4.4s + fmla v2.4s, v3.4s, v5.4s + st1 {v2.d}[0], [x0], #8 + st1 {v2.d}[1], [x1], #8 + subs w4, w4, #1 + b.gt 1b ret endfunc function ff_ps_stereo_interpolate_ipdopd_neon, export=1 - ld1 {v0.4S,v1.4S}, [x2] - ld1 {v6.4S,v7.4S}, [x3] - fneg v2.4S, v1.4S - fneg v3.4S, v7.4S - zip1 v16.4S, v0.4S, v0.4S - zip2 v17.4S, v0.4S, v0.4S - zip1 v18.4S, v2.4S, v1.4S - zip2 v19.4S, v2.4S, v1.4S - zip1 v20.4S, v6.4S, v6.4S - zip2 v21.4S, v6.4S, v6.4S - zip1 v22.4S, v3.4S, v7.4S - zip2 v23.4S, v3.4S, v7.4S -1: ld1 {v2.2S}, [x0] - ld1 {v3.2S}, [x1] - fadd v16.4S, v16.4S, v20.4S - fadd v17.4S, v17.4S, v21.4S - mov v2.D[1], v2.D[0] - mov v3.D[1], v3.D[0] - fmul v4.4S, v2.4S, v16.4S - fmla v4.4S, v3.4S, v17.4S - fadd v18.4S, v18.4S, v22.4S - fadd v19.4S, v19.4S, v23.4S - ext v2.16B, v2.16B, v2.16B, #4 - ext v3.16B, v3.16B, v3.16B, #4 - fmla v4.4S, v2.4S, v18.4S - fmla v4.4S, v3.4S, v19.4S - st1 {v4.D}[0], [x0], #8 - st1 {v4.D}[1], [x1], #8 - subs w4, w4, #1 - b.gt 1b + ld1 {v0.4s,v1.4s}, [x2] + ld1 {v6.4s,v7.4s}, [x3] + fneg v2.4s, v1.4s + fneg v3.4s, v7.4s + zip1 v16.4s, v0.4s, v0.4s + zip2 v17.4s, v0.4s, v0.4s + zip1 v18.4s, v2.4s, v1.4s + zip2 v19.4s, v2.4s, v1.4s + zip1 v20.4s, v6.4s, v6.4s + zip2 v21.4s, v6.4s, v6.4s + zip1 v22.4s, v3.4s, v7.4s + zip2 v23.4s, v3.4s, v7.4s +1: ld1 {v2.2s}, [x0] + ld1 {v3.2s}, [x1] + fadd v16.4s, v16.4s, v20.4s + fadd v17.4s, v17.4s, v21.4s + mov v2.d[1], v2.d[0] + mov v3.d[1], v3.d[0] + fmul v4.4s, v2.4s, v16.4s + fmla v4.4s, v3.4s, v17.4s + fadd v18.4s, v18.4s, v22.4s + fadd v19.4s, v19.4s, v23.4s + ext v2.16b, v2.16b, v2.16b, #4 + ext v3.16b, v3.16b, v3.16b, #4 + fmla v4.4s, v2.4s, v18.4s + fmla v4.4s, v3.4s, v19.4s + st1 {v4.d}[0], [x0], #8 + st1 {v4.d}[1], [x1], #8 + subs w4, w4, #1 + b.gt 1b ret endfunc function ff_ps_hybrid_analysis_neon, export=1 - lsl x3, x3, #3 - ld2 {v0.4S,v1.4S}, [x1], #32 - ld2 {v2.2S,v3.2S}, [x1], #16 - ld1 {v24.2S}, [x1], #8 - ld2 {v4.2S,v5.2S}, [x1], #16 - ld2 {v6.4S,v7.4S}, [x1] - rev64 v6.4S, v6.4S - rev64 v7.4S, v7.4S - ext v6.16B, v6.16B, v6.16B, #8 - ext v7.16B, v7.16B, v7.16B, #8 - rev64 v4.2S, v4.2S - rev64 v5.2S, v5.2S - mov v2.D[1], v3.D[0] - mov v4.D[1], v5.D[0] - mov v5.D[1], v2.D[0] - mov v3.D[1], v4.D[0] - fadd v16.4S, v0.4S, v6.4S - fadd v17.4S, v1.4S, v7.4S - fsub v18.4S, v1.4S, v7.4S - fsub v19.4S, v0.4S, v6.4S - fadd v22.4S, v2.4S, v4.4S - fsub v23.4S, v5.4S, v3.4S - trn1 v20.2D, v22.2D, v23.2D // {re4+re8, re5+re7, im8-im4, im7-im5} - trn2 v21.2D, v22.2D, v23.2D // {im4+im8, im5+im7, re4-re8, re5-re7} -1: ld2 {v2.4S,v3.4S}, [x2], #32 - ld2 {v4.2S,v5.2S}, [x2], #16 - ld1 {v6.2S}, [x2], #8 - add x2, x2, #8 - mov v4.D[1], v5.D[0] - mov v6.S[1], v6.S[0] - fmul v6.2S, v6.2S, v24.2S - fmul v0.4S, v2.4S, v16.4S - fmul v1.4S, v2.4S, v17.4S - fmls v0.4S, v3.4S, v18.4S - fmla v1.4S, v3.4S, v19.4S - fmla v0.4S, v4.4S, v20.4S - fmla v1.4S, v4.4S, v21.4S - faddp v0.4S, v0.4S, v1.4S - faddp v0.4S, v0.4S, v0.4S - fadd v0.2S, v0.2S, v6.2S - st1 {v0.2S}, [x0], x3 - subs w4, w4, #1 - b.gt 1b + lsl x3, x3, #3 + ld2 {v0.4s,v1.4s}, [x1], #32 + ld2 {v2.2s,v3.2s}, [x1], #16 + ld1 {v24.2s}, [x1], #8 + ld2 {v4.2s,v5.2s}, [x1], #16 + ld2 {v6.4s,v7.4s}, [x1] + rev64 v6.4s, v6.4s + rev64 v7.4s, v7.4s + ext v6.16b, v6.16b, v6.16b, #8 + ext v7.16b, v7.16b, v7.16b, #8 + rev64 v4.2s, v4.2s + rev64 v5.2s, v5.2s + mov v2.d[1], v3.d[0] + mov v4.d[1], v5.d[0] + mov v5.d[1], v2.d[0] + mov v3.d[1], v4.d[0] + fadd v16.4s, v0.4s, v6.4s + fadd v17.4s, v1.4s, v7.4s + fsub v18.4s, v1.4s, v7.4s + fsub v19.4s, v0.4s, v6.4s + fadd v22.4s, v2.4s, v4.4s + fsub v23.4s, v5.4s, v3.4s + trn1 v20.2d, v22.2d, v23.2d // {re4+re8, re5+re7, im8-im4, im7-im5} + trn2 v21.2d, v22.2d, v23.2d // {im4+im8, im5+im7, re4-re8, re5-re7} +1: ld2 {v2.4s,v3.4s}, [x2], #32 + ld2 {v4.2s,v5.2s}, [x2], #16 + ld1 {v6.2s}, [x2], #8 + add x2, x2, #8 + mov v4.d[1], v5.d[0] + mov v6.s[1], v6.s[0] + fmul v6.2s, v6.2s, v24.2s + fmul v0.4s, v2.4s, v16.4s + fmul v1.4s, v2.4s, v17.4s + fmls v0.4s, v3.4s, v18.4s + fmla v1.4s, v3.4s, v19.4s + fmla v0.4s, v4.4s, v20.4s + fmla v1.4s, v4.4s, v21.4s + faddp v0.4s, v0.4s, v1.4s + faddp v0.4s, v0.4s, v0.4s + fadd v0.2s, v0.2s, v6.2s + st1 {v0.2s}, [x0], x3 + subs w4, w4, #1 + b.gt 1b ret endfunc diff -Nru ffmpeg-5.1.8/libavcodec/aarch64/fft_neon.S ffmpeg-5.1.9/libavcodec/aarch64/fft_neon.S --- ffmpeg-5.1.8/libavcodec/aarch64/fft_neon.S 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/aarch64/fft_neon.S 2026-05-05 15:50:52.000000000 +0000 @@ -359,18 +359,18 @@ endfunc .endm - def_fft 32, 16, 8 - def_fft 64, 32, 16 - def_fft 128, 64, 32 - def_fft 256, 128, 64 - def_fft 512, 256, 128 - def_fft 1024, 512, 256 - def_fft 2048, 1024, 512 - def_fft 4096, 2048, 1024 - def_fft 8192, 4096, 2048 - def_fft 16384, 8192, 4096 - def_fft 32768, 16384, 8192 - def_fft 65536, 32768, 16384 + def_fft 32, 16, 8 + def_fft 64, 32, 16 + def_fft 128, 64, 32 + def_fft 256, 128, 64 + def_fft 512, 256, 128 + def_fft 1024, 512, 256 + def_fft 2048, 1024, 512 + def_fft 4096, 2048, 1024 + def_fft 8192, 4096, 2048 + def_fft 16384, 8192, 4096 + def_fft 32768, 16384, 8192 + def_fft 65536, 32768, 16384 function ff_fft_calc_neon, export=1 prfm pldl1keep, [x1] diff -Nru ffmpeg-5.1.8/libavcodec/aarch64/h264cmc_neon.S ffmpeg-5.1.9/libavcodec/aarch64/h264cmc_neon.S --- ffmpeg-5.1.8/libavcodec/aarch64/h264cmc_neon.S 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/aarch64/h264cmc_neon.S 2026-05-05 15:50:52.000000000 +0000 @@ -38,11 +38,11 @@ lsl w9, w9, #3 lsl w10, w10, #1 add w9, w9, w10 - add x6, x6, w9, UXTW - ld1r {v22.8H}, [x6] + add x6, x6, w9, uxtw + ld1r {v22.8h}, [x6] .endif .ifc \codec,vc1 - movi v22.8H, #28 + movi v22.8h, #28 .endif mul w7, w4, w5 lsl w14, w5, #3 @@ -55,139 +55,139 @@ add w4, w4, #64 b.eq 2f - dup v0.8B, w4 - dup v1.8B, w12 - ld1 {v4.8B, v5.8B}, [x1], x2 - dup v2.8B, w6 - dup v3.8B, w7 - ext v5.8B, v4.8B, v5.8B, #1 -1: ld1 {v6.8B, v7.8B}, [x1], x2 - umull v16.8H, v4.8B, v0.8B - umlal v16.8H, v5.8B, v1.8B - ext v7.8B, v6.8B, v7.8B, #1 - ld1 {v4.8B, v5.8B}, [x1], x2 - umlal v16.8H, v6.8B, v2.8B - prfm pldl1strm, [x1] - ext v5.8B, v4.8B, v5.8B, #1 - umlal v16.8H, v7.8B, v3.8B - umull v17.8H, v6.8B, v0.8B + dup v0.8b, w4 + dup v1.8b, w12 + ld1 {v4.8b, v5.8b}, [x1], x2 + dup v2.8b, w6 + dup v3.8b, w7 + ext v5.8b, v4.8b, v5.8b, #1 +1: ld1 {v6.8b, v7.8b}, [x1], x2 + umull v16.8h, v4.8b, v0.8b + umlal v16.8h, v5.8b, v1.8b + ext v7.8b, v6.8b, v7.8b, #1 + ld1 {v4.8b, v5.8b}, [x1], x2 + umlal v16.8h, v6.8b, v2.8b + prfm pldl1strm, [x1] + ext v5.8b, v4.8b, v5.8b, #1 + umlal v16.8h, v7.8b, v3.8b + umull v17.8h, v6.8b, v0.8b subs w3, w3, #2 - umlal v17.8H, v7.8B, v1.8B - umlal v17.8H, v4.8B, v2.8B - umlal v17.8H, v5.8B, v3.8B + umlal v17.8h, v7.8b, v1.8b + umlal v17.8h, v4.8b, v2.8b + umlal v17.8h, v5.8b, v3.8b prfm pldl1strm, [x1, x2] .ifc \codec,h264 - rshrn v16.8B, v16.8H, #6 - rshrn v17.8B, v17.8H, #6 + rshrn v16.8b, v16.8h, #6 + rshrn v17.8b, v17.8h, #6 .else - add v16.8H, v16.8H, v22.8H - add v17.8H, v17.8H, v22.8H - shrn v16.8B, v16.8H, #6 - shrn v17.8B, v17.8H, #6 + add v16.8h, v16.8h, v22.8h + add v17.8h, v17.8h, v22.8h + shrn v16.8b, v16.8h, #6 + shrn v17.8b, v17.8h, #6 .endif .ifc \type,avg - ld1 {v20.8B}, [x8], x2 - ld1 {v21.8B}, [x8], x2 - urhadd v16.8B, v16.8B, v20.8B - urhadd v17.8B, v17.8B, v21.8B + ld1 {v20.8b}, [x8], x2 + ld1 {v21.8b}, [x8], x2 + urhadd v16.8b, v16.8b, v20.8b + urhadd v17.8b, v17.8b, v21.8b .endif - st1 {v16.8B}, [x0], x2 - st1 {v17.8B}, [x0], x2 + st1 {v16.8b}, [x0], x2 + st1 {v17.8b}, [x0], x2 b.gt 1b ret 2: adds w12, w12, w6 - dup v0.8B, w4 + dup v0.8b, w4 b.eq 5f tst w6, w6 - dup v1.8B, w12 + dup v1.8b, w12 b.eq 4f - ld1 {v4.8B}, [x1], x2 -3: ld1 {v6.8B}, [x1], x2 - umull v16.8H, v4.8B, v0.8B - umlal v16.8H, v6.8B, v1.8B - ld1 {v4.8B}, [x1], x2 - umull v17.8H, v6.8B, v0.8B - umlal v17.8H, v4.8B, v1.8B + ld1 {v4.8b}, [x1], x2 +3: ld1 {v6.8b}, [x1], x2 + umull v16.8h, v4.8b, v0.8b + umlal v16.8h, v6.8b, v1.8b + ld1 {v4.8b}, [x1], x2 + umull v17.8h, v6.8b, v0.8b + umlal v17.8h, v4.8b, v1.8b prfm pldl1strm, [x1] .ifc \codec,h264 - rshrn v16.8B, v16.8H, #6 - rshrn v17.8B, v17.8H, #6 + rshrn v16.8b, v16.8h, #6 + rshrn v17.8b, v17.8h, #6 .else - add v16.8H, v16.8H, v22.8H - add v17.8H, v17.8H, v22.8H - shrn v16.8B, v16.8H, #6 - shrn v17.8B, v17.8H, #6 + add v16.8h, v16.8h, v22.8h + add v17.8h, v17.8h, v22.8h + shrn v16.8b, v16.8h, #6 + shrn v17.8b, v17.8h, #6 .endif prfm pldl1strm, [x1, x2] .ifc \type,avg - ld1 {v20.8B}, [x8], x2 - ld1 {v21.8B}, [x8], x2 - urhadd v16.8B, v16.8B, v20.8B - urhadd v17.8B, v17.8B, v21.8B + ld1 {v20.8b}, [x8], x2 + ld1 {v21.8b}, [x8], x2 + urhadd v16.8b, v16.8b, v20.8b + urhadd v17.8b, v17.8b, v21.8b .endif subs w3, w3, #2 - st1 {v16.8B}, [x0], x2 - st1 {v17.8B}, [x0], x2 + st1 {v16.8b}, [x0], x2 + st1 {v17.8b}, [x0], x2 b.gt 3b ret -4: ld1 {v4.8B, v5.8B}, [x1], x2 - ld1 {v6.8B, v7.8B}, [x1], x2 - ext v5.8B, v4.8B, v5.8B, #1 - ext v7.8B, v6.8B, v7.8B, #1 +4: ld1 {v4.8b, v5.8b}, [x1], x2 + ld1 {v6.8b, v7.8b}, [x1], x2 + ext v5.8b, v4.8b, v5.8b, #1 + ext v7.8b, v6.8b, v7.8b, #1 prfm pldl1strm, [x1] subs w3, w3, #2 - umull v16.8H, v4.8B, v0.8B - umlal v16.8H, v5.8B, v1.8B - umull v17.8H, v6.8B, v0.8B - umlal v17.8H, v7.8B, v1.8B + umull v16.8h, v4.8b, v0.8b + umlal v16.8h, v5.8b, v1.8b + umull v17.8h, v6.8b, v0.8b + umlal v17.8h, v7.8b, v1.8b prfm pldl1strm, [x1, x2] .ifc \codec,h264 - rshrn v16.8B, v16.8H, #6 - rshrn v17.8B, v17.8H, #6 + rshrn v16.8b, v16.8h, #6 + rshrn v17.8b, v17.8h, #6 .else - add v16.8H, v16.8H, v22.8H - add v17.8H, v17.8H, v22.8H - shrn v16.8B, v16.8H, #6 - shrn v17.8B, v17.8H, #6 + add v16.8h, v16.8h, v22.8h + add v17.8h, v17.8h, v22.8h + shrn v16.8b, v16.8h, #6 + shrn v17.8b, v17.8h, #6 .endif .ifc \type,avg - ld1 {v20.8B}, [x8], x2 - ld1 {v21.8B}, [x8], x2 - urhadd v16.8B, v16.8B, v20.8B - urhadd v17.8B, v17.8B, v21.8B + ld1 {v20.8b}, [x8], x2 + ld1 {v21.8b}, [x8], x2 + urhadd v16.8b, v16.8b, v20.8b + urhadd v17.8b, v17.8b, v21.8b .endif - st1 {v16.8B}, [x0], x2 - st1 {v17.8B}, [x0], x2 + st1 {v16.8b}, [x0], x2 + st1 {v17.8b}, [x0], x2 b.gt 4b ret -5: ld1 {v4.8B}, [x1], x2 - ld1 {v5.8B}, [x1], x2 +5: ld1 {v4.8b}, [x1], x2 + ld1 {v5.8b}, [x1], x2 prfm pldl1strm, [x1] subs w3, w3, #2 - umull v16.8H, v4.8B, v0.8B - umull v17.8H, v5.8B, v0.8B + umull v16.8h, v4.8b, v0.8b + umull v17.8h, v5.8b, v0.8b prfm pldl1strm, [x1, x2] .ifc \codec,h264 - rshrn v16.8B, v16.8H, #6 - rshrn v17.8B, v17.8H, #6 + rshrn v16.8b, v16.8h, #6 + rshrn v17.8b, v17.8h, #6 .else - add v16.8H, v16.8H, v22.8H - add v17.8H, v17.8H, v22.8H - shrn v16.8B, v16.8H, #6 - shrn v17.8B, v17.8H, #6 + add v16.8h, v16.8h, v22.8h + add v17.8h, v17.8h, v22.8h + shrn v16.8b, v16.8h, #6 + shrn v17.8b, v17.8h, #6 .endif .ifc \type,avg - ld1 {v20.8B}, [x8], x2 - ld1 {v21.8B}, [x8], x2 - urhadd v16.8B, v16.8B, v20.8B - urhadd v17.8B, v17.8B, v21.8B + ld1 {v20.8b}, [x8], x2 + ld1 {v21.8b}, [x8], x2 + urhadd v16.8b, v16.8b, v20.8b + urhadd v17.8b, v17.8b, v21.8b .endif - st1 {v16.8B}, [x0], x2 - st1 {v17.8B}, [x0], x2 + st1 {v16.8b}, [x0], x2 + st1 {v17.8b}, [x0], x2 b.gt 5b ret endfunc @@ -208,11 +208,11 @@ lsl w9, w9, #3 lsl w10, w10, #1 add w9, w9, w10 - add x6, x6, w9, UXTW - ld1r {v22.8H}, [x6] + add x6, x6, w9, uxtw + ld1r {v22.8h}, [x6] .endif .ifc \codec,vc1 - movi v22.8H, #28 + movi v22.8h, #28 .endif mul w7, w4, w5 lsl w14, w5, #3 @@ -225,133 +225,133 @@ add w4, w4, #64 b.eq 2f - dup v24.8B, w4 - dup v25.8B, w12 - ld1 {v4.8B}, [x1], x2 - dup v26.8B, w6 - dup v27.8B, w7 - ext v5.8B, v4.8B, v5.8B, #1 - trn1 v0.2S, v24.2S, v25.2S - trn1 v2.2S, v26.2S, v27.2S - trn1 v4.2S, v4.2S, v5.2S -1: ld1 {v6.8B}, [x1], x2 - ext v7.8B, v6.8B, v7.8B, #1 - trn1 v6.2S, v6.2S, v7.2S - umull v18.8H, v4.8B, v0.8B - umlal v18.8H, v6.8B, v2.8B - ld1 {v4.8B}, [x1], x2 - ext v5.8B, v4.8B, v5.8B, #1 - trn1 v4.2S, v4.2S, v5.2S - prfm pldl1strm, [x1] - umull v19.8H, v6.8B, v0.8B - umlal v19.8H, v4.8B, v2.8B - trn1 v30.2D, v18.2D, v19.2D - trn2 v31.2D, v18.2D, v19.2D - add v18.8H, v30.8H, v31.8H + dup v24.8b, w4 + dup v25.8b, w12 + ld1 {v4.8b}, [x1], x2 + dup v26.8b, w6 + dup v27.8b, w7 + ext v5.8b, v4.8b, v5.8b, #1 + trn1 v0.2s, v24.2s, v25.2s + trn1 v2.2s, v26.2s, v27.2s + trn1 v4.2s, v4.2s, v5.2s +1: ld1 {v6.8b}, [x1], x2 + ext v7.8b, v6.8b, v7.8b, #1 + trn1 v6.2s, v6.2s, v7.2s + umull v18.8h, v4.8b, v0.8b + umlal v18.8h, v6.8b, v2.8b + ld1 {v4.8b}, [x1], x2 + ext v5.8b, v4.8b, v5.8b, #1 + trn1 v4.2s, v4.2s, v5.2s + prfm pldl1strm, [x1] + umull v19.8h, v6.8b, v0.8b + umlal v19.8h, v4.8b, v2.8b + trn1 v30.2d, v18.2d, v19.2d + trn2 v31.2d, v18.2d, v19.2d + add v18.8h, v30.8h, v31.8h .ifc \codec,h264 - rshrn v16.8B, v18.8H, #6 + rshrn v16.8b, v18.8h, #6 .else - add v18.8H, v18.8H, v22.8H - shrn v16.8B, v18.8H, #6 + add v18.8h, v18.8h, v22.8h + shrn v16.8b, v18.8h, #6 .endif subs w3, w3, #2 prfm pldl1strm, [x1, x2] .ifc \type,avg - ld1 {v20.S}[0], [x8], x2 - ld1 {v20.S}[1], [x8], x2 - urhadd v16.8B, v16.8B, v20.8B + ld1 {v20.s}[0], [x8], x2 + ld1 {v20.s}[1], [x8], x2 + urhadd v16.8b, v16.8b, v20.8b .endif - st1 {v16.S}[0], [x0], x2 - st1 {v16.S}[1], [x0], x2 + st1 {v16.s}[0], [x0], x2 + st1 {v16.s}[1], [x0], x2 b.gt 1b ret 2: adds w12, w12, w6 - dup v30.8B, w4 + dup v30.8b, w4 b.eq 5f tst w6, w6 - dup v31.8B, w12 - trn1 v0.2S, v30.2S, v31.2S - trn2 v1.2S, v30.2S, v31.2S + dup v31.8b, w12 + trn1 v0.2s, v30.2s, v31.2s + trn2 v1.2s, v30.2s, v31.2s b.eq 4f - ext v1.8B, v0.8B, v1.8B, #4 - ld1 {v4.S}[0], [x1], x2 -3: ld1 {v4.S}[1], [x1], x2 - umull v18.8H, v4.8B, v0.8B - ld1 {v4.S}[0], [x1], x2 - umull v19.8H, v4.8B, v1.8B - trn1 v30.2D, v18.2D, v19.2D - trn2 v31.2D, v18.2D, v19.2D - add v18.8H, v30.8H, v31.8H + ext v1.8b, v0.8b, v1.8b, #4 + ld1 {v4.s}[0], [x1], x2 +3: ld1 {v4.s}[1], [x1], x2 + umull v18.8h, v4.8b, v0.8b + ld1 {v4.s}[0], [x1], x2 + umull v19.8h, v4.8b, v1.8b + trn1 v30.2d, v18.2d, v19.2d + trn2 v31.2d, v18.2d, v19.2d + add v18.8h, v30.8h, v31.8h prfm pldl1strm, [x1] .ifc \codec,h264 - rshrn v16.8B, v18.8H, #6 + rshrn v16.8b, v18.8h, #6 .else - add v18.8H, v18.8H, v22.8H - shrn v16.8B, v18.8H, #6 + add v18.8h, v18.8h, v22.8h + shrn v16.8b, v18.8h, #6 .endif .ifc \type,avg - ld1 {v20.S}[0], [x8], x2 - ld1 {v20.S}[1], [x8], x2 - urhadd v16.8B, v16.8B, v20.8B + ld1 {v20.s}[0], [x8], x2 + ld1 {v20.s}[1], [x8], x2 + urhadd v16.8b, v16.8b, v20.8b .endif subs w3, w3, #2 prfm pldl1strm, [x1, x2] - st1 {v16.S}[0], [x0], x2 - st1 {v16.S}[1], [x0], x2 + st1 {v16.s}[0], [x0], x2 + st1 {v16.s}[1], [x0], x2 b.gt 3b ret -4: ld1 {v4.8B}, [x1], x2 - ld1 {v6.8B}, [x1], x2 - ext v5.8B, v4.8B, v5.8B, #1 - ext v7.8B, v6.8B, v7.8B, #1 - trn1 v4.2S, v4.2S, v5.2S - trn1 v6.2S, v6.2S, v7.2S - umull v18.8H, v4.8B, v0.8B - umull v19.8H, v6.8B, v0.8B +4: ld1 {v4.8b}, [x1], x2 + ld1 {v6.8b}, [x1], x2 + ext v5.8b, v4.8b, v5.8b, #1 + ext v7.8b, v6.8b, v7.8b, #1 + trn1 v4.2s, v4.2s, v5.2s + trn1 v6.2s, v6.2s, v7.2s + umull v18.8h, v4.8b, v0.8b + umull v19.8h, v6.8b, v0.8b subs w3, w3, #2 - trn1 v30.2D, v18.2D, v19.2D - trn2 v31.2D, v18.2D, v19.2D - add v18.8H, v30.8H, v31.8H + trn1 v30.2d, v18.2d, v19.2d + trn2 v31.2d, v18.2d, v19.2d + add v18.8h, v30.8h, v31.8h prfm pldl1strm, [x1] .ifc \codec,h264 - rshrn v16.8B, v18.8H, #6 + rshrn v16.8b, v18.8h, #6 .else - add v18.8H, v18.8H, v22.8H - shrn v16.8B, v18.8H, #6 + add v18.8h, v18.8h, v22.8h + shrn v16.8b, v18.8h, #6 .endif .ifc \type,avg - ld1 {v20.S}[0], [x8], x2 - ld1 {v20.S}[1], [x8], x2 - urhadd v16.8B, v16.8B, v20.8B + ld1 {v20.s}[0], [x8], x2 + ld1 {v20.s}[1], [x8], x2 + urhadd v16.8b, v16.8b, v20.8b .endif prfm pldl1strm, [x1] - st1 {v16.S}[0], [x0], x2 - st1 {v16.S}[1], [x0], x2 + st1 {v16.s}[0], [x0], x2 + st1 {v16.s}[1], [x0], x2 b.gt 4b ret -5: ld1 {v4.S}[0], [x1], x2 - ld1 {v4.S}[1], [x1], x2 - umull v18.8H, v4.8B, v30.8B +5: ld1 {v4.s}[0], [x1], x2 + ld1 {v4.s}[1], [x1], x2 + umull v18.8h, v4.8b, v30.8b subs w3, w3, #2 prfm pldl1strm, [x1] .ifc \codec,h264 - rshrn v16.8B, v18.8H, #6 + rshrn v16.8b, v18.8h, #6 .else - add v18.8H, v18.8H, v22.8H - shrn v16.8B, v18.8H, #6 + add v18.8h, v18.8h, v22.8h + shrn v16.8b, v18.8h, #6 .endif .ifc \type,avg - ld1 {v20.S}[0], [x8], x2 - ld1 {v20.S}[1], [x8], x2 - urhadd v16.8B, v16.8B, v20.8B + ld1 {v20.s}[0], [x8], x2 + ld1 {v20.s}[1], [x8], x2 + urhadd v16.8b, v16.8b, v20.8b .endif prfm pldl1strm, [x1] - st1 {v16.S}[0], [x0], x2 - st1 {v16.S}[1], [x0], x2 + st1 {v16.s}[0], [x0], x2 + st1 {v16.s}[1], [x0], x2 b.gt 5b ret endfunc @@ -372,51 +372,51 @@ sub w4, w7, w13 sub w4, w4, w14 add w4, w4, #64 - dup v0.8B, w4 - dup v2.8B, w12 - dup v1.8B, w6 - dup v3.8B, w7 - trn1 v0.4H, v0.4H, v2.4H - trn1 v1.4H, v1.4H, v3.4H + dup v0.8b, w4 + dup v2.8b, w12 + dup v1.8b, w6 + dup v3.8b, w7 + trn1 v0.4h, v0.4h, v2.4h + trn1 v1.4h, v1.4h, v3.4h 1: - ld1 {v4.S}[0], [x1], x2 - ld1 {v4.S}[1], [x1], x2 - rev64 v5.2S, v4.2S - ld1 {v5.S}[1], [x1] - ext v6.8B, v4.8B, v5.8B, #1 - ext v7.8B, v5.8B, v4.8B, #1 - trn1 v4.4H, v4.4H, v6.4H - trn1 v5.4H, v5.4H, v7.4H - umull v16.8H, v4.8B, v0.8B - umlal v16.8H, v5.8B, v1.8B + ld1 {v4.s}[0], [x1], x2 + ld1 {v4.s}[1], [x1], x2 + rev64 v5.2s, v4.2s + ld1 {v5.s}[1], [x1] + ext v6.8b, v4.8b, v5.8b, #1 + ext v7.8b, v5.8b, v4.8b, #1 + trn1 v4.4h, v4.4h, v6.4h + trn1 v5.4h, v5.4h, v7.4h + umull v16.8h, v4.8b, v0.8b + umlal v16.8h, v5.8b, v1.8b .ifc \type,avg - ld1 {v18.H}[0], [x0], x2 - ld1 {v18.H}[2], [x0] + ld1 {v18.h}[0], [x0], x2 + ld1 {v18.h}[2], [x0] sub x0, x0, x2 .endif - rev64 v17.4S, v16.4S - add v16.8H, v16.8H, v17.8H - rshrn v16.8B, v16.8H, #6 + rev64 v17.4s, v16.4s + add v16.8h, v16.8h, v17.8h + rshrn v16.8b, v16.8h, #6 .ifc \type,avg - urhadd v16.8B, v16.8B, v18.8B + urhadd v16.8b, v16.8b, v18.8b .endif - st1 {v16.H}[0], [x0], x2 - st1 {v16.H}[2], [x0], x2 + st1 {v16.h}[0], [x0], x2 + st1 {v16.h}[2], [x0], x2 subs w3, w3, #2 b.gt 1b ret 2: - ld1 {v16.H}[0], [x1], x2 - ld1 {v16.H}[1], [x1], x2 + ld1 {v16.h}[0], [x1], x2 + ld1 {v16.h}[1], [x1], x2 .ifc \type,avg - ld1 {v18.H}[0], [x0], x2 - ld1 {v18.H}[1], [x0] + ld1 {v18.h}[0], [x0], x2 + ld1 {v18.h}[1], [x0] sub x0, x0, x2 - urhadd v16.8B, v16.8B, v18.8B + urhadd v16.8b, v16.8b, v18.8b .endif - st1 {v16.H}[0], [x0], x2 - st1 {v16.H}[1], [x0], x2 + st1 {v16.h}[0], [x0], x2 + st1 {v16.h}[1], [x0], x2 subs w3, w3, #2 b.gt 2b ret diff -Nru ffmpeg-5.1.8/libavcodec/aarch64/h264dsp_neon.S ffmpeg-5.1.9/libavcodec/aarch64/h264dsp_neon.S --- ffmpeg-5.1.8/libavcodec/aarch64/h264dsp_neon.S 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/aarch64/h264dsp_neon.S 2026-03-16 18:10:00.000000000 +0000 @@ -27,7 +27,7 @@ cmp w2, #0 ldr w6, [x4] ccmp w3, #0, #0, ne - mov v24.S[0], w6 + mov v24.s[0], w6 and w8, w6, w6, lsl #16 b.eq 1f ands w8, w8, w8, lsl #8 @@ -38,95 +38,95 @@ .endm .macro h264_loop_filter_luma - dup v22.16B, w2 // alpha - uxtl v24.8H, v24.8B - uabd v21.16B, v16.16B, v0.16B // abs(p0 - q0) - uxtl v24.4S, v24.4H - uabd v28.16B, v18.16B, v16.16B // abs(p1 - p0) - sli v24.8H, v24.8H, #8 - uabd v30.16B, v2.16B, v0.16B // abs(q1 - q0) - sli v24.4S, v24.4S, #16 - cmhi v21.16B, v22.16B, v21.16B // < alpha - dup v22.16B, w3 // beta - cmlt v23.16B, v24.16B, #0 - cmhi v28.16B, v22.16B, v28.16B // < beta - cmhi v30.16B, v22.16B, v30.16B // < beta - bic v21.16B, v21.16B, v23.16B - uabd v17.16B, v20.16B, v16.16B // abs(p2 - p0) - and v21.16B, v21.16B, v28.16B - uabd v19.16B, v4.16B, v0.16B // abs(q2 - q0) - and v21.16B, v21.16B, v30.16B // < beta + dup v22.16b, w2 // alpha + uxtl v24.8h, v24.8b + uabd v21.16b, v16.16b, v0.16b // abs(p0 - q0) + uxtl v24.4s, v24.4h + uabd v28.16b, v18.16b, v16.16b // abs(p1 - p0) + sli v24.8h, v24.8h, #8 + uabd v30.16b, v2.16b, v0.16b // abs(q1 - q0) + sli v24.4s, v24.4s, #16 + cmhi v21.16b, v22.16b, v21.16b // < alpha + dup v22.16b, w3 // beta + cmlt v23.16b, v24.16b, #0 + cmhi v28.16b, v22.16b, v28.16b // < beta + cmhi v30.16b, v22.16b, v30.16b // < beta + bic v21.16b, v21.16b, v23.16b + uabd v17.16b, v20.16b, v16.16b // abs(p2 - p0) + and v21.16b, v21.16b, v28.16b + uabd v19.16b, v4.16b, v0.16b // abs(q2 - q0) + and v21.16b, v21.16b, v30.16b // < beta shrn v30.8b, v21.8h, #4 mov x7, v30.d[0] - cmhi v17.16B, v22.16B, v17.16B // < beta - cmhi v19.16B, v22.16B, v19.16B // < beta + cmhi v17.16b, v22.16b, v17.16b // < beta + cmhi v19.16b, v22.16b, v19.16b // < beta cbz x7, 9f - and v17.16B, v17.16B, v21.16B - and v19.16B, v19.16B, v21.16B - and v24.16B, v24.16B, v21.16B - urhadd v28.16B, v16.16B, v0.16B - sub v21.16B, v24.16B, v17.16B - uqadd v23.16B, v18.16B, v24.16B - uhadd v20.16B, v20.16B, v28.16B - sub v21.16B, v21.16B, v19.16B - uhadd v28.16B, v4.16B, v28.16B - umin v23.16B, v23.16B, v20.16B - uqsub v22.16B, v18.16B, v24.16B - uqadd v4.16B, v2.16B, v24.16B - umax v23.16B, v23.16B, v22.16B - uqsub v22.16B, v2.16B, v24.16B - umin v28.16B, v4.16B, v28.16B - uxtl v4.8H, v0.8B - umax v28.16B, v28.16B, v22.16B - uxtl2 v20.8H, v0.16B - usubw v4.8H, v4.8H, v16.8B - usubw2 v20.8H, v20.8H, v16.16B - shl v4.8H, v4.8H, #2 - shl v20.8H, v20.8H, #2 - uaddw v4.8H, v4.8H, v18.8B - uaddw2 v20.8H, v20.8H, v18.16B - usubw v4.8H, v4.8H, v2.8B - usubw2 v20.8H, v20.8H, v2.16B - rshrn v4.8B, v4.8H, #3 - rshrn2 v4.16B, v20.8H, #3 - bsl v17.16B, v23.16B, v18.16B - bsl v19.16B, v28.16B, v2.16B - neg v23.16B, v21.16B - uxtl v28.8H, v16.8B - smin v4.16B, v4.16B, v21.16B - uxtl2 v21.8H, v16.16B - smax v4.16B, v4.16B, v23.16B - uxtl v22.8H, v0.8B - uxtl2 v24.8H, v0.16B - saddw v28.8H, v28.8H, v4.8B - saddw2 v21.8H, v21.8H, v4.16B - ssubw v22.8H, v22.8H, v4.8B - ssubw2 v24.8H, v24.8H, v4.16B - sqxtun v16.8B, v28.8H - sqxtun2 v16.16B, v21.8H - sqxtun v0.8B, v22.8H - sqxtun2 v0.16B, v24.8H + and v17.16b, v17.16b, v21.16b + and v19.16b, v19.16b, v21.16b + and v24.16b, v24.16b, v21.16b + urhadd v28.16b, v16.16b, v0.16b + sub v21.16b, v24.16b, v17.16b + uqadd v23.16b, v18.16b, v24.16b + uhadd v20.16b, v20.16b, v28.16b + sub v21.16b, v21.16b, v19.16b + uhadd v28.16b, v4.16b, v28.16b + umin v23.16b, v23.16b, v20.16b + uqsub v22.16b, v18.16b, v24.16b + uqadd v4.16b, v2.16b, v24.16b + umax v23.16b, v23.16b, v22.16b + uqsub v22.16b, v2.16b, v24.16b + umin v28.16b, v4.16b, v28.16b + uxtl v4.8h, v0.8b + umax v28.16b, v28.16b, v22.16b + uxtl2 v20.8h, v0.16b + usubw v4.8h, v4.8h, v16.8b + usubw2 v20.8h, v20.8h, v16.16b + shl v4.8h, v4.8h, #2 + shl v20.8h, v20.8h, #2 + uaddw v4.8h, v4.8h, v18.8b + uaddw2 v20.8h, v20.8h, v18.16b + usubw v4.8h, v4.8h, v2.8b + usubw2 v20.8h, v20.8h, v2.16b + rshrn v4.8b, v4.8h, #3 + rshrn2 v4.16b, v20.8h, #3 + bsl v17.16b, v23.16b, v18.16b + bsl v19.16b, v28.16b, v2.16b + neg v23.16b, v21.16b + uxtl v28.8h, v16.8b + smin v4.16b, v4.16b, v21.16b + uxtl2 v21.8h, v16.16b + smax v4.16b, v4.16b, v23.16b + uxtl v22.8h, v0.8b + uxtl2 v24.8h, v0.16b + saddw v28.8h, v28.8h, v4.8b + saddw2 v21.8h, v21.8h, v4.16b + ssubw v22.8h, v22.8h, v4.8b + ssubw2 v24.8h, v24.8h, v4.16b + sqxtun v16.8b, v28.8h + sqxtun2 v16.16b, v21.8h + sqxtun v0.8b, v22.8h + sqxtun2 v0.16b, v24.8h .endm function ff_h264_v_loop_filter_luma_neon, export=1 h264_loop_filter_start - ld1 {v0.16B}, [x0], x1 - ld1 {v2.16B}, [x0], x1 - ld1 {v4.16B}, [x0], x1 + ld1 {v0.16b}, [x0], x1 + ld1 {v2.16b}, [x0], x1 + ld1 {v4.16b}, [x0], x1 sub x0, x0, x1, lsl #2 sub x0, x0, x1, lsl #1 - ld1 {v20.16B}, [x0], x1 - ld1 {v18.16B}, [x0], x1 - ld1 {v16.16B}, [x0], x1 + ld1 {v20.16b}, [x0], x1 + ld1 {v18.16b}, [x0], x1 + ld1 {v16.16b}, [x0], x1 h264_loop_filter_luma sub x0, x0, x1, lsl #1 - st1 {v17.16B}, [x0], x1 - st1 {v16.16B}, [x0], x1 - st1 {v0.16B}, [x0], x1 - st1 {v19.16B}, [x0] + st1 {v17.16b}, [x0], x1 + st1 {v16.16b}, [x0], x1 + st1 {v0.16b}, [x0], x1 + st1 {v19.16b}, [x0] 9: ret endfunc @@ -135,22 +135,22 @@ h264_loop_filter_start sub x0, x0, #4 - ld1 {v6.8B}, [x0], x1 - ld1 {v20.8B}, [x0], x1 - ld1 {v18.8B}, [x0], x1 - ld1 {v16.8B}, [x0], x1 - ld1 {v0.8B}, [x0], x1 - ld1 {v2.8B}, [x0], x1 - ld1 {v4.8B}, [x0], x1 - ld1 {v26.8B}, [x0], x1 - ld1 {v6.D}[1], [x0], x1 - ld1 {v20.D}[1], [x0], x1 - ld1 {v18.D}[1], [x0], x1 - ld1 {v16.D}[1], [x0], x1 - ld1 {v0.D}[1], [x0], x1 - ld1 {v2.D}[1], [x0], x1 - ld1 {v4.D}[1], [x0], x1 - ld1 {v26.D}[1], [x0], x1 + ld1 {v6.8b}, [x0], x1 + ld1 {v20.8b}, [x0], x1 + ld1 {v18.8b}, [x0], x1 + ld1 {v16.8b}, [x0], x1 + ld1 {v0.8b}, [x0], x1 + ld1 {v2.8b}, [x0], x1 + ld1 {v4.8b}, [x0], x1 + ld1 {v26.8b}, [x0], x1 + ld1 {v6.d}[1], [x0], x1 + ld1 {v20.d}[1], [x0], x1 + ld1 {v18.d}[1], [x0], x1 + ld1 {v16.d}[1], [x0], x1 + ld1 {v0.d}[1], [x0], x1 + ld1 {v2.d}[1], [x0], x1 + ld1 {v4.d}[1], [x0], x1 + ld1 {v26.d}[1], [x0], x1 transpose_8x16B v6, v20, v18, v16, v0, v2, v4, v26, v21, v23 @@ -160,22 +160,22 @@ sub x0, x0, x1, lsl #4 add x0, x0, #2 - st1 {v17.S}[0], [x0], x1 - st1 {v16.S}[0], [x0], x1 - st1 {v0.S}[0], [x0], x1 - st1 {v19.S}[0], [x0], x1 - st1 {v17.S}[1], [x0], x1 - st1 {v16.S}[1], [x0], x1 - st1 {v0.S}[1], [x0], x1 - st1 {v19.S}[1], [x0], x1 - st1 {v17.S}[2], [x0], x1 - st1 {v16.S}[2], [x0], x1 - st1 {v0.S}[2], [x0], x1 - st1 {v19.S}[2], [x0], x1 - st1 {v17.S}[3], [x0], x1 - st1 {v16.S}[3], [x0], x1 - st1 {v0.S}[3], [x0], x1 - st1 {v19.S}[3], [x0], x1 + st1 {v17.s}[0], [x0], x1 + st1 {v16.s}[0], [x0], x1 + st1 {v0.s}[0], [x0], x1 + st1 {v19.s}[0], [x0], x1 + st1 {v17.s}[1], [x0], x1 + st1 {v16.s}[1], [x0], x1 + st1 {v0.s}[1], [x0], x1 + st1 {v19.s}[1], [x0], x1 + st1 {v17.s}[2], [x0], x1 + st1 {v16.s}[2], [x0], x1 + st1 {v0.s}[2], [x0], x1 + st1 {v19.s}[2], [x0], x1 + st1 {v17.s}[3], [x0], x1 + st1 {v16.s}[3], [x0], x1 + st1 {v0.s}[3], [x0], x1 + st1 {v19.s}[3], [x0], x1 9: ret endfunc @@ -377,52 +377,52 @@ endfunc .macro h264_loop_filter_chroma - dup v22.8B, w2 // alpha - dup v23.8B, w3 // beta - uxtl v24.8H, v24.8B - uabd v26.8B, v16.8B, v0.8B // abs(p0 - q0) - uabd v28.8B, v18.8B, v16.8B // abs(p1 - p0) - uabd v30.8B, v2.8B, v0.8B // abs(q1 - q0) - cmhi v26.8B, v22.8B, v26.8B // < alpha - cmhi v28.8B, v23.8B, v28.8B // < beta - cmhi v30.8B, v23.8B, v30.8B // < beta - uxtl v4.8H, v0.8B - and v26.8B, v26.8B, v28.8B - usubw v4.8H, v4.8H, v16.8B - and v26.8B, v26.8B, v30.8B - shl v4.8H, v4.8H, #2 + dup v22.8b, w2 // alpha + dup v23.8b, w3 // beta + uxtl v24.8h, v24.8b + uabd v26.8b, v16.8b, v0.8b // abs(p0 - q0) + uabd v28.8b, v18.8b, v16.8b // abs(p1 - p0) + uabd v30.8b, v2.8b, v0.8b // abs(q1 - q0) + cmhi v26.8b, v22.8b, v26.8b // < alpha + cmhi v28.8b, v23.8b, v28.8b // < beta + cmhi v30.8b, v23.8b, v30.8b // < beta + uxtl v4.8h, v0.8b + and v26.8b, v26.8b, v28.8b + usubw v4.8h, v4.8h, v16.8b + and v26.8b, v26.8b, v30.8b + shl v4.8h, v4.8h, #2 mov x8, v26.d[0] - sli v24.8H, v24.8H, #8 - uaddw v4.8H, v4.8H, v18.8B + sli v24.8h, v24.8h, #8 + uaddw v4.8h, v4.8h, v18.8b cbz x8, 9f - usubw v4.8H, v4.8H, v2.8B - rshrn v4.8B, v4.8H, #3 - smin v4.8B, v4.8B, v24.8B - neg v25.8B, v24.8B - smax v4.8B, v4.8B, v25.8B - uxtl v22.8H, v0.8B - and v4.8B, v4.8B, v26.8B - uxtl v28.8H, v16.8B - saddw v28.8H, v28.8H, v4.8B - ssubw v22.8H, v22.8H, v4.8B - sqxtun v16.8B, v28.8H - sqxtun v0.8B, v22.8H + usubw v4.8h, v4.8h, v2.8b + rshrn v4.8b, v4.8h, #3 + smin v4.8b, v4.8b, v24.8b + neg v25.8b, v24.8b + smax v4.8b, v4.8b, v25.8b + uxtl v22.8h, v0.8b + and v4.8b, v4.8b, v26.8b + uxtl v28.8h, v16.8b + saddw v28.8h, v28.8h, v4.8b + ssubw v22.8h, v22.8h, v4.8b + sqxtun v16.8b, v28.8h + sqxtun v0.8b, v22.8h .endm function ff_h264_v_loop_filter_chroma_neon, export=1 h264_loop_filter_start sub x0, x0, x1, lsl #1 - ld1 {v18.8B}, [x0], x1 - ld1 {v16.8B}, [x0], x1 - ld1 {v0.8B}, [x0], x1 - ld1 {v2.8B}, [x0] + ld1 {v18.8b}, [x0], x1 + ld1 {v16.8b}, [x0], x1 + ld1 {v0.8b}, [x0], x1 + ld1 {v2.8b}, [x0] h264_loop_filter_chroma sub x0, x0, x1, lsl #1 - st1 {v16.8B}, [x0], x1 - st1 {v0.8B}, [x0], x1 + st1 {v16.8b}, [x0], x1 + st1 {v0.8b}, [x0], x1 9: ret endfunc @@ -432,14 +432,14 @@ sub x0, x0, #2 h_loop_filter_chroma420: - ld1 {v18.S}[0], [x0], x1 - ld1 {v16.S}[0], [x0], x1 - ld1 {v0.S}[0], [x0], x1 - ld1 {v2.S}[0], [x0], x1 - ld1 {v18.S}[1], [x0], x1 - ld1 {v16.S}[1], [x0], x1 - ld1 {v0.S}[1], [x0], x1 - ld1 {v2.S}[1], [x0], x1 + ld1 {v18.s}[0], [x0], x1 + ld1 {v16.s}[0], [x0], x1 + ld1 {v0.s}[0], [x0], x1 + ld1 {v2.s}[0], [x0], x1 + ld1 {v18.s}[1], [x0], x1 + ld1 {v16.s}[1], [x0], x1 + ld1 {v0.s}[1], [x0], x1 + ld1 {v2.s}[1], [x0], x1 transpose_4x8B v18, v16, v0, v2, v28, v29, v30, v31 @@ -448,14 +448,14 @@ transpose_4x8B v18, v16, v0, v2, v28, v29, v30, v31 sub x0, x0, x1, lsl #3 - st1 {v18.S}[0], [x0], x1 - st1 {v16.S}[0], [x0], x1 - st1 {v0.S}[0], [x0], x1 - st1 {v2.S}[0], [x0], x1 - st1 {v18.S}[1], [x0], x1 - st1 {v16.S}[1], [x0], x1 - st1 {v0.S}[1], [x0], x1 - st1 {v2.S}[1], [x0], x1 + st1 {v18.s}[0], [x0], x1 + st1 {v16.s}[0], [x0], x1 + st1 {v0.s}[0], [x0], x1 + st1 {v2.s}[0], [x0], x1 + st1 {v18.s}[1], [x0], x1 + st1 {v16.s}[1], [x0], x1 + st1 {v0.s}[1], [x0], x1 + st1 {v2.s}[1], [x0], x1 9: ret endfunc @@ -526,7 +526,7 @@ ld1 {v17.8b}, [x4], x1 ld1 {v19.8b}, [x4], x1 - transpose_4x8B v18, v16, v17, v19, v26, v27, v28, v29 + transpose_4x8B v18, v16, v17, v19, v26, v27, v28, v29 h264_loop_filter_chroma_intra @@ -554,7 +554,7 @@ ld1 {v17.s}[1], [x4], x1 ld1 {v19.s}[1], [x4], x1 - transpose_4x8B v18, v16, v17, v19, v26, v27, v28, v29 + transpose_4x8B v18, v16, v17, v19, v26, v27, v28, v29 h264_loop_filter_chroma_intra @@ -584,102 +584,102 @@ endfunc .macro biweight_16 macs, macd - dup v0.16B, w5 - dup v1.16B, w6 - mov v4.16B, v16.16B - mov v6.16B, v16.16B + dup v0.16b, w5 + dup v1.16b, w6 + mov v4.16b, v16.16b + mov v6.16b, v16.16b 1: subs w3, w3, #2 - ld1 {v20.16B}, [x0], x2 - \macd v4.8H, v0.8B, v20.8B + ld1 {v20.16b}, [x0], x2 + \macd v4.8h, v0.8b, v20.8b \macd\()2 v6.8H, v0.16B, v20.16B - ld1 {v22.16B}, [x1], x2 - \macs v4.8H, v1.8B, v22.8B + ld1 {v22.16b}, [x1], x2 + \macs v4.8h, v1.8b, v22.8b \macs\()2 v6.8H, v1.16B, v22.16B - mov v24.16B, v16.16B - ld1 {v28.16B}, [x0], x2 - mov v26.16B, v16.16B - \macd v24.8H, v0.8B, v28.8B + mov v24.16b, v16.16b + ld1 {v28.16b}, [x0], x2 + mov v26.16b, v16.16b + \macd v24.8h, v0.8b, v28.8b \macd\()2 v26.8H, v0.16B, v28.16B - ld1 {v30.16B}, [x1], x2 - \macs v24.8H, v1.8B, v30.8B + ld1 {v30.16b}, [x1], x2 + \macs v24.8h, v1.8b, v30.8b \macs\()2 v26.8H, v1.16B, v30.16B - sshl v4.8H, v4.8H, v18.8H - sshl v6.8H, v6.8H, v18.8H - sqxtun v4.8B, v4.8H - sqxtun2 v4.16B, v6.8H - sshl v24.8H, v24.8H, v18.8H - sshl v26.8H, v26.8H, v18.8H - sqxtun v24.8B, v24.8H - sqxtun2 v24.16B, v26.8H - mov v6.16B, v16.16B - st1 {v4.16B}, [x7], x2 - mov v4.16B, v16.16B - st1 {v24.16B}, [x7], x2 + sshl v4.8h, v4.8h, v18.8h + sshl v6.8h, v6.8h, v18.8h + sqxtun v4.8b, v4.8h + sqxtun2 v4.16b, v6.8h + sshl v24.8h, v24.8h, v18.8h + sshl v26.8h, v26.8h, v18.8h + sqxtun v24.8b, v24.8h + sqxtun2 v24.16b, v26.8h + mov v6.16b, v16.16b + st1 {v4.16b}, [x7], x2 + mov v4.16b, v16.16b + st1 {v24.16b}, [x7], x2 b.ne 1b ret .endm .macro biweight_8 macs, macd - dup v0.8B, w5 - dup v1.8B, w6 - mov v2.16B, v16.16B - mov v20.16B, v16.16B + dup v0.8b, w5 + dup v1.8b, w6 + mov v2.16b, v16.16b + mov v20.16b, v16.16b 1: subs w3, w3, #2 - ld1 {v4.8B}, [x0], x2 - \macd v2.8H, v0.8B, v4.8B - ld1 {v5.8B}, [x1], x2 - \macs v2.8H, v1.8B, v5.8B - ld1 {v6.8B}, [x0], x2 - \macd v20.8H, v0.8B, v6.8B - ld1 {v7.8B}, [x1], x2 - \macs v20.8H, v1.8B, v7.8B - sshl v2.8H, v2.8H, v18.8H - sqxtun v2.8B, v2.8H - sshl v20.8H, v20.8H, v18.8H - sqxtun v4.8B, v20.8H - mov v20.16B, v16.16B - st1 {v2.8B}, [x7], x2 - mov v2.16B, v16.16B - st1 {v4.8B}, [x7], x2 + ld1 {v4.8b}, [x0], x2 + \macd v2.8h, v0.8b, v4.8b + ld1 {v5.8b}, [x1], x2 + \macs v2.8h, v1.8b, v5.8b + ld1 {v6.8b}, [x0], x2 + \macd v20.8h, v0.8b, v6.8b + ld1 {v7.8b}, [x1], x2 + \macs v20.8h, v1.8b, v7.8b + sshl v2.8h, v2.8h, v18.8h + sqxtun v2.8b, v2.8h + sshl v20.8h, v20.8h, v18.8h + sqxtun v4.8b, v20.8h + mov v20.16b, v16.16b + st1 {v2.8b}, [x7], x2 + mov v2.16b, v16.16b + st1 {v4.8b}, [x7], x2 b.ne 1b ret .endm .macro biweight_4 macs, macd - dup v0.8B, w5 - dup v1.8B, w6 - mov v2.16B, v16.16B - mov v20.16B,v16.16B + dup v0.8b, w5 + dup v1.8b, w6 + mov v2.16b, v16.16b + mov v20.16b,v16.16b 1: subs w3, w3, #4 - ld1 {v4.S}[0], [x0], x2 - ld1 {v4.S}[1], [x0], x2 - \macd v2.8H, v0.8B, v4.8B - ld1 {v5.S}[0], [x1], x2 - ld1 {v5.S}[1], [x1], x2 - \macs v2.8H, v1.8B, v5.8B + ld1 {v4.s}[0], [x0], x2 + ld1 {v4.s}[1], [x0], x2 + \macd v2.8h, v0.8b, v4.8b + ld1 {v5.s}[0], [x1], x2 + ld1 {v5.s}[1], [x1], x2 + \macs v2.8h, v1.8b, v5.8b b.lt 2f - ld1 {v6.S}[0], [x0], x2 - ld1 {v6.S}[1], [x0], x2 - \macd v20.8H, v0.8B, v6.8B - ld1 {v7.S}[0], [x1], x2 - ld1 {v7.S}[1], [x1], x2 - \macs v20.8H, v1.8B, v7.8B - sshl v2.8H, v2.8H, v18.8H - sqxtun v2.8B, v2.8H - sshl v20.8H, v20.8H, v18.8H - sqxtun v4.8B, v20.8H - mov v20.16B, v16.16B - st1 {v2.S}[0], [x7], x2 - st1 {v2.S}[1], [x7], x2 - mov v2.16B, v16.16B - st1 {v4.S}[0], [x7], x2 - st1 {v4.S}[1], [x7], x2 + ld1 {v6.s}[0], [x0], x2 + ld1 {v6.s}[1], [x0], x2 + \macd v20.8h, v0.8b, v6.8b + ld1 {v7.s}[0], [x1], x2 + ld1 {v7.s}[1], [x1], x2 + \macs v20.8h, v1.8b, v7.8b + sshl v2.8h, v2.8h, v18.8h + sqxtun v2.8b, v2.8h + sshl v20.8h, v20.8h, v18.8h + sqxtun v4.8b, v20.8h + mov v20.16b, v16.16b + st1 {v2.s}[0], [x7], x2 + st1 {v2.s}[1], [x7], x2 + mov v2.16b, v16.16b + st1 {v4.s}[0], [x7], x2 + st1 {v4.s}[1], [x7], x2 b.ne 1b ret -2: sshl v2.8H, v2.8H, v18.8H - sqxtun v2.8B, v2.8H - st1 {v2.S}[0], [x7], x2 - st1 {v2.S}[1], [x7], x2 +2: sshl v2.8h, v2.8h, v18.8h + sqxtun v2.8b, v2.8h + st1 {v2.s}[0], [x7], x2 + st1 {v2.s}[1], [x7], x2 ret .endm @@ -689,10 +689,10 @@ add w7, w7, #1 eor w8, w8, w6, lsr #30 orr w7, w7, #1 - dup v18.8H, w4 + dup v18.8h, w4 lsl w7, w7, w4 - not v18.16B, v18.16B - dup v16.8H, w7 + not v18.16b, v18.16b + dup v16.8h, w7 mov x7, x0 cbz w8, 10f subs w8, w8, #1 @@ -716,78 +716,78 @@ biweight_func 4 .macro weight_16 add - dup v0.16B, w4 + dup v0.16b, w4 1: subs w2, w2, #2 - ld1 {v20.16B}, [x0], x1 - umull v4.8H, v0.8B, v20.8B - umull2 v6.8H, v0.16B, v20.16B - ld1 {v28.16B}, [x0], x1 - umull v24.8H, v0.8B, v28.8B - umull2 v26.8H, v0.16B, v28.16B - \add v4.8H, v16.8H, v4.8H - srshl v4.8H, v4.8H, v18.8H - \add v6.8H, v16.8H, v6.8H - srshl v6.8H, v6.8H, v18.8H - sqxtun v4.8B, v4.8H - sqxtun2 v4.16B, v6.8H - \add v24.8H, v16.8H, v24.8H - srshl v24.8H, v24.8H, v18.8H - \add v26.8H, v16.8H, v26.8H - srshl v26.8H, v26.8H, v18.8H - sqxtun v24.8B, v24.8H - sqxtun2 v24.16B, v26.8H - st1 {v4.16B}, [x5], x1 - st1 {v24.16B}, [x5], x1 + ld1 {v20.16b}, [x0], x1 + umull v4.8h, v0.8b, v20.8b + umull2 v6.8h, v0.16b, v20.16b + ld1 {v28.16b}, [x0], x1 + umull v24.8h, v0.8b, v28.8b + umull2 v26.8h, v0.16b, v28.16b + \add v4.8h, v16.8h, v4.8h + srshl v4.8h, v4.8h, v18.8h + \add v6.8h, v16.8h, v6.8h + srshl v6.8h, v6.8h, v18.8h + sqxtun v4.8b, v4.8h + sqxtun2 v4.16b, v6.8h + \add v24.8h, v16.8h, v24.8h + srshl v24.8h, v24.8h, v18.8h + \add v26.8h, v16.8h, v26.8h + srshl v26.8h, v26.8h, v18.8h + sqxtun v24.8b, v24.8h + sqxtun2 v24.16b, v26.8h + st1 {v4.16b}, [x5], x1 + st1 {v24.16b}, [x5], x1 b.ne 1b ret .endm .macro weight_8 add - dup v0.8B, w4 + dup v0.8b, w4 1: subs w2, w2, #2 - ld1 {v4.8B}, [x0], x1 - umull v2.8H, v0.8B, v4.8B - ld1 {v6.8B}, [x0], x1 - umull v20.8H, v0.8B, v6.8B - \add v2.8H, v16.8H, v2.8H - srshl v2.8H, v2.8H, v18.8H - sqxtun v2.8B, v2.8H - \add v20.8H, v16.8H, v20.8H - srshl v20.8H, v20.8H, v18.8H - sqxtun v4.8B, v20.8H - st1 {v2.8B}, [x5], x1 - st1 {v4.8B}, [x5], x1 + ld1 {v4.8b}, [x0], x1 + umull v2.8h, v0.8b, v4.8b + ld1 {v6.8b}, [x0], x1 + umull v20.8h, v0.8b, v6.8b + \add v2.8h, v16.8h, v2.8h + srshl v2.8h, v2.8h, v18.8h + sqxtun v2.8b, v2.8h + \add v20.8h, v16.8h, v20.8h + srshl v20.8h, v20.8h, v18.8h + sqxtun v4.8b, v20.8h + st1 {v2.8b}, [x5], x1 + st1 {v4.8b}, [x5], x1 b.ne 1b ret .endm .macro weight_4 add - dup v0.8B, w4 + dup v0.8b, w4 1: subs w2, w2, #4 - ld1 {v4.S}[0], [x0], x1 - ld1 {v4.S}[1], [x0], x1 - umull v2.8H, v0.8B, v4.8B + ld1 {v4.s}[0], [x0], x1 + ld1 {v4.s}[1], [x0], x1 + umull v2.8h, v0.8b, v4.8b b.lt 2f - ld1 {v6.S}[0], [x0], x1 - ld1 {v6.S}[1], [x0], x1 - umull v20.8H, v0.8B, v6.8B - \add v2.8H, v16.8H, v2.8H - srshl v2.8H, v2.8H, v18.8H - sqxtun v2.8B, v2.8H - \add v20.8H, v16.8H, v20.8H - srshl v20.8H, v20.8h, v18.8H - sqxtun v4.8B, v20.8H - st1 {v2.S}[0], [x5], x1 - st1 {v2.S}[1], [x5], x1 - st1 {v4.S}[0], [x5], x1 - st1 {v4.S}[1], [x5], x1 + ld1 {v6.s}[0], [x0], x1 + ld1 {v6.s}[1], [x0], x1 + umull v20.8h, v0.8b, v6.8b + \add v2.8h, v16.8h, v2.8h + srshl v2.8h, v2.8h, v18.8h + sqxtun v2.8b, v2.8h + \add v20.8h, v16.8h, v20.8h + srshl v20.8h, v20.8h, v18.8h + sqxtun v4.8b, v20.8h + st1 {v2.s}[0], [x5], x1 + st1 {v2.s}[1], [x5], x1 + st1 {v4.s}[0], [x5], x1 + st1 {v4.s}[1], [x5], x1 b.ne 1b ret -2: \add v2.8H, v16.8H, v2.8H - srshl v2.8H, v2.8H, v18.8H - sqxtun v2.8B, v2.8H - st1 {v2.S}[0], [x5], x1 - st1 {v2.S}[1], [x5], x1 +2: \add v2.8h, v16.8h, v2.8h + srshl v2.8h, v2.8h, v18.8h + sqxtun v2.8b, v2.8h + st1 {v2.s}[0], [x5], x1 + st1 {v2.s}[1], [x5], x1 ret .endm @@ -796,18 +796,18 @@ cmp w3, #1 mov w6, #1 lsl w5, w5, w3 - dup v16.8H, w5 + dup v16.8h, w5 mov x5, x0 b.le 20f sub w6, w6, w3 - dup v18.8H, w6 + dup v18.8h, w6 cmp w4, #0 b.lt 10f weight_\w shadd 10: neg w4, w4 weight_\w shsub 20: neg w6, w3 - dup v18.8H, w6 + dup v18.8h, w6 cmp w4, #0 b.lt 10f weight_\w add @@ -825,7 +825,7 @@ ldr w6, [x4] ccmp w3, #0, #0, ne lsl w2, w2, #2 - mov v24.S[0], w6 + mov v24.s[0], w6 lsl w3, w3, #2 and w8, w6, w6, lsl #16 b.eq 1f @@ -1017,7 +1017,7 @@ ld1 {v16.8h}, [x4], x1 ld1 {v19.8h}, [x9], x1 - transpose_4x8H v18, v16, v17, v19, v26, v27, v28, v29 + transpose_4x8H v18, v16, v17, v19, v26, v27, v28, v29 h264_loop_filter_chroma_intra_10 @@ -1045,7 +1045,7 @@ ld1 {v19.4h}, [x4], x1 ld1 {v19.d}[1], [x9], x1 - transpose_4x8H v18, v16, v17, v19, v26, v27, v28, v29 + transpose_4x8H v18, v16, v17, v19, v26, v27, v28, v29 h264_loop_filter_chroma_intra_10 diff -Nru ffmpeg-5.1.8/libavcodec/aarch64/h264qpel_neon.S ffmpeg-5.1.9/libavcodec/aarch64/h264qpel_neon.S --- ffmpeg-5.1.8/libavcodec/aarch64/h264qpel_neon.S 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/aarch64/h264qpel_neon.S 2026-05-05 14:21:58.000000000 +0000 @@ -27,127 +27,127 @@ .macro lowpass_const r movz \r, #20, lsl #16 movk \r, #5 - mov v6.S[0], \r + mov v6.s[0], \r .endm //trashes v0-v5 .macro lowpass_8 r0, r1, r2, r3, d0, d1, narrow=1 - ext v2.8B, \r0\().8B, \r1\().8B, #2 - ext v3.8B, \r0\().8B, \r1\().8B, #3 - uaddl v2.8H, v2.8B, v3.8B - ext v4.8B, \r0\().8B, \r1\().8B, #1 - ext v5.8B, \r0\().8B, \r1\().8B, #4 - uaddl v4.8H, v4.8B, v5.8B - ext v1.8B, \r0\().8B, \r1\().8B, #5 - uaddl \d0\().8H, \r0\().8B, v1.8B - ext v0.8B, \r2\().8B, \r3\().8B, #2 - mla \d0\().8H, v2.8H, v6.H[1] - ext v1.8B, \r2\().8B, \r3\().8B, #3 - uaddl v0.8H, v0.8B, v1.8B - ext v1.8B, \r2\().8B, \r3\().8B, #1 - mls \d0\().8H, v4.8H, v6.H[0] - ext v3.8B, \r2\().8B, \r3\().8B, #4 - uaddl v1.8H, v1.8B, v3.8B - ext v2.8B, \r2\().8B, \r3\().8B, #5 - uaddl \d1\().8H, \r2\().8B, v2.8B - mla \d1\().8H, v0.8H, v6.H[1] - mls \d1\().8H, v1.8H, v6.H[0] + ext v2.8b, \r0\().8b, \r1\().8b, #2 + ext v3.8b, \r0\().8b, \r1\().8b, #3 + uaddl v2.8h, v2.8b, v3.8b + ext v4.8b, \r0\().8b, \r1\().8b, #1 + ext v5.8b, \r0\().8b, \r1\().8b, #4 + uaddl v4.8h, v4.8b, v5.8b + ext v1.8b, \r0\().8b, \r1\().8b, #5 + uaddl \d0\().8h, \r0\().8b, v1.8b + ext v0.8b, \r2\().8b, \r3\().8b, #2 + mla \d0\().8h, v2.8h, v6.h[1] + ext v1.8b, \r2\().8b, \r3\().8b, #3 + uaddl v0.8h, v0.8b, v1.8b + ext v1.8b, \r2\().8b, \r3\().8b, #1 + mls \d0\().8h, v4.8h, v6.h[0] + ext v3.8b, \r2\().8b, \r3\().8b, #4 + uaddl v1.8h, v1.8b, v3.8b + ext v2.8b, \r2\().8b, \r3\().8b, #5 + uaddl \d1\().8h, \r2\().8b, v2.8b + mla \d1\().8h, v0.8h, v6.h[1] + mls \d1\().8h, v1.8h, v6.h[0] .if \narrow - sqrshrun \d0\().8B, \d0\().8H, #5 - sqrshrun \d1\().8B, \d1\().8H, #5 + sqrshrun \d0\().8b, \d0\().8h, #5 + sqrshrun \d1\().8b, \d1\().8h, #5 .endif .endm //trashes v0-v4 .macro lowpass_8_v r0, r1, r2, r3, r4, r5, r6, d0, d1, narrow=1 - uaddl v2.8H, \r2\().8B, \r3\().8B - uaddl v0.8H, \r3\().8B, \r4\().8B - uaddl v4.8H, \r1\().8B, \r4\().8B - uaddl v1.8H, \r2\().8B, \r5\().8B - uaddl \d0\().8H, \r0\().8B, \r5\().8B - uaddl \d1\().8H, \r1\().8B, \r6\().8B - mla \d0\().8H, v2.8H, v6.H[1] - mls \d0\().8H, v4.8H, v6.H[0] - mla \d1\().8H, v0.8H, v6.H[1] - mls \d1\().8H, v1.8H, v6.H[0] + uaddl v2.8h, \r2\().8b, \r3\().8b + uaddl v0.8h, \r3\().8b, \r4\().8b + uaddl v4.8h, \r1\().8b, \r4\().8b + uaddl v1.8h, \r2\().8b, \r5\().8b + uaddl \d0\().8h, \r0\().8b, \r5\().8b + uaddl \d1\().8h, \r1\().8b, \r6\().8b + mla \d0\().8h, v2.8h, v6.h[1] + mls \d0\().8h, v4.8h, v6.h[0] + mla \d1\().8h, v0.8h, v6.h[1] + mls \d1\().8h, v1.8h, v6.h[0] .if \narrow - sqrshrun \d0\().8B, \d0\().8H, #5 - sqrshrun \d1\().8B, \d1\().8H, #5 + sqrshrun \d0\().8b, \d0\().8h, #5 + sqrshrun \d1\().8b, \d1\().8h, #5 .endif .endm //trashes v0-v5, v7, v30-v31 .macro lowpass_8H r0, r1 - ext v0.16B, \r0\().16B, \r0\().16B, #2 - ext v1.16B, \r0\().16B, \r0\().16B, #3 - uaddl v0.8H, v0.8B, v1.8B - ext v2.16B, \r0\().16B, \r0\().16B, #1 - ext v3.16B, \r0\().16B, \r0\().16B, #4 - uaddl v2.8H, v2.8B, v3.8B - ext v30.16B, \r0\().16B, \r0\().16B, #5 - uaddl \r0\().8H, \r0\().8B, v30.8B - ext v4.16B, \r1\().16B, \r1\().16B, #2 - mla \r0\().8H, v0.8H, v6.H[1] - ext v5.16B, \r1\().16B, \r1\().16B, #3 - uaddl v4.8H, v4.8B, v5.8B - ext v7.16B, \r1\().16B, \r1\().16B, #1 - mls \r0\().8H, v2.8H, v6.H[0] - ext v0.16B, \r1\().16B, \r1\().16B, #4 - uaddl v7.8H, v7.8B, v0.8B - ext v31.16B, \r1\().16B, \r1\().16B, #5 - uaddl \r1\().8H, \r1\().8B, v31.8B - mla \r1\().8H, v4.8H, v6.H[1] - mls \r1\().8H, v7.8H, v6.H[0] + ext v0.16b, \r0\().16b, \r0\().16b, #2 + ext v1.16b, \r0\().16b, \r0\().16b, #3 + uaddl v0.8h, v0.8b, v1.8b + ext v2.16b, \r0\().16b, \r0\().16b, #1 + ext v3.16b, \r0\().16b, \r0\().16b, #4 + uaddl v2.8h, v2.8b, v3.8b + ext v30.16b, \r0\().16b, \r0\().16b, #5 + uaddl \r0\().8h, \r0\().8b, v30.8b + ext v4.16b, \r1\().16b, \r1\().16b, #2 + mla \r0\().8h, v0.8h, v6.h[1] + ext v5.16b, \r1\().16b, \r1\().16b, #3 + uaddl v4.8h, v4.8b, v5.8b + ext v7.16b, \r1\().16b, \r1\().16b, #1 + mls \r0\().8h, v2.8h, v6.h[0] + ext v0.16b, \r1\().16b, \r1\().16b, #4 + uaddl v7.8h, v7.8b, v0.8b + ext v31.16b, \r1\().16b, \r1\().16b, #5 + uaddl \r1\().8h, \r1\().8b, v31.8b + mla \r1\().8h, v4.8h, v6.h[1] + mls \r1\().8h, v7.8h, v6.h[0] .endm // trashes v2-v5, v30 .macro lowpass_8_1 r0, r1, d0, narrow=1 - ext v2.8B, \r0\().8B, \r1\().8B, #2 - ext v3.8B, \r0\().8B, \r1\().8B, #3 - uaddl v2.8H, v2.8B, v3.8B - ext v4.8B, \r0\().8B, \r1\().8B, #1 - ext v5.8B, \r0\().8B, \r1\().8B, #4 - uaddl v4.8H, v4.8B, v5.8B - ext v30.8B, \r0\().8B, \r1\().8B, #5 - uaddl \d0\().8H, \r0\().8B, v30.8B - mla \d0\().8H, v2.8H, v6.H[1] - mls \d0\().8H, v4.8H, v6.H[0] + ext v2.8b, \r0\().8b, \r1\().8b, #2 + ext v3.8b, \r0\().8b, \r1\().8b, #3 + uaddl v2.8h, v2.8b, v3.8b + ext v4.8b, \r0\().8b, \r1\().8b, #1 + ext v5.8b, \r0\().8b, \r1\().8b, #4 + uaddl v4.8h, v4.8b, v5.8b + ext v30.8b, \r0\().8b, \r1\().8b, #5 + uaddl \d0\().8h, \r0\().8b, v30.8b + mla \d0\().8h, v2.8h, v6.h[1] + mls \d0\().8h, v4.8h, v6.h[0] .if \narrow - sqrshrun \d0\().8B, \d0\().8H, #5 + sqrshrun \d0\().8b, \d0\().8h, #5 .endif .endm // trashed v0-v7 .macro lowpass_8.16 r0, r1, r2, r3, r4, r5 - saddl v5.4S, \r2\().4H, \r3\().4H - saddl2 v1.4S, \r2\().8H, \r3\().8H - saddl v6.4S, \r1\().4H, \r4\().4H - saddl2 v2.4S, \r1\().8H, \r4\().8H - saddl v0.4S, \r0\().4H, \r5\().4H - saddl2 v4.4S, \r0\().8H, \r5\().8H - - shl v3.4S, v5.4S, #4 - shl v5.4S, v5.4S, #2 - shl v7.4S, v6.4S, #2 - add v5.4S, v5.4S, v3.4S - add v6.4S, v6.4S, v7.4S - - shl v3.4S, v1.4S, #4 - shl v1.4S, v1.4S, #2 - shl v7.4S, v2.4S, #2 - add v1.4S, v1.4S, v3.4S - add v2.4S, v2.4S, v7.4S - - add v5.4S, v5.4S, v0.4S - sub v5.4S, v5.4S, v6.4S + saddl v5.4s, \r2\().4h, \r3\().4h + saddl2 v1.4s, \r2\().8h, \r3\().8h + saddl v6.4s, \r1\().4h, \r4\().4h + saddl2 v2.4s, \r1\().8h, \r4\().8h + saddl v0.4s, \r0\().4h, \r5\().4h + saddl2 v4.4s, \r0\().8h, \r5\().8h + + shl v3.4s, v5.4s, #4 + shl v5.4s, v5.4s, #2 + shl v7.4s, v6.4s, #2 + add v5.4s, v5.4s, v3.4s + add v6.4s, v6.4s, v7.4s + + shl v3.4s, v1.4s, #4 + shl v1.4s, v1.4s, #2 + shl v7.4s, v2.4s, #2 + add v1.4s, v1.4s, v3.4s + add v2.4s, v2.4s, v7.4s + + add v5.4s, v5.4s, v0.4s + sub v5.4s, v5.4s, v6.4s - add v1.4S, v1.4S, v4.4S - sub v1.4S, v1.4S, v2.4S + add v1.4s, v1.4s, v4.4s + sub v1.4s, v1.4s, v2.4s - rshrn v5.4H, v5.4S, #10 - rshrn2 v5.8H, v1.4S, #10 + rshrn v5.4h, v5.4s, #10 + rshrn2 v5.8h, v1.4s, #10 - sqxtun \r0\().8B, v5.8H + sqxtun \r0\().8b, v5.8h .endm function put_h264_qpel16_h_lowpass_neon_packed @@ -176,19 +176,19 @@ endfunc function \type\()_h264_qpel8_h_lowpass_neon -1: ld1 {v28.8B, v29.8B}, [x1], x2 - ld1 {v16.8B, v17.8B}, [x1], x2 +1: ld1 {v28.8b, v29.8b}, [x1], x2 + ld1 {v16.8b, v17.8b}, [x1], x2 subs x12, x12, #2 lowpass_8 v28, v29, v16, v17, v28, v16 .ifc \type,avg - ld1 {v2.8B}, [x0], x3 - ld1 {v3.8B}, [x0] - urhadd v28.8B, v28.8B, v2.8B - urhadd v16.8B, v16.8B, v3.8B + ld1 {v2.8b}, [x0], x3 + ld1 {v3.8b}, [x0] + urhadd v28.8b, v28.8b, v2.8b + urhadd v16.8b, v16.8b, v3.8b sub x0, x0, x3 .endif - st1 {v28.8B}, [x0], x3 - st1 {v16.8B}, [x0], x3 + st1 {v28.8b}, [x0], x3 + st1 {v16.8b}, [x0], x3 b.ne 1b ret endfunc @@ -213,23 +213,23 @@ endfunc function \type\()_h264_qpel8_h_lowpass_l2_neon -1: ld1 {v26.8B, v27.8B}, [x1], x2 - ld1 {v16.8B, v17.8B}, [x1], x2 - ld1 {v28.8B}, [x3], x2 - ld1 {v29.8B}, [x3], x2 +1: ld1 {v26.8b, v27.8b}, [x1], x2 + ld1 {v16.8b, v17.8b}, [x1], x2 + ld1 {v28.8b}, [x3], x2 + ld1 {v29.8b}, [x3], x2 subs x12, x12, #2 lowpass_8 v26, v27, v16, v17, v26, v27 - urhadd v26.8B, v26.8B, v28.8B - urhadd v27.8B, v27.8B, v29.8B + urhadd v26.8b, v26.8b, v28.8b + urhadd v27.8b, v27.8b, v29.8b .ifc \type,avg - ld1 {v2.8B}, [x0], x2 - ld1 {v3.8B}, [x0] - urhadd v26.8B, v26.8B, v2.8B - urhadd v27.8B, v27.8B, v3.8B + ld1 {v2.8b}, [x0], x2 + ld1 {v3.8b}, [x0] + urhadd v26.8b, v26.8b, v2.8b + urhadd v27.8b, v27.8b, v3.8b sub x0, x0, x2 .endif - st1 {v26.8B}, [x0], x2 - st1 {v27.8B}, [x0], x2 + st1 {v26.8b}, [x0], x2 + st1 {v27.8b}, [x0], x2 b.ne 1b ret endfunc @@ -270,52 +270,52 @@ endfunc function \type\()_h264_qpel8_v_lowpass_neon - ld1 {v16.8B}, [x1], x3 - ld1 {v17.8B}, [x1], x3 - ld1 {v18.8B}, [x1], x3 - ld1 {v19.8B}, [x1], x3 - ld1 {v20.8B}, [x1], x3 - ld1 {v21.8B}, [x1], x3 - ld1 {v22.8B}, [x1], x3 - ld1 {v23.8B}, [x1], x3 - ld1 {v24.8B}, [x1], x3 - ld1 {v25.8B}, [x1], x3 - ld1 {v26.8B}, [x1], x3 - ld1 {v27.8B}, [x1], x3 - ld1 {v28.8B}, [x1] + ld1 {v16.8b}, [x1], x3 + ld1 {v17.8b}, [x1], x3 + ld1 {v18.8b}, [x1], x3 + ld1 {v19.8b}, [x1], x3 + ld1 {v20.8b}, [x1], x3 + ld1 {v21.8b}, [x1], x3 + ld1 {v22.8b}, [x1], x3 + ld1 {v23.8b}, [x1], x3 + ld1 {v24.8b}, [x1], x3 + ld1 {v25.8b}, [x1], x3 + ld1 {v26.8b}, [x1], x3 + ld1 {v27.8b}, [x1], x3 + ld1 {v28.8b}, [x1] lowpass_8_v v16, v17, v18, v19, v20, v21, v22, v16, v17 lowpass_8_v v18, v19, v20, v21, v22, v23, v24, v18, v19 lowpass_8_v v20, v21, v22, v23, v24, v25, v26, v20, v21 lowpass_8_v v22, v23, v24, v25, v26, v27, v28, v22, v23 .ifc \type,avg - ld1 {v24.8B}, [x0], x2 - ld1 {v25.8B}, [x0], x2 - ld1 {v26.8B}, [x0], x2 - urhadd v16.8B, v16.8B, v24.8B - ld1 {v27.8B}, [x0], x2 - urhadd v17.8B, v17.8B, v25.8B - ld1 {v28.8B}, [x0], x2 - urhadd v18.8B, v18.8B, v26.8B - ld1 {v29.8B}, [x0], x2 - urhadd v19.8B, v19.8B, v27.8B - ld1 {v30.8B}, [x0], x2 - urhadd v20.8B, v20.8B, v28.8B - ld1 {v31.8B}, [x0], x2 - urhadd v21.8B, v21.8B, v29.8B - urhadd v22.8B, v22.8B, v30.8B - urhadd v23.8B, v23.8B, v31.8B + ld1 {v24.8b}, [x0], x2 + ld1 {v25.8b}, [x0], x2 + ld1 {v26.8b}, [x0], x2 + urhadd v16.8b, v16.8b, v24.8b + ld1 {v27.8b}, [x0], x2 + urhadd v17.8b, v17.8b, v25.8b + ld1 {v28.8b}, [x0], x2 + urhadd v18.8b, v18.8b, v26.8b + ld1 {v29.8b}, [x0], x2 + urhadd v19.8b, v19.8b, v27.8b + ld1 {v30.8b}, [x0], x2 + urhadd v20.8b, v20.8b, v28.8b + ld1 {v31.8b}, [x0], x2 + urhadd v21.8b, v21.8b, v29.8b + urhadd v22.8b, v22.8b, v30.8b + urhadd v23.8b, v23.8b, v31.8b sub x0, x0, x2, lsl #3 .endif - st1 {v16.8B}, [x0], x2 - st1 {v17.8B}, [x0], x2 - st1 {v18.8B}, [x0], x2 - st1 {v19.8B}, [x0], x2 - st1 {v20.8B}, [x0], x2 - st1 {v21.8B}, [x0], x2 - st1 {v22.8B}, [x0], x2 - st1 {v23.8B}, [x0], x2 + st1 {v16.8b}, [x0], x2 + st1 {v17.8b}, [x0], x2 + st1 {v18.8b}, [x0], x2 + st1 {v19.8b}, [x0], x2 + st1 {v20.8b}, [x0], x2 + st1 {v21.8b}, [x0], x2 + st1 {v22.8b}, [x0], x2 + st1 {v23.8b}, [x0], x2 ret endfunc @@ -343,70 +343,70 @@ endfunc function \type\()_h264_qpel8_v_lowpass_l2_neon - ld1 {v16.8B}, [x1], x3 - ld1 {v17.8B}, [x1], x3 - ld1 {v18.8B}, [x1], x3 - ld1 {v19.8B}, [x1], x3 - ld1 {v20.8B}, [x1], x3 - ld1 {v21.8B}, [x1], x3 - ld1 {v22.8B}, [x1], x3 - ld1 {v23.8B}, [x1], x3 - ld1 {v24.8B}, [x1], x3 - ld1 {v25.8B}, [x1], x3 - ld1 {v26.8B}, [x1], x3 - ld1 {v27.8B}, [x1], x3 - ld1 {v28.8B}, [x1] + ld1 {v16.8b}, [x1], x3 + ld1 {v17.8b}, [x1], x3 + ld1 {v18.8b}, [x1], x3 + ld1 {v19.8b}, [x1], x3 + ld1 {v20.8b}, [x1], x3 + ld1 {v21.8b}, [x1], x3 + ld1 {v22.8b}, [x1], x3 + ld1 {v23.8b}, [x1], x3 + ld1 {v24.8b}, [x1], x3 + ld1 {v25.8b}, [x1], x3 + ld1 {v26.8b}, [x1], x3 + ld1 {v27.8b}, [x1], x3 + ld1 {v28.8b}, [x1] lowpass_8_v v16, v17, v18, v19, v20, v21, v22, v16, v17 lowpass_8_v v18, v19, v20, v21, v22, v23, v24, v18, v19 lowpass_8_v v20, v21, v22, v23, v24, v25, v26, v20, v21 lowpass_8_v v22, v23, v24, v25, v26, v27, v28, v22, v23 - ld1 {v24.8B}, [x12], x2 - ld1 {v25.8B}, [x12], x2 - ld1 {v26.8B}, [x12], x2 - ld1 {v27.8B}, [x12], x2 - ld1 {v28.8B}, [x12], x2 - urhadd v16.8B, v24.8B, v16.8B - urhadd v17.8B, v25.8B, v17.8B - ld1 {v29.8B}, [x12], x2 - urhadd v18.8B, v26.8B, v18.8B - urhadd v19.8B, v27.8B, v19.8B - ld1 {v30.8B}, [x12], x2 - urhadd v20.8B, v28.8B, v20.8B - urhadd v21.8B, v29.8B, v21.8B - ld1 {v31.8B}, [x12], x2 - urhadd v22.8B, v30.8B, v22.8B - urhadd v23.8B, v31.8B, v23.8B + ld1 {v24.8b}, [x12], x2 + ld1 {v25.8b}, [x12], x2 + ld1 {v26.8b}, [x12], x2 + ld1 {v27.8b}, [x12], x2 + ld1 {v28.8b}, [x12], x2 + urhadd v16.8b, v24.8b, v16.8b + urhadd v17.8b, v25.8b, v17.8b + ld1 {v29.8b}, [x12], x2 + urhadd v18.8b, v26.8b, v18.8b + urhadd v19.8b, v27.8b, v19.8b + ld1 {v30.8b}, [x12], x2 + urhadd v20.8b, v28.8b, v20.8b + urhadd v21.8b, v29.8b, v21.8b + ld1 {v31.8b}, [x12], x2 + urhadd v22.8b, v30.8b, v22.8b + urhadd v23.8b, v31.8b, v23.8b .ifc \type,avg - ld1 {v24.8B}, [x0], x3 - ld1 {v25.8B}, [x0], x3 - ld1 {v26.8B}, [x0], x3 - urhadd v16.8B, v16.8B, v24.8B - ld1 {v27.8B}, [x0], x3 - urhadd v17.8B, v17.8B, v25.8B - ld1 {v28.8B}, [x0], x3 - urhadd v18.8B, v18.8B, v26.8B - ld1 {v29.8B}, [x0], x3 - urhadd v19.8B, v19.8B, v27.8B - ld1 {v30.8B}, [x0], x3 - urhadd v20.8B, v20.8B, v28.8B - ld1 {v31.8B}, [x0], x3 - urhadd v21.8B, v21.8B, v29.8B - urhadd v22.8B, v22.8B, v30.8B - urhadd v23.8B, v23.8B, v31.8B + ld1 {v24.8b}, [x0], x3 + ld1 {v25.8b}, [x0], x3 + ld1 {v26.8b}, [x0], x3 + urhadd v16.8b, v16.8b, v24.8b + ld1 {v27.8b}, [x0], x3 + urhadd v17.8b, v17.8b, v25.8b + ld1 {v28.8b}, [x0], x3 + urhadd v18.8b, v18.8b, v26.8b + ld1 {v29.8b}, [x0], x3 + urhadd v19.8b, v19.8b, v27.8b + ld1 {v30.8b}, [x0], x3 + urhadd v20.8b, v20.8b, v28.8b + ld1 {v31.8b}, [x0], x3 + urhadd v21.8b, v21.8b, v29.8b + urhadd v22.8b, v22.8b, v30.8b + urhadd v23.8b, v23.8b, v31.8b sub x0, x0, x3, lsl #3 .endif - st1 {v16.8B}, [x0], x3 - st1 {v17.8B}, [x0], x3 - st1 {v18.8B}, [x0], x3 - st1 {v19.8B}, [x0], x3 - st1 {v20.8B}, [x0], x3 - st1 {v21.8B}, [x0], x3 - st1 {v22.8B}, [x0], x3 - st1 {v23.8B}, [x0], x3 + st1 {v16.8b}, [x0], x3 + st1 {v17.8b}, [x0], x3 + st1 {v18.8b}, [x0], x3 + st1 {v19.8b}, [x0], x3 + st1 {v20.8b}, [x0], x3 + st1 {v21.8b}, [x0], x3 + st1 {v22.8b}, [x0], x3 + st1 {v23.8b}, [x0], x3 ret endfunc @@ -417,19 +417,19 @@ function put_h264_qpel8_hv_lowpass_neon_top lowpass_const w12 - ld1 {v16.8H}, [x1], x3 - ld1 {v17.8H}, [x1], x3 - ld1 {v18.8H}, [x1], x3 - ld1 {v19.8H}, [x1], x3 - ld1 {v20.8H}, [x1], x3 - ld1 {v21.8H}, [x1], x3 - ld1 {v22.8H}, [x1], x3 - ld1 {v23.8H}, [x1], x3 - ld1 {v24.8H}, [x1], x3 - ld1 {v25.8H}, [x1], x3 - ld1 {v26.8H}, [x1], x3 - ld1 {v27.8H}, [x1], x3 - ld1 {v28.8H}, [x1] + ld1 {v16.8h}, [x1], x3 + ld1 {v17.8h}, [x1], x3 + ld1 {v18.8h}, [x1], x3 + ld1 {v19.8h}, [x1], x3 + ld1 {v20.8h}, [x1], x3 + ld1 {v21.8h}, [x1], x3 + ld1 {v22.8h}, [x1], x3 + ld1 {v23.8h}, [x1], x3 + ld1 {v24.8h}, [x1], x3 + ld1 {v25.8h}, [x1], x3 + ld1 {v26.8h}, [x1], x3 + ld1 {v27.8h}, [x1], x3 + ld1 {v28.8h}, [x1] lowpass_8H v16, v17 lowpass_8H v18, v19 lowpass_8H v20, v21 @@ -458,33 +458,33 @@ mov x10, x30 bl put_h264_qpel8_hv_lowpass_neon_top .ifc \type,avg - ld1 {v0.8B}, [x0], x2 - ld1 {v1.8B}, [x0], x2 - ld1 {v2.8B}, [x0], x2 - urhadd v16.8B, v16.8B, v0.8B - ld1 {v3.8B}, [x0], x2 - urhadd v17.8B, v17.8B, v1.8B - ld1 {v4.8B}, [x0], x2 - urhadd v18.8B, v18.8B, v2.8B - ld1 {v5.8B}, [x0], x2 - urhadd v19.8B, v19.8B, v3.8B - ld1 {v6.8B}, [x0], x2 - urhadd v20.8B, v20.8B, v4.8B - ld1 {v7.8B}, [x0], x2 - urhadd v21.8B, v21.8B, v5.8B - urhadd v22.8B, v22.8B, v6.8B - urhadd v23.8B, v23.8B, v7.8B + ld1 {v0.8b}, [x0], x2 + ld1 {v1.8b}, [x0], x2 + ld1 {v2.8b}, [x0], x2 + urhadd v16.8b, v16.8b, v0.8b + ld1 {v3.8b}, [x0], x2 + urhadd v17.8b, v17.8b, v1.8b + ld1 {v4.8b}, [x0], x2 + urhadd v18.8b, v18.8b, v2.8b + ld1 {v5.8b}, [x0], x2 + urhadd v19.8b, v19.8b, v3.8b + ld1 {v6.8b}, [x0], x2 + urhadd v20.8b, v20.8b, v4.8b + ld1 {v7.8b}, [x0], x2 + urhadd v21.8b, v21.8b, v5.8b + urhadd v22.8b, v22.8b, v6.8b + urhadd v23.8b, v23.8b, v7.8b sub x0, x0, x2, lsl #3 .endif - st1 {v16.8B}, [x0], x2 - st1 {v17.8B}, [x0], x2 - st1 {v18.8B}, [x0], x2 - st1 {v19.8B}, [x0], x2 - st1 {v20.8B}, [x0], x2 - st1 {v21.8B}, [x0], x2 - st1 {v22.8B}, [x0], x2 - st1 {v23.8B}, [x0], x2 + st1 {v16.8b}, [x0], x2 + st1 {v17.8b}, [x0], x2 + st1 {v18.8b}, [x0], x2 + st1 {v19.8b}, [x0], x2 + st1 {v20.8b}, [x0], x2 + st1 {v21.8b}, [x0], x2 + st1 {v22.8b}, [x0], x2 + st1 {v23.8b}, [x0], x2 ret x10 endfunc @@ -498,45 +498,45 @@ mov x10, x30 bl put_h264_qpel8_hv_lowpass_neon_top - ld1 {v0.8B, v1.8B}, [x2], #16 - ld1 {v2.8B, v3.8B}, [x2], #16 - urhadd v0.8B, v0.8B, v16.8B - urhadd v1.8B, v1.8B, v17.8B - ld1 {v4.8B, v5.8B}, [x2], #16 - urhadd v2.8B, v2.8B, v18.8B - urhadd v3.8B, v3.8B, v19.8B - ld1 {v6.8B, v7.8B}, [x2], #16 - urhadd v4.8B, v4.8B, v20.8B - urhadd v5.8B, v5.8B, v21.8B - urhadd v6.8B, v6.8B, v22.8B - urhadd v7.8B, v7.8B, v23.8B + ld1 {v0.8b, v1.8b}, [x2], #16 + ld1 {v2.8b, v3.8b}, [x2], #16 + urhadd v0.8b, v0.8b, v16.8b + urhadd v1.8b, v1.8b, v17.8b + ld1 {v4.8b, v5.8b}, [x2], #16 + urhadd v2.8b, v2.8b, v18.8b + urhadd v3.8b, v3.8b, v19.8b + ld1 {v6.8b, v7.8b}, [x2], #16 + urhadd v4.8b, v4.8b, v20.8b + urhadd v5.8b, v5.8b, v21.8b + urhadd v6.8b, v6.8b, v22.8b + urhadd v7.8b, v7.8b, v23.8b .ifc \type,avg - ld1 {v16.8B}, [x0], x3 - ld1 {v17.8B}, [x0], x3 - ld1 {v18.8B}, [x0], x3 - urhadd v0.8B, v0.8B, v16.8B - ld1 {v19.8B}, [x0], x3 - urhadd v1.8B, v1.8B, v17.8B - ld1 {v20.8B}, [x0], x3 - urhadd v2.8B, v2.8B, v18.8B - ld1 {v21.8B}, [x0], x3 - urhadd v3.8B, v3.8B, v19.8B - ld1 {v22.8B}, [x0], x3 - urhadd v4.8B, v4.8B, v20.8B - ld1 {v23.8B}, [x0], x3 - urhadd v5.8B, v5.8B, v21.8B - urhadd v6.8B, v6.8B, v22.8B - urhadd v7.8B, v7.8B, v23.8B + ld1 {v16.8b}, [x0], x3 + ld1 {v17.8b}, [x0], x3 + ld1 {v18.8b}, [x0], x3 + urhadd v0.8b, v0.8b, v16.8b + ld1 {v19.8b}, [x0], x3 + urhadd v1.8b, v1.8b, v17.8b + ld1 {v20.8b}, [x0], x3 + urhadd v2.8b, v2.8b, v18.8b + ld1 {v21.8b}, [x0], x3 + urhadd v3.8b, v3.8b, v19.8b + ld1 {v22.8b}, [x0], x3 + urhadd v4.8b, v4.8b, v20.8b + ld1 {v23.8b}, [x0], x3 + urhadd v5.8b, v5.8b, v21.8b + urhadd v6.8b, v6.8b, v22.8b + urhadd v7.8b, v7.8b, v23.8b sub x0, x0, x3, lsl #3 .endif - st1 {v0.8B}, [x0], x3 - st1 {v1.8B}, [x0], x3 - st1 {v2.8B}, [x0], x3 - st1 {v3.8B}, [x0], x3 - st1 {v4.8B}, [x0], x3 - st1 {v5.8B}, [x0], x3 - st1 {v6.8B}, [x0], x3 - st1 {v7.8B}, [x0], x3 + st1 {v0.8b}, [x0], x3 + st1 {v1.8b}, [x0], x3 + st1 {v2.8b}, [x0], x3 + st1 {v3.8b}, [x0], x3 + st1 {v4.8b}, [x0], x3 + st1 {v5.8b}, [x0], x3 + st1 {v6.8b}, [x0], x3 + st1 {v7.8b}, [x0], x3 ret x10 endfunc @@ -580,8 +580,8 @@ endfunc .endm - h264_qpel16_hv put - h264_qpel16_hv avg + h264_qpel16_hv put + h264_qpel16_hv avg .macro h264_qpel8 type function ff_\type\()_h264_qpel8_mc10_neon, export=1 @@ -759,8 +759,8 @@ endfunc .endm - h264_qpel8 put - h264_qpel8 avg + h264_qpel8 put + h264_qpel8 avg .macro h264_qpel16 type function ff_\type\()_h264_qpel16_mc10_neon, export=1 @@ -931,5 +931,5 @@ endfunc .endm - h264_qpel16 put - h264_qpel16 avg + h264_qpel16 put + h264_qpel16 avg diff -Nru ffmpeg-5.1.8/libavcodec/aarch64/hevcdsp_idct_neon.S ffmpeg-5.1.9/libavcodec/aarch64/hevcdsp_idct_neon.S --- ffmpeg-5.1.8/libavcodec/aarch64/hevcdsp_idct_neon.S 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/aarch64/hevcdsp_idct_neon.S 2026-05-05 15:50:52.000000000 +0000 @@ -38,10 +38,10 @@ endconst .macro clip10 in1, in2, c1, c2 - smax \in1, \in1, \c1 - smax \in2, \in2, \c1 - smin \in1, \in1, \c2 - smin \in2, \in2, \c2 + smax \in1, \in1, \c1 + smax \in2, \in2, \c1 + smin \in1, \in1, \c2 + smin \in2, \in2, \c2 .endm function ff_hevc_add_residual_4x4_8_neon, export=1 @@ -50,13 +50,13 @@ ld1 {v2.s}[1], [x0], x2 ld1 {v2.s}[2], [x0], x2 ld1 {v2.s}[3], [x0], x2 - sub x0, x0, x2, lsl #2 - uxtl v6.8h, v2.8b - uxtl2 v7.8h, v2.16b - sqadd v0.8h, v0.8h, v6.8h - sqadd v1.8h, v1.8h, v7.8h - sqxtun v0.8b, v0.8h - sqxtun2 v0.16b, v1.8h + sub x0, x0, x2, lsl #2 + uxtl v6.8h, v2.8b + uxtl2 v7.8h, v2.16b + sqadd v0.8h, v0.8h, v6.8h + sqadd v1.8h, v1.8h, v7.8h + sqxtun v0.8b, v0.8h + sqxtun2 v0.16b, v1.8h st1 {v0.s}[0], [x0], x2 st1 {v0.s}[1], [x0], x2 st1 {v0.s}[2], [x0], x2 @@ -70,12 +70,12 @@ ld1 {v2.d}[0], [x12], x2 ld1 {v2.d}[1], [x12], x2 ld1 {v3.d}[0], [x12], x2 - sqadd v0.8h, v0.8h, v2.8h + sqadd v0.8h, v0.8h, v2.8h ld1 {v3.d}[1], [x12], x2 - movi v4.8h, #0 - sqadd v1.8h, v1.8h, v3.8h - mvni v5.8h, #0xFC, lsl #8 // movi #0x3FF - clip10 v0.8h, v1.8h, v4.8h, v5.8h + movi v4.8h, #0 + sqadd v1.8h, v1.8h, v3.8h + mvni v5.8h, #0xFC, lsl #8 // movi #0x3FF + clip10 v0.8h, v1.8h, v4.8h, v5.8h st1 {v0.d}[0], [x0], x2 st1 {v0.d}[1], [x0], x2 st1 {v1.d}[0], [x0], x2 @@ -85,48 +85,48 @@ function ff_hevc_add_residual_8x8_8_neon, export=1 add x12, x0, x2 - add x2, x2, x2 - mov x3, #8 -1: subs x3, x3, #2 + add x2, x2, x2 + mov x3, #8 +1: subs x3, x3, #2 ld1 {v2.d}[0], [x0] ld1 {v2.d}[1], [x12] - uxtl v3.8h, v2.8b + uxtl v3.8h, v2.8b ld1 {v0.8h-v1.8h}, [x1], #32 - uxtl2 v2.8h, v2.16b - sqadd v0.8h, v0.8h, v3.8h - sqadd v1.8h, v1.8h, v2.8h - sqxtun v0.8b, v0.8h - sqxtun2 v0.16b, v1.8h + uxtl2 v2.8h, v2.16b + sqadd v0.8h, v0.8h, v3.8h + sqadd v1.8h, v1.8h, v2.8h + sqxtun v0.8b, v0.8h + sqxtun2 v0.16b, v1.8h st1 {v0.d}[0], [x0], x2 st1 {v0.d}[1], [x12], x2 - bne 1b + bne 1b ret endfunc function ff_hevc_add_residual_8x8_10_neon, export=1 add x12, x0, x2 - add x2, x2, x2 - mov x3, #8 - movi v4.8h, #0 - mvni v5.8h, #0xFC, lsl #8 // movi #0x3FF -1: subs x3, x3, #2 + add x2, x2, x2 + mov x3, #8 + movi v4.8h, #0 + mvni v5.8h, #0xFC, lsl #8 // movi #0x3FF +1: subs x3, x3, #2 ld1 {v0.8h-v1.8h}, [x1], #32 ld1 {v2.8h}, [x0] - sqadd v0.8h, v0.8h, v2.8h + sqadd v0.8h, v0.8h, v2.8h ld1 {v3.8h}, [x12] - sqadd v1.8h, v1.8h, v3.8h - clip10 v0.8h, v1.8h, v4.8h, v5.8h + sqadd v1.8h, v1.8h, v3.8h + clip10 v0.8h, v1.8h, v4.8h, v5.8h st1 {v0.8h}, [x0], x2 st1 {v1.8h}, [x12], x2 - bne 1b + bne 1b ret endfunc function ff_hevc_add_residual_16x16_8_neon, export=1 - mov x3, #16 + mov x3, #16 add x12, x0, x2 - add x2, x2, x2 -1: subs x3, x3, #2 + add x2, x2, x2 +1: subs x3, x3, #2 ld1 {v16.16b}, [x0] ld1 {v0.8h-v3.8h}, [x1], #64 ld1 {v19.16b}, [x12] @@ -134,47 +134,47 @@ uxtl2 v18.8h, v16.16b uxtl v20.8h, v19.8b uxtl2 v21.8h, v19.16b - sqadd v0.8h, v0.8h, v17.8h - sqadd v1.8h, v1.8h, v18.8h - sqadd v2.8h, v2.8h, v20.8h - sqadd v3.8h, v3.8h, v21.8h - sqxtun v0.8b, v0.8h + sqadd v0.8h, v0.8h, v17.8h + sqadd v1.8h, v1.8h, v18.8h + sqadd v2.8h, v2.8h, v20.8h + sqadd v3.8h, v3.8h, v21.8h + sqxtun v0.8b, v0.8h sqxtun2 v0.16b, v1.8h - sqxtun v1.8b, v2.8h + sqxtun v1.8b, v2.8h sqxtun2 v1.16b, v3.8h st1 {v0.16b}, [x0], x2 st1 {v1.16b}, [x12], x2 - bne 1b + bne 1b ret endfunc function ff_hevc_add_residual_16x16_10_neon, export=1 - mov x3, #16 + mov x3, #16 movi v20.8h, #0 mvni v21.8h, #0xFC, lsl #8 // movi #0x3FF add x12, x0, x2 - add x2, x2, x2 -1: subs x3, x3, #2 + add x2, x2, x2 +1: subs x3, x3, #2 ld1 {v16.8h-v17.8h}, [x0] ld1 {v0.8h-v3.8h}, [x1], #64 - sqadd v0.8h, v0.8h, v16.8h + sqadd v0.8h, v0.8h, v16.8h ld1 {v18.8h-v19.8h}, [x12] - sqadd v1.8h, v1.8h, v17.8h - sqadd v2.8h, v2.8h, v18.8h - sqadd v3.8h, v3.8h, v19.8h - clip10 v0.8h, v1.8h, v20.8h, v21.8h - clip10 v2.8h, v3.8h, v20.8h, v21.8h + sqadd v1.8h, v1.8h, v17.8h + sqadd v2.8h, v2.8h, v18.8h + sqadd v3.8h, v3.8h, v19.8h + clip10 v0.8h, v1.8h, v20.8h, v21.8h + clip10 v2.8h, v3.8h, v20.8h, v21.8h st1 {v0.8h-v1.8h}, [x0], x2 st1 {v2.8h-v3.8h}, [x12], x2 - bne 1b + bne 1b ret endfunc function ff_hevc_add_residual_32x32_8_neon, export=1 add x12, x0, x2 - add x2, x2, x2 - mov x3, #32 -1: subs x3, x3, #2 + add x2, x2, x2 + mov x3, #32 +1: subs x3, x3, #2 ld1 {v20.16b, v21.16b}, [x0] uxtl v16.8h, v20.8b uxtl2 v17.8h, v20.16b @@ -187,43 +187,43 @@ uxtl2 v21.8h, v22.16b uxtl v22.8h, v23.8b uxtl2 v23.8h, v23.16b - sqadd v0.8h, v0.8h, v16.8h - sqadd v1.8h, v1.8h, v17.8h - sqadd v2.8h, v2.8h, v18.8h - sqadd v3.8h, v3.8h, v19.8h - sqadd v4.8h, v4.8h, v20.8h - sqadd v5.8h, v5.8h, v21.8h - sqadd v6.8h, v6.8h, v22.8h - sqadd v7.8h, v7.8h, v23.8h - sqxtun v0.8b, v0.8h + sqadd v0.8h, v0.8h, v16.8h + sqadd v1.8h, v1.8h, v17.8h + sqadd v2.8h, v2.8h, v18.8h + sqadd v3.8h, v3.8h, v19.8h + sqadd v4.8h, v4.8h, v20.8h + sqadd v5.8h, v5.8h, v21.8h + sqadd v6.8h, v6.8h, v22.8h + sqadd v7.8h, v7.8h, v23.8h + sqxtun v0.8b, v0.8h sqxtun2 v0.16b, v1.8h - sqxtun v1.8b, v2.8h + sqxtun v1.8b, v2.8h sqxtun2 v1.16b, v3.8h - sqxtun v2.8b, v4.8h + sqxtun v2.8b, v4.8h sqxtun2 v2.16b, v5.8h st1 {v0.16b, v1.16b}, [x0], x2 - sqxtun v3.8b, v6.8h + sqxtun v3.8b, v6.8h sqxtun2 v3.16b, v7.8h st1 {v2.16b, v3.16b}, [x12], x2 - bne 1b + bne 1b ret endfunc function ff_hevc_add_residual_32x32_10_neon, export=1 - mov x3, #32 + mov x3, #32 movi v20.8h, #0 mvni v21.8h, #0xFC, lsl #8 // movi #0x3FF -1: subs x3, x3, #1 +1: subs x3, x3, #1 ld1 {v0.8h-v3.8h}, [x1], #64 ld1 {v16.8h-v19.8h}, [x0] - sqadd v0.8h, v0.8h, v16.8h - sqadd v1.8h, v1.8h, v17.8h - sqadd v2.8h, v2.8h, v18.8h - sqadd v3.8h, v3.8h, v19.8h - clip10 v0.8h, v1.8h, v20.8h, v21.8h - clip10 v2.8h, v3.8h, v20.8h, v21.8h + sqadd v0.8h, v0.8h, v16.8h + sqadd v1.8h, v1.8h, v17.8h + sqadd v2.8h, v2.8h, v18.8h + sqadd v3.8h, v3.8h, v19.8h + clip10 v0.8h, v1.8h, v20.8h, v21.8h + clip10 v2.8h, v3.8h, v20.8h, v21.8h st1 {v0.8h-v3.8h}, [x0], x2 - bne 1b + bne 1b ret endfunc @@ -246,19 +246,19 @@ // uses and clobbers v28-v31 as temp registers .macro tr_4x4_8 in0, in1, in2, in3, out0, out1, out2, out3, p1, p2 - sshll\p1 v28.4s, \in0, #6 - mov v29.16b, v28.16b - smull\p1 v30.4s, \in1, v0.h[1] - smull\p1 v31.4s, \in1, v0.h[3] - smlal\p2 v28.4s, \in2, v0.h[0] //e0 - smlsl\p2 v29.4s, \in2, v0.h[0] //e1 - smlal\p2 v30.4s, \in3, v0.h[3] //o0 - smlsl\p2 v31.4s, \in3, v0.h[1] //o1 - - add \out0, v28.4s, v30.4s - add \out1, v29.4s, v31.4s - sub \out2, v29.4s, v31.4s - sub \out3, v28.4s, v30.4s + sshll\p1 v28.4s, \in0, #6 + mov v29.16b, v28.16b + smull\p1 v30.4s, \in1, v0.h[1] + smull\p1 v31.4s, \in1, v0.h[3] + smlal\p2 v28.4s, \in2, v0.h[0] //e0 + smlsl\p2 v29.4s, \in2, v0.h[0] //e1 + smlal\p2 v30.4s, \in3, v0.h[3] //o0 + smlsl\p2 v31.4s, \in3, v0.h[1] //o1 + + add \out0, v28.4s, v30.4s + add \out1, v29.4s, v31.4s + sub \out2, v29.4s, v31.4s + sub \out3, v28.4s, v30.4s .endm .macro transpose8_4x4 r0, r1, r2, r3 @@ -325,11 +325,11 @@ .macro idct_8x8 bitdepth function ff_hevc_idct_8x8_\bitdepth\()_neon, export=1 //x0 - coeffs - mov x1, x0 + mov x1, x0 ld1 {v16.8h-v19.8h}, [x1], #64 ld1 {v20.8h-v23.8h}, [x1] - movrel x1, trans + movrel x1, trans ld1 {v0.8h}, [x1] tr_8x4 7, v16,.4h, v17,.4h, v18,.4h, v19,.4h, v20,.4h, v21,.4h, v22,.4h, v23,.4h @@ -342,7 +342,7 @@ transpose_8x8 v16, v17, v18, v19, v20, v21, v22, v23 - mov x1, x0 + mov x1, x0 st1 {v16.8h-v19.8h}, [x1], #64 st1 {v20.8h-v23.8h}, [x1] @@ -351,8 +351,8 @@ .endm .macro butterfly e, o, tmp_p, tmp_m - add \tmp_p, \e, \o - sub \tmp_m, \e, \o + add \tmp_p, \e, \o + sub \tmp_m, \e, \o .endm .macro tr16_8x4 in0, in1, in2, in3, offset @@ -381,7 +381,7 @@ butterfly v25.4s, v29.4s, v17.4s, v22.4s butterfly v26.4s, v30.4s, v18.4s, v21.4s butterfly v27.4s, v31.4s, v19.4s, v20.4s - add x4, sp, #\offset + add x4, sp, #\offset st1 {v16.4s-v19.4s}, [x4], #64 st1 {v20.4s-v23.4s}, [x4] .endm @@ -398,14 +398,14 @@ .endm .macro add_member in, t0, t1, t2, t3, t4, t5, t6, t7, op0, op1, op2, op3, op4, op5, op6, op7, p - sum_sub v21.4s, \in, \t0, \op0, \p - sum_sub v22.4s, \in, \t1, \op1, \p - sum_sub v23.4s, \in, \t2, \op2, \p - sum_sub v24.4s, \in, \t3, \op3, \p - sum_sub v25.4s, \in, \t4, \op4, \p - sum_sub v26.4s, \in, \t5, \op5, \p - sum_sub v27.4s, \in, \t6, \op6, \p - sum_sub v28.4s, \in, \t7, \op7, \p + sum_sub v21.4s, \in, \t0, \op0, \p + sum_sub v22.4s, \in, \t1, \op1, \p + sum_sub v23.4s, \in, \t2, \op2, \p + sum_sub v24.4s, \in, \t3, \op3, \p + sum_sub v25.4s, \in, \t4, \op4, \p + sum_sub v26.4s, \in, \t5, \op5, \p + sum_sub v27.4s, \in, \t6, \op6, \p + sum_sub v28.4s, \in, \t7, \op7, \p .endm .macro butterfly16 in0, in1, in2, in3, in4, in5, in6, in7 @@ -473,20 +473,20 @@ .macro tr_16x4 name, shift, offset, step function func_tr_16x4_\name - mov x1, x5 - add x3, x5, #(\step * 64) - mov x2, #(\step * 128) + mov x1, x5 + add x3, x5, #(\step * 64) + mov x2, #(\step * 128) load16 v16.d, v17.d, v18.d, v19.d - movrel x1, trans + movrel x1, trans ld1 {v0.8h}, [x1] tr16_8x4 v16, v17, v18, v19, \offset - add x1, x5, #(\step * 32) - add x3, x5, #(\step * 3 *32) - mov x2, #(\step * 128) + add x1, x5, #(\step * 32) + add x3, x5, #(\step * 3 *32) + mov x2, #(\step * 128) load16 v20.d, v17.d, v18.d, v19.d - movrel x1, trans, 16 + movrel x1, trans, 16 ld1 {v1.8h}, [x1] smull v21.4s, v20.4h, v1.h[0] smull v22.4s, v20.4h, v1.h[1] @@ -505,16 +505,16 @@ add_member v19.4h, v1.h[6], v1.h[3], v1.h[0], v1.h[2], v1.h[5], v1.h[7], v1.h[4], v1.h[1], +, -, +, -, +, +, -, + add_member v19.8h, v1.h[7], v1.h[6], v1.h[5], v1.h[4], v1.h[3], v1.h[2], v1.h[1], v1.h[0], +, -, +, -, +, -, +, -, 2 - add x4, sp, #\offset + add x4, sp, #\offset ld1 {v16.4s-v19.4s}, [x4], #64 butterfly16 v16.4s, v21.4s, v17.4s, v22.4s, v18.4s, v23.4s, v19.4s, v24.4s scale v29, v30, v31, v24, v20.4s, v16.4s, v21.4s, v17.4s, v22.4s, v18.4s, v23.4s, v19.4s, \shift transpose16_4x4_2 v29, v30, v31, v24 - mov x1, x6 - add x3, x6, #(24 +3*32) - mov x2, #32 - mov x4, #-32 + mov x1, x6 + add x3, x6, #(24 +3*32) + mov x2, #32 + mov x4, #-32 store16 v29.d, v30.d, v31.d, v24.d, x4 add x4, sp, #(\offset + 64) @@ -523,10 +523,10 @@ scale v29, v30, v31, v20, v20.4s, v16.4s, v25.4s, v17.4s, v26.4s, v18.4s, v27.4s, v19.4s, \shift transpose16_4x4_2 v29, v30, v31, v20 - add x1, x6, #8 - add x3, x6, #(16 + 3 * 32) - mov x2, #32 - mov x4, #-32 + add x1, x6, #8 + add x3, x6, #(16 + 3 * 32) + mov x2, #32 + mov x4, #-32 store16 v29.d, v30.d, v31.d, v20.d, x4 ret @@ -539,21 +539,21 @@ mov x15, x30 // allocate a temp buffer - sub sp, sp, #640 + sub sp, sp, #640 .irp i, 0, 1, 2, 3 - add x5, x0, #(8 * \i) - add x6, sp, #(8 * \i * 16) + add x5, x0, #(8 * \i) + add x6, sp, #(8 * \i * 16) bl func_tr_16x4_firstpass .endr .irp i, 0, 1, 2, 3 - add x5, sp, #(8 * \i) - add x6, x0, #(8 * \i * 16) + add x5, sp, #(8 * \i) + add x6, x0, #(8 * \i * 16) bl func_tr_16x4_secondpass_\bitdepth .endr - add sp, sp, #640 + add sp, sp, #640 mov x30, x15 ret @@ -573,35 +573,35 @@ // void ff_hevc_idct_NxN_dc_DEPTH_neon(int16_t *coeffs) .macro idct_dc size, bitdepth function ff_hevc_idct_\size\()x\size\()_dc_\bitdepth\()_neon, export=1 - ld1r {v4.8h}, [x0] - srshr v4.8h, v4.8h, #1 - srshr v0.8h, v4.8h, #(14 - \bitdepth) - srshr v1.8h, v4.8h, #(14 - \bitdepth) + ld1r {v4.8h}, [x0] + srshr v4.8h, v4.8h, #1 + srshr v0.8h, v4.8h, #(14 - \bitdepth) + srshr v1.8h, v4.8h, #(14 - \bitdepth) .if \size > 4 - srshr v2.8h, v4.8h, #(14 - \bitdepth) - srshr v3.8h, v4.8h, #(14 - \bitdepth) + srshr v2.8h, v4.8h, #(14 - \bitdepth) + srshr v3.8h, v4.8h, #(14 - \bitdepth) .if \size > 16 /* dc 32x32 */ - mov x2, #4 + mov x2, #4 1: - subs x2, x2, #1 + subs x2, x2, #1 .endif add x12, x0, #64 mov x13, #128 .if \size > 8 /* dc 16x16 */ - st1 {v0.8h-v3.8h}, [x0], x13 - st1 {v0.8h-v3.8h}, [x12], x13 - st1 {v0.8h-v3.8h}, [x0], x13 - st1 {v0.8h-v3.8h}, [x12], x13 - st1 {v0.8h-v3.8h}, [x0], x13 - st1 {v0.8h-v3.8h}, [x12], x13 + st1 {v0.8h-v3.8h}, [x0], x13 + st1 {v0.8h-v3.8h}, [x12], x13 + st1 {v0.8h-v3.8h}, [x0], x13 + st1 {v0.8h-v3.8h}, [x12], x13 + st1 {v0.8h-v3.8h}, [x0], x13 + st1 {v0.8h-v3.8h}, [x12], x13 .endif /* dc 8x8 */ - st1 {v0.8h-v3.8h}, [x0], x13 - st1 {v0.8h-v3.8h}, [x12], x13 + st1 {v0.8h-v3.8h}, [x0], x13 + st1 {v0.8h-v3.8h}, [x12], x13 .if \size > 16 /* dc 32x32 */ bne 1b .endif .else /* dc 4x4 */ - st1 {v0.8h-v1.8h}, [x0] + st1 {v0.8h-v1.8h}, [x0] .endif ret endfunc diff -Nru ffmpeg-5.1.8/libavcodec/aarch64/hpeldsp_neon.S ffmpeg-5.1.9/libavcodec/aarch64/hpeldsp_neon.S --- ffmpeg-5.1.8/libavcodec/aarch64/hpeldsp_neon.S 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/aarch64/hpeldsp_neon.S 2026-05-05 14:21:58.000000000 +0000 @@ -26,295 +26,295 @@ .if \avg mov x12, x0 .endif -1: ld1 {v0.16B}, [x1], x2 - ld1 {v1.16B}, [x1], x2 - ld1 {v2.16B}, [x1], x2 - ld1 {v3.16B}, [x1], x2 - .if \avg - ld1 {v4.16B}, [x12], x2 - urhadd v0.16B, v0.16B, v4.16B - ld1 {v5.16B}, [x12], x2 - urhadd v1.16B, v1.16B, v5.16B - ld1 {v6.16B}, [x12], x2 - urhadd v2.16B, v2.16B, v6.16B - ld1 {v7.16B}, [x12], x2 - urhadd v3.16B, v3.16B, v7.16B +1: ld1 {v0.16b}, [x1], x2 + ld1 {v1.16b}, [x1], x2 + ld1 {v2.16b}, [x1], x2 + ld1 {v3.16b}, [x1], x2 + .if \avg + ld1 {v4.16b}, [x12], x2 + urhadd v0.16b, v0.16b, v4.16b + ld1 {v5.16b}, [x12], x2 + urhadd v1.16b, v1.16b, v5.16b + ld1 {v6.16b}, [x12], x2 + urhadd v2.16b, v2.16b, v6.16b + ld1 {v7.16b}, [x12], x2 + urhadd v3.16b, v3.16b, v7.16b .endif subs w3, w3, #4 - st1 {v0.16B}, [x0], x2 - st1 {v1.16B}, [x0], x2 - st1 {v2.16B}, [x0], x2 - st1 {v3.16B}, [x0], x2 + st1 {v0.16b}, [x0], x2 + st1 {v1.16b}, [x0], x2 + st1 {v2.16b}, [x0], x2 + st1 {v3.16b}, [x0], x2 b.ne 1b ret .endm .macro pixels16_x2 rnd=1, avg=0 -1: ld1 {v0.16B, v1.16B}, [x1], x2 - ld1 {v2.16B, v3.16B}, [x1], x2 +1: ld1 {v0.16b, v1.16b}, [x1], x2 + ld1 {v2.16b, v3.16b}, [x1], x2 subs w3, w3, #2 - ext v1.16B, v0.16B, v1.16B, #1 - avg v0.16B, v0.16B, v1.16B - ext v3.16B, v2.16B, v3.16B, #1 - avg v2.16B, v2.16B, v3.16B - .if \avg - ld1 {v1.16B}, [x0], x2 - ld1 {v3.16B}, [x0] - urhadd v0.16B, v0.16B, v1.16B - urhadd v2.16B, v2.16B, v3.16B + ext v1.16b, v0.16b, v1.16b, #1 + avg v0.16b, v0.16b, v1.16b + ext v3.16b, v2.16b, v3.16b, #1 + avg v2.16b, v2.16b, v3.16b + .if \avg + ld1 {v1.16b}, [x0], x2 + ld1 {v3.16b}, [x0] + urhadd v0.16b, v0.16b, v1.16b + urhadd v2.16b, v2.16b, v3.16b sub x0, x0, x2 .endif - st1 {v0.16B}, [x0], x2 - st1 {v2.16B}, [x0], x2 + st1 {v0.16b}, [x0], x2 + st1 {v2.16b}, [x0], x2 b.ne 1b ret .endm .macro pixels16_y2 rnd=1, avg=0 sub w3, w3, #2 - ld1 {v0.16B}, [x1], x2 - ld1 {v1.16B}, [x1], x2 + ld1 {v0.16b}, [x1], x2 + ld1 {v1.16b}, [x1], x2 1: subs w3, w3, #2 - avg v2.16B, v0.16B, v1.16B - ld1 {v0.16B}, [x1], x2 - avg v3.16B, v0.16B, v1.16B - ld1 {v1.16B}, [x1], x2 - .if \avg - ld1 {v4.16B}, [x0], x2 - ld1 {v5.16B}, [x0] - urhadd v2.16B, v2.16B, v4.16B - urhadd v3.16B, v3.16B, v5.16B + avg v2.16b, v0.16b, v1.16b + ld1 {v0.16b}, [x1], x2 + avg v3.16b, v0.16b, v1.16b + ld1 {v1.16b}, [x1], x2 + .if \avg + ld1 {v4.16b}, [x0], x2 + ld1 {v5.16b}, [x0] + urhadd v2.16b, v2.16b, v4.16b + urhadd v3.16b, v3.16b, v5.16b sub x0, x0, x2 .endif - st1 {v2.16B}, [x0], x2 - st1 {v3.16B}, [x0], x2 + st1 {v2.16b}, [x0], x2 + st1 {v3.16b}, [x0], x2 b.ne 1b - avg v2.16B, v0.16B, v1.16B - ld1 {v0.16B}, [x1], x2 - avg v3.16B, v0.16B, v1.16B - .if \avg - ld1 {v4.16B}, [x0], x2 - ld1 {v5.16B}, [x0] - urhadd v2.16B, v2.16B, v4.16B - urhadd v3.16B, v3.16B, v5.16B + avg v2.16b, v0.16b, v1.16b + ld1 {v0.16b}, [x1], x2 + avg v3.16b, v0.16b, v1.16b + .if \avg + ld1 {v4.16b}, [x0], x2 + ld1 {v5.16b}, [x0] + urhadd v2.16b, v2.16b, v4.16b + urhadd v3.16b, v3.16b, v5.16b sub x0, x0, x2 .endif - st1 {v2.16B}, [x0], x2 - st1 {v3.16B}, [x0], x2 + st1 {v2.16b}, [x0], x2 + st1 {v3.16b}, [x0], x2 ret .endm .macro pixels16_xy2 rnd=1, avg=0 sub w3, w3, #2 - ld1 {v0.16B, v1.16B}, [x1], x2 - ld1 {v4.16B, v5.16B}, [x1], x2 + ld1 {v0.16b, v1.16b}, [x1], x2 + ld1 {v4.16b, v5.16b}, [x1], x2 NRND movi v26.8H, #1 - ext v1.16B, v0.16B, v1.16B, #1 - ext v5.16B, v4.16B, v5.16B, #1 - uaddl v16.8H, v0.8B, v1.8B - uaddl2 v20.8H, v0.16B, v1.16B - uaddl v18.8H, v4.8B, v5.8B - uaddl2 v22.8H, v4.16B, v5.16B + ext v1.16b, v0.16b, v1.16b, #1 + ext v5.16b, v4.16b, v5.16b, #1 + uaddl v16.8h, v0.8b, v1.8b + uaddl2 v20.8h, v0.16b, v1.16b + uaddl v18.8h, v4.8b, v5.8b + uaddl2 v22.8h, v4.16b, v5.16b 1: subs w3, w3, #2 - ld1 {v0.16B, v1.16B}, [x1], x2 - add v24.8H, v16.8H, v18.8H + ld1 {v0.16b, v1.16b}, [x1], x2 + add v24.8h, v16.8h, v18.8h NRND add v24.8H, v24.8H, v26.8H - ext v30.16B, v0.16B, v1.16B, #1 - add v1.8H, v20.8H, v22.8H - mshrn v28.8B, v24.8H, #2 + ext v30.16b, v0.16b, v1.16b, #1 + add v1.8h, v20.8h, v22.8h + mshrn v28.8b, v24.8h, #2 NRND add v1.8H, v1.8H, v26.8H - mshrn2 v28.16B, v1.8H, #2 + mshrn2 v28.16b, v1.8h, #2 .if \avg - ld1 {v16.16B}, [x0] - urhadd v28.16B, v28.16B, v16.16B + ld1 {v16.16b}, [x0] + urhadd v28.16b, v28.16b, v16.16b .endif - uaddl v16.8H, v0.8B, v30.8B - ld1 {v2.16B, v3.16B}, [x1], x2 - uaddl2 v20.8H, v0.16B, v30.16B - st1 {v28.16B}, [x0], x2 - add v24.8H, v16.8H, v18.8H + uaddl v16.8h, v0.8b, v30.8b + ld1 {v2.16b, v3.16b}, [x1], x2 + uaddl2 v20.8h, v0.16b, v30.16b + st1 {v28.16b}, [x0], x2 + add v24.8h, v16.8h, v18.8h NRND add v24.8H, v24.8H, v26.8H - ext v3.16B, v2.16B, v3.16B, #1 - add v0.8H, v20.8H, v22.8H - mshrn v30.8B, v24.8H, #2 + ext v3.16b, v2.16b, v3.16b, #1 + add v0.8h, v20.8h, v22.8h + mshrn v30.8b, v24.8h, #2 NRND add v0.8H, v0.8H, v26.8H - mshrn2 v30.16B, v0.8H, #2 + mshrn2 v30.16b, v0.8h, #2 .if \avg - ld1 {v18.16B}, [x0] - urhadd v30.16B, v30.16B, v18.16B + ld1 {v18.16b}, [x0] + urhadd v30.16b, v30.16b, v18.16b .endif - uaddl v18.8H, v2.8B, v3.8B - uaddl2 v22.8H, v2.16B, v3.16B - st1 {v30.16B}, [x0], x2 + uaddl v18.8h, v2.8b, v3.8b + uaddl2 v22.8h, v2.16b, v3.16b + st1 {v30.16b}, [x0], x2 b.gt 1b - ld1 {v0.16B, v1.16B}, [x1], x2 - add v24.8H, v16.8H, v18.8H + ld1 {v0.16b, v1.16b}, [x1], x2 + add v24.8h, v16.8h, v18.8h NRND add v24.8H, v24.8H, v26.8H - ext v30.16B, v0.16B, v1.16B, #1 - add v1.8H, v20.8H, v22.8H - mshrn v28.8B, v24.8H, #2 + ext v30.16b, v0.16b, v1.16b, #1 + add v1.8h, v20.8h, v22.8h + mshrn v28.8b, v24.8h, #2 NRND add v1.8H, v1.8H, v26.8H - mshrn2 v28.16B, v1.8H, #2 + mshrn2 v28.16b, v1.8h, #2 .if \avg - ld1 {v16.16B}, [x0] - urhadd v28.16B, v28.16B, v16.16B + ld1 {v16.16b}, [x0] + urhadd v28.16b, v28.16b, v16.16b .endif - uaddl v16.8H, v0.8B, v30.8B - uaddl2 v20.8H, v0.16B, v30.16B - st1 {v28.16B}, [x0], x2 - add v24.8H, v16.8H, v18.8H + uaddl v16.8h, v0.8b, v30.8b + uaddl2 v20.8h, v0.16b, v30.16b + st1 {v28.16b}, [x0], x2 + add v24.8h, v16.8h, v18.8h NRND add v24.8H, v24.8H, v26.8H - add v0.8H, v20.8H, v22.8H - mshrn v30.8B, v24.8H, #2 + add v0.8h, v20.8h, v22.8h + mshrn v30.8b, v24.8h, #2 NRND add v0.8H, v0.8H, v26.8H - mshrn2 v30.16B, v0.8H, #2 + mshrn2 v30.16b, v0.8h, #2 .if \avg - ld1 {v18.16B}, [x0] - urhadd v30.16B, v30.16B, v18.16B + ld1 {v18.16b}, [x0] + urhadd v30.16b, v30.16b, v18.16b .endif - st1 {v30.16B}, [x0], x2 + st1 {v30.16b}, [x0], x2 ret .endm .macro pixels8 rnd=1, avg=0 -1: ld1 {v0.8B}, [x1], x2 - ld1 {v1.8B}, [x1], x2 - ld1 {v2.8B}, [x1], x2 - ld1 {v3.8B}, [x1], x2 - .if \avg - ld1 {v4.8B}, [x0], x2 - urhadd v0.8B, v0.8B, v4.8B - ld1 {v5.8B}, [x0], x2 - urhadd v1.8B, v1.8B, v5.8B - ld1 {v6.8B}, [x0], x2 - urhadd v2.8B, v2.8B, v6.8B - ld1 {v7.8B}, [x0], x2 - urhadd v3.8B, v3.8B, v7.8B +1: ld1 {v0.8b}, [x1], x2 + ld1 {v1.8b}, [x1], x2 + ld1 {v2.8b}, [x1], x2 + ld1 {v3.8b}, [x1], x2 + .if \avg + ld1 {v4.8b}, [x0], x2 + urhadd v0.8b, v0.8b, v4.8b + ld1 {v5.8b}, [x0], x2 + urhadd v1.8b, v1.8b, v5.8b + ld1 {v6.8b}, [x0], x2 + urhadd v2.8b, v2.8b, v6.8b + ld1 {v7.8b}, [x0], x2 + urhadd v3.8b, v3.8b, v7.8b sub x0, x0, x2, lsl #2 .endif subs w3, w3, #4 - st1 {v0.8B}, [x0], x2 - st1 {v1.8B}, [x0], x2 - st1 {v2.8B}, [x0], x2 - st1 {v3.8B}, [x0], x2 + st1 {v0.8b}, [x0], x2 + st1 {v1.8b}, [x0], x2 + st1 {v2.8b}, [x0], x2 + st1 {v3.8b}, [x0], x2 b.ne 1b ret .endm .macro pixels8_x2 rnd=1, avg=0 -1: ld1 {v0.8B, v1.8B}, [x1], x2 - ext v1.8B, v0.8B, v1.8B, #1 - ld1 {v2.8B, v3.8B}, [x1], x2 - ext v3.8B, v2.8B, v3.8B, #1 +1: ld1 {v0.8b, v1.8b}, [x1], x2 + ext v1.8b, v0.8b, v1.8b, #1 + ld1 {v2.8b, v3.8b}, [x1], x2 + ext v3.8b, v2.8b, v3.8b, #1 subs w3, w3, #2 - avg v0.8B, v0.8B, v1.8B - avg v2.8B, v2.8B, v3.8B + avg v0.8b, v0.8b, v1.8b + avg v2.8b, v2.8b, v3.8b .if \avg - ld1 {v4.8B}, [x0], x2 - ld1 {v5.8B}, [x0] - urhadd v0.8B, v0.8B, v4.8B - urhadd v2.8B, v2.8B, v5.8B + ld1 {v4.8b}, [x0], x2 + ld1 {v5.8b}, [x0] + urhadd v0.8b, v0.8b, v4.8b + urhadd v2.8b, v2.8b, v5.8b sub x0, x0, x2 .endif - st1 {v0.8B}, [x0], x2 - st1 {v2.8B}, [x0], x2 + st1 {v0.8b}, [x0], x2 + st1 {v2.8b}, [x0], x2 b.ne 1b ret .endm .macro pixels8_y2 rnd=1, avg=0 sub w3, w3, #2 - ld1 {v0.8B}, [x1], x2 - ld1 {v1.8B}, [x1], x2 + ld1 {v0.8b}, [x1], x2 + ld1 {v1.8b}, [x1], x2 1: subs w3, w3, #2 - avg v4.8B, v0.8B, v1.8B - ld1 {v0.8B}, [x1], x2 - avg v5.8B, v0.8B, v1.8B - ld1 {v1.8B}, [x1], x2 - .if \avg - ld1 {v2.8B}, [x0], x2 - ld1 {v3.8B}, [x0] - urhadd v4.8B, v4.8B, v2.8B - urhadd v5.8B, v5.8B, v3.8B + avg v4.8b, v0.8b, v1.8b + ld1 {v0.8b}, [x1], x2 + avg v5.8b, v0.8b, v1.8b + ld1 {v1.8b}, [x1], x2 + .if \avg + ld1 {v2.8b}, [x0], x2 + ld1 {v3.8b}, [x0] + urhadd v4.8b, v4.8b, v2.8b + urhadd v5.8b, v5.8b, v3.8b sub x0, x0, x2 .endif - st1 {v4.8B}, [x0], x2 - st1 {v5.8B}, [x0], x2 + st1 {v4.8b}, [x0], x2 + st1 {v5.8b}, [x0], x2 b.ne 1b - avg v4.8B, v0.8B, v1.8B - ld1 {v0.8B}, [x1], x2 - avg v5.8B, v0.8B, v1.8B - .if \avg - ld1 {v2.8B}, [x0], x2 - ld1 {v3.8B}, [x0] - urhadd v4.8B, v4.8B, v2.8B - urhadd v5.8B, v5.8B, v3.8B + avg v4.8b, v0.8b, v1.8b + ld1 {v0.8b}, [x1], x2 + avg v5.8b, v0.8b, v1.8b + .if \avg + ld1 {v2.8b}, [x0], x2 + ld1 {v3.8b}, [x0] + urhadd v4.8b, v4.8b, v2.8b + urhadd v5.8b, v5.8b, v3.8b sub x0, x0, x2 .endif - st1 {v4.8B}, [x0], x2 - st1 {v5.8B}, [x0], x2 + st1 {v4.8b}, [x0], x2 + st1 {v5.8b}, [x0], x2 ret .endm .macro pixels8_xy2 rnd=1, avg=0 sub w3, w3, #2 - ld1 {v0.16B}, [x1], x2 - ld1 {v1.16B}, [x1], x2 + ld1 {v0.16b}, [x1], x2 + ld1 {v1.16b}, [x1], x2 NRND movi v19.8H, #1 - ext v4.16B, v0.16B, v4.16B, #1 - ext v6.16B, v1.16B, v6.16B, #1 - uaddl v16.8H, v0.8B, v4.8B - uaddl v17.8H, v1.8B, v6.8B + ext v4.16b, v0.16b, v4.16b, #1 + ext v6.16b, v1.16b, v6.16b, #1 + uaddl v16.8h, v0.8b, v4.8b + uaddl v17.8h, v1.8b, v6.8b 1: subs w3, w3, #2 - ld1 {v0.16B}, [x1], x2 - add v18.8H, v16.8H, v17.8H - ext v4.16B, v0.16B, v4.16B, #1 + ld1 {v0.16b}, [x1], x2 + add v18.8h, v16.8h, v17.8h + ext v4.16b, v0.16b, v4.16b, #1 NRND add v18.8H, v18.8H, v19.8H - uaddl v16.8H, v0.8B, v4.8B - mshrn v5.8B, v18.8H, #2 - ld1 {v1.16B}, [x1], x2 - add v18.8H, v16.8H, v17.8H + uaddl v16.8h, v0.8b, v4.8b + mshrn v5.8b, v18.8h, #2 + ld1 {v1.16b}, [x1], x2 + add v18.8h, v16.8h, v17.8h .if \avg - ld1 {v7.8B}, [x0] - urhadd v5.8B, v5.8B, v7.8B + ld1 {v7.8b}, [x0] + urhadd v5.8b, v5.8b, v7.8b .endif NRND add v18.8H, v18.8H, v19.8H - st1 {v5.8B}, [x0], x2 - mshrn v7.8B, v18.8H, #2 + st1 {v5.8b}, [x0], x2 + mshrn v7.8b, v18.8h, #2 .if \avg - ld1 {v5.8B}, [x0] - urhadd v7.8B, v7.8B, v5.8B + ld1 {v5.8b}, [x0] + urhadd v7.8b, v7.8b, v5.8b .endif - ext v6.16B, v1.16B, v6.16B, #1 - uaddl v17.8H, v1.8B, v6.8B - st1 {v7.8B}, [x0], x2 + ext v6.16b, v1.16b, v6.16b, #1 + uaddl v17.8h, v1.8b, v6.8b + st1 {v7.8b}, [x0], x2 b.gt 1b - ld1 {v0.16B}, [x1], x2 - add v18.8H, v16.8H, v17.8H - ext v4.16B, v0.16B, v4.16B, #1 + ld1 {v0.16b}, [x1], x2 + add v18.8h, v16.8h, v17.8h + ext v4.16b, v0.16b, v4.16b, #1 NRND add v18.8H, v18.8H, v19.8H - uaddl v16.8H, v0.8B, v4.8B - mshrn v5.8B, v18.8H, #2 - add v18.8H, v16.8H, v17.8H + uaddl v16.8h, v0.8b, v4.8b + mshrn v5.8b, v18.8h, #2 + add v18.8h, v16.8h, v17.8h .if \avg - ld1 {v7.8B}, [x0] - urhadd v5.8B, v5.8B, v7.8B + ld1 {v7.8b}, [x0] + urhadd v5.8b, v5.8b, v7.8b .endif NRND add v18.8H, v18.8H, v19.8H - st1 {v5.8B}, [x0], x2 - mshrn v7.8B, v18.8H, #2 + st1 {v5.8b}, [x0], x2 + mshrn v7.8b, v18.8h, #2 .if \avg - ld1 {v5.8B}, [x0] - urhadd v7.8B, v7.8B, v5.8B + ld1 {v5.8b}, [x0] + urhadd v7.8b, v7.8b, v5.8b .endif - st1 {v7.8B}, [x0], x2 + st1 {v7.8b}, [x0], x2 ret .endm diff -Nru ffmpeg-5.1.8/libavcodec/aarch64/me_cmp_neon.S ffmpeg-5.1.9/libavcodec/aarch64/me_cmp_neon.S --- ffmpeg-5.1.8/libavcodec/aarch64/me_cmp_neon.S 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/aarch64/me_cmp_neon.S 2026-05-05 15:50:52.000000000 +0000 @@ -27,7 +27,7 @@ // x3 ptrdiff_t stride // w4 int h cmp w4, #4 // if h < 4, jump to completion section - movi v18.4S, #0 // clear result accumulator + movi v18.4s, #0 // clear result accumulator b.lt 2f 1: ld1 {v0.16b}, [x1], x3 // load pix1 diff -Nru ffmpeg-5.1.8/libavcodec/aarch64/neon.S ffmpeg-5.1.9/libavcodec/aarch64/neon.S --- ffmpeg-5.1.8/libavcodec/aarch64/neon.S 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/aarch64/neon.S 2026-05-05 15:50:52.000000000 +0000 @@ -17,146 +17,146 @@ */ .macro transpose_8x8B r0, r1, r2, r3, r4, r5, r6, r7, r8, r9 - trn1 \r8\().8B, \r0\().8B, \r1\().8B - trn2 \r9\().8B, \r0\().8B, \r1\().8B - trn1 \r1\().8B, \r2\().8B, \r3\().8B - trn2 \r3\().8B, \r2\().8B, \r3\().8B - trn1 \r0\().8B, \r4\().8B, \r5\().8B - trn2 \r5\().8B, \r4\().8B, \r5\().8B - trn1 \r2\().8B, \r6\().8B, \r7\().8B - trn2 \r7\().8B, \r6\().8B, \r7\().8B - - trn1 \r4\().4H, \r0\().4H, \r2\().4H - trn2 \r2\().4H, \r0\().4H, \r2\().4H - trn1 \r6\().4H, \r5\().4H, \r7\().4H - trn2 \r7\().4H, \r5\().4H, \r7\().4H - trn1 \r5\().4H, \r9\().4H, \r3\().4H - trn2 \r9\().4H, \r9\().4H, \r3\().4H - trn1 \r3\().4H, \r8\().4H, \r1\().4H - trn2 \r8\().4H, \r8\().4H, \r1\().4H - - trn1 \r0\().2S, \r3\().2S, \r4\().2S - trn2 \r4\().2S, \r3\().2S, \r4\().2S + trn1 \r8\().8b, \r0\().8b, \r1\().8b + trn2 \r9\().8b, \r0\().8b, \r1\().8b + trn1 \r1\().8b, \r2\().8b, \r3\().8b + trn2 \r3\().8b, \r2\().8b, \r3\().8b + trn1 \r0\().8b, \r4\().8b, \r5\().8b + trn2 \r5\().8b, \r4\().8b, \r5\().8b + trn1 \r2\().8b, \r6\().8b, \r7\().8b + trn2 \r7\().8b, \r6\().8b, \r7\().8b + + trn1 \r4\().4h, \r0\().4h, \r2\().4h + trn2 \r2\().4h, \r0\().4h, \r2\().4h + trn1 \r6\().4h, \r5\().4h, \r7\().4h + trn2 \r7\().4h, \r5\().4h, \r7\().4h + trn1 \r5\().4h, \r9\().4h, \r3\().4h + trn2 \r9\().4h, \r9\().4h, \r3\().4h + trn1 \r3\().4h, \r8\().4h, \r1\().4h + trn2 \r8\().4h, \r8\().4h, \r1\().4h + + trn1 \r0\().2s, \r3\().2s, \r4\().2s + trn2 \r4\().2s, \r3\().2s, \r4\().2s - trn1 \r1\().2S, \r5\().2S, \r6\().2S - trn2 \r5\().2S, \r5\().2S, \r6\().2S + trn1 \r1\().2s, \r5\().2s, \r6\().2s + trn2 \r5\().2s, \r5\().2s, \r6\().2s - trn2 \r6\().2S, \r8\().2S, \r2\().2S - trn1 \r2\().2S, \r8\().2S, \r2\().2S + trn2 \r6\().2s, \r8\().2s, \r2\().2s + trn1 \r2\().2s, \r8\().2s, \r2\().2s - trn1 \r3\().2S, \r9\().2S, \r7\().2S - trn2 \r7\().2S, \r9\().2S, \r7\().2S + trn1 \r3\().2s, \r9\().2s, \r7\().2s + trn2 \r7\().2s, \r9\().2s, \r7\().2s .endm .macro transpose_8x16B r0, r1, r2, r3, r4, r5, r6, r7, t0, t1 - trn1 \t0\().16B, \r0\().16B, \r1\().16B - trn2 \t1\().16B, \r0\().16B, \r1\().16B - trn1 \r1\().16B, \r2\().16B, \r3\().16B - trn2 \r3\().16B, \r2\().16B, \r3\().16B - trn1 \r0\().16B, \r4\().16B, \r5\().16B - trn2 \r5\().16B, \r4\().16B, \r5\().16B - trn1 \r2\().16B, \r6\().16B, \r7\().16B - trn2 \r7\().16B, \r6\().16B, \r7\().16B - - trn1 \r4\().8H, \r0\().8H, \r2\().8H - trn2 \r2\().8H, \r0\().8H, \r2\().8H - trn1 \r6\().8H, \r5\().8H, \r7\().8H - trn2 \r7\().8H, \r5\().8H, \r7\().8H - trn1 \r5\().8H, \t1\().8H, \r3\().8H - trn2 \t1\().8H, \t1\().8H, \r3\().8H - trn1 \r3\().8H, \t0\().8H, \r1\().8H - trn2 \t0\().8H, \t0\().8H, \r1\().8H - - trn1 \r0\().4S, \r3\().4S, \r4\().4S - trn2 \r4\().4S, \r3\().4S, \r4\().4S + trn1 \t0\().16b, \r0\().16b, \r1\().16b + trn2 \t1\().16b, \r0\().16b, \r1\().16b + trn1 \r1\().16b, \r2\().16b, \r3\().16b + trn2 \r3\().16b, \r2\().16b, \r3\().16b + trn1 \r0\().16b, \r4\().16b, \r5\().16b + trn2 \r5\().16b, \r4\().16b, \r5\().16b + trn1 \r2\().16b, \r6\().16b, \r7\().16b + trn2 \r7\().16b, \r6\().16b, \r7\().16b + + trn1 \r4\().8h, \r0\().8h, \r2\().8h + trn2 \r2\().8h, \r0\().8h, \r2\().8h + trn1 \r6\().8h, \r5\().8h, \r7\().8h + trn2 \r7\().8h, \r5\().8h, \r7\().8h + trn1 \r5\().8h, \t1\().8h, \r3\().8h + trn2 \t1\().8h, \t1\().8h, \r3\().8h + trn1 \r3\().8h, \t0\().8h, \r1\().8h + trn2 \t0\().8h, \t0\().8h, \r1\().8h + + trn1 \r0\().4s, \r3\().4s, \r4\().4s + trn2 \r4\().4s, \r3\().4s, \r4\().4s - trn1 \r1\().4S, \r5\().4S, \r6\().4S - trn2 \r5\().4S, \r5\().4S, \r6\().4S + trn1 \r1\().4s, \r5\().4s, \r6\().4s + trn2 \r5\().4s, \r5\().4s, \r6\().4s - trn2 \r6\().4S, \t0\().4S, \r2\().4S - trn1 \r2\().4S, \t0\().4S, \r2\().4S + trn2 \r6\().4s, \t0\().4s, \r2\().4s + trn1 \r2\().4s, \t0\().4s, \r2\().4s - trn1 \r3\().4S, \t1\().4S, \r7\().4S - trn2 \r7\().4S, \t1\().4S, \r7\().4S + trn1 \r3\().4s, \t1\().4s, \r7\().4s + trn2 \r7\().4s, \t1\().4s, \r7\().4s .endm .macro transpose_4x16B r0, r1, r2, r3, t4, t5, t6, t7 - trn1 \t4\().16B, \r0\().16B, \r1\().16B - trn2 \t5\().16B, \r0\().16B, \r1\().16B - trn1 \t6\().16B, \r2\().16B, \r3\().16B - trn2 \t7\().16B, \r2\().16B, \r3\().16B - - trn1 \r0\().8H, \t4\().8H, \t6\().8H - trn2 \r2\().8H, \t4\().8H, \t6\().8H - trn1 \r1\().8H, \t5\().8H, \t7\().8H - trn2 \r3\().8H, \t5\().8H, \t7\().8H + trn1 \t4\().16b, \r0\().16b, \r1\().16b + trn2 \t5\().16b, \r0\().16b, \r1\().16b + trn1 \t6\().16b, \r2\().16b, \r3\().16b + trn2 \t7\().16b, \r2\().16b, \r3\().16b + + trn1 \r0\().8h, \t4\().8h, \t6\().8h + trn2 \r2\().8h, \t4\().8h, \t6\().8h + trn1 \r1\().8h, \t5\().8h, \t7\().8h + trn2 \r3\().8h, \t5\().8h, \t7\().8h .endm .macro transpose_4x8B r0, r1, r2, r3, t4, t5, t6, t7 - trn1 \t4\().8B, \r0\().8B, \r1\().8B - trn2 \t5\().8B, \r0\().8B, \r1\().8B - trn1 \t6\().8B, \r2\().8B, \r3\().8B - trn2 \t7\().8B, \r2\().8B, \r3\().8B - - trn1 \r0\().4H, \t4\().4H, \t6\().4H - trn2 \r2\().4H, \t4\().4H, \t6\().4H - trn1 \r1\().4H, \t5\().4H, \t7\().4H - trn2 \r3\().4H, \t5\().4H, \t7\().4H + trn1 \t4\().8b, \r0\().8b, \r1\().8b + trn2 \t5\().8b, \r0\().8b, \r1\().8b + trn1 \t6\().8b, \r2\().8b, \r3\().8b + trn2 \t7\().8b, \r2\().8b, \r3\().8b + + trn1 \r0\().4h, \t4\().4h, \t6\().4h + trn2 \r2\().4h, \t4\().4h, \t6\().4h + trn1 \r1\().4h, \t5\().4h, \t7\().4h + trn2 \r3\().4h, \t5\().4h, \t7\().4h .endm .macro transpose_4x4H r0, r1, r2, r3, r4, r5, r6, r7 - trn1 \r4\().4H, \r0\().4H, \r1\().4H - trn2 \r5\().4H, \r0\().4H, \r1\().4H - trn1 \r6\().4H, \r2\().4H, \r3\().4H - trn2 \r7\().4H, \r2\().4H, \r3\().4H - - trn1 \r0\().2S, \r4\().2S, \r6\().2S - trn2 \r2\().2S, \r4\().2S, \r6\().2S - trn1 \r1\().2S, \r5\().2S, \r7\().2S - trn2 \r3\().2S, \r5\().2S, \r7\().2S + trn1 \r4\().4h, \r0\().4h, \r1\().4h + trn2 \r5\().4h, \r0\().4h, \r1\().4h + trn1 \r6\().4h, \r2\().4h, \r3\().4h + trn2 \r7\().4h, \r2\().4h, \r3\().4h + + trn1 \r0\().2s, \r4\().2s, \r6\().2s + trn2 \r2\().2s, \r4\().2s, \r6\().2s + trn1 \r1\().2s, \r5\().2s, \r7\().2s + trn2 \r3\().2s, \r5\().2s, \r7\().2s .endm .macro transpose_4x8H r0, r1, r2, r3, t4, t5, t6, t7 - trn1 \t4\().8H, \r0\().8H, \r1\().8H - trn2 \t5\().8H, \r0\().8H, \r1\().8H - trn1 \t6\().8H, \r2\().8H, \r3\().8H - trn2 \t7\().8H, \r2\().8H, \r3\().8H - - trn1 \r0\().4S, \t4\().4S, \t6\().4S - trn2 \r2\().4S, \t4\().4S, \t6\().4S - trn1 \r1\().4S, \t5\().4S, \t7\().4S - trn2 \r3\().4S, \t5\().4S, \t7\().4S + trn1 \t4\().8h, \r0\().8h, \r1\().8h + trn2 \t5\().8h, \r0\().8h, \r1\().8h + trn1 \t6\().8h, \r2\().8h, \r3\().8h + trn2 \t7\().8h, \r2\().8h, \r3\().8h + + trn1 \r0\().4s, \t4\().4s, \t6\().4s + trn2 \r2\().4s, \t4\().4s, \t6\().4s + trn1 \r1\().4s, \t5\().4s, \t7\().4s + trn2 \r3\().4s, \t5\().4s, \t7\().4s .endm .macro transpose_8x8H r0, r1, r2, r3, r4, r5, r6, r7, r8, r9 - trn1 \r8\().8H, \r0\().8H, \r1\().8H - trn2 \r9\().8H, \r0\().8H, \r1\().8H - trn1 \r1\().8H, \r2\().8H, \r3\().8H - trn2 \r3\().8H, \r2\().8H, \r3\().8H - trn1 \r0\().8H, \r4\().8H, \r5\().8H - trn2 \r5\().8H, \r4\().8H, \r5\().8H - trn1 \r2\().8H, \r6\().8H, \r7\().8H - trn2 \r7\().8H, \r6\().8H, \r7\().8H - - trn1 \r4\().4S, \r0\().4S, \r2\().4S - trn2 \r2\().4S, \r0\().4S, \r2\().4S - trn1 \r6\().4S, \r5\().4S, \r7\().4S - trn2 \r7\().4S, \r5\().4S, \r7\().4S - trn1 \r5\().4S, \r9\().4S, \r3\().4S - trn2 \r9\().4S, \r9\().4S, \r3\().4S - trn1 \r3\().4S, \r8\().4S, \r1\().4S - trn2 \r8\().4S, \r8\().4S, \r1\().4S - - trn1 \r0\().2D, \r3\().2D, \r4\().2D - trn2 \r4\().2D, \r3\().2D, \r4\().2D + trn1 \r8\().8h, \r0\().8h, \r1\().8h + trn2 \r9\().8h, \r0\().8h, \r1\().8h + trn1 \r1\().8h, \r2\().8h, \r3\().8h + trn2 \r3\().8h, \r2\().8h, \r3\().8h + trn1 \r0\().8h, \r4\().8h, \r5\().8h + trn2 \r5\().8h, \r4\().8h, \r5\().8h + trn1 \r2\().8h, \r6\().8h, \r7\().8h + trn2 \r7\().8h, \r6\().8h, \r7\().8h + + trn1 \r4\().4s, \r0\().4s, \r2\().4s + trn2 \r2\().4s, \r0\().4s, \r2\().4s + trn1 \r6\().4s, \r5\().4s, \r7\().4s + trn2 \r7\().4s, \r5\().4s, \r7\().4s + trn1 \r5\().4s, \r9\().4s, \r3\().4s + trn2 \r9\().4s, \r9\().4s, \r3\().4s + trn1 \r3\().4s, \r8\().4s, \r1\().4s + trn2 \r8\().4s, \r8\().4s, \r1\().4s + + trn1 \r0\().2d, \r3\().2d, \r4\().2d + trn2 \r4\().2d, \r3\().2d, \r4\().2d - trn1 \r1\().2D, \r5\().2D, \r6\().2D - trn2 \r5\().2D, \r5\().2D, \r6\().2D + trn1 \r1\().2d, \r5\().2d, \r6\().2d + trn2 \r5\().2d, \r5\().2d, \r6\().2d - trn2 \r6\().2D, \r8\().2D, \r2\().2D - trn1 \r2\().2D, \r8\().2D, \r2\().2D + trn2 \r6\().2d, \r8\().2d, \r2\().2d + trn1 \r2\().2d, \r8\().2d, \r2\().2d - trn1 \r3\().2D, \r9\().2D, \r7\().2D - trn2 \r7\().2D, \r9\().2D, \r7\().2D + trn1 \r3\().2d, \r9\().2d, \r7\().2d + trn2 \r7\().2d, \r9\().2d, \r7\().2d .endm diff -Nru ffmpeg-5.1.8/libavcodec/aarch64/opusdsp_neon.S ffmpeg-5.1.9/libavcodec/aarch64/opusdsp_neon.S --- ffmpeg-5.1.8/libavcodec/aarch64/opusdsp_neon.S 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/aarch64/opusdsp_neon.S 2026-05-05 14:21:58.000000000 +0000 @@ -33,81 +33,81 @@ endconst function ff_opus_deemphasis_neon, export=1 - movrel x4, tab_st - ld1 {v4.4s}, [x4] - movrel x4, tab_x0 - ld1 {v5.4s}, [x4] - movrel x4, tab_x1 - ld1 {v6.4s}, [x4] - movrel x4, tab_x2 - ld1 {v7.4s}, [x4] + movrel x4, tab_st + ld1 {v4.4s}, [x4] + movrel x4, tab_x0 + ld1 {v5.4s}, [x4] + movrel x4, tab_x1 + ld1 {v6.4s}, [x4] + movrel x4, tab_x2 + ld1 {v7.4s}, [x4] - fmul v0.4s, v4.4s, v0.s[0] + fmul v0.4s, v4.4s, v0.s[0] -1: ld1 {v1.4s, v2.4s}, [x1], #32 +1: ld1 {v1.4s, v2.4s}, [x1], #32 - fmla v0.4s, v5.4s, v1.s[0] - fmul v3.4s, v7.4s, v2.s[2] + fmla v0.4s, v5.4s, v1.s[0] + fmul v3.4s, v7.4s, v2.s[2] - fmla v0.4s, v6.4s, v1.s[1] - fmla v3.4s, v6.4s, v2.s[1] + fmla v0.4s, v6.4s, v1.s[1] + fmla v3.4s, v6.4s, v2.s[1] - fmla v0.4s, v7.4s, v1.s[2] - fmla v3.4s, v5.4s, v2.s[0] + fmla v0.4s, v7.4s, v1.s[2] + fmla v3.4s, v5.4s, v2.s[0] - fadd v1.4s, v1.4s, v0.4s - fadd v2.4s, v2.4s, v3.4s + fadd v1.4s, v1.4s, v0.4s + fadd v2.4s, v2.4s, v3.4s - fmla v2.4s, v4.4s, v1.s[3] + fmla v2.4s, v4.4s, v1.s[3] - st1 {v1.4s, v2.4s}, [x0], #32 - fmul v0.4s, v4.4s, v2.s[3] + st1 {v1.4s, v2.4s}, [x0], #32 + fmul v0.4s, v4.4s, v2.s[3] - subs w2, w2, #8 - b.gt 1b + subs w2, w2, #8 + b.gt 1b - mov s0, v2.s[3] + mov s0, v2.s[3] ret endfunc function ff_opus_postfilter_neon, export=1 - ld1 {v0.4s}, [x2] - dup v1.4s, v0.s[1] - dup v2.4s, v0.s[2] - dup v0.4s, v0.s[0] - - add w1, w1, #2 - sub x1, x0, x1, lsl #2 - - ld1 {v3.4s}, [x1] - fmul v3.4s, v3.4s, v2.4s - -1: add x1, x1, #4 - ld1 {v4.4s}, [x1] - add x1, x1, #4 - ld1 {v5.4s}, [x1] - add x1, x1, #4 - ld1 {v6.4s}, [x1] - add x1, x1, #4 - ld1 {v7.4s}, [x1] - - fmla v3.4s, v7.4s, v2.4s - fadd v6.4s, v6.4s, v4.4s - - ld1 {v4.4s}, [x0] - fmla v4.4s, v5.4s, v0.4s - - fmul v6.4s, v6.4s, v1.4s - fadd v6.4s, v6.4s, v3.4s + ld1 {v0.4s}, [x2] + dup v1.4s, v0.s[1] + dup v2.4s, v0.s[2] + dup v0.4s, v0.s[0] + + add w1, w1, #2 + sub x1, x0, x1, lsl #2 + + ld1 {v3.4s}, [x1] + fmul v3.4s, v3.4s, v2.4s + +1: add x1, x1, #4 + ld1 {v4.4s}, [x1] + add x1, x1, #4 + ld1 {v5.4s}, [x1] + add x1, x1, #4 + ld1 {v6.4s}, [x1] + add x1, x1, #4 + ld1 {v7.4s}, [x1] + + fmla v3.4s, v7.4s, v2.4s + fadd v6.4s, v6.4s, v4.4s + + ld1 {v4.4s}, [x0] + fmla v4.4s, v5.4s, v0.4s + + fmul v6.4s, v6.4s, v1.4s + fadd v6.4s, v6.4s, v3.4s - fadd v4.4s, v4.4s, v6.4s - fmul v3.4s, v7.4s, v2.4s + fadd v4.4s, v4.4s, v6.4s + fmul v3.4s, v7.4s, v2.4s - st1 {v4.4s}, [x0], #16 + st1 {v4.4s}, [x0], #16 - subs w3, w3, #4 - b.gt 1b + subs w3, w3, #4 + b.gt 1b ret endfunc diff -Nru ffmpeg-5.1.8/libavcodec/aarch64/sbrdsp_neon.S ffmpeg-5.1.9/libavcodec/aarch64/sbrdsp_neon.S --- ffmpeg-5.1.8/libavcodec/aarch64/sbrdsp_neon.S 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/aarch64/sbrdsp_neon.S 2026-03-16 18:10:00.000000000 +0000 @@ -46,49 +46,49 @@ add x3, x0, #192*4 add x4, x0, #256*4 mov x5, #64 -1: ld1 {v0.4S}, [x0] - ld1 {v1.4S}, [x1], #16 - fadd v0.4S, v0.4S, v1.4S - ld1 {v2.4S}, [x2], #16 - fadd v0.4S, v0.4S, v2.4S - ld1 {v3.4S}, [x3], #16 - fadd v0.4S, v0.4S, v3.4S - ld1 {v4.4S}, [x4], #16 - fadd v0.4S, v0.4S, v4.4S - st1 {v0.4S}, [x0], #16 +1: ld1 {v0.4s}, [x0] + ld1 {v1.4s}, [x1], #16 + fadd v0.4s, v0.4s, v1.4s + ld1 {v2.4s}, [x2], #16 + fadd v0.4s, v0.4s, v2.4s + ld1 {v3.4s}, [x3], #16 + fadd v0.4s, v0.4s, v3.4s + ld1 {v4.4s}, [x4], #16 + fadd v0.4s, v0.4s, v4.4s + st1 {v0.4s}, [x0], #16 subs x5, x5, #4 b.gt 1b ret endfunc function ff_sbr_sum_square_neon, export=1 - movi v0.4S, #0 -1: ld1 {v1.4S}, [x0], #16 - fmla v0.4S, v1.4S, v1.4S + movi v0.4s, #0 +1: ld1 {v1.4s}, [x0], #16 + fmla v0.4s, v1.4s, v1.4s subs w1, w1, #2 b.gt 1b - faddp v0.4S, v0.4S, v0.4S - faddp v0.4S, v0.4S, v0.4S + faddp v0.4s, v0.4s, v0.4s + faddp v0.4s, v0.4s, v0.4s ret endfunc function ff_sbr_neg_odd_64_neon, export=1 mov x1, x0 - movi v5.4S, #1<<7, lsl #24 - ld2 {v0.4S, v1.4S}, [x0], #32 - eor v1.16B, v1.16B, v5.16B - ld2 {v2.4S, v3.4S}, [x0], #32 + movi v5.4s, #1<<7, lsl #24 + ld2 {v0.4s, v1.4s}, [x0], #32 + eor v1.16b, v1.16b, v5.16b + ld2 {v2.4s, v3.4s}, [x0], #32 .rept 3 - st2 {v0.4S, v1.4S}, [x1], #32 - eor v3.16B, v3.16B, v5.16B - ld2 {v0.4S, v1.4S}, [x0], #32 - st2 {v2.4S, v3.4S}, [x1], #32 - eor v1.16B, v1.16B, v5.16B - ld2 {v2.4S, v3.4S}, [x0], #32 + st2 {v0.4s, v1.4s}, [x1], #32 + eor v3.16b, v3.16b, v5.16b + ld2 {v0.4s, v1.4s}, [x0], #32 + st2 {v2.4s, v3.4s}, [x1], #32 + eor v1.16b, v1.16b, v5.16b + ld2 {v2.4s, v3.4s}, [x0], #32 .endr - eor v3.16B, v3.16B, v5.16B - st2 {v0.4S, v1.4S}, [x1], #32 - st2 {v2.4S, v3.4S}, [x1], #32 + eor v3.16b, v3.16b, v5.16b + st2 {v0.4s, v1.4s}, [x1], #32 + st2 {v2.4s, v3.4s}, [x1], #32 ret endfunc @@ -97,26 +97,26 @@ add x2, x0, #64*4 mov x3, #-16 mov x4, #-4 - movi v6.4S, #1<<7, lsl #24 - ld1 {v0.2S}, [x0], #8 - st1 {v0.2S}, [x2], #8 + movi v6.4s, #1<<7, lsl #24 + ld1 {v0.2s}, [x0], #8 + st1 {v0.2s}, [x2], #8 .rept 7 - ld1 {v1.4S}, [x1], x3 - ld1 {v2.4S}, [x0], #16 - eor v1.16B, v1.16B, v6.16B - rev64 v1.4S, v1.4S - ext v1.16B, v1.16B, v1.16B, #8 - st2 {v1.4S, v2.4S}, [x2], #32 + ld1 {v1.4s}, [x1], x3 + ld1 {v2.4s}, [x0], #16 + eor v1.16b, v1.16b, v6.16b + rev64 v1.4s, v1.4s + ext v1.16b, v1.16b, v1.16b, #8 + st2 {v1.4s, v2.4s}, [x2], #32 .endr add x1, x1, #8 - ld1 {v1.2S}, [x1], x4 - ld1 {v2.2S}, [x0], #8 - ld1 {v1.S}[3], [x1] - ld1 {v2.S}[2], [x0] - eor v1.16B, v1.16B, v6.16B - rev64 v1.4S, v1.4S - st2 {v1.2S, v2.2S}, [x2], #16 - st2 {v1.S, v2.S}[2], [x2] + ld1 {v1.2s}, [x1], x4 + ld1 {v2.2s}, [x0], #8 + ld1 {v1.s}[3], [x1] + ld1 {v2.s}[2], [x0] + eor v1.16b, v1.16b, v6.16b + rev64 v1.4s, v1.4s + st2 {v1.2s, v2.2s}, [x2], #16 + st2 {v1.s, v2.s}[2], [x2] ret endfunc @@ -124,13 +124,13 @@ add x2, x1, #60*4 mov x3, #-16 mov x4, #32 - movi v6.4S, #1<<7, lsl #24 -1: ld1 {v0.4S}, [x2], x3 - ld1 {v1.4S}, [x1], #16 - eor v0.16B, v0.16B, v6.16B - rev64 v0.4S, v0.4S - ext v0.16B, v0.16B, v0.16B, #8 - st2 {v0.4S, v1.4S}, [x0], #32 + movi v6.4s, #1<<7, lsl #24 +1: ld1 {v0.4s}, [x2], x3 + ld1 {v1.4s}, [x1], #16 + eor v0.16b, v0.16b, v6.16b + rev64 v0.4s, v0.4s + ext v0.16b, v0.16b, v0.16b, #8 + st2 {v0.4s, v1.4s}, [x0], #32 subs x4, x4, #4 b.gt 1b ret @@ -141,13 +141,13 @@ add x2, x0, #60*4 mov x3, #-32 mov x4, #32 - movi v2.4S, #1<<7, lsl #24 -1: ld2 {v0.4S, v1.4S}, [x1], x3 - eor v0.16B, v0.16B, v2.16B - rev64 v1.4S, v1.4S - ext v1.16B, v1.16B, v1.16B, #8 - st1 {v0.4S}, [x2] - st1 {v1.4S}, [x0], #16 + movi v2.4s, #1<<7, lsl #24 +1: ld2 {v0.4s, v1.4s}, [x1], x3 + eor v0.16b, v0.16b, v2.16b + rev64 v1.4s, v1.4s + ext v1.16b, v1.16b, v1.16b, #8 + st1 {v0.4s}, [x2] + st1 {v1.4s}, [x0], #16 sub x2, x2, #16 subs x4, x4, #4 b.gt 1b @@ -159,16 +159,16 @@ add x3, x0, #124*4 mov x4, #64 mov x5, #-16 -1: ld1 {v0.4S}, [x1], #16 - ld1 {v1.4S}, [x2], x5 - rev64 v2.4S, v0.4S - ext v2.16B, v2.16B, v2.16B, #8 - rev64 v3.4S, v1.4S - ext v3.16B, v3.16B, v3.16B, #8 - fadd v1.4S, v1.4S, v2.4S - fsub v0.4S, v0.4S, v3.4S - st1 {v0.4S}, [x0], #16 - st1 {v1.4S}, [x3], x5 +1: ld1 {v0.4s}, [x1], #16 + ld1 {v1.4s}, [x2], x5 + rev64 v2.4s, v0.4s + ext v2.16b, v2.16b, v2.16b, #8 + rev64 v3.4s, v1.4s + ext v3.16b, v3.16b, v3.16b, #8 + fadd v1.4s, v1.4s, v2.4s + fsub v0.4s, v0.4s, v3.4s + st1 {v0.4s}, [x0], #16 + st1 {v1.4s}, [x3], x5 subs x4, x4, #4 b.gt 1b ret @@ -178,32 +178,32 @@ sxtw x4, w4 sxtw x5, w5 movrel x6, factors - ld1 {v7.4S}, [x6] - dup v1.4S, v0.S[0] - mov v2.8B, v1.8B - mov v2.S[2], v7.S[0] - mov v2.S[3], v7.S[0] - fmul v1.4S, v1.4S, v2.4S - ld1 {v0.D}[0], [x3] - ld1 {v0.D}[1], [x2] - fmul v0.4S, v0.4S, v1.4S - fmul v1.4S, v0.4S, v7.4S - rev64 v0.4S, v0.4S + ld1 {v7.4s}, [x6] + dup v1.4s, v0.s[0] + mov v2.8b, v1.8b + mov v2.s[2], v7.s[0] + mov v2.s[3], v7.s[0] + fmul v1.4s, v1.4s, v2.4s + ld1 {v0.d}[0], [x3] + ld1 {v0.d}[1], [x2] + fmul v0.4s, v0.4s, v1.4s + fmul v1.4s, v0.4s, v7.4s + rev64 v0.4s, v0.4s sub x7, x5, x4 add x0, x0, x4, lsl #3 add x1, x1, x4, lsl #3 sub x1, x1, #16 -1: ld1 {v2.4S}, [x1], #16 - ld1 {v3.2S}, [x1] - fmul v4.4S, v2.4S, v1.4S - fmul v5.4S, v2.4S, v0.4S - faddp v4.4S, v4.4S, v4.4S - faddp v5.4S, v5.4S, v5.4S - faddp v4.4S, v4.4S, v4.4S - faddp v5.4S, v5.4S, v5.4S - mov v4.S[1], v5.S[0] - fadd v4.2S, v4.2S, v3.2S - st1 {v4.2S}, [x0], #8 +1: ld1 {v2.4s}, [x1], #16 + ld1 {v3.2s}, [x1] + fmul v4.4s, v2.4s, v1.4s + fmul v5.4s, v2.4s, v0.4s + faddp v4.4s, v4.4s, v4.4s + faddp v5.4s, v5.4s, v5.4s + faddp v4.4s, v4.4s, v4.4s + faddp v5.4s, v5.4s, v5.4s + mov v4.s[1], v5.s[0] + fadd v4.2s, v4.2s, v3.2s + st1 {v4.2s}, [x0], #8 sub x1, x1, #8 subs x7, x7, #1 b.gt 1b @@ -215,10 +215,10 @@ sxtw x4, w4 mov x5, #40*2*4 add x1, x1, x4, lsl #3 -1: ld1 {v0.2S}, [x1], x5 - ld1 {v1.S}[0], [x2], #4 - fmul v2.4S, v0.4S, v1.S[0] - st1 {v2.2S}, [x0], #8 +1: ld1 {v0.2s}, [x1], x5 + ld1 {v1.s}[0], [x2], #4 + fmul v2.4s, v0.4s, v1.s[0] + st1 {v2.2s}, [x0], #8 subs x3, x3, #1 b.gt 1b ret @@ -227,46 +227,46 @@ function ff_sbr_autocorrelate_neon, export=1 mov x2, #38 movrel x3, factors - ld1 {v0.4S}, [x3] - movi v1.4S, #0 - movi v2.4S, #0 - movi v3.4S, #0 - ld1 {v4.2S}, [x0], #8 - ld1 {v5.2S}, [x0], #8 - fmul v16.2S, v4.2S, v4.2S - fmul v17.2S, v5.2S, v4.S[0] - fmul v18.2S, v5.2S, v4.S[1] -1: ld1 {v5.D}[1], [x0], #8 - fmla v1.2S, v4.2S, v4.2S - fmla v2.4S, v5.4S, v4.S[0] - fmla v3.4S, v5.4S, v4.S[1] - mov v4.D[0], v5.D[0] - mov v5.D[0], v5.D[1] + ld1 {v0.4s}, [x3] + movi v1.4s, #0 + movi v2.4s, #0 + movi v3.4s, #0 + ld1 {v4.2s}, [x0], #8 + ld1 {v5.2s}, [x0], #8 + fmul v16.2s, v4.2s, v4.2s + fmul v17.2s, v5.2s, v4.s[0] + fmul v18.2s, v5.2s, v4.s[1] +1: ld1 {v5.d}[1], [x0], #8 + fmla v1.2s, v4.2s, v4.2s + fmla v2.4s, v5.4s, v4.s[0] + fmla v3.4s, v5.4s, v4.s[1] + mov v4.d[0], v5.d[0] + mov v5.d[0], v5.d[1] subs x2, x2, #1 b.gt 1b - fmul v19.2S, v4.2S, v4.2S - fmul v20.2S, v5.2S, v4.S[0] - fmul v21.2S, v5.2S, v4.S[1] - fadd v22.4S, v2.4S, v20.4S - fsub v22.4S, v22.4S, v17.4S - fadd v23.4S, v3.4S, v21.4S - fsub v23.4S, v23.4S, v18.4S - rev64 v23.4S, v23.4S - fmul v23.4S, v23.4S, v0.4S - fadd v22.4S, v22.4S, v23.4S - st1 {v22.4S}, [x1], #16 - fadd v23.2S, v1.2S, v19.2S - fsub v23.2S, v23.2S, v16.2S - faddp v23.2S, v23.2S, v23.2S - st1 {v23.S}[0], [x1] + fmul v19.2s, v4.2s, v4.2s + fmul v20.2s, v5.2s, v4.s[0] + fmul v21.2s, v5.2s, v4.s[1] + fadd v22.4s, v2.4s, v20.4s + fsub v22.4s, v22.4s, v17.4s + fadd v23.4s, v3.4s, v21.4s + fsub v23.4s, v23.4s, v18.4s + rev64 v23.4s, v23.4s + fmul v23.4s, v23.4s, v0.4s + fadd v22.4s, v22.4s, v23.4s + st1 {v22.4s}, [x1], #16 + fadd v23.2s, v1.2s, v19.2s + fsub v23.2s, v23.2s, v16.2s + faddp v23.2s, v23.2s, v23.2s + st1 {v23.s}[0], [x1] add x1, x1, #8 - rev64 v3.2S, v3.2S - fmul v3.2S, v3.2S, v0.2S - fadd v2.2S, v2.2S, v3.2S - st1 {v2.2S}, [x1] + rev64 v3.2s, v3.2s + fmul v3.2s, v3.2s, v0.2s + fadd v2.2s, v2.2s, v3.2s + st1 {v2.2s}, [x1] add x1, x1, #16 - faddp v1.2S, v1.2S, v1.2S - st1 {v1.S}[0], [x1] + faddp v1.2s, v1.2s, v1.2s + st1 {v1.s}[0], [x1] ret endfunc @@ -278,25 +278,25 @@ 1: and x3, x3, #0x1ff add x8, x7, x3, lsl #3 add x3, x3, #2 - ld1 {v2.4S}, [x0] - ld1 {v3.2S}, [x1], #8 - ld1 {v4.2S}, [x2], #8 - ld1 {v5.4S}, [x8] - mov v6.16B, v2.16B - zip1 v3.4S, v3.4S, v3.4S - zip1 v4.4S, v4.4S, v4.4S - fmla v6.4S, v1.4S, v3.4S - fmla v2.4S, v5.4S, v4.4S - fcmeq v7.4S, v3.4S, #0 - bif v2.16B, v6.16B, v7.16B - st1 {v2.4S}, [x0], #16 + ld1 {v2.4s}, [x0] + ld1 {v3.2s}, [x1], #8 + ld1 {v4.2s}, [x2], #8 + ld1 {v5.4s}, [x8] + mov v6.16b, v2.16b + zip1 v3.4s, v3.4s, v3.4s + zip1 v4.4s, v4.4s, v4.4s + fmla v6.4s, v1.4s, v3.4s + fmla v2.4s, v5.4s, v4.4s + fcmeq v7.4s, v3.4s, #0 + bif v2.16b, v6.16b, v7.16b + st1 {v2.4s}, [x0], #16 subs x5, x5, #2 b.gt 1b .endm function ff_sbr_hf_apply_noise_0_neon, export=1 movrel x9, phi_noise_0 - ld1 {v1.4S}, [x9] + ld1 {v1.4s}, [x9] apply_noise_common ret endfunc @@ -305,14 +305,14 @@ movrel x9, phi_noise_1 and x4, x4, #1 add x9, x9, x4, lsl #4 - ld1 {v1.4S}, [x9] + ld1 {v1.4s}, [x9] apply_noise_common ret endfunc function ff_sbr_hf_apply_noise_2_neon, export=1 movrel x9, phi_noise_2 - ld1 {v1.4S}, [x9] + ld1 {v1.4s}, [x9] apply_noise_common ret endfunc @@ -321,7 +321,7 @@ movrel x9, phi_noise_3 and x4, x4, #1 add x9, x9, x4, lsl #4 - ld1 {v1.4S}, [x9] + ld1 {v1.4s}, [x9] apply_noise_common ret endfunc diff -Nru ffmpeg-5.1.8/libavcodec/aarch64/simple_idct_neon.S ffmpeg-5.1.9/libavcodec/aarch64/simple_idct_neon.S --- ffmpeg-5.1.8/libavcodec/aarch64/simple_idct_neon.S 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/aarch64/simple_idct_neon.S 2026-03-16 18:10:00.000000000 +0000 @@ -54,7 +54,7 @@ prfm pldl1keep, [\data] mov x10, x30 movrel x3, idct_coeff_neon - ld1 {v0.2D}, [x3] + ld1 {v0.2d}, [x3] .endm .macro idct_end @@ -74,146 +74,146 @@ .endm .macro idct_col4_top y1, y2, y3, y4, i, l - smull\i v7.4S, \y3\l, z2 - smull\i v16.4S, \y3\l, z6 - smull\i v17.4S, \y2\l, z1 - add v19.4S, v23.4S, v7.4S - smull\i v18.4S, \y2\l, z3 - add v20.4S, v23.4S, v16.4S - smull\i v5.4S, \y2\l, z5 - sub v21.4S, v23.4S, v16.4S - smull\i v6.4S, \y2\l, z7 - sub v22.4S, v23.4S, v7.4S - - smlal\i v17.4S, \y4\l, z3 - smlsl\i v18.4S, \y4\l, z7 - smlsl\i v5.4S, \y4\l, z1 - smlsl\i v6.4S, \y4\l, z5 + smull\i v7.4s, \y3\l, z2 + smull\i v16.4s, \y3\l, z6 + smull\i v17.4s, \y2\l, z1 + add v19.4s, v23.4s, v7.4s + smull\i v18.4s, \y2\l, z3 + add v20.4s, v23.4s, v16.4s + smull\i v5.4s, \y2\l, z5 + sub v21.4s, v23.4s, v16.4s + smull\i v6.4s, \y2\l, z7 + sub v22.4s, v23.4s, v7.4s + + smlal\i v17.4s, \y4\l, z3 + smlsl\i v18.4s, \y4\l, z7 + smlsl\i v5.4s, \y4\l, z1 + smlsl\i v6.4s, \y4\l, z5 .endm .macro idct_row4_neon y1, y2, y3, y4, pass - ld1 {\y1\().2D,\y2\().2D}, [x2], #32 - movi v23.4S, #1<<2, lsl #8 - orr v5.16B, \y1\().16B, \y2\().16B - ld1 {\y3\().2D,\y4\().2D}, [x2], #32 - orr v6.16B, \y3\().16B, \y4\().16B - orr v5.16B, v5.16B, v6.16B - mov x3, v5.D[1] - smlal v23.4S, \y1\().4H, z4 + ld1 {\y1\().2d,\y2\().2d}, [x2], #32 + movi v23.4s, #1<<2, lsl #8 + orr v5.16b, \y1\().16b, \y2\().16b + ld1 {\y3\().2d,\y4\().2d}, [x2], #32 + orr v6.16b, \y3\().16b, \y4\().16b + orr v5.16b, v5.16b, v6.16b + mov x3, v5.d[1] + smlal v23.4s, \y1\().4h, z4 - idct_col4_top \y1, \y2, \y3, \y4, 1, .4H + idct_col4_top \y1, \y2, \y3, \y4, 1, .4h cmp x3, #0 b.eq \pass\()f - smull2 v7.4S, \y1\().8H, z4 - smlal2 v17.4S, \y2\().8H, z5 - smlsl2 v18.4S, \y2\().8H, z1 - smull2 v16.4S, \y3\().8H, z2 - smlal2 v5.4S, \y2\().8H, z7 - add v19.4S, v19.4S, v7.4S - sub v20.4S, v20.4S, v7.4S - sub v21.4S, v21.4S, v7.4S - add v22.4S, v22.4S, v7.4S - smlal2 v6.4S, \y2\().8H, z3 - smull2 v7.4S, \y3\().8H, z6 - smlal2 v17.4S, \y4\().8H, z7 - smlsl2 v18.4S, \y4\().8H, z5 - smlal2 v5.4S, \y4\().8H, z3 - smlsl2 v6.4S, \y4\().8H, z1 - add v19.4S, v19.4S, v7.4S - sub v20.4S, v20.4S, v16.4S - add v21.4S, v21.4S, v16.4S - sub v22.4S, v22.4S, v7.4S + smull2 v7.4s, \y1\().8h, z4 + smlal2 v17.4s, \y2\().8h, z5 + smlsl2 v18.4s, \y2\().8h, z1 + smull2 v16.4s, \y3\().8h, z2 + smlal2 v5.4s, \y2\().8h, z7 + add v19.4s, v19.4s, v7.4s + sub v20.4s, v20.4s, v7.4s + sub v21.4s, v21.4s, v7.4s + add v22.4s, v22.4s, v7.4s + smlal2 v6.4s, \y2\().8h, z3 + smull2 v7.4s, \y3\().8h, z6 + smlal2 v17.4s, \y4\().8h, z7 + smlsl2 v18.4s, \y4\().8h, z5 + smlal2 v5.4s, \y4\().8h, z3 + smlsl2 v6.4s, \y4\().8h, z1 + add v19.4s, v19.4s, v7.4s + sub v20.4s, v20.4s, v16.4s + add v21.4s, v21.4s, v16.4s + sub v22.4s, v22.4s, v7.4s \pass: add \y3\().4S, v19.4S, v17.4S - add \y4\().4S, v20.4S, v18.4S - shrn \y1\().4H, \y3\().4S, #ROW_SHIFT - shrn \y2\().4H, \y4\().4S, #ROW_SHIFT - add v7.4S, v21.4S, v5.4S - add v16.4S, v22.4S, v6.4S - shrn \y3\().4H, v7.4S, #ROW_SHIFT - shrn \y4\().4H, v16.4S, #ROW_SHIFT - sub v22.4S, v22.4S, v6.4S - sub v19.4S, v19.4S, v17.4S - sub v21.4S, v21.4S, v5.4S - shrn2 \y1\().8H, v22.4S, #ROW_SHIFT - sub v20.4S, v20.4S, v18.4S - shrn2 \y2\().8H, v21.4S, #ROW_SHIFT - shrn2 \y3\().8H, v20.4S, #ROW_SHIFT - shrn2 \y4\().8H, v19.4S, #ROW_SHIFT - - trn1 v16.8H, \y1\().8H, \y2\().8H - trn2 v17.8H, \y1\().8H, \y2\().8H - trn1 v18.8H, \y3\().8H, \y4\().8H - trn2 v19.8H, \y3\().8H, \y4\().8H - trn1 \y1\().4S, v16.4S, v18.4S - trn1 \y2\().4S, v17.4S, v19.4S - trn2 \y3\().4S, v16.4S, v18.4S - trn2 \y4\().4S, v17.4S, v19.4S + add \y4\().4s, v20.4s, v18.4s + shrn \y1\().4h, \y3\().4s, #ROW_SHIFT + shrn \y2\().4h, \y4\().4s, #ROW_SHIFT + add v7.4s, v21.4s, v5.4s + add v16.4s, v22.4s, v6.4s + shrn \y3\().4h, v7.4s, #ROW_SHIFT + shrn \y4\().4h, v16.4s, #ROW_SHIFT + sub v22.4s, v22.4s, v6.4s + sub v19.4s, v19.4s, v17.4s + sub v21.4s, v21.4s, v5.4s + shrn2 \y1\().8h, v22.4s, #ROW_SHIFT + sub v20.4s, v20.4s, v18.4s + shrn2 \y2\().8h, v21.4s, #ROW_SHIFT + shrn2 \y3\().8h, v20.4s, #ROW_SHIFT + shrn2 \y4\().8h, v19.4s, #ROW_SHIFT + + trn1 v16.8h, \y1\().8h, \y2\().8h + trn2 v17.8h, \y1\().8h, \y2\().8h + trn1 v18.8h, \y3\().8h, \y4\().8h + trn2 v19.8h, \y3\().8h, \y4\().8h + trn1 \y1\().4s, v16.4s, v18.4s + trn1 \y2\().4s, v17.4s, v19.4s + trn2 \y3\().4s, v16.4s, v18.4s + trn2 \y4\().4s, v17.4s, v19.4s .endm .macro declare_idct_col4_neon i, l function idct_col4_neon\i - dup v23.4H, z4c + dup v23.4h, z4c .if \i == 1 - add v23.4H, v23.4H, v24.4H + add v23.4h, v23.4h, v24.4h .else - mov v5.D[0], v24.D[1] - add v23.4H, v23.4H, v5.4H + mov v5.d[0], v24.d[1] + add v23.4h, v23.4h, v5.4h .endif - smull v23.4S, v23.4H, z4 + smull v23.4s, v23.4h, z4 idct_col4_top v24, v25, v26, v27, \i, \l - mov x4, v28.D[\i - 1] - mov x5, v29.D[\i - 1] + mov x4, v28.d[\i - 1] + mov x5, v29.d[\i - 1] cmp x4, #0 b.eq 1f - smull\i v7.4S, v28\l, z4 - add v19.4S, v19.4S, v7.4S - sub v20.4S, v20.4S, v7.4S - sub v21.4S, v21.4S, v7.4S - add v22.4S, v22.4S, v7.4S + smull\i v7.4s, v28\l, z4 + add v19.4s, v19.4s, v7.4s + sub v20.4s, v20.4s, v7.4s + sub v21.4s, v21.4s, v7.4s + add v22.4s, v22.4s, v7.4s -1: mov x4, v30.D[\i - 1] +1: mov x4, v30.d[\i - 1] cmp x5, #0 b.eq 2f - smlal\i v17.4S, v29\l, z5 - smlsl\i v18.4S, v29\l, z1 - smlal\i v5.4S, v29\l, z7 - smlal\i v6.4S, v29\l, z3 + smlal\i v17.4s, v29\l, z5 + smlsl\i v18.4s, v29\l, z1 + smlal\i v5.4s, v29\l, z7 + smlal\i v6.4s, v29\l, z3 -2: mov x5, v31.D[\i - 1] +2: mov x5, v31.d[\i - 1] cmp x4, #0 b.eq 3f - smull\i v7.4S, v30\l, z6 - smull\i v16.4S, v30\l, z2 - add v19.4S, v19.4S, v7.4S - sub v22.4S, v22.4S, v7.4S - sub v20.4S, v20.4S, v16.4S - add v21.4S, v21.4S, v16.4S + smull\i v7.4s, v30\l, z6 + smull\i v16.4s, v30\l, z2 + add v19.4s, v19.4s, v7.4s + sub v22.4s, v22.4s, v7.4s + sub v20.4s, v20.4s, v16.4s + add v21.4s, v21.4s, v16.4s 3: cmp x5, #0 b.eq 4f - smlal\i v17.4S, v31\l, z7 - smlsl\i v18.4S, v31\l, z5 - smlal\i v5.4S, v31\l, z3 - smlsl\i v6.4S, v31\l, z1 - -4: addhn v7.4H, v19.4S, v17.4S - addhn2 v7.8H, v20.4S, v18.4S - subhn v18.4H, v20.4S, v18.4S - subhn2 v18.8H, v19.4S, v17.4S - - addhn v16.4H, v21.4S, v5.4S - addhn2 v16.8H, v22.4S, v6.4S - subhn v17.4H, v22.4S, v6.4S - subhn2 v17.8H, v21.4S, v5.4S + smlal\i v17.4s, v31\l, z7 + smlsl\i v18.4s, v31\l, z5 + smlal\i v5.4s, v31\l, z3 + smlsl\i v6.4s, v31\l, z1 + +4: addhn v7.4h, v19.4s, v17.4s + addhn2 v7.8h, v20.4s, v18.4s + subhn v18.4h, v20.4s, v18.4s + subhn2 v18.8h, v19.4s, v17.4s + + addhn v16.4h, v21.4s, v5.4s + addhn2 v16.8h, v22.4s, v6.4s + subhn v17.4h, v22.4s, v6.4s + subhn2 v17.8h, v21.4s, v5.4s ret endfunc @@ -229,33 +229,33 @@ idct_row4_neon v28, v29, v30, v31, 2 bl idct_col4_neon1 - sqshrun v1.8B, v7.8H, #COL_SHIFT-16 - sqshrun2 v1.16B, v16.8H, #COL_SHIFT-16 - sqshrun v3.8B, v17.8H, #COL_SHIFT-16 - sqshrun2 v3.16B, v18.8H, #COL_SHIFT-16 + sqshrun v1.8b, v7.8h, #COL_SHIFT-16 + sqshrun2 v1.16b, v16.8h, #COL_SHIFT-16 + sqshrun v3.8b, v17.8h, #COL_SHIFT-16 + sqshrun2 v3.16b, v18.8h, #COL_SHIFT-16 bl idct_col4_neon2 - sqshrun v2.8B, v7.8H, #COL_SHIFT-16 - sqshrun2 v2.16B, v16.8H, #COL_SHIFT-16 - sqshrun v4.8B, v17.8H, #COL_SHIFT-16 - sqshrun2 v4.16B, v18.8H, #COL_SHIFT-16 - - zip1 v16.4S, v1.4S, v2.4S - zip2 v17.4S, v1.4S, v2.4S - - st1 {v16.D}[0], [x0], x1 - st1 {v16.D}[1], [x0], x1 - - zip1 v18.4S, v3.4S, v4.4S - zip2 v19.4S, v3.4S, v4.4S - - st1 {v17.D}[0], [x0], x1 - st1 {v17.D}[1], [x0], x1 - st1 {v18.D}[0], [x0], x1 - st1 {v18.D}[1], [x0], x1 - st1 {v19.D}[0], [x0], x1 - st1 {v19.D}[1], [x0], x1 + sqshrun v2.8b, v7.8h, #COL_SHIFT-16 + sqshrun2 v2.16b, v16.8h, #COL_SHIFT-16 + sqshrun v4.8b, v17.8h, #COL_SHIFT-16 + sqshrun2 v4.16b, v18.8h, #COL_SHIFT-16 + + zip1 v16.4s, v1.4s, v2.4s + zip2 v17.4s, v1.4s, v2.4s + + st1 {v16.d}[0], [x0], x1 + st1 {v16.d}[1], [x0], x1 + + zip1 v18.4s, v3.4s, v4.4s + zip2 v19.4s, v3.4s, v4.4s + + st1 {v17.d}[0], [x0], x1 + st1 {v17.d}[1], [x0], x1 + st1 {v18.d}[0], [x0], x1 + st1 {v18.d}[1], [x0], x1 + st1 {v19.d}[0], [x0], x1 + st1 {v19.d}[1], [x0], x1 idct_end endfunc @@ -267,59 +267,59 @@ idct_row4_neon v28, v29, v30, v31, 2 bl idct_col4_neon1 - sshr v1.8H, v7.8H, #COL_SHIFT-16 - sshr v2.8H, v16.8H, #COL_SHIFT-16 - sshr v3.8H, v17.8H, #COL_SHIFT-16 - sshr v4.8H, v18.8H, #COL_SHIFT-16 + sshr v1.8h, v7.8h, #COL_SHIFT-16 + sshr v2.8h, v16.8h, #COL_SHIFT-16 + sshr v3.8h, v17.8h, #COL_SHIFT-16 + sshr v4.8h, v18.8h, #COL_SHIFT-16 bl idct_col4_neon2 - sshr v7.8H, v7.8H, #COL_SHIFT-16 - sshr v16.8H, v16.8H, #COL_SHIFT-16 - sshr v17.8H, v17.8H, #COL_SHIFT-16 - sshr v18.8H, v18.8H, #COL_SHIFT-16 + sshr v7.8h, v7.8h, #COL_SHIFT-16 + sshr v16.8h, v16.8h, #COL_SHIFT-16 + sshr v17.8h, v17.8h, #COL_SHIFT-16 + sshr v18.8h, v18.8h, #COL_SHIFT-16 mov x9, x0 - ld1 {v19.D}[0], [x0], x1 - zip1 v23.2D, v1.2D, v7.2D - zip2 v24.2D, v1.2D, v7.2D - ld1 {v19.D}[1], [x0], x1 - zip1 v25.2D, v2.2D, v16.2D - zip2 v26.2D, v2.2D, v16.2D - ld1 {v20.D}[0], [x0], x1 - zip1 v27.2D, v3.2D, v17.2D - zip2 v28.2D, v3.2D, v17.2D - ld1 {v20.D}[1], [x0], x1 - zip1 v29.2D, v4.2D, v18.2D - zip2 v30.2D, v4.2D, v18.2D - ld1 {v21.D}[0], [x0], x1 - uaddw v23.8H, v23.8H, v19.8B - uaddw2 v24.8H, v24.8H, v19.16B - ld1 {v21.D}[1], [x0], x1 - sqxtun v23.8B, v23.8H - sqxtun2 v23.16B, v24.8H - ld1 {v22.D}[0], [x0], x1 - uaddw v24.8H, v25.8H, v20.8B - uaddw2 v25.8H, v26.8H, v20.16B - ld1 {v22.D}[1], [x0], x1 - sqxtun v24.8B, v24.8H - sqxtun2 v24.16B, v25.8H - st1 {v23.D}[0], [x9], x1 - uaddw v25.8H, v27.8H, v21.8B - uaddw2 v26.8H, v28.8H, v21.16B - st1 {v23.D}[1], [x9], x1 - sqxtun v25.8B, v25.8H - sqxtun2 v25.16B, v26.8H - st1 {v24.D}[0], [x9], x1 - uaddw v26.8H, v29.8H, v22.8B - uaddw2 v27.8H, v30.8H, v22.16B - st1 {v24.D}[1], [x9], x1 - sqxtun v26.8B, v26.8H - sqxtun2 v26.16B, v27.8H - st1 {v25.D}[0], [x9], x1 - st1 {v25.D}[1], [x9], x1 - st1 {v26.D}[0], [x9], x1 - st1 {v26.D}[1], [x9], x1 + ld1 {v19.d}[0], [x0], x1 + zip1 v23.2d, v1.2d, v7.2d + zip2 v24.2d, v1.2d, v7.2d + ld1 {v19.d}[1], [x0], x1 + zip1 v25.2d, v2.2d, v16.2d + zip2 v26.2d, v2.2d, v16.2d + ld1 {v20.d}[0], [x0], x1 + zip1 v27.2d, v3.2d, v17.2d + zip2 v28.2d, v3.2d, v17.2d + ld1 {v20.d}[1], [x0], x1 + zip1 v29.2d, v4.2d, v18.2d + zip2 v30.2d, v4.2d, v18.2d + ld1 {v21.d}[0], [x0], x1 + uaddw v23.8h, v23.8h, v19.8b + uaddw2 v24.8h, v24.8h, v19.16b + ld1 {v21.d}[1], [x0], x1 + sqxtun v23.8b, v23.8h + sqxtun2 v23.16b, v24.8h + ld1 {v22.d}[0], [x0], x1 + uaddw v24.8h, v25.8h, v20.8b + uaddw2 v25.8h, v26.8h, v20.16b + ld1 {v22.d}[1], [x0], x1 + sqxtun v24.8b, v24.8h + sqxtun2 v24.16b, v25.8h + st1 {v23.d}[0], [x9], x1 + uaddw v25.8h, v27.8h, v21.8b + uaddw2 v26.8h, v28.8h, v21.16b + st1 {v23.d}[1], [x9], x1 + sqxtun v25.8b, v25.8h + sqxtun2 v25.16b, v26.8h + st1 {v24.d}[0], [x9], x1 + uaddw v26.8h, v29.8h, v22.8b + uaddw2 v27.8h, v30.8h, v22.16b + st1 {v24.d}[1], [x9], x1 + sqxtun v26.8b, v26.8h + sqxtun2 v26.16b, v27.8h + st1 {v25.d}[0], [x9], x1 + st1 {v25.d}[1], [x9], x1 + st1 {v26.d}[0], [x9], x1 + st1 {v26.d}[1], [x9], x1 idct_end endfunc @@ -333,30 +333,30 @@ sub x2, x2, #128 bl idct_col4_neon1 - sshr v1.8H, v7.8H, #COL_SHIFT-16 - sshr v2.8H, v16.8H, #COL_SHIFT-16 - sshr v3.8H, v17.8H, #COL_SHIFT-16 - sshr v4.8H, v18.8H, #COL_SHIFT-16 + sshr v1.8h, v7.8h, #COL_SHIFT-16 + sshr v2.8h, v16.8h, #COL_SHIFT-16 + sshr v3.8h, v17.8h, #COL_SHIFT-16 + sshr v4.8h, v18.8h, #COL_SHIFT-16 bl idct_col4_neon2 - sshr v7.8H, v7.8H, #COL_SHIFT-16 - sshr v16.8H, v16.8H, #COL_SHIFT-16 - sshr v17.8H, v17.8H, #COL_SHIFT-16 - sshr v18.8H, v18.8H, #COL_SHIFT-16 - - zip1 v23.2D, v1.2D, v7.2D - zip2 v24.2D, v1.2D, v7.2D - st1 {v23.2D,v24.2D}, [x2], #32 - zip1 v25.2D, v2.2D, v16.2D - zip2 v26.2D, v2.2D, v16.2D - st1 {v25.2D,v26.2D}, [x2], #32 - zip1 v27.2D, v3.2D, v17.2D - zip2 v28.2D, v3.2D, v17.2D - st1 {v27.2D,v28.2D}, [x2], #32 - zip1 v29.2D, v4.2D, v18.2D - zip2 v30.2D, v4.2D, v18.2D - st1 {v29.2D,v30.2D}, [x2], #32 + sshr v7.8h, v7.8h, #COL_SHIFT-16 + sshr v16.8h, v16.8h, #COL_SHIFT-16 + sshr v17.8h, v17.8h, #COL_SHIFT-16 + sshr v18.8h, v18.8h, #COL_SHIFT-16 + + zip1 v23.2d, v1.2d, v7.2d + zip2 v24.2d, v1.2d, v7.2d + st1 {v23.2d,v24.2d}, [x2], #32 + zip1 v25.2d, v2.2d, v16.2d + zip2 v26.2d, v2.2d, v16.2d + st1 {v25.2d,v26.2d}, [x2], #32 + zip1 v27.2d, v3.2d, v17.2d + zip2 v28.2d, v3.2d, v17.2d + st1 {v27.2d,v28.2d}, [x2], #32 + zip1 v29.2d, v4.2d, v18.2d + zip2 v30.2d, v4.2d, v18.2d + st1 {v29.2d,v30.2d}, [x2], #32 idct_end endfunc diff -Nru ffmpeg-5.1.8/libavcodec/aarch64/vp8dsp_neon.S ffmpeg-5.1.9/libavcodec/aarch64/vp8dsp_neon.S --- ffmpeg-5.1.8/libavcodec/aarch64/vp8dsp_neon.S 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/aarch64/vp8dsp_neon.S 2026-03-16 18:10:00.000000000 +0000 @@ -330,32 +330,32 @@ // v17: hev // convert to signed value: - eor v3.16b, v3.16b, v21.16b // PS0 = P0 ^ 0x80 - eor v4.16b, v4.16b, v21.16b // QS0 = Q0 ^ 0x80 + eor v3.16b, v3.16b, v21.16b // PS0 = P0 ^ 0x80 + eor v4.16b, v4.16b, v21.16b // QS0 = Q0 ^ 0x80 - movi v20.8h, #3 - ssubl v18.8h, v4.8b, v3.8b // QS0 - PS0 - ssubl2 v19.8h, v4.16b, v3.16b // (widened to 16bit) - eor v2.16b, v2.16b, v21.16b // PS1 = P1 ^ 0x80 - eor v5.16b, v5.16b, v21.16b // QS1 = Q1 ^ 0x80 - mul v18.8h, v18.8h, v20.8h // w = 3 * (QS0 - PS0) - mul v19.8h, v19.8h, v20.8h - - sqsub v20.16b, v2.16b, v5.16b // clamp(PS1-QS1) - movi v22.16b, #4 - movi v23.16b, #3 + movi v20.8h, #3 + ssubl v18.8h, v4.8b, v3.8b // QS0 - PS0 + ssubl2 v19.8h, v4.16b, v3.16b // (widened to 16bit) + eor v2.16b, v2.16b, v21.16b // PS1 = P1 ^ 0x80 + eor v5.16b, v5.16b, v21.16b // QS1 = Q1 ^ 0x80 + mul v18.8h, v18.8h, v20.8h // w = 3 * (QS0 - PS0) + mul v19.8h, v19.8h, v20.8h + + sqsub v20.16b, v2.16b, v5.16b // clamp(PS1-QS1) + movi v22.16b, #4 + movi v23.16b, #3 .if \inner - and v20.16b, v20.16b, v17.16b // if(hev) w += clamp(PS1-QS1) + and v20.16b, v20.16b, v17.16b // if(hev) w += clamp(PS1-QS1) .endif - saddw v18.8h, v18.8h, v20.8b // w += clamp(PS1-QS1) - saddw2 v19.8h, v19.8h, v20.16b - sqxtn v18.8b, v18.8h // narrow result back into v18 - sqxtn2 v18.16b, v19.8h + saddw v18.8h, v18.8h, v20.8b // w += clamp(PS1-QS1) + saddw2 v19.8h, v19.8h, v20.16b + sqxtn v18.8b, v18.8h // narrow result back into v18 + sqxtn2 v18.16b, v19.8h .if !\inner && !\simple - eor v1.16b, v1.16b, v21.16b // PS2 = P2 ^ 0x80 - eor v6.16b, v6.16b, v21.16b // QS2 = Q2 ^ 0x80 + eor v1.16b, v1.16b, v21.16b // PS2 = P2 ^ 0x80 + eor v6.16b, v6.16b, v21.16b // QS2 = Q2 ^ 0x80 .endif - and v18.16b, v18.16b, v16.16b // w &= normal_limit + and v18.16b, v18.16b, v16.16b // w &= normal_limit // registers used at this point.. // v0 -> P3 (don't corrupt) @@ -375,44 +375,44 @@ // P0 = s2u(PS0 + c2); .if \simple - sqadd v19.16b, v18.16b, v22.16b // c1 = clamp((w&hev)+4) - sqadd v20.16b, v18.16b, v23.16b // c2 = clamp((w&hev)+3) - sshr v19.16b, v19.16b, #3 // c1 >>= 3 - sshr v20.16b, v20.16b, #3 // c2 >>= 3 - sqsub v4.16b, v4.16b, v19.16b // QS0 = clamp(QS0-c1) - sqadd v3.16b, v3.16b, v20.16b // PS0 = clamp(PS0+c2) - eor v4.16b, v4.16b, v21.16b // Q0 = QS0 ^ 0x80 - eor v3.16b, v3.16b, v21.16b // P0 = PS0 ^ 0x80 - eor v5.16b, v5.16b, v21.16b // Q1 = QS1 ^ 0x80 - eor v2.16b, v2.16b, v21.16b // P1 = PS1 ^ 0x80 + sqadd v19.16b, v18.16b, v22.16b // c1 = clamp((w&hev)+4) + sqadd v20.16b, v18.16b, v23.16b // c2 = clamp((w&hev)+3) + sshr v19.16b, v19.16b, #3 // c1 >>= 3 + sshr v20.16b, v20.16b, #3 // c2 >>= 3 + sqsub v4.16b, v4.16b, v19.16b // QS0 = clamp(QS0-c1) + sqadd v3.16b, v3.16b, v20.16b // PS0 = clamp(PS0+c2) + eor v4.16b, v4.16b, v21.16b // Q0 = QS0 ^ 0x80 + eor v3.16b, v3.16b, v21.16b // P0 = PS0 ^ 0x80 + eor v5.16b, v5.16b, v21.16b // Q1 = QS1 ^ 0x80 + eor v2.16b, v2.16b, v21.16b // P1 = PS1 ^ 0x80 .elseif \inner // the !is4tap case of filter_common, only used for inner blocks // c3 = ((c1&~hev) + 1) >> 1; // Q1 = s2u(QS1 - c3); // P1 = s2u(PS1 + c3); - sqadd v19.16b, v18.16b, v22.16b // c1 = clamp((w&hev)+4) - sqadd v20.16b, v18.16b, v23.16b // c2 = clamp((w&hev)+3) - sshr v19.16b, v19.16b, #3 // c1 >>= 3 - sshr v20.16b, v20.16b, #3 // c2 >>= 3 - sqsub v4.16b, v4.16b, v19.16b // QS0 = clamp(QS0-c1) - sqadd v3.16b, v3.16b, v20.16b // PS0 = clamp(PS0+c2) - bic v19.16b, v19.16b, v17.16b // c1 & ~hev - eor v4.16b, v4.16b, v21.16b // Q0 = QS0 ^ 0x80 - srshr v19.16b, v19.16b, #1 // c3 >>= 1 - eor v3.16b, v3.16b, v21.16b // P0 = PS0 ^ 0x80 - sqsub v5.16b, v5.16b, v19.16b // QS1 = clamp(QS1-c3) - sqadd v2.16b, v2.16b, v19.16b // PS1 = clamp(PS1+c3) - eor v5.16b, v5.16b, v21.16b // Q1 = QS1 ^ 0x80 - eor v2.16b, v2.16b, v21.16b // P1 = PS1 ^ 0x80 + sqadd v19.16b, v18.16b, v22.16b // c1 = clamp((w&hev)+4) + sqadd v20.16b, v18.16b, v23.16b // c2 = clamp((w&hev)+3) + sshr v19.16b, v19.16b, #3 // c1 >>= 3 + sshr v20.16b, v20.16b, #3 // c2 >>= 3 + sqsub v4.16b, v4.16b, v19.16b // QS0 = clamp(QS0-c1) + sqadd v3.16b, v3.16b, v20.16b // PS0 = clamp(PS0+c2) + bic v19.16b, v19.16b, v17.16b // c1 & ~hev + eor v4.16b, v4.16b, v21.16b // Q0 = QS0 ^ 0x80 + srshr v19.16b, v19.16b, #1 // c3 >>= 1 + eor v3.16b, v3.16b, v21.16b // P0 = PS0 ^ 0x80 + sqsub v5.16b, v5.16b, v19.16b // QS1 = clamp(QS1-c3) + sqadd v2.16b, v2.16b, v19.16b // PS1 = clamp(PS1+c3) + eor v5.16b, v5.16b, v21.16b // Q1 = QS1 ^ 0x80 + eor v2.16b, v2.16b, v21.16b // P1 = PS1 ^ 0x80 .else - and v20.16b, v18.16b, v17.16b // w & hev - sqadd v19.16b, v20.16b, v22.16b // c1 = clamp((w&hev)+4) - sqadd v20.16b, v20.16b, v23.16b // c2 = clamp((w&hev)+3) - sshr v19.16b, v19.16b, #3 // c1 >>= 3 - sshr v20.16b, v20.16b, #3 // c2 >>= 3 - bic v18.16b, v18.16b, v17.16b // w &= ~hev - sqsub v4.16b, v4.16b, v19.16b // QS0 = clamp(QS0-c1) - sqadd v3.16b, v3.16b, v20.16b // PS0 = clamp(PS0+c2) + and v20.16b, v18.16b, v17.16b // w & hev + sqadd v19.16b, v20.16b, v22.16b // c1 = clamp((w&hev)+4) + sqadd v20.16b, v20.16b, v23.16b // c2 = clamp((w&hev)+3) + sshr v19.16b, v19.16b, #3 // c1 >>= 3 + sshr v20.16b, v20.16b, #3 // c2 >>= 3 + bic v18.16b, v18.16b, v17.16b // w &= ~hev + sqsub v4.16b, v4.16b, v19.16b // QS0 = clamp(QS0-c1) + sqadd v3.16b, v3.16b, v20.16b // PS0 = clamp(PS0+c2) // filter_mbedge: // a = clamp((27*w + 63) >> 7); @@ -424,35 +424,35 @@ // a = clamp((9*w + 63) >> 7); // Q2 = s2u(QS2 - a); // P2 = s2u(PS2 + a); - movi v17.8h, #63 - sshll v22.8h, v18.8b, #3 - sshll2 v23.8h, v18.16b, #3 - saddw v22.8h, v22.8h, v18.8b - saddw2 v23.8h, v23.8h, v18.16b - add v16.8h, v17.8h, v22.8h - add v17.8h, v17.8h, v23.8h // 9*w + 63 - add v19.8h, v16.8h, v22.8h - add v20.8h, v17.8h, v23.8h // 18*w + 63 - add v22.8h, v19.8h, v22.8h - add v23.8h, v20.8h, v23.8h // 27*w + 63 - sqshrn v16.8b, v16.8h, #7 - sqshrn2 v16.16b, v17.8h, #7 // clamp(( 9*w + 63)>>7) - sqshrn v19.8b, v19.8h, #7 - sqshrn2 v19.16b, v20.8h, #7 // clamp((18*w + 63)>>7) - sqshrn v22.8b, v22.8h, #7 - sqshrn2 v22.16b, v23.8h, #7 // clamp((27*w + 63)>>7) - sqadd v1.16b, v1.16b, v16.16b // PS2 = clamp(PS2+a) - sqsub v6.16b, v6.16b, v16.16b // QS2 = clamp(QS2-a) - sqadd v2.16b, v2.16b, v19.16b // PS1 = clamp(PS1+a) - sqsub v5.16b, v5.16b, v19.16b // QS1 = clamp(QS1-a) - sqadd v3.16b, v3.16b, v22.16b // PS0 = clamp(PS0+a) - sqsub v4.16b, v4.16b, v22.16b // QS0 = clamp(QS0-a) - eor v3.16b, v3.16b, v21.16b // P0 = PS0 ^ 0x80 - eor v4.16b, v4.16b, v21.16b // Q0 = QS0 ^ 0x80 - eor v2.16b, v2.16b, v21.16b // P1 = PS1 ^ 0x80 - eor v5.16b, v5.16b, v21.16b // Q1 = QS1 ^ 0x80 - eor v1.16b, v1.16b, v21.16b // P2 = PS2 ^ 0x80 - eor v6.16b, v6.16b, v21.16b // Q2 = QS2 ^ 0x80 + movi v17.8h, #63 + sshll v22.8h, v18.8b, #3 + sshll2 v23.8h, v18.16b, #3 + saddw v22.8h, v22.8h, v18.8b + saddw2 v23.8h, v23.8h, v18.16b + add v16.8h, v17.8h, v22.8h + add v17.8h, v17.8h, v23.8h // 9*w + 63 + add v19.8h, v16.8h, v22.8h + add v20.8h, v17.8h, v23.8h // 18*w + 63 + add v22.8h, v19.8h, v22.8h + add v23.8h, v20.8h, v23.8h // 27*w + 63 + sqshrn v16.8b, v16.8h, #7 + sqshrn2 v16.16b, v17.8h, #7 // clamp(( 9*w + 63)>>7) + sqshrn v19.8b, v19.8h, #7 + sqshrn2 v19.16b, v20.8h, #7 // clamp((18*w + 63)>>7) + sqshrn v22.8b, v22.8h, #7 + sqshrn2 v22.16b, v23.8h, #7 // clamp((27*w + 63)>>7) + sqadd v1.16b, v1.16b, v16.16b // PS2 = clamp(PS2+a) + sqsub v6.16b, v6.16b, v16.16b // QS2 = clamp(QS2-a) + sqadd v2.16b, v2.16b, v19.16b // PS1 = clamp(PS1+a) + sqsub v5.16b, v5.16b, v19.16b // QS1 = clamp(QS1-a) + sqadd v3.16b, v3.16b, v22.16b // PS0 = clamp(PS0+a) + sqsub v4.16b, v4.16b, v22.16b // QS0 = clamp(QS0-a) + eor v3.16b, v3.16b, v21.16b // P0 = PS0 ^ 0x80 + eor v4.16b, v4.16b, v21.16b // Q0 = QS0 ^ 0x80 + eor v2.16b, v2.16b, v21.16b // P1 = PS1 ^ 0x80 + eor v5.16b, v5.16b, v21.16b // Q1 = QS1 ^ 0x80 + eor v1.16b, v1.16b, v21.16b // P2 = PS2 ^ 0x80 + eor v6.16b, v6.16b, v21.16b // Q2 = QS2 ^ 0x80 .endif .endm @@ -507,48 +507,48 @@ sub x0, x0, x2, lsl #2 sub x1, x1, x2, lsl #2 // Load pixels: - ld1 {v0.d}[0], [x0], x2 // P3 - ld1 {v0.d}[1], [x1], x2 // P3 - ld1 {v1.d}[0], [x0], x2 // P2 - ld1 {v1.d}[1], [x1], x2 // P2 - ld1 {v2.d}[0], [x0], x2 // P1 - ld1 {v2.d}[1], [x1], x2 // P1 - ld1 {v3.d}[0], [x0], x2 // P0 - ld1 {v3.d}[1], [x1], x2 // P0 - ld1 {v4.d}[0], [x0], x2 // Q0 - ld1 {v4.d}[1], [x1], x2 // Q0 - ld1 {v5.d}[0], [x0], x2 // Q1 - ld1 {v5.d}[1], [x1], x2 // Q1 - ld1 {v6.d}[0], [x0], x2 // Q2 - ld1 {v6.d}[1], [x1], x2 // Q2 - ld1 {v7.d}[0], [x0] // Q3 - ld1 {v7.d}[1], [x1] // Q3 + ld1 {v0.d}[0], [x0], x2 // P3 + ld1 {v0.d}[1], [x1], x2 // P3 + ld1 {v1.d}[0], [x0], x2 // P2 + ld1 {v1.d}[1], [x1], x2 // P2 + ld1 {v2.d}[0], [x0], x2 // P1 + ld1 {v2.d}[1], [x1], x2 // P1 + ld1 {v3.d}[0], [x0], x2 // P0 + ld1 {v3.d}[1], [x1], x2 // P0 + ld1 {v4.d}[0], [x0], x2 // Q0 + ld1 {v4.d}[1], [x1], x2 // Q0 + ld1 {v5.d}[0], [x0], x2 // Q1 + ld1 {v5.d}[1], [x1], x2 // Q1 + ld1 {v6.d}[0], [x0], x2 // Q2 + ld1 {v6.d}[1], [x1], x2 // Q2 + ld1 {v7.d}[0], [x0] // Q3 + ld1 {v7.d}[1], [x1] // Q3 - dup v22.16b, w3 // flim_E - dup v23.16b, w4 // flim_I + dup v22.16b, w3 // flim_E + dup v23.16b, w4 // flim_I vp8_loop_filter inner=\inner, hev_thresh=w5 // back up to P2: u,v -= stride * 6 - sub x0, x0, x2, lsl #2 - sub x1, x1, x2, lsl #2 - sub x0, x0, x2, lsl #1 - sub x1, x1, x2, lsl #1 + sub x0, x0, x2, lsl #2 + sub x1, x1, x2, lsl #2 + sub x0, x0, x2, lsl #1 + sub x1, x1, x2, lsl #1 // Store pixels: - st1 {v1.d}[0], [x0], x2 // P2 - st1 {v1.d}[1], [x1], x2 // P2 - st1 {v2.d}[0], [x0], x2 // P1 - st1 {v2.d}[1], [x1], x2 // P1 - st1 {v3.d}[0], [x0], x2 // P0 - st1 {v3.d}[1], [x1], x2 // P0 - st1 {v4.d}[0], [x0], x2 // Q0 - st1 {v4.d}[1], [x1], x2 // Q0 - st1 {v5.d}[0], [x0], x2 // Q1 - st1 {v5.d}[1], [x1], x2 // Q1 - st1 {v6.d}[0], [x0] // Q2 - st1 {v6.d}[1], [x1] // Q2 + st1 {v1.d}[0], [x0], x2 // P2 + st1 {v1.d}[1], [x1], x2 // P2 + st1 {v2.d}[0], [x0], x2 // P1 + st1 {v2.d}[1], [x1], x2 // P1 + st1 {v3.d}[0], [x0], x2 // P0 + st1 {v3.d}[1], [x1], x2 // P0 + st1 {v4.d}[0], [x0], x2 // Q0 + st1 {v4.d}[1], [x1], x2 // Q0 + st1 {v5.d}[0], [x0], x2 // Q1 + st1 {v5.d}[1], [x1], x2 // Q1 + st1 {v6.d}[0], [x0] // Q2 + st1 {v6.d}[1], [x1] // Q2 ret endfunc @@ -579,7 +579,7 @@ ld1 {v6.d}[1], [x0], x1 ld1 {v7.d}[1], [x0], x1 - transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31 + transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31 dup v22.16b, w2 // flim_E .if !\simple @@ -590,7 +590,7 @@ sub x0, x0, x1, lsl #4 // backup 16 rows - transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31 + transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31 // Store pixels: st1 {v0.d}[0], [x0], x1 @@ -624,24 +624,24 @@ sub x1, x1, #4 // Load pixels: - ld1 {v0.d}[0], [x0], x2 // load u - ld1 {v0.d}[1], [x1], x2 // load v - ld1 {v1.d}[0], [x0], x2 - ld1 {v1.d}[1], [x1], x2 - ld1 {v2.d}[0], [x0], x2 - ld1 {v2.d}[1], [x1], x2 - ld1 {v3.d}[0], [x0], x2 - ld1 {v3.d}[1], [x1], x2 - ld1 {v4.d}[0], [x0], x2 - ld1 {v4.d}[1], [x1], x2 - ld1 {v5.d}[0], [x0], x2 - ld1 {v5.d}[1], [x1], x2 - ld1 {v6.d}[0], [x0], x2 - ld1 {v6.d}[1], [x1], x2 - ld1 {v7.d}[0], [x0], x2 - ld1 {v7.d}[1], [x1], x2 + ld1 {v0.d}[0], [x0], x2 // load u + ld1 {v0.d}[1], [x1], x2 // load v + ld1 {v1.d}[0], [x0], x2 + ld1 {v1.d}[1], [x1], x2 + ld1 {v2.d}[0], [x0], x2 + ld1 {v2.d}[1], [x1], x2 + ld1 {v3.d}[0], [x0], x2 + ld1 {v3.d}[1], [x1], x2 + ld1 {v4.d}[0], [x0], x2 + ld1 {v4.d}[1], [x1], x2 + ld1 {v5.d}[0], [x0], x2 + ld1 {v5.d}[1], [x1], x2 + ld1 {v6.d}[0], [x0], x2 + ld1 {v6.d}[1], [x1], x2 + ld1 {v7.d}[0], [x0], x2 + ld1 {v7.d}[1], [x1], x2 - transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31 + transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31 dup v22.16b, w3 // flim_E dup v23.16b, w4 // flim_I @@ -651,25 +651,25 @@ sub x0, x0, x2, lsl #3 // backup u 8 rows sub x1, x1, x2, lsl #3 // backup v 8 rows - transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31 + transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31 // Store pixels: - st1 {v0.d}[0], [x0], x2 // load u - st1 {v0.d}[1], [x1], x2 // load v - st1 {v1.d}[0], [x0], x2 - st1 {v1.d}[1], [x1], x2 - st1 {v2.d}[0], [x0], x2 - st1 {v2.d}[1], [x1], x2 - st1 {v3.d}[0], [x0], x2 - st1 {v3.d}[1], [x1], x2 - st1 {v4.d}[0], [x0], x2 - st1 {v4.d}[1], [x1], x2 - st1 {v5.d}[0], [x0], x2 - st1 {v5.d}[1], [x1], x2 - st1 {v6.d}[0], [x0], x2 - st1 {v6.d}[1], [x1], x2 - st1 {v7.d}[0], [x0] - st1 {v7.d}[1], [x1] + st1 {v0.d}[0], [x0], x2 // load u + st1 {v0.d}[1], [x1], x2 // load v + st1 {v1.d}[0], [x0], x2 + st1 {v1.d}[1], [x1], x2 + st1 {v2.d}[0], [x0], x2 + st1 {v2.d}[1], [x1], x2 + st1 {v3.d}[0], [x0], x2 + st1 {v3.d}[1], [x1], x2 + st1 {v4.d}[0], [x0], x2 + st1 {v4.d}[1], [x1], x2 + st1 {v5.d}[0], [x0], x2 + st1 {v5.d}[1], [x1], x2 + st1 {v6.d}[0], [x0], x2 + st1 {v6.d}[1], [x1], x2 + st1 {v7.d}[0], [x0] + st1 {v7.d}[1], [x1] ret diff -Nru ffmpeg-5.1.8/libavcodec/adpcm.c ffmpeg-5.1.9/libavcodec/adpcm.c --- ffmpeg-5.1.8/libavcodec/adpcm.c 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/adpcm.c 2026-05-05 15:50:55.000000000 +0000 @@ -857,6 +857,8 @@ if(ch <= 0) return 0; + if (buf_size > INT_MAX / 2) + return 0; switch (avctx->codec->id) { /* constant, only check buf_size */ diff -Nru ffmpeg-5.1.8/libavcodec/alsdec.c ffmpeg-5.1.9/libavcodec/alsdec.c --- ffmpeg-5.1.8/libavcodec/alsdec.c 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/alsdec.c 2026-05-05 15:50:55.000000000 +0000 @@ -1539,8 +1539,12 @@ return AVERROR_INVALIDDATA; } + j = 0; for (i = 0; i < frame_length; ++i) { - ctx->raw_mantissa[c][i] = AV_RB32(larray); + if (ctx->raw_samples[c][i] == 0) { + ctx->raw_mantissa[c][i] = AV_RB32(larray + j); + j += 4; + } } } } @@ -1551,7 +1555,10 @@ if (ctx->raw_samples[c][i] != 0) { //The following logic is taken from Tabel 14.45 and 14.46 from the ISO spec if (av_cmp_sf_ieee754(acf[c], FLOAT_1)) { - nbits[i] = 23 - av_log2(abs(ctx->raw_samples[c][i])); + int nbit = av_log2(FFABSU(ctx->raw_samples[c][i])); + if (nbit > 23) + return AVERROR_INVALIDDATA; + nbits[i] = 23 - nbit; } else { nbits[i] = 23; } @@ -1625,7 +1632,7 @@ tmp_32 = (sign << 31) | ((e + EXP_BIAS) << 23) | (mantissa); ctx->raw_samples[c][i] = tmp_32; } else { - ctx->raw_samples[c][i] = raw_mantissa[c][i] & 0x007fffffUL; + ctx->raw_samples[c][i] = raw_mantissa[c][i]; } } align_get_bits(gb); @@ -1781,7 +1788,9 @@ } if (sconf->floating) { - read_diff_float_data(ctx, ra_frame); + ret = read_diff_float_data(ctx, ra_frame); + if (ret < 0) + return ret; } if (get_bits_left(gb) < 0) { diff -Nru ffmpeg-5.1.8/libavcodec/arm/int_neon.S ffmpeg-5.1.9/libavcodec/arm/int_neon.S --- ffmpeg-5.1.8/libavcodec/arm/int_neon.S 2025-11-21 01:15:18.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/arm/int_neon.S 2026-05-05 14:22:01.000000000 +0000 @@ -48,4 +48,3 @@ vmov.32 r0, d3[0] bx lr endfunc - diff -Nru ffmpeg-5.1.8/libavcodec/av1dec.c ffmpeg-5.1.9/libavcodec/av1dec.c --- ffmpeg-5.1.8/libavcodec/av1dec.c 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/av1dec.c 2026-05-05 15:50:55.000000000 +0000 @@ -89,12 +89,11 @@ static void read_global_param(AV1DecContext *s, int type, int ref, int idx) { - uint8_t primary_frame, prev_frame; + int primary_frame; uint32_t abs_bits, prec_bits, round, prec_diff, sub, mx; int32_t r, prev_gm_param; primary_frame = s->raw_frame_header->primary_ref_frame; - prev_frame = s->raw_frame_header->ref_frame_idx[primary_frame]; abs_bits = AV1_GM_ABS_ALPHA_BITS; prec_bits = AV1_GM_ALPHA_PREC_BITS; @@ -104,8 +103,10 @@ */ if (s->raw_frame_header->primary_ref_frame == AV1_PRIMARY_REF_NONE) prev_gm_param = s->cur_frame.gm_params[ref][idx]; - else + else { + int prev_frame = s->raw_frame_header->ref_frame_idx[primary_frame]; prev_gm_param = s->ref[prev_frame].gm_params[ref][idx]; + } if (idx < 2) { if (type == AV1_WARP_MODEL_TRANSLATION) { @@ -1042,6 +1043,8 @@ } s->raw_seq = &obu->obu.sequence_header; + s->raw_frame_header = NULL; + raw_tile_group = NULL; ret = set_context_with_sequence(avctx, s->raw_seq); if (ret < 0) { @@ -1091,6 +1094,8 @@ goto end; } + raw_tile_group = NULL; + if (unit->type == AV1_OBU_FRAME) s->raw_frame_header = &obu->obu.frame.header; else @@ -1170,8 +1175,11 @@ } } break; - case AV1_OBU_TILE_LIST: case AV1_OBU_TEMPORAL_DELIMITER: + s->raw_frame_header = NULL; + raw_tile_group = NULL; + // fall-through + case AV1_OBU_TILE_LIST: case AV1_OBU_PADDING: case AV1_OBU_METADATA: break; diff -Nru ffmpeg-5.1.8/libavcodec/bmp.c ffmpeg-5.1.9/libavcodec/bmp.c --- ffmpeg-5.1.8/libavcodec/bmp.c 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/bmp.c 2026-05-05 15:50:55.000000000 +0000 @@ -129,7 +129,7 @@ rgb[1] = bytestream_get_le32(&buf); rgb[2] = bytestream_get_le32(&buf); if (ihsize > 40) - alpha = bytestream_get_le32(&buf); + alpha = bytestream_get_le32(&buf); } ret = ff_set_dimensions(avctx, width, height > 0 ? height : -(unsigned)height); diff -Nru ffmpeg-5.1.8/libavcodec/cfhd.c ffmpeg-5.1.9/libavcodec/cfhd.c --- ffmpeg-5.1.8/libavcodec/cfhd.c 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/cfhd.c 2026-05-05 15:50:55.000000000 +0000 @@ -638,7 +638,7 @@ } else av_log(avctx, AV_LOG_DEBUG, "Unknown tag %i data %x\n", tag, data); - if (tag == BitstreamMarker && data == 0xf0f && + if (tag == BitstreamMarker && data == CoefficientSegment && s->coded_format != AV_PIX_FMT_NONE) { int lowpass_height = s->plane[s->channel_num].band[0][0].height; int lowpass_width = s->plane[s->channel_num].band[0][0].width; @@ -705,10 +705,15 @@ if (s->subband_num_actual == 255) goto finish; + + if (tag == BitstreamMarker && data == CoefficientSegment || tag == BandHeader || tag == BandSecondPass || s->peak.level) + if (s->transform_type != s->a_transform_type) + return AVERROR_PATCHWELCOME; + coeff_data = s->plane[s->channel_num].subband[s->subband_num_actual]; /* Lowpass coefficients */ - if (tag == BitstreamMarker && data == 0xf0f) { + if (tag == BitstreamMarker && data == CoefficientSegment) { int lowpass_height, lowpass_width, lowpass_a_height, lowpass_a_width; if (!s->a_width || !s->a_height) { diff -Nru ffmpeg-5.1.8/libavcodec/cfhd.h ffmpeg-5.1.9/libavcodec/cfhd.h --- ffmpeg-5.1.8/libavcodec/cfhd.h 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/cfhd.h 2026-05-05 15:50:55.000000000 +0000 @@ -93,6 +93,15 @@ ChannelHeight = 105, }; +enum CFHDSegment { + LowPassSegment = 0x1a4a, + LowPassEndSegment = 0x1b4b, + HighPassSegment = 0x0d0d, + BandSegment = 0x0e0e, + HighPassEndSegment = 0x0c0c, + CoefficientSegment = 0x0f0f, +}; + #define VLC_BITS 9 #define SUBBAND_COUNT 10 #define SUBBAND_COUNT_3D 17 diff -Nru ffmpeg-5.1.8/libavcodec/cfhdenc.c ffmpeg-5.1.9/libavcodec/cfhdenc.c --- ffmpeg-5.1.8/libavcodec/cfhdenc.c 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/cfhdenc.c 2026-05-05 15:50:55.000000000 +0000 @@ -624,7 +624,7 @@ } bytestream2_put_be16(pby, BitstreamMarker); - bytestream2_put_be16(pby, 0x1a4a); + bytestream2_put_be16(pby, LowPassSegment); pos = bytestream2_tell_p(pby); @@ -650,7 +650,7 @@ bytestream2_put_be16(pby, 16); bytestream2_put_be16(pby, BitstreamMarker); - bytestream2_put_be16(pby, 0x0f0f); + bytestream2_put_be16(pby, CoefficientSegment); for (int i = 0; i < height; i++) { for (int j = 0; j < width; j++) @@ -659,7 +659,7 @@ } bytestream2_put_be16(pby, BitstreamMarker); - bytestream2_put_be16(pby, 0x1b4b); + bytestream2_put_be16(pby, LowPassEndSegment); for (int l = 0; l < 3; l++) { for (int i = 0; i < 3; i++) { @@ -674,7 +674,7 @@ int height = s->plane[p].band[l][0].height; bytestream2_put_be16(pby, BitstreamMarker); - bytestream2_put_be16(pby, 0x0d0d); + bytestream2_put_be16(pby, HighPassSegment); bytestream2_put_be16(pby, WaveletType); bytestream2_put_be16(pby, 3 + 2 * (l == 2)); @@ -711,7 +711,7 @@ int count = 0, padd = 0; bytestream2_put_be16(pby, BitstreamMarker); - bytestream2_put_be16(pby, 0x0e0e); + bytestream2_put_be16(pby, BandSegment); bytestream2_put_be16(pby, SubbandNumber); bytestream2_put_be16(pby, i + 1); @@ -781,7 +781,7 @@ } bytestream2_put_be16(pby, BitstreamMarker); - bytestream2_put_be16(pby, 0x0c0c); + bytestream2_put_be16(pby, HighPassEndSegment); } s->plane[p].size = bytestream2_tell_p(pby) - pos; diff -Nru ffmpeg-5.1.8/libavcodec/cljrdec.c ffmpeg-5.1.9/libavcodec/cljrdec.c --- ffmpeg-5.1.8/libavcodec/cljrdec.c 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/cljrdec.c 2026-05-05 15:50:52.000000000 +0000 @@ -91,4 +91,3 @@ .p.capabilities = AV_CODEC_CAP_DR1, .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE, }; - diff -Nru ffmpeg-5.1.8/libavcodec/dca_xll.c ffmpeg-5.1.9/libavcodec/dca_xll.c --- ffmpeg-5.1.8/libavcodec/dca_xll.c 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/dca_xll.c 2026-05-05 15:50:55.000000000 +0000 @@ -62,12 +62,16 @@ array[i] = get_linear(gb, n); } -static void get_rice_array(GetBitContext *gb, int32_t *array, int size, int k) +static int get_rice_array(GetBitContext *gb, int32_t *array, int size, int k) { int i; - for (i = 0; i < size; i++) + for (i = 0; i < size && get_bits_left(gb) > k; i++) array[i] = get_rice(gb, k); + + if (i < size) + return AVERROR_INVALIDDATA; + return 0; } static int parse_dmix_coeffs(DCAXllDecoder *s, DCAXllChSet *c) @@ -527,8 +531,10 @@ } else { // Rice codes // Unpack all residuals of part A of segment 0 - get_rice_array(&s->gb, part_a, c->nsamples_part_a[k], - c->bitalloc_part_a[k]); + int ret = get_rice_array(&s->gb, part_a, c->nsamples_part_a[k], + c->bitalloc_part_a[k]); + if (ret < 0) + return ret; if (c->bitalloc_hybrid_linear[k]) { // Hybrid Rice codes @@ -558,7 +564,9 @@ } else { // Rice codes // Unpack all residuals of part B of segment 0 and others - get_rice_array(&s->gb, part_b, nsamples_part_b, c->bitalloc_part_b[k]); + ret = get_rice_array(&s->gb, part_b, nsamples_part_b, c->bitalloc_part_b[k]); + if (ret < 0) + return ret; } } } @@ -1076,6 +1084,7 @@ return AVERROR(ENOMEM); memcpy(s->pbr_buffer, data, size); + memset(s->pbr_buffer + size, 0, AV_INPUT_BUFFER_PADDING_SIZE); s->pbr_length = size; s->pbr_delay = delay; return 0; @@ -1130,6 +1139,7 @@ memcpy(s->pbr_buffer + s->pbr_length, data, size); s->pbr_length += size; + memset(s->pbr_buffer + s->pbr_length, 0, AV_INPUT_BUFFER_PADDING_SIZE); // Respect decoding delay after synchronization error if (s->pbr_delay > 0 && --s->pbr_delay) diff -Nru ffmpeg-5.1.8/libavcodec/dfpwmdec.c ffmpeg-5.1.9/libavcodec/dfpwmdec.c --- ffmpeg-5.1.8/libavcodec/dfpwmdec.c 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/dfpwmdec.c 2026-05-05 15:50:55.000000000 +0000 @@ -106,15 +106,16 @@ { DFPWMState *state = ctx->priv_data; int ret; + uint64_t nb_samples = packet->size * 8LL / ctx->ch_layout.nb_channels; if (packet->size * 8LL % ctx->ch_layout.nb_channels) return AVERROR_PATCHWELCOME; - frame->nb_samples = packet->size * 8LL / ctx->ch_layout.nb_channels; - if (frame->nb_samples <= 0) { + if (nb_samples > INT_MAX || !nb_samples) { av_log(ctx, AV_LOG_ERROR, "invalid number of samples in packet\n"); return AVERROR_INVALIDDATA; } + frame->nb_samples = nb_samples; if ((ret = ff_get_buffer(ctx, frame, 0)) < 0) return ret; diff -Nru ffmpeg-5.1.8/libavcodec/dv_profile.c ffmpeg-5.1.9/libavcodec/dv_profile.c --- ffmpeg-5.1.8/libavcodec/dv_profile.c 2025-11-21 01:15:18.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/dv_profile.c 2026-05-05 14:22:01.000000000 +0000 @@ -337,4 +337,3 @@ return p; } - diff -Nru ffmpeg-5.1.8/libavcodec/dvdsub_parser.c ffmpeg-5.1.9/libavcodec/dvdsub_parser.c --- ffmpeg-5.1.8/libavcodec/dvdsub_parser.c 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/dvdsub_parser.c 2026-05-05 15:50:55.000000000 +0000 @@ -59,7 +59,7 @@ pc->packet = av_malloc(pc->packet_len + AV_INPUT_BUFFER_PADDING_SIZE); } if (pc->packet) { - if (pc->packet_index + buf_size <= pc->packet_len) { + if (buf_size <= pc->packet_len - pc->packet_index) { memcpy(pc->packet + pc->packet_index, buf, buf_size); pc->packet_index += buf_size; if (pc->packet_index >= pc->packet_len) { diff -Nru ffmpeg-5.1.8/libavcodec/escape130.c ffmpeg-5.1.9/libavcodec/escape130.c --- ffmpeg-5.1.8/libavcodec/escape130.c 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/escape130.c 2026-05-05 15:50:55.000000000 +0000 @@ -125,7 +125,7 @@ return AVERROR_INVALIDDATA; } - s->old_y_avg = av_malloc(avctx->width * avctx->height / 4); + s->old_y_avg = av_mallocz(avctx->width * avctx->height / 4); s->buf1 = av_malloc(avctx->width * avctx->height * 3 / 2); s->buf2 = av_malloc(avctx->width * avctx->height * 3 / 2); if (!s->old_y_avg || !s->buf1 || !s->buf2) { diff -Nru ffmpeg-5.1.8/libavcodec/exr.c ffmpeg-5.1.9/libavcodec/exr.c --- ffmpeg-5.1.8/libavcodec/exr.c 2025-11-26 02:41:35.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/exr.c 2026-05-05 15:50:55.000000000 +0000 @@ -631,6 +631,9 @@ max_non_zero - min_non_zero + 1); memset(td->bitmap + max_non_zero + 1, 0, BITMAP_SIZE - max_non_zero - 1); + if (bytestream2_get_bytes_left(&gb) < 4) + return AVERROR_INVALIDDATA; + maxval = reverse_lut(td->bitmap, td->lut); bytestream2_skip(&gb, 4); @@ -1471,7 +1474,8 @@ } // Zero out the end if xmax+1 is not w - memset(ptr_x, 0, axmax); + if (s->desc->flags & AV_PIX_FMT_FLAG_PLANAR || !c) + memset(ptr_x, 0, axmax); channel_buffer[c] += td->channel_line_size; } } @@ -1793,12 +1797,17 @@ } } - s->channels = av_realloc(s->channels, - ++s->nb_channels * sizeof(EXRChannel)); - if (!s->channels) { + av_assert0(s->nb_channels < INT_MAX); // Impossible due to size of the bitstream + EXRChannel *new_channels = av_realloc_array(s->channels, + s->nb_channels + 1, + sizeof(EXRChannel)); + if (!new_channels) { ret = AVERROR(ENOMEM); goto fail; } + s->nb_channels ++; + s->channels = new_channels; + channel = &s->channels[s->nb_channels - 1]; channel->pixel_type = current_pixel_type; channel->xsub = xsub; @@ -1821,7 +1830,7 @@ s->is_luma = 1; } else { avpriv_request_sample(s->avctx, "Uncommon channel combination"); - ret = AVERROR(AVERROR_PATCHWELCOME); + ret = AVERROR_PATCHWELCOME; goto fail; } @@ -2214,6 +2223,8 @@ } if (s->is_tile) { + if (s->tile_attr.ySize <= 0 || s->tile_attr.xSize <= 0) + return AVERROR_INVALIDDATA; nb_blocks = ((s->xdelta + s->tile_attr.xSize - 1) / s->tile_attr.xSize) * ((s->ydelta + s->tile_attr.ySize - 1) / s->tile_attr.ySize); } else { /* scanline */ diff -Nru ffmpeg-5.1.8/libavcodec/ffv1_template.c ffmpeg-5.1.9/libavcodec/ffv1_template.c --- ffmpeg-5.1.8/libavcodec/ffv1_template.c 2025-11-23 02:57:58.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/ffv1_template.c 2026-05-05 14:22:01.000000000 +0000 @@ -50,4 +50,3 @@ p->quant_table[1][(LT - T) & 0xFF] + p->quant_table[2][(T - RT) & 0xFF]; } - diff -Nru ffmpeg-5.1.8/libavcodec/ffv1enc_template.c ffmpeg-5.1.9/libavcodec/ffv1enc_template.c --- ffmpeg-5.1.8/libavcodec/ffv1enc_template.c 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/ffv1enc_template.c 2026-05-05 15:50:52.000000000 +0000 @@ -199,4 +199,3 @@ } return 0; } - diff -Nru ffmpeg-5.1.8/libavcodec/flashsv.c ffmpeg-5.1.9/libavcodec/flashsv.c --- ffmpeg-5.1.8/libavcodec/flashsv.c 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/flashsv.c 2026-05-05 15:50:55.000000000 +0000 @@ -314,6 +314,9 @@ v_blocks = s->image_height / s->block_height; v_part = s->image_height % s->block_height; + if (h_blocks * v_blocks * 16 > get_bits_left(&gb)) + return AVERROR_INVALIDDATA; + /* the block size could change between frames, make sure the buffer * is large enough, if not, get a larger one */ if (s->block_size < s->block_width * s->block_height) { diff -Nru ffmpeg-5.1.8/libavcodec/golomb.h ffmpeg-5.1.9/libavcodec/golomb.h --- ffmpeg-5.1.8/libavcodec/golomb.h 2025-08-05 00:22:34.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/golomb.h 2026-05-05 15:50:55.000000000 +0000 @@ -455,7 +455,7 @@ buf = get_bits_long(gb, k); return buf + (i << k); - } else if (i == limit - 1) { + } else if (esc_len && i == limit - 1) { buf = get_bits_long(gb, esc_len); return buf + 1; @@ -512,7 +512,7 @@ } buf += ((SUINT)i << k); - } else if (i == limit - 1) { + } else if (esc_len && i == limit - 1) { buf = SHOW_UBITS(re, gb, esc_len); LAST_SKIP_BITS(re, gb, esc_len); diff -Nru ffmpeg-5.1.8/libavcodec/h264_direct.c ffmpeg-5.1.9/libavcodec/h264_direct.c --- ffmpeg-5.1.8/libavcodec/h264_direct.c 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/h264_direct.c 2026-05-05 15:50:55.000000000 +0000 @@ -121,26 +121,30 @@ { H264Ref *const ref1 = &sl->ref_list[1][0]; H264Picture *const cur = h->cur_pic_ptr; - int list, j, field; + int list, field; int sidx = (h->picture_structure & 1) ^ 1; int ref1sidx = (ref1->reference & 1) ^ 1; - for (list = 0; list < sl->list_count; list++) { - cur->ref_count[sidx][list] = sl->ref_count[list]; - for (j = 0; j < sl->ref_count[list]; j++) - cur->ref_poc[sidx][list][j] = 4 * sl->ref_list[list][j].parent->frame_num + - (sl->ref_list[list][j].reference & 3); - } + /* Updates to cur_pic are not safe once ff_thread_finish_setup() has been + * called (other threads may already be reading these fields). */ + if (!h->setup_finished) { + for (list = 0; list < sl->list_count; list++) { + cur->ref_count[sidx][list] = sl->ref_count[list]; + for (int j = 0; j < sl->ref_count[list]; j++) + cur->ref_poc[sidx][list][j] = 4 * sl->ref_list[list][j].parent->frame_num + + (sl->ref_list[list][j].reference & 3); + } - if (h->picture_structure == PICT_FRAME) { - memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0])); - memcpy(cur->ref_poc[1], cur->ref_poc[0], sizeof(cur->ref_poc[0])); - } + if (h->picture_structure == PICT_FRAME) { + memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0])); + memcpy(cur->ref_poc[1], cur->ref_poc[0], sizeof(cur->ref_poc[0])); + } - if (h->current_slice == 0) { - cur->mbaff = FRAME_MBAFF(h); - } else { - av_assert0(cur->mbaff == FRAME_MBAFF(h)); + if (h->current_slice == 0) { + cur->mbaff = FRAME_MBAFF(h); + } else { + av_assert0(cur->mbaff == FRAME_MBAFF(h)); + } } sl->col_fieldoff = 0; diff -Nru ffmpeg-5.1.8/libavcodec/h264_mc_template.c ffmpeg-5.1.9/libavcodec/h264_mc_template.c --- ffmpeg-5.1.8/libavcodec/h264_mc_template.c 2025-11-21 01:15:18.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/h264_mc_template.c 2026-05-05 14:22:01.000000000 +0000 @@ -162,4 +162,3 @@ if (USES_LIST(mb_type, 1)) prefetch_motion(h, sl, 1, PIXEL_SHIFT, CHROMA_IDC); } - diff -Nru ffmpeg-5.1.8/libavcodec/h264_parser.c ffmpeg-5.1.9/libavcodec/h264_parser.c --- ffmpeg-5.1.8/libavcodec/h264_parser.c 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/h264_parser.c 2026-05-05 15:50:55.000000000 +0000 @@ -222,6 +222,9 @@ if (get_bits1(gb)) { // adaptive_ref_pic_marking_mode_flag int i; for (i = 0; i < H264_MAX_MMCO_COUNT; i++) { + if (get_bits_left(gb) < 1) + return AVERROR_INVALIDDATA; + MMCOOpcode opcode = get_ue_golomb_31(gb); if (opcode > (unsigned) MMCO_LONG) { av_log(logctx, AV_LOG_ERROR, @@ -651,8 +654,12 @@ s->dts = av_sat_add64(p->reference_dts, av_rescale(s->dts_ref_dts_delta, num, den)); } - if (p->reference_dts != AV_NOPTS_VALUE && s->pts == AV_NOPTS_VALUE) - s->pts = s->dts + av_rescale(s->pts_dts_delta, num, den); + if (p->reference_dts != AV_NOPTS_VALUE && s->pts == AV_NOPTS_VALUE) { + int64_t pts_dts_delta = av_rescale(s->pts_dts_delta, num, den); + uint64_t pts = (uint64_t)s->dts + pts_dts_delta; + if (pts == av_sat_add64(s->dts, pts_dts_delta)) + s->pts = pts; + } if (s->dts_sync_point > 0) p->reference_dts = s->dts; // new reference diff -Nru ffmpeg-5.1.8/libavcodec/h264_refs.c ffmpeg-5.1.9/libavcodec/h264_refs.c --- ffmpeg-5.1.8/libavcodec/h264_refs.c 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/h264_refs.c 2026-05-05 15:50:55.000000000 +0000 @@ -158,8 +158,7 @@ h->long_ref, 16, 1, h->picture_structure); av_assert0(len <= 32); - if (len < sl->ref_count[list]) - memset(&sl->ref_list[list][len], 0, sizeof(H264Ref) * (sl->ref_count[list] - len)); + memset(&sl->ref_list[list][len], 0, sizeof(H264Ref) * (32 - len)); lens[list] = len; } @@ -179,8 +178,7 @@ h-> long_ref, 16, 1, h->picture_structure); av_assert0(len <= 32); - if (len < sl->ref_count[0]) - memset(&sl->ref_list[0][len], 0, sizeof(H264Ref) * (sl->ref_count[0] - len)); + memset(&sl->ref_list[0][len], 0, sizeof(H264Ref) * (32 - len)); } #ifdef TRACE for (i = 0; i < sl->ref_count[0]; i++) { diff -Nru ffmpeg-5.1.8/libavcodec/h264_slice.c ffmpeg-5.1.9/libavcodec/h264_slice.c --- ffmpeg-5.1.8/libavcodec/h264_slice.c 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/h264_slice.c 2026-05-05 15:50:55.000000000 +0000 @@ -2086,8 +2086,7 @@ if (sl->slice_type_nos == AV_PICTURE_TYPE_B && !sl->direct_spatial_mv_pred) ff_h264_direct_dist_scale_factor(h, sl); - if (!h->setup_finished) - ff_h264_direct_ref_list_init(h, sl); + ff_h264_direct_ref_list_init(h, sl); if (h->avctx->skip_loop_filter >= AVDISCARD_ALL || (h->avctx->skip_loop_filter >= AVDISCARD_NONKEY && @@ -2116,6 +2115,12 @@ h->ps.pps->chroma_qp_index_offset[1]) + 6 * (h->ps.sps->bit_depth_luma - 8); + // slice_table is uint16_t initialized to 0xFFFF as a sentinel. + if (h->current_slice >= 0xFFFE) { + av_log(h->avctx, AV_LOG_ERROR, "Too many slices (%d)\n", h->current_slice + 1); + return AVERROR_PATCHWELCOME; + } + sl->slice_num = ++h->current_slice; if (sl->slice_num) diff -Nru ffmpeg-5.1.8/libavcodec/hevc_cabac.c ffmpeg-5.1.9/libavcodec/hevc_cabac.c --- ffmpeg-5.1.8/libavcodec/hevc_cabac.c 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/hevc_cabac.c 2026-05-05 15:50:52.000000000 +0000 @@ -1561,4 +1561,3 @@ case 0: lc->pu.mvd.y = 0; break; } } - diff -Nru ffmpeg-5.1.8/libavcodec/imgconvert.c ffmpeg-5.1.9/libavcodec/imgconvert.c --- ffmpeg-5.1.8/libavcodec/imgconvert.c 2025-11-21 01:15:18.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/imgconvert.c 2026-05-05 14:22:01.000000000 +0000 @@ -45,4 +45,3 @@ *loss_ptr = loss; return best; } - diff -Nru ffmpeg-5.1.8/libavcodec/imm5.c ffmpeg-5.1.9/libavcodec/imm5.c --- ffmpeg-5.1.8/libavcodec/imm5.c 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/imm5.c 2026-05-05 15:50:55.000000000 +0000 @@ -139,6 +139,8 @@ } ret = avcodec_receive_frame(codec_avctx, frame); + if (ret == AVERROR(EAGAIN)) + return avpkt->size; if (ret < 0) return ret; diff -Nru ffmpeg-5.1.8/libavcodec/interplayacm.c ffmpeg-5.1.9/libavcodec/interplayacm.c --- ffmpeg-5.1.8/libavcodec/interplayacm.c 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/interplayacm.c 2026-05-05 15:50:55.000000000 +0000 @@ -434,6 +434,9 @@ unsigned i, ind; int ret; + if (get_bits_left(gb) < s->cols * 5) + return AVERROR_INVALIDDATA; + for (i = 0; i < s->cols; i++) { ind = get_bits(gb, 5); ret = filler_list[ind](s, ind, i); diff -Nru ffmpeg-5.1.8/libavcodec/jpeg2000dec.c ffmpeg-5.1.9/libavcodec/jpeg2000dec.c --- ffmpeg-5.1.8/libavcodec/jpeg2000dec.c 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/jpeg2000dec.c 2026-05-05 15:50:55.000000000 +0000 @@ -1801,7 +1801,7 @@ while (passno--) { if (bpno < 0 || bpno > 29) { - av_log(s->avctx, AV_LOG_ERROR, "bpno became invalid\n"); + av_log(s->avctx, AV_LOG_ERROR, "bpno (%d) became invalid\n", bpno); return AVERROR_INVALIDDATA; } switch(pass_t) { @@ -2060,9 +2060,12 @@ int h = tile->comp[compno].coord[1][1] - \ ff_jpeg2000_ceildiv(s->image_offset_y, s->cdy[compno]); \ int plane = 0; \ + ptrdiff_t dstoffset = 0; \ \ if (planar) \ plane = s->cdef[compno] ? s->cdef[compno]-1 : (s->ncomponents-1); \ + else \ + dstoffset = s->cdef[compno] ? s->cdef[compno] - 1 : compno; \ \ y = tile->comp[compno].coord[1][0] - \ ff_jpeg2000_ceildiv(s->image_offset_y, s->cdy[compno]); \ @@ -2072,7 +2075,7 @@ \ x = tile->comp[compno].coord[0][0] - \ ff_jpeg2000_ceildiv(s->image_offset_x, s->cdx[compno]); \ - dst = line + x * pixelsize + compno*!planar; \ + dst = line + x * pixelsize + dstoffset; \ \ if (codsty->transform == FF_DWT97) { \ for (; x < w; x++) { \ diff -Nru ffmpeg-5.1.8/libavcodec/lcldec.c ffmpeg-5.1.9/libavcodec/lcldec.c --- ffmpeg-5.1.8/libavcodec/lcldec.c 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/lcldec.c 2026-05-05 15:50:55.000000000 +0000 @@ -175,7 +175,7 @@ int height = avctx->height; // Real image height unsigned int mszh_dlen; unsigned char yq, y1q, uq, vq; - int uqvq, ret; + int ret; unsigned int mthread_inlen, mthread_outlen; unsigned int len = buf_size; int linesize, offset; @@ -304,7 +304,7 @@ for (row = 0; row < height; row++) { pixel_ptr = row * width * 3; yq = encoded[pixel_ptr++]; - uqvq = AV_RL16(encoded+pixel_ptr); + unsigned uqvq = AV_RL16(encoded+pixel_ptr); pixel_ptr += 2; for (col = 1; col < width; col++) { encoded[pixel_ptr] = yq -= encoded[pixel_ptr]; diff -Nru ffmpeg-5.1.8/libavcodec/magicyuv.c ffmpeg-5.1.9/libavcodec/magicyuv.c --- ffmpeg-5.1.8/libavcodec/magicyuv.c 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/magicyuv.c 2026-05-05 15:50:55.000000000 +0000 @@ -343,7 +343,8 @@ s->llviddsp.add_left_pred(dst, dst, width, 0); dst += stride; } - lefttop = left = dst[0]; + if (1 + interlaced < height) + lefttop = left = dst[0]; for (k = 1 + interlaced; k < height; k++) { s->llviddsp.add_median_pred(dst, dst - fake_stride, dst, width, &left, &lefttop); diff -Nru ffmpeg-5.1.8/libavcodec/mdec.c ffmpeg-5.1.9/libavcodec/mdec.c --- ffmpeg-5.1.8/libavcodec/mdec.c 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/mdec.c 2026-05-05 15:50:55.000000000 +0000 @@ -175,6 +175,9 @@ int buf_size = avpkt->size; int ret; + if (a->mb_width * a->mb_height * 3 > buf_size) + return AVERROR_INVALIDDATA; + if ((ret = ff_thread_get_buffer(avctx, frame, 0)) < 0) return ret; frame->pict_type = AV_PICTURE_TYPE_I; diff -Nru ffmpeg-5.1.8/libavcodec/mjpegdec.c ffmpeg-5.1.9/libavcodec/mjpegdec.c --- ffmpeg-5.1.8/libavcodec/mjpegdec.c 2025-11-26 02:41:35.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/mjpegdec.c 2026-05-05 15:50:55.000000000 +0000 @@ -150,7 +150,7 @@ if ((ret = init_default_huffman_tables(s)) < 0) return ret; - if (s->extern_huff) { + if (s->extern_huff && avctx->extradata) { av_log(avctx, AV_LOG_INFO, "using external huffman table\n"); if ((ret = init_get_bits(&s->gb, avctx->extradata, avctx->extradata_size * 8)) < 0) return ret; @@ -344,9 +344,11 @@ if (av_image_check_size(width, height, 0, s->avctx) < 0) return AVERROR_INVALIDDATA; - // A valid frame requires at least 1 bit for DC + 1 bit for AC for each 8x8 block. - if (s->buf_size && (width + 7) / 8 * ((height + 7) / 8) > s->buf_size * 4LL) - return AVERROR_INVALIDDATA; + if (!s->progressive && !s->ls) { + // A valid frame requires at least 1 bit for DC + 1 bit for AC for each 8x8 block. + if (s->buf_size && (width + 7) / 8 * ((height + 7) / 8) > s->buf_size * 4LL) + return AVERROR_INVALIDDATA; + } nb_components = get_bits(&s->gb, 8); if (nb_components <= 0 || diff -Nru ffmpeg-5.1.8/libavcodec/mpegaudiodsp_template.c ffmpeg-5.1.9/libavcodec/mpegaudiodsp_template.c --- ffmpeg-5.1.8/libavcodec/mpegaudiodsp_template.c 2025-11-21 01:15:18.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/mpegaudiodsp_template.c 2026-05-05 14:22:01.000000000 +0000 @@ -369,4 +369,3 @@ out++; } } - diff -Nru ffmpeg-5.1.8/libavcodec/mpegaudioenc_template.c ffmpeg-5.1.9/libavcodec/mpegaudioenc_template.c --- ffmpeg-5.1.8/libavcodec/mpegaudioenc_template.c 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/mpegaudioenc_template.c 2026-05-05 15:50:52.000000000 +0000 @@ -783,4 +783,3 @@ { "b", "0" }, { NULL }, }; - diff -Nru ffmpeg-5.1.8/libavcodec/mpegvideo_enc.c ffmpeg-5.1.9/libavcodec/mpegvideo_enc.c --- ffmpeg-5.1.8/libavcodec/mpegvideo_enc.c 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/mpegvideo_enc.c 2026-05-05 15:50:55.000000000 +0000 @@ -2706,7 +2706,7 @@ bytestream_put_byte(&ptr, 0); /* vmv2 */ } -static void update_mb_info(MpegEncContext *s, int startcode) +static void update_mb_info(MpegEncContext *s) { if (!s->mb_info) return; @@ -2714,14 +2714,6 @@ s->mb_info_size += 12; s->prev_mb_info = s->last_mb_info; } - if (startcode) { - s->prev_mb_info = put_bytes_count(&s->pb, 0); - /* This might have incremented mb_info_size above, and we return without - * actually writing any info into that slot yet. But in that case, - * this will be called again at the start of the after writing the - * start code, actually writing the mb info. */ - return; - } s->last_mb_info = put_bytes_count(&s->pb, 0); if (!s->mb_info_size) @@ -2938,8 +2930,11 @@ case AV_CODEC_ID_H263: case AV_CODEC_ID_H263P: if (CONFIG_H263_ENCODER) { - update_mb_info(s, 1); + if (s->mb_info && put_bytes_count(&s->pb, 0) - s->prev_mb_info >= s->mb_info) + s->mb_info_size += 12; + ff_h263_encode_gob_header(s, mb_y); + s->prev_mb_info = put_bits_count(&s->pb)/8; } break; } @@ -2965,7 +2960,7 @@ s->mb_skipped=0; s->dquant=0; //only for QP_RD - update_mb_info(s, 0); + update_mb_info(s); if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD int next_block=0; diff -Nru ffmpeg-5.1.8/libavcodec/msmpeg4.c ffmpeg-5.1.9/libavcodec/msmpeg4.c --- ffmpeg-5.1.8/libavcodec/msmpeg4.c 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/msmpeg4.c 2026-05-05 15:50:52.000000000 +0000 @@ -344,4 +344,3 @@ *dc_val_ptr = &dc_val[0]; return pred; } - diff -Nru ffmpeg-5.1.8/libavcodec/notchlc.c ffmpeg-5.1.9/libavcodec/notchlc.c --- ffmpeg-5.1.8/libavcodec/notchlc.c 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/notchlc.c 2026-05-05 15:50:55.000000000 +0000 @@ -79,7 +79,7 @@ PutByteContext *pb) { unsigned reference_pos, match_length, delta, pos = 0; - uint8_t history[64 * 1024]; + uint8_t history[64 * 1024] = { 0 }; while (bytestream2_get_bytes_left(gb) > 0) { uint8_t token = bytestream2_get_byte(gb); @@ -89,6 +89,8 @@ unsigned char current; do { current = bytestream2_get_byte(gb); + if (current > INT_MAX - num_literals) + return AVERROR_INVALIDDATA; num_literals += current; } while (current == 255); } @@ -121,6 +123,8 @@ do { current = bytestream2_get_byte(gb); + if (current > INT_MAX - match_length) + return AVERROR_INVALIDDATA; match_length += current; } while (current == 255); } diff -Nru ffmpeg-5.1.8/libavcodec/omx.c ffmpeg-5.1.9/libavcodec/omx.c --- ffmpeg-5.1.8/libavcodec/omx.c 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/omx.c 2026-05-05 15:50:55.000000000 +0000 @@ -683,6 +683,11 @@ buffer = get_buffer(&s->output_mutex, &s->output_cond, &s->num_done_out_buffers, s->done_out_buffers, 1); if (buffer->nFlags & OMX_BUFFERFLAG_CODECCONFIG) { + if (buffer->nFilledLen > INT32_MAX - AV_INPUT_BUFFER_PADDING_SIZE - avctx->extradata_size) { + ret = AVERROR(ENOMEM); + goto fail; + } + if ((ret = av_reallocp(&avctx->extradata, avctx->extradata_size + buffer->nFilledLen + AV_INPUT_BUFFER_PADDING_SIZE)) < 0) { avctx->extradata_size = 0; goto fail; diff -Nru ffmpeg-5.1.8/libavcodec/qdm2.c ffmpeg-5.1.9/libavcodec/qdm2.c --- ffmpeg-5.1.8/libavcodec/qdm2.c 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/qdm2.c 2026-05-05 15:50:55.000000000 +0000 @@ -1852,6 +1852,8 @@ if(buf_size < s->checksum_size) return -1; + s->sub_packet = 0; + /* get output buffer */ frame->nb_samples = 16 * s->frame_size; if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) diff -Nru ffmpeg-5.1.8/libavcodec/ralf.c ffmpeg-5.1.9/libavcodec/ralf.c --- ffmpeg-5.1.8/libavcodec/ralf.c 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/ralf.c 2026-05-05 15:50:55.000000000 +0000 @@ -158,6 +158,7 @@ if (ctx->max_frame_size > (1 << 20) || !ctx->max_frame_size) { av_log(avctx, AV_LOG_ERROR, "invalid frame size %d\n", ctx->max_frame_size); + return AVERROR_INVALIDDATA; } ctx->max_frame_size = FFMAX(ctx->max_frame_size, avctx->sample_rate); diff -Nru ffmpeg-5.1.8/libavcodec/rasc.c ffmpeg-5.1.9/libavcodec/rasc.c --- ffmpeg-5.1.8/libavcodec/rasc.c 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/rasc.c 2026-05-05 15:50:55.000000000 +0000 @@ -52,6 +52,8 @@ GetByteContext gb; uint8_t *delta; int delta_size; + uint8_t *mv_scratch; + unsigned int mv_scratch_size; uint8_t *cursor; int cursor_size; unsigned cursor_w; @@ -295,10 +297,8 @@ b2 -= s->frame2->linesize[0]; } } else if (type == 0) { - uint8_t *buffer; - - av_fast_padded_malloc(&s->delta, &s->delta_size, w * h * s->bpp); - buffer = s->delta; + av_fast_padded_malloc(&s->mv_scratch, &s->mv_scratch_size, w * h * s->bpp); + uint8_t *buffer = s->mv_scratch; if (!buffer) return AVERROR(ENOMEM); @@ -770,6 +770,8 @@ s->cursor_size = 0; av_freep(&s->delta); s->delta_size = 0; + av_freep(&s->mv_scratch); + s->mv_scratch_size = 0; av_frame_free(&s->frame1); av_frame_free(&s->frame2); ff_inflate_end(&s->zstream); diff -Nru ffmpeg-5.1.8/libavcodec/snow_dwt.c ffmpeg-5.1.9/libavcodec/snow_dwt.c --- ffmpeg-5.1.8/libavcodec/snow_dwt.c 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/snow_dwt.c 2026-05-05 15:50:52.000000000 +0000 @@ -857,5 +857,3 @@ ff_dwt_init_x86(c); #endif } - - diff -Nru ffmpeg-5.1.8/libavcodec/svq1dec.c ffmpeg-5.1.9/libavcodec/svq1dec.c --- ffmpeg-5.1.8/libavcodec/svq1dec.c 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/svq1dec.c 2026-05-05 15:50:55.000000000 +0000 @@ -680,6 +680,11 @@ avctx->skip_frame >= AVDISCARD_ALL) return buf_size; + // Reject obviously too-small packets early: require at least one remaining bit per aligned luma macroblock. + // FFALIGN(s->width, 16) * FFALIGN(s->height, 16) / 256 represent the number of Macroblocks + if (get_bits_left(&s->gb) < FFALIGN(s->width, 16) * FFALIGN(s->height, 16) / 256) + return AVERROR_INVALIDDATA; + result = ff_get_buffer(avctx, cur, s->nonref ? 0 : AV_GET_BUFFER_FLAG_REF); if (result < 0) return result; diff -Nru ffmpeg-5.1.8/libavcodec/tdsc.c ffmpeg-5.1.9/libavcodec/tdsc.c --- ffmpeg-5.1.8/libavcodec/tdsc.c 2025-11-26 02:41:31.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/tdsc.c 2026-05-05 15:50:55.000000000 +0000 @@ -242,7 +242,6 @@ bits <<= 1; } } - dst += ctx->cursor_stride - ctx->cursor_w * 4; } dst = ctx->cursor; @@ -274,7 +273,6 @@ bits <<= 1; } } - dst += ctx->cursor_stride - ctx->cursor_w * 4; } break; case CUR_FMT_BGRA: @@ -360,7 +358,8 @@ } ret = avcodec_receive_frame(ctx->jpeg_avctx, ctx->jpgframe); - if (ret < 0 || ctx->jpgframe->format != AV_PIX_FMT_YUVJ420P) { + if (ret < 0 || ctx->jpgframe->format != AV_PIX_FMT_YUVJ420P || + w > ctx->jpgframe->width || h > ctx->jpgframe->height) { av_log(avctx, AV_LOG_ERROR, "JPEG decoding error (%d).\n", ret); @@ -404,7 +403,7 @@ } tile_size = bytestream2_get_le32(&ctx->gbc); - if (bytestream2_get_bytes_left(&ctx->gbc) < tile_size) + if (bytestream2_get_bytes_left(&ctx->gbc) < tile_size + 24LL) return AVERROR_INVALIDDATA; tile_mode = bytestream2_get_le32(&ctx->gbc); @@ -437,6 +436,9 @@ if (ret < 0) return ret; } else if (tile_mode == MKTAG(' ','W','A','R')) { + if (3LL * w * h > tile_size) + return AVERROR_INVALIDDATA; + /* Just copy the buffer to output */ av_image_copy_plane(ctx->refframe->data[0] + x * 3 + ctx->refframe->linesize[0] * y, diff -Nru ffmpeg-5.1.8/libavcodec/vp3.c ffmpeg-5.1.9/libavcodec/vp3.c --- ffmpeg-5.1.8/libavcodec/vp3.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/vp3.c 2026-05-05 15:50:55.000000000 +0000 @@ -2921,6 +2921,8 @@ if (av_image_check_size(visible_width, visible_height, 0, avctx) < 0 || visible_width + offset_x > s->width || visible_height + offset_y > s->height || + visible_width + 512 < s->width || + visible_height + 512 < s->height || visible_width < 18 ) { av_log(avctx, AV_LOG_ERROR, diff -Nru ffmpeg-5.1.8/libavcodec/vp9.c ffmpeg-5.1.9/libavcodec/vp9.c --- ffmpeg-5.1.8/libavcodec/vp9.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/vp9.c 2026-05-05 15:50:55.000000000 +0000 @@ -192,10 +192,12 @@ uint8_t *p; int bytesperpixel = s->bytesperpixel, ret, cols, rows; int lflvl_len, i; + int changed = 0; av_assert0(w > 0 && h > 0); if (!(s->pix_fmt == s->gf_fmt && w == s->w && h == s->h)) { + changed = 1; if ((ret = ff_set_dimensions(avctx, w, h)) < 0) return ret; @@ -239,8 +241,10 @@ *fmtp = AV_PIX_FMT_NONE; ret = ff_thread_get_format(avctx, pix_fmts); - if (ret < 0) + if (ret < 0) { + ff_set_dimensions(avctx, s->w, s->h); return ret; + } avctx->pix_fmt = ret; s->gf_fmt = s->pix_fmt; @@ -252,7 +256,7 @@ rows = (h + 7) >> 3; if (s->intra_pred_data[0] && cols == s->cols && rows == s->rows && s->pix_fmt == s->last_fmt) - return 0; + return changed; s->last_fmt = s->pix_fmt; s->sb_cols = (w + 63) >> 6; @@ -297,9 +301,10 @@ ff_vp9dsp_init(&s->dsp, s->s.h.bpp, avctx->flags & AV_CODEC_FLAG_BITEXACT); ff_videodsp_init(&s->vdsp, s->s.h.bpp); s->last_bpp = s->s.h.bpp; + changed = 1; } - return 0; + return changed; } static int update_block_buffers(AVCodecContext *avctx) @@ -506,6 +511,7 @@ int c, i, j, k, l, m, n, w, h, max, size2, ret, sharp; int last_invisible; const uint8_t *data2; + int changed; /* general header */ if ((ret = init_get_bits8(&s->gb, data, size)) < 0) { @@ -770,10 +776,10 @@ } /* tiling info */ - if ((ret = update_size(avctx, w, h)) < 0) { + if ((changed = update_size(avctx, w, h)) < 0) { av_log(avctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n", w, h, s->pix_fmt); - return ret; + return changed; } for (s->s.h.tiling.log2_tile_cols = 0; s->sb_cols > (64 << s->s.h.tiling.log2_tile_cols); @@ -788,7 +794,7 @@ } s->s.h.tiling.log2_tile_rows = decode012(&s->gb); s->s.h.tiling.tile_rows = 1 << s->s.h.tiling.log2_tile_rows; - if (s->s.h.tiling.tile_cols != (1 << s->s.h.tiling.log2_tile_cols)) { + if (s->s.h.tiling.tile_cols != (1 << s->s.h.tiling.log2_tile_cols) || changed) { int n_range_coders; VP56RangeCoder *rc; diff -Nru ffmpeg-5.1.8/libavcodec/wmaenc.c ffmpeg-5.1.9/libavcodec/wmaenc.c --- ffmpeg-5.1.8/libavcodec/wmaenc.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/wmaenc.c 2026-05-05 15:50:55.000000000 +0000 @@ -65,14 +65,14 @@ flags1 = 0; flags2 = 1; if (avctx->codec->id == AV_CODEC_ID_WMAV1) { - extradata = av_malloc(4); + extradata = av_mallocz(4 + AV_INPUT_BUFFER_PADDING_SIZE); if (!extradata) return AVERROR(ENOMEM); avctx->extradata_size = 4; AV_WL16(extradata, flags1); AV_WL16(extradata + 2, flags2); } else if (avctx->codec->id == AV_CODEC_ID_WMAV2) { - extradata = av_mallocz(10); + extradata = av_mallocz(10 + AV_INPUT_BUFFER_PADDING_SIZE); if (!extradata) return AVERROR(ENOMEM); avctx->extradata_size = 10; diff -Nru ffmpeg-5.1.8/libavcodec/x86/fmtconvert.asm ffmpeg-5.1.9/libavcodec/x86/fmtconvert.asm --- ffmpeg-5.1.8/libavcodec/x86/fmtconvert.asm 2025-11-21 01:15:18.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/x86/fmtconvert.asm 2026-05-05 14:22:01.000000000 +0000 @@ -85,4 +85,3 @@ INIT_XMM sse2 INT32_TO_FLOAT_FMUL_ARRAY8 - diff -Nru ffmpeg-5.1.8/libavcodec/x86/mpegvideoencdsp.asm ffmpeg-5.1.9/libavcodec/x86/mpegvideoencdsp.asm --- ffmpeg-5.1.8/libavcodec/x86/mpegvideoencdsp.asm 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/x86/mpegvideoencdsp.asm 2026-05-05 15:50:52.000000000 +0000 @@ -106,4 +106,3 @@ INIT_XMM sse2 PIX_NORM1 6, 8 - diff -Nru ffmpeg-5.1.8/libavcodec/xxan.c ffmpeg-5.1.9/libavcodec/xxan.c --- ffmpeg-5.1.8/libavcodec/xxan.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/xxan.c 2026-05-05 15:50:55.000000000 +0000 @@ -68,7 +68,7 @@ } s->buffer_size = avctx->width * avctx->height; - s->y_buffer = av_malloc(s->buffer_size); + s->y_buffer = av_mallocz(s->buffer_size); if (!s->y_buffer) return AVERROR(ENOMEM); s->scratch_buffer = av_malloc(s->buffer_size + 130); diff -Nru ffmpeg-5.1.8/libavcodec/zmbv.c ffmpeg-5.1.9/libavcodec/zmbv.c --- ffmpeg-5.1.8/libavcodec/zmbv.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavcodec/zmbv.c 2026-05-05 15:50:55.000000000 +0000 @@ -138,6 +138,8 @@ } if (d) { /* apply XOR'ed difference */ + if (c->decomp_len - (src - c->decomp_buf) < bw2 * bh2) + return AVERROR_INVALIDDATA; out = output + x; for (j = 0; j < bh2; j++) { for (i = 0; i < bw2; i++) @@ -212,6 +214,8 @@ } if (d) { /* apply XOR'ed difference */ + if (c->decomp_len - (src - c->decomp_buf) < bw2 * bh2 * 2) + return AVERROR_INVALIDDATA; out = output + x; for (j = 0; j < bh2; j++){ for (i = 0; i < bw2; i++) { @@ -296,6 +300,8 @@ } if (d) { /* apply XOR'ed difference */ + if (c->decomp_len - (src - c->decomp_buf) < bw2 * bh2 * 3) + return AVERROR_INVALIDDATA; out = output + x * 3; for (j = 0; j < bh2; j++) { for (i = 0; i < bw2; i++) { @@ -374,6 +380,8 @@ } if (d) { /* apply XOR'ed difference */ + if (c->decomp_len - (src - c->decomp_buf) < bw2 * bh2 * 4) + return AVERROR_INVALIDDATA; out = output + x; for (j = 0; j < bh2; j++){ for (i = 0; i < bw2; i++) { @@ -568,8 +576,10 @@ frame->pict_type = AV_PICTURE_TYPE_P; if (c->decomp_len < 2LL * ((c->width + c->bw - 1) / c->bw) * ((c->height + c->bh - 1) / c->bh)) return AVERROR_INVALIDDATA; - if (c->decomp_len) - c->decode_xor(c); + if (c->decomp_len) { + if ((ret = c->decode_xor(c)) < 0) + return ret; + } } /* update frames */ diff -Nru ffmpeg-5.1.8/libavfilter/aarch64/vf_nlmeans_neon.S ffmpeg-5.1.9/libavfilter/aarch64/vf_nlmeans_neon.S --- ffmpeg-5.1.8/libavfilter/aarch64/vf_nlmeans_neon.S 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavfilter/aarch64/vf_nlmeans_neon.S 2026-05-03 23:09:50.000000000 +0000 @@ -22,52 +22,52 @@ // acc_sum_store(ABCD) = {X+A, X+A+B, X+A+B+C, X+A+B+C+D} .macro acc_sum_store x, xb - dup v24.4S, v24.S[3] // ...X -> XXXX - ext v25.16B, v26.16B, \xb, #12 // ext(0000,ABCD,12)=0ABC - add v24.4S, v24.4S, \x // XXXX+ABCD={X+A,X+B,X+C,X+D} - add v24.4S, v24.4S, v25.4S // {X+A,X+B+A,X+C+B,X+D+C} (+0ABC) - ext v25.16B, v26.16B, v25.16B, #12 // ext(0000,0ABC,12)=00AB - add v24.4S, v24.4S, v25.4S // {X+A,X+B+A,X+C+B+A,X+D+C+B} (+00AB) - ext v25.16B, v26.16B, v25.16B, #12 // ext(0000,00AB,12)=000A - add v24.4S, v24.4S, v25.4S // {X+A,X+B+A,X+C+B+A,X+D+C+B+A} (+000A) - st1 {v24.4S}, [x0], #16 // write 4x32-bit final values + dup v24.4s, v24.s[3] // ...X -> XXXX + ext v25.16b, v26.16b, \xb, #12 // ext(0000,ABCD,12)=0ABC + add v24.4s, v24.4s, \x // XXXX+ABCD={X+A,X+B,X+C,X+D} + add v24.4s, v24.4s, v25.4s // {X+A,X+B+A,X+C+B,X+D+C} (+0ABC) + ext v25.16b, v26.16b, v25.16b, #12 // ext(0000,0ABC,12)=00AB + add v24.4s, v24.4s, v25.4s // {X+A,X+B+A,X+C+B+A,X+D+C+B} (+00AB) + ext v25.16b, v26.16b, v25.16b, #12 // ext(0000,00AB,12)=000A + add v24.4s, v24.4s, v25.4s // {X+A,X+B+A,X+C+B+A,X+D+C+B+A} (+000A) + st1 {v24.4s}, [x0], #16 // write 4x32-bit final values .endm function ff_compute_safe_ssd_integral_image_neon, export=1 - movi v26.4S, #0 // used as zero for the "rotations" in acc_sum_store - sub x3, x3, w6, UXTW // s1 padding (s1_linesize - w) - sub x5, x5, w6, UXTW // s2 padding (s2_linesize - w) - sub x9, x0, w1, UXTW #2 // dst_top - sub x1, x1, w6, UXTW // dst padding (dst_linesize_32 - w) + movi v26.4s, #0 // used as zero for the "rotations" in acc_sum_store + sub x3, x3, w6, uxtw // s1 padding (s1_linesize - w) + sub x5, x5, w6, uxtw // s2 padding (s2_linesize - w) + sub x9, x0, w1, uxtw #2 // dst_top + sub x1, x1, w6, uxtw // dst padding (dst_linesize_32 - w) lsl x1, x1, #2 // dst padding expressed in bytes 1: mov w10, w6 // width copy for each line sub x0, x0, #16 // beginning of the dst line minus 4 sums sub x8, x9, #4 // dst_top-1 - ld1 {v24.4S}, [x0], #16 // load ...X (contextual last sums) -2: ld1 {v0.16B}, [x2], #16 // s1[x + 0..15] - ld1 {v1.16B}, [x4], #16 // s2[x + 0..15] - ld1 {v16.4S,v17.4S}, [x8], #32 // dst_top[x + 0..7 - 1] - usubl v2.8H, v0.8B, v1.8B // d[x + 0..7] = s1[x + 0..7] - s2[x + 0..7] - usubl2 v3.8H, v0.16B, v1.16B // d[x + 8..15] = s1[x + 8..15] - s2[x + 8..15] - ld1 {v18.4S,v19.4S}, [x8], #32 // dst_top[x + 8..15 - 1] - smull v4.4S, v2.4H, v2.4H // d[x + 0..3]^2 - smull2 v5.4S, v2.8H, v2.8H // d[x + 4..7]^2 - ld1 {v20.4S,v21.4S}, [x9], #32 // dst_top[x + 0..7] - smull v6.4S, v3.4H, v3.4H // d[x + 8..11]^2 - smull2 v7.4S, v3.8H, v3.8H // d[x + 12..15]^2 - ld1 {v22.4S,v23.4S}, [x9], #32 // dst_top[x + 8..15] - sub v0.4S, v20.4S, v16.4S // dst_top[x + 0..3] - dst_top[x + 0..3 - 1] - sub v1.4S, v21.4S, v17.4S // dst_top[x + 4..7] - dst_top[x + 4..7 - 1] - add v0.4S, v0.4S, v4.4S // + d[x + 0..3]^2 - add v1.4S, v1.4S, v5.4S // + d[x + 4..7]^2 - sub v2.4S, v22.4S, v18.4S // dst_top[x + 8..11] - dst_top[x + 8..11 - 1] - sub v3.4S, v23.4S, v19.4S // dst_top[x + 12..15] - dst_top[x + 12..15 - 1] - add v2.4S, v2.4S, v6.4S // + d[x + 8..11]^2 - add v3.4S, v3.4S, v7.4S // + d[x + 12..15]^2 - acc_sum_store v0.4S, v0.16B // accumulate and store dst[ 0..3] - acc_sum_store v1.4S, v1.16B // accumulate and store dst[ 4..7] - acc_sum_store v2.4S, v2.16B // accumulate and store dst[ 8..11] - acc_sum_store v3.4S, v3.16B // accumulate and store dst[12..15] + ld1 {v24.4s}, [x0], #16 // load ...X (contextual last sums) +2: ld1 {v0.16b}, [x2], #16 // s1[x + 0..15] + ld1 {v1.16b}, [x4], #16 // s2[x + 0..15] + ld1 {v16.4s,v17.4s}, [x8], #32 // dst_top[x + 0..7 - 1] + usubl v2.8h, v0.8b, v1.8b // d[x + 0..7] = s1[x + 0..7] - s2[x + 0..7] + usubl2 v3.8h, v0.16b, v1.16b // d[x + 8..15] = s1[x + 8..15] - s2[x + 8..15] + ld1 {v18.4s,v19.4s}, [x8], #32 // dst_top[x + 8..15 - 1] + smull v4.4s, v2.4h, v2.4h // d[x + 0..3]^2 + smull2 v5.4s, v2.8h, v2.8h // d[x + 4..7]^2 + ld1 {v20.4s,v21.4s}, [x9], #32 // dst_top[x + 0..7] + smull v6.4s, v3.4h, v3.4h // d[x + 8..11]^2 + smull2 v7.4s, v3.8h, v3.8h // d[x + 12..15]^2 + ld1 {v22.4s,v23.4s}, [x9], #32 // dst_top[x + 8..15] + sub v0.4s, v20.4s, v16.4s // dst_top[x + 0..3] - dst_top[x + 0..3 - 1] + sub v1.4s, v21.4s, v17.4s // dst_top[x + 4..7] - dst_top[x + 4..7 - 1] + add v0.4s, v0.4s, v4.4s // + d[x + 0..3]^2 + add v1.4s, v1.4s, v5.4s // + d[x + 4..7]^2 + sub v2.4s, v22.4s, v18.4s // dst_top[x + 8..11] - dst_top[x + 8..11 - 1] + sub v3.4s, v23.4s, v19.4s // dst_top[x + 12..15] - dst_top[x + 12..15 - 1] + add v2.4s, v2.4s, v6.4s // + d[x + 8..11]^2 + add v3.4s, v3.4s, v7.4s // + d[x + 12..15]^2 + acc_sum_store v0.4s, v0.16b // accumulate and store dst[ 0..3] + acc_sum_store v1.4s, v1.16b // accumulate and store dst[ 4..7] + acc_sum_store v2.4s, v2.16b // accumulate and store dst[ 8..11] + acc_sum_store v3.4s, v3.16b // accumulate and store dst[12..15] subs w10, w10, #16 // width dec b.ne 2b // loop til next line add x2, x2, x3 // skip to next line (s1) diff -Nru ffmpeg-5.1.8/libavfilter/af_amerge.c ffmpeg-5.1.9/libavfilter/af_amerge.c --- ffmpeg-5.1.8/libavfilter/af_amerge.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavfilter/af_amerge.c 2026-05-05 15:50:52.000000000 +0000 @@ -76,7 +76,7 @@ AVChannelLayout *inlayout[SWR_CH_MAX] = { NULL }, outlayout = { 0 }; uint64_t outmask = 0; AVFilterChannelLayouts *layouts; - int i, ret, overlap = 0, nb_ch = 0; + int i, ret, nb_ch = 0; for (i = 0; i < s->nb_inputs; i++) { if (!ctx->inputs[i]->incfg.channel_layouts || @@ -91,15 +91,11 @@ av_channel_layout_describe(inlayout[i], buf, sizeof(buf)); av_log(ctx, AV_LOG_INFO, "Using \"%s\" for input %d\n", buf, i + 1); } - s->in[i].nb_ch = FF_LAYOUT2COUNT(inlayout[i]); - if (s->in[i].nb_ch) { - overlap++; - } else { - s->in[i].nb_ch = inlayout[i]->nb_channels; - if (av_channel_layout_subset(inlayout[i], outmask)) - overlap++; - outmask |= inlayout[i]->order == AV_CHANNEL_ORDER_NATIVE ? - inlayout[i]->u.mask : 0; + s->in[i].nb_ch = inlayout[i]->nb_channels; + for (int j = 0; j < s->in[i].nb_ch; j++) { + enum AVChannel id = av_channel_layout_channel_from_index(inlayout[i], j); + if (id >= 0 && id < 64) + outmask |= (1ULL << id); } nb_ch += s->in[i].nb_ch; } @@ -107,7 +103,7 @@ av_log(ctx, AV_LOG_ERROR, "Too many channels (max %d)\n", SWR_CH_MAX); return AVERROR(EINVAL); } - if (overlap) { + if (av_popcount64(outmask) != nb_ch) { av_log(ctx, AV_LOG_WARNING, "Input channel layouts overlap: " "output layout will be determined by the number of distinct input channels\n"); diff -Nru ffmpeg-5.1.8/libavfilter/af_lv2.c ffmpeg-5.1.9/libavfilter/af_lv2.c --- ffmpeg-5.1.8/libavfilter/af_lv2.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavfilter/af_lv2.c 2026-05-05 15:50:55.000000000 +0000 @@ -72,6 +72,7 @@ float *controls; LilvInstance *instance; + int instance_activated; LilvNode *atom_AtomPort; LilvNode *atom_Sequence; @@ -389,6 +390,9 @@ inlink->min_samples = inlink->max_samples = 4096; } + lilv_instance_activate(s->instance); + s->instance_activated = 1; + return 0; } @@ -568,6 +572,8 @@ { LV2Context *s = ctx->priv; + if (s->instance_activated) + lilv_instance_deactivate(s->instance); lilv_node_free(s->powerOf2BlockLength); lilv_node_free(s->fixedBlockLength); lilv_node_free(s->boundedBlockLength); diff -Nru ffmpeg-5.1.8/libavfilter/af_pan.c ffmpeg-5.1.9/libavfilter/af_pan.c --- ffmpeg-5.1.8/libavfilter/af_pan.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavfilter/af_pan.c 2026-05-05 15:50:55.000000000 +0000 @@ -69,7 +69,7 @@ skip_spaces(arg); /* try to parse a channel name, e.g. "FL" */ - if (sscanf(*arg, "%7[A-Z]%n", buf, &len)) { + if (sscanf(*arg, "%7[A-Z]%n", buf, &len) >= 1) { channel_id = av_channel_from_string(buf); if (channel_id < 0) return channel_id; @@ -80,7 +80,7 @@ return 0; } /* try to parse a channel number, e.g. "c2" */ - if (sscanf(*arg, "c%d%n", &channel_id, &len) && + if (sscanf(*arg, "c%d%n", &channel_id, &len) >= 1 && channel_id >= 0 && channel_id < MAX_CHANNELS) { *rchannel = channel_id; *rnamed = 0; diff -Nru ffmpeg-5.1.8/libavfilter/afir_template.c ffmpeg-5.1.9/libavfilter/afir_template.c --- ffmpeg-5.1.8/libavfilter/afir_template.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavfilter/afir_template.c 2026-05-05 15:50:52.000000000 +0000 @@ -388,5 +388,3 @@ return 0; } - - diff -Nru ffmpeg-5.1.8/libavfilter/convolution.h ffmpeg-5.1.9/libavfilter/convolution.h --- ffmpeg-5.1.8/libavfilter/convolution.h 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavfilter/convolution.h 2026-05-05 15:50:55.000000000 +0000 @@ -21,6 +21,7 @@ #ifndef AVFILTER_CONVOLUTION_H #define AVFILTER_CONVOLUTION_H #include "avfilter.h" +#include "libavutil/internal.h" enum MatrixMode { MATRIX_SQUARE, diff -Nru ffmpeg-5.1.8/libavfilter/qp_table.c ffmpeg-5.1.9/libavfilter/qp_table.c --- ffmpeg-5.1.8/libavfilter/qp_table.c 2025-11-21 01:15:18.000000000 +0000 +++ ffmpeg-5.1.9/libavfilter/qp_table.c 2026-05-05 14:22:01.000000000 +0000 @@ -66,4 +66,3 @@ return 0; } - diff -Nru ffmpeg-5.1.8/libavfilter/scale_eval.c ffmpeg-5.1.9/libavfilter/scale_eval.c --- ffmpeg-5.1.8/libavfilter/scale_eval.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavfilter/scale_eval.c 2026-05-05 15:50:55.000000000 +0000 @@ -83,18 +83,27 @@ av_expr_parse_and_eval(&res, (expr = w_expr), var_names, var_values, NULL, NULL, NULL, NULL, NULL, 0, log_ctx); - eval_w = var_values[VAR_OUT_W] = var_values[VAR_OW] = (int) res == 0 ? inlink->w : (int) res; + var_values[VAR_OUT_W] = var_values[VAR_OW] = res == 0 ? inlink->w : trunc(res); if ((ret = av_expr_parse_and_eval(&res, (expr = h_expr), var_names, var_values, NULL, NULL, NULL, NULL, NULL, 0, log_ctx)) < 0) goto fail; + if (!(res >= INT32_MIN && res <= INT32_MAX)) { + ret = AVERROR(EINVAL); + goto fail; + } + eval_h = var_values[VAR_OUT_H] = var_values[VAR_OH] = (int) res == 0 ? inlink->h : (int) res; /* evaluate again the width, as it may depend on the output height */ if ((ret = av_expr_parse_and_eval(&res, (expr = w_expr), var_names, var_values, NULL, NULL, NULL, NULL, NULL, 0, log_ctx)) < 0) goto fail; + if (!(res >= INT32_MIN && res <= INT32_MAX)) { + ret = AVERROR(EINVAL); + goto fail; + } eval_w = (int) res == 0 ? inlink->w : (int) res; *ret_w = eval_w; @@ -115,7 +124,7 @@ int force_original_aspect_ratio, int force_divisible_by) { int64_t w, h; - int factor_w, factor_h; + int64_t factor_w, factor_h; w = *ret_w; h = *ret_h; diff -Nru ffmpeg-5.1.8/libavfilter/scene_sad.c ffmpeg-5.1.9/libavfilter/scene_sad.c --- ffmpeg-5.1.8/libavfilter/scene_sad.c 2025-11-21 01:15:18.000000000 +0000 +++ ffmpeg-5.1.9/libavfilter/scene_sad.c 2026-05-05 14:22:01.000000000 +0000 @@ -70,4 +70,3 @@ } return sad; } - diff -Nru ffmpeg-5.1.8/libavfilter/vf_codecview.c ffmpeg-5.1.9/libavfilter/vf_codecview.c --- ffmpeg-5.1.8/libavfilter/vf_codecview.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavfilter/vf_codecview.c 2026-05-05 15:50:55.000000000 +0000 @@ -264,9 +264,22 @@ if (par->nb_blocks) { for (int block_idx = 0; block_idx < par->nb_blocks; block_idx++) { AVVideoBlockParams *b = av_video_enc_params_block(par, block_idx); - uint8_t *buf = frame->data[0] + b->src_y * stride; - draw_block_rectangle(buf, b->src_x, b->src_y, b->w, b->h, stride, 100); + int64_t x0 = b->src_x; + int64_t y0 = b->src_y; + int64_t x1 = x0 + b->w; + int64_t y1 = y0 + b->h; + + x0 = FFMAX(x0, 0); + y0 = FFMAX(y0, 0); + x1 = FFMIN(x1, frame->width); + y1 = FFMIN(y1, frame->height); + + if (x1 <= x0 || y1 <= y0) + continue; + + uint8_t *buf = frame->data[0] + y0 * stride; + draw_block_rectangle(buf, x0, y0, x1-x0, y1-y0, stride, 100); } } } diff -Nru ffmpeg-5.1.8/libavfilter/vf_convolution.c ffmpeg-5.1.9/libavfilter/vf_convolution.c --- ffmpeg-5.1.8/libavfilter/vf_convolution.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavfilter/vf_convolution.c 2026-05-05 15:50:55.000000000 +0000 @@ -574,11 +574,8 @@ int i; for (i = 0; i < 25; i++) { - int xoff = FFABS(x + ((i % 5) - 2)); - int yoff = FFABS(y + (i / 5) - 2); - - xoff = xoff >= w ? 2 * w - 1 - xoff : xoff; - yoff = yoff >= h ? 2 * h - 1 - yoff : yoff; + int xoff = avpriv_mirror(x + (i % 5) - 2, w - 1); + int yoff = avpriv_mirror(y + (i / 5) - 2, h - 1); c[i] = src + xoff * bpc + yoff * stride; } @@ -590,11 +587,8 @@ int i; for (i = 0; i < 49; i++) { - int xoff = FFABS(x + ((i % 7) - 3)); - int yoff = FFABS(y + (i / 7) - 3); - - xoff = xoff >= w ? 2 * w - 1 - xoff : xoff; - yoff = yoff >= h ? 2 * h - 1 - yoff : yoff; + int xoff = avpriv_mirror(x + (i % 7) - 3, w - 1); + int yoff = avpriv_mirror(y + (i / 7) - 3, h - 1); c[i] = src + xoff * bpc + yoff * stride; } @@ -606,9 +600,7 @@ int i; for (i = 0; i < radius * 2 + 1; i++) { - int xoff = FFABS(x + i - radius); - - xoff = xoff >= w ? 2 * w - 1 - xoff : xoff; + int xoff = avpriv_mirror(x + i - radius, w - 1); c[i] = src + xoff * bpc + y * stride; } @@ -620,9 +612,7 @@ int i; for (i = 0; i < radius * 2 + 1; i++) { - int xoff = FFABS(x + i - radius); - - xoff = xoff >= h ? 2 * h - 1 - xoff : xoff; + int xoff = avpriv_mirror(x + i - radius, h - 1); c[i] = src + y * bpc + xoff * stride; } @@ -668,10 +658,12 @@ continue; } for (y = slice_start; y < slice_end; y += step) { - const int xoff = mode == MATRIX_COLUMN ? (y - slice_start) * bpc : radius * bpc; - const int yoff = mode == MATRIX_COLUMN ? radius * dstride : 0; + const int left = FFMIN(radius, sizew); + const int right = FFMAX(left, sizew - radius); + const int xoff = mode == MATRIX_COLUMN ? (y - slice_start) * bpc : left * bpc; + const int yoff = mode == MATRIX_COLUMN ? left * dstride : 0; - for (x = 0; x < radius; x++) { + for (x = 0; x < left; x++) { const int xoff = mode == MATRIX_COLUMN ? (y - slice_start) * bpc : x * bpc; const int yoff = mode == MATRIX_COLUMN ? x * dstride : 0; @@ -680,11 +672,11 @@ bias, matrix, c, s->max, radius, dstride, stride, slice_end - step); } - s->setup[plane](radius, c, src, stride, radius, width, y, height, bpc); - s->filter[plane](dst + yoff + xoff, sizew - 2 * radius, + s->setup[plane](radius, c, src, stride, left, width, y, height, bpc); + s->filter[plane](dst + yoff + xoff, right - left, rdiv, bias, matrix, c, s->max, radius, dstride, stride, slice_end - step); - for (x = sizew - radius; x < sizew; x++) { + for (x = right; x < sizew; x++) { const int xoff = mode == MATRIX_COLUMN ? (y - slice_start) * bpc : x * bpc; const int yoff = mode == MATRIX_COLUMN ? x * dstride : 0; diff -Nru ffmpeg-5.1.8/libavfilter/vf_find_rect.c ffmpeg-5.1.9/libavfilter/vf_find_rect.c --- ffmpeg-5.1.8/libavfilter/vf_find_rect.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavfilter/vf_find_rect.c 2026-05-05 15:50:55.000000000 +0000 @@ -51,8 +51,8 @@ { "mipmaps", "set mipmaps", OFFSET(mipmaps), AV_OPT_TYPE_INT, {.i64 = 3}, 1, MAX_MIPMAPS, FLAGS }, { "xmin", "", OFFSET(xmin), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS }, { "ymin", "", OFFSET(ymin), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS }, - { "xmax", "", OFFSET(xmax), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS }, - { "ymax", "", OFFSET(ymax), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS }, + { "xmax", "", OFFSET(xmax), AV_OPT_TYPE_INT, {.i64 = INT_MAX}, 0, INT_MAX, FLAGS }, + { "ymax", "", OFFSET(ymax), AV_OPT_TYPE_INT, {.i64 = INT_MAX}, 0, INT_MAX, FLAGS }, { "discard", "", OFFSET(discard), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS }, { NULL } }; @@ -78,8 +78,10 @@ src = in ->data[0]; dst = frame->data[0]; - for(y = 0; y < frame->height; y++) { - for(x = 0; x < frame->width; x++) { + int w2 = in->width/2; + int h2 = in->height/2; + for(y = 0; y < h2; y++) { + for(x = 0; x < w2; x++) { dst[x] = ( src[2*x+0] + src[2*x+1] + src[2*x+0 + in->linesize[0]] @@ -89,6 +91,22 @@ src += 2*in->linesize[0]; dst += frame->linesize[0]; } + src = in ->data[0]; + dst = frame->data[0]; + for(y = 0; y < frame->height; y++) { + int yd = y < h2 ? in->linesize[0] : 0; + x = yd ? w2 : 0; + for(; x < frame->width; x++) { + dst[x] = ( src[2*x+0] + + src[FFMIN(2*x+1, w2)] + + src[2*x+0 + yd] + + src[FFMIN(2*x+1, w2) + yd] + + 2) >> 2; + } + src += 2*in->linesize[0]; + dst += frame->linesize[0]; + } + return frame; } @@ -130,19 +148,6 @@ return 1 - fabs(c); } -static int config_input(AVFilterLink *inlink) -{ - AVFilterContext *ctx = inlink->dst; - FOCContext *foc = ctx->priv; - - if (foc->xmax <= 0) - foc->xmax = inlink->w - foc->obj_frame->width; - if (foc->ymax <= 0) - foc->ymax = inlink->h - foc->obj_frame->height; - - return 0; -} - static float search(FOCContext *foc, int pass, int maxpass, int xmin, int xmax, int ymin, int ymax, int *best_x, int *best_y, float best_score) { int x, y; @@ -178,19 +183,24 @@ int i; char buf[32]; + int xmin = FFMAX(foc->xmin, 0); + int ymin = FFMAX(foc->ymin, 0); + int xmax = FFMIN(foc->xmax, inlink->w - foc->obj_frame->width ); + int ymax = FFMIN(foc->ymax, inlink->h - foc->obj_frame->height); + foc->haystack_frame[0] = av_frame_clone(in); for (i=1; imipmaps; i++) { foc->haystack_frame[i] = downscale(foc->haystack_frame[i-1]); } best_score = search(foc, 0, 0, - FFMAX(foc->xmin, foc->last_x - 8), - FFMIN(foc->xmax, foc->last_x + 8), - FFMAX(foc->ymin, foc->last_y - 8), - FFMIN(foc->ymax, foc->last_y + 8), + FFMAX(xmin, foc->last_x - 8), + FFMIN(xmax, foc->last_x + 8), + FFMAX(ymin, foc->last_y - 8), + FFMIN(ymax, foc->last_y + 8), &best_x, &best_y, 2.0); - best_score = search(foc, 0, foc->mipmaps - 1, foc->xmin, foc->xmax, foc->ymin, foc->ymax, + best_score = search(foc, 0, foc->mipmaps - 1, xmin, xmax, ymin, ymax, &best_x, &best_y, best_score); for (i=0; iis_packed_rgb = av_pix_fmt_desc_get(inlink->format)->flags & AV_PIX_FMT_FLAG_RGB; kerndeint->vsub = desc->log2_chroma_h; + if (AV_CEIL_RSHIFT(inlink->h, kerndeint->vsub) < 4) { + av_log(inlink->dst, AV_LOG_ERROR, + "Input height %d is too small; minimum chroma plane height is 4\n", + inlink->h); + return AVERROR(EINVAL); + } ret = av_image_alloc(kerndeint->tmp_data, kerndeint->tmp_linesize, inlink->w, inlink->h, inlink->format, 16); diff -Nru ffmpeg-5.1.8/libavfilter/vf_libopencv.c ffmpeg-5.1.9/libavfilter/vf_libopencv.c --- ffmpeg-5.1.8/libavfilter/vf_libopencv.c 2025-11-23 02:57:58.000000000 +0000 +++ ffmpeg-5.1.9/libavfilter/vf_libopencv.c 2026-05-05 15:50:55.000000000 +0000 @@ -209,7 +209,7 @@ int cols = 0, rows = 0, anchor_x = 0, anchor_y = 0, shape = CV_SHAPE_RECT; int *values = NULL, ret = 0; - sscanf(buf, "%dx%d+%dx%d/%32[^=]=%127s", &cols, &rows, &anchor_x, &anchor_y, shape_str, shape_filename); + sscanf(buf, "%dx%d+%dx%d/%31[^=]=%127s", &cols, &rows, &anchor_x, &anchor_y, shape_str, shape_filename); if (!strcmp(shape_str, "rect" )) shape = CV_SHAPE_RECT; else if (!strcmp(shape_str, "cross" )) shape = CV_SHAPE_CROSS; diff -Nru ffmpeg-5.1.8/libavfilter/vf_neighbor_opencl.c ffmpeg-5.1.9/libavfilter/vf_neighbor_opencl.c --- ffmpeg-5.1.8/libavfilter/vf_neighbor_opencl.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavfilter/vf_neighbor_opencl.c 2026-05-05 15:50:55.000000000 +0000 @@ -69,6 +69,9 @@ kernel_name = "erosion_global"; } else if (!strcmp(avctx->filter->name, "dilation_opencl")){ kernel_name = "dilation_global"; + } else { + err = AVERROR_BUG; + goto fail; } ctx->kernel = clCreateKernel(ctx->ocf.program, kernel_name, &cle); CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create " diff -Nru ffmpeg-5.1.8/libavfilter/vf_overlay_cuda.cu ffmpeg-5.1.9/libavfilter/vf_overlay_cuda.cu --- ffmpeg-5.1.8/libavfilter/vf_overlay_cuda.cu 2025-11-21 01:15:18.000000000 +0000 +++ ffmpeg-5.1.9/libavfilter/vf_overlay_cuda.cu 2026-05-05 14:22:01.000000000 +0000 @@ -51,4 +51,3 @@ } } - diff -Nru ffmpeg-5.1.8/libavfilter/vf_scale.c ffmpeg-5.1.9/libavfilter/vf_scale.c --- ffmpeg-5.1.8/libavfilter/vf_scale.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavfilter/vf_scale.c 2026-05-05 15:50:55.000000000 +0000 @@ -503,8 +503,8 @@ if (outlink->w > INT_MAX || outlink->h > INT_MAX || - (outlink->h * inlink->w) > INT_MAX || - (outlink->w * inlink->h) > INT_MAX) + (outlink->h * (uint64_t)inlink->w) > INT_MAX || + (outlink->w * (uint64_t)inlink->h) > INT_MAX) av_log(ctx, AV_LOG_ERROR, "Rescaled value for width or height is too big.\n"); /* TODO: make algorithm configurable */ diff -Nru ffmpeg-5.1.8/libavfilter/vf_stack.c ffmpeg-5.1.9/libavfilter/vf_stack.c --- ffmpeg-5.1.8/libavfilter/vf_stack.c 2025-11-23 02:57:58.000000000 +0000 +++ ffmpeg-5.1.9/libavfilter/vf_stack.c 2026-05-05 15:50:55.000000000 +0000 @@ -227,6 +227,8 @@ item->y[1] = item->y[2] = AV_CEIL_RSHIFT(height, s->desc->log2_chroma_h); item->y[0] = item->y[3] = height; + if (height > INT_MAX - ctx->inputs[i]->h) + return AVERROR(EINVAL); height += ctx->inputs[i]->h; } } @@ -252,6 +254,8 @@ return ret; } + if (width > INT_MAX - ctx->inputs[i]->w) + return AVERROR(EINVAL); width += ctx->inputs[i]->w; } } @@ -287,8 +291,13 @@ item->y[1] = item->y[2] = AV_CEIL_RSHIFT(inh, s->desc->log2_chroma_h); item->y[0] = item->y[3] = inh; + + if (inw > INT_MAX - ctx->inputs[k]->w) + return AVERROR(EINVAL); inw += ctx->inputs[k]->w; } + if (height > INT_MAX - row_height) + return AVERROR(EINVAL); height += row_height; if (!i) width = inw; @@ -339,26 +348,41 @@ if (size == i || size < 0 || size >= s->nb_inputs) return AVERROR(EINVAL); - if (!j) + if (!j) { + if (inw > INT_MAX - ctx->inputs[size]->w) + return AVERROR(EINVAL); inw += ctx->inputs[size]->w; - else + } else { + if (inh > INT_MAX - ctx->inputs[size]->w) + return AVERROR(EINVAL); inh += ctx->inputs[size]->w; + } } else if (sscanf(arg3, "h%d", &size) == 1) { if (size == i || size < 0 || size >= s->nb_inputs) return AVERROR(EINVAL); - if (!j) + if (!j) { + if (inw > INT_MAX - ctx->inputs[size]->h) + return AVERROR(EINVAL); inw += ctx->inputs[size]->h; - else + } else { + if (inh > INT_MAX - ctx->inputs[size]->h) + return AVERROR(EINVAL); inh += ctx->inputs[size]->h; + } } else if (sscanf(arg3, "%d", &size) == 1) { if (size < 0) return AVERROR(EINVAL); - if (!j) + if (!j) { + if (inw > INT_MAX - size) + return AVERROR(EINVAL); inw += size; - else + } else { + if (inh > INT_MAX - size) + return AVERROR(EINVAL); inh += size; + } } else { return AVERROR(EINVAL); } @@ -372,6 +396,8 @@ item->y[1] = item->y[2] = AV_CEIL_RSHIFT(inh, s->desc->log2_chroma_h); item->y[0] = item->y[3] = inh; + if (inlink->w > INT_MAX - inw || inlink->h > INT_MAX - inh) + return AVERROR(EINVAL); width = FFMAX(width, inlink->w + inw); height = FFMAX(height, inlink->h + inh); } diff -Nru ffmpeg-5.1.8/libavfilter/vf_v360.c ffmpeg-5.1.9/libavfilter/vf_v360.c --- ffmpeg-5.1.8/libavfilter/vf_v360.c 2025-11-23 02:57:58.000000000 +0000 +++ ffmpeg-5.1.9/libavfilter/vf_v360.c 2026-05-05 15:50:55.000000000 +0000 @@ -286,7 +286,8 @@ const AVFrame *in = td->in; \ AVFrame *out = td->out; \ \ - for (int stereo = 0; stereo < 1 + s->out_stereo > STEREO_2D; stereo++) { \ + \ + for (int stereo = 0; stereo < 1 + (s->out_stereo > STEREO_2D); stereo++) { \ for (int plane = 0; plane < s->nb_planes; plane++) { \ const unsigned map = s->map[plane]; \ const int in_linesize = in->linesize[plane]; \ diff -Nru ffmpeg-5.1.8/libavfilter/vf_zscale.c ffmpeg-5.1.9/libavfilter/vf_zscale.c --- ffmpeg-5.1.8/libavfilter/vf_zscale.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavfilter/vf_zscale.c 2026-05-05 15:50:55.000000000 +0000 @@ -251,7 +251,7 @@ double var_values[VARS_NB], res; char *expr; int ret; - int factor_w, factor_h; + int64_t factor_w, factor_h; var_values[VAR_IN_W] = var_values[VAR_IW] = inlink->w; var_values[VAR_IN_H] = var_values[VAR_IH] = inlink->h; @@ -270,17 +270,26 @@ av_expr_parse_and_eval(&res, (expr = s->w_expr), var_names, var_values, NULL, NULL, NULL, NULL, NULL, 0, ctx); - s->w = var_values[VAR_OUT_W] = var_values[VAR_OW] = res; + var_values[VAR_OUT_W] = var_values[VAR_OW] = trunc(res); if ((ret = av_expr_parse_and_eval(&res, (expr = s->h_expr), var_names, var_values, NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0) goto fail; + if (!(res >= INT32_MIN && res <= INT32_MAX)) { + ret = AVERROR(EINVAL); + goto fail; + } + s->h = var_values[VAR_OUT_H] = var_values[VAR_OH] = res; /* evaluate again the width, as it may depend on the output height */ if ((ret = av_expr_parse_and_eval(&res, (expr = s->w_expr), var_names, var_values, NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0) goto fail; + if (!(res >= INT32_MIN && res <= INT32_MAX)) { + ret = AVERROR(EINVAL); + goto fail; + } s->w = res; w = s->w; diff -Nru ffmpeg-5.1.8/libavformat/avidec.c ffmpeg-5.1.9/libavformat/avidec.c --- ffmpeg-5.1.8/libavformat/avidec.c 2025-11-26 02:41:35.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/avidec.c 2026-05-05 15:50:55.000000000 +0000 @@ -549,9 +549,11 @@ avi->movi_end = avi->fsize; av_log(s, AV_LOG_TRACE, "movi end=%"PRIx64"\n", avi->movi_end); goto end_of_header; - } else if (tag1 == MKTAG('I', 'N', 'F', 'O')) + } else if (tag1 == MKTAG('I', 'N', 'F', 'O')) { + if (size < 4) + return AVERROR_INVALIDDATA; ff_read_riff_info(s, size - 4); - else if (tag1 == MKTAG('n', 'c', 'd', 't')) + } else if (tag1 == MKTAG('n', 'c', 'd', 't')) avi_read_nikon(s, list_end); break; @@ -1820,6 +1822,10 @@ avi->index_loaded=2; ret = 0; }else if (tag == MKTAG('L', 'I', 'S', 'T')) { + if (size < 4) { + av_log(s, AV_LOG_WARNING, "Invalid size (%u) LIST in index\n", size); + break; + } uint32_t tag1 = avio_rl32(pb); if (tag1 == MKTAG('I', 'N', 'F', 'O')) diff -Nru ffmpeg-5.1.8/libavformat/cafdec.c ffmpeg-5.1.9/libavformat/cafdec.c --- ffmpeg-5.1.8/libavformat/cafdec.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/cafdec.c 2026-05-05 15:50:55.000000000 +0000 @@ -281,6 +281,10 @@ AVIOContext *pb = s->pb; unsigned int i; unsigned int nb_entries = avio_rb32(pb); + + if (3LL * nb_entries > size) + return; + for (i = 0; i < nb_entries && !avio_feof(pb); i++) { char key[32]; char value[1024]; @@ -494,6 +498,8 @@ frame_cnt = caf->frames_per_packet * packet_cnt; } else if (sti->nb_index_entries) { packet_cnt = av_index_search_timestamp(st, timestamp, flags); + if (packet_cnt < 0) + return -1; frame_cnt = sti->index_entries[packet_cnt].timestamp; pos = sti->index_entries[packet_cnt].pos; } else { diff -Nru ffmpeg-5.1.8/libavformat/concat.c ffmpeg-5.1.9/libavformat/concat.c --- ffmpeg-5.1.8/libavformat/concat.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/concat.c 2026-05-05 15:50:55.000000000 +0000 @@ -111,6 +111,12 @@ break; } + if (total_size > INT64_MAX - size) { + ffurl_close(uc); + err = AVERROR_INVALIDDATA; + break; + } + /* assembling */ nodes[i].uc = uc; nodes[i].size = size; @@ -280,6 +286,12 @@ break; } + if (total_size > INT64_MAX - size) { + ffurl_close(uc); + err = AVERROR_INVALIDDATA; + break; + } + nodes = av_fast_realloc(data->nodes, &nodes_size, sizeof(*nodes) * len); if (!nodes) { ffurl_close(uc); diff -Nru ffmpeg-5.1.8/libavformat/dash.c ffmpeg-5.1.9/libavformat/dash.c --- ffmpeg-5.1.8/libavformat/dash.c 2025-11-21 01:15:18.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/dash.c 2026-05-05 14:22:01.000000000 +0000 @@ -152,5 +152,3 @@ t_cur = t_next; } } - - diff -Nru ffmpeg-5.1.8/libavformat/dashdec.c ffmpeg-5.1.9/libavformat/dashdec.c --- ffmpeg-5.1.8/libavformat/dashdec.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/dashdec.c 2026-05-05 15:50:55.000000000 +0000 @@ -828,6 +828,43 @@ } +#define SET_REPRESENTATION_SEQUENCE_BASE_INFO(arg, cnt) { \ + val = get_val_from_nodes_tab((arg), (cnt), "duration"); \ + if (val) { \ + int64_t fragment_duration = (int64_t) strtoll(val, NULL, 10); \ + if (fragment_duration < 0) { \ + av_log(s, AV_LOG_WARNING, "duration invalid, autochanged to 0.\n"); \ + fragment_duration = 0; \ + } \ + rep->fragment_duration = fragment_duration; \ + av_log(s, AV_LOG_TRACE, "rep->fragment_duration = [%"PRId64"]\n", rep->fragment_duration); \ + xmlFree(val); \ + } \ + val = get_val_from_nodes_tab((arg), (cnt), "timescale"); \ + if (val) { \ + int64_t fragment_timescale = (int64_t) strtoll(val, NULL, 10); \ + if (fragment_timescale < 0) { \ + av_log(s, AV_LOG_WARNING, "timescale invalid, autochanged to 0.\n"); \ + fragment_timescale = 0; \ + } \ + rep->fragment_timescale = fragment_timescale; \ + av_log(s, AV_LOG_TRACE, "rep->fragment_timescale = [%"PRId64"]\n", rep->fragment_timescale); \ + xmlFree(val); \ + } \ + val = get_val_from_nodes_tab((arg), (cnt), "startNumber"); \ + if (val) { \ + int64_t start_number = (int64_t) strtoll(val, NULL, 10); \ + if (start_number < 0) { \ + av_log(s, AV_LOG_WARNING, "startNumber invalid, autochanged to 0.\n"); \ + start_number = 0; \ + } \ + rep->start_number = rep->first_seq_no = start_number; \ + av_log(s, AV_LOG_TRACE, "rep->first_seq_no = [%"PRId64"]\n", rep->first_seq_no); \ + xmlFree(val); \ + } \ + } + + static int parse_manifest_representation(AVFormatContext *s, const char *url, xmlNodePtr node, xmlNodePtr adaptionset_node, @@ -942,28 +979,17 @@ } val = get_val_from_nodes_tab(fragment_templates_tab, 4, "presentationTimeOffset"); if (val) { - rep->presentation_timeoffset = (int64_t) strtoll(val, NULL, 10); + int64_t presentation_timeoffset = (int64_t) strtoll(val, NULL, 10); + if (presentation_timeoffset < 0) { + av_log(s, AV_LOG_WARNING, "presentationTimeOffset invalid, autochanged to 0.\n"); + presentation_timeoffset = 0; + } + rep->presentation_timeoffset = presentation_timeoffset; av_log(s, AV_LOG_TRACE, "rep->presentation_timeoffset = [%"PRId64"]\n", rep->presentation_timeoffset); xmlFree(val); } - val = get_val_from_nodes_tab(fragment_templates_tab, 4, "duration"); - if (val) { - rep->fragment_duration = (int64_t) strtoll(val, NULL, 10); - av_log(s, AV_LOG_TRACE, "rep->fragment_duration = [%"PRId64"]\n", rep->fragment_duration); - xmlFree(val); - } - val = get_val_from_nodes_tab(fragment_templates_tab, 4, "timescale"); - if (val) { - rep->fragment_timescale = (int64_t) strtoll(val, NULL, 10); - av_log(s, AV_LOG_TRACE, "rep->fragment_timescale = [%"PRId64"]\n", rep->fragment_timescale); - xmlFree(val); - } - val = get_val_from_nodes_tab(fragment_templates_tab, 4, "startNumber"); - if (val) { - rep->start_number = rep->first_seq_no = (int64_t) strtoll(val, NULL, 10); - av_log(s, AV_LOG_TRACE, "rep->first_seq_no = [%"PRId64"]\n", rep->first_seq_no); - xmlFree(val); - } + + SET_REPRESENTATION_SEQUENCE_BASE_INFO(fragment_templates_tab, 4); if (adaptionset_supplementalproperty_node) { char *scheme_id_uri = xmlGetProp(adaptionset_supplementalproperty_node, "schemeIdUri"); if (scheme_id_uri) { @@ -1020,25 +1046,7 @@ segmentlists_tab[1] = adaptionset_segmentlist_node; segmentlists_tab[2] = period_segmentlist_node; - val = get_val_from_nodes_tab(segmentlists_tab, 3, "duration"); - if (val) { - rep->fragment_duration = (int64_t) strtoll(val, NULL, 10); - av_log(s, AV_LOG_TRACE, "rep->fragment_duration = [%"PRId64"]\n", rep->fragment_duration); - xmlFree(val); - } - val = get_val_from_nodes_tab(segmentlists_tab, 3, "timescale"); - if (val) { - rep->fragment_timescale = (int64_t) strtoll(val, NULL, 10); - av_log(s, AV_LOG_TRACE, "rep->fragment_timescale = [%"PRId64"]\n", rep->fragment_timescale); - xmlFree(val); - } - val = get_val_from_nodes_tab(segmentlists_tab, 3, "startNumber"); - if (val) { - rep->start_number = rep->first_seq_no = (int64_t) strtoll(val, NULL, 10); - av_log(s, AV_LOG_TRACE, "rep->first_seq_no = [%"PRId64"]\n", rep->first_seq_no); - xmlFree(val); - } - + SET_REPRESENTATION_SEQUENCE_BASE_INFO(segmentlists_tab, 3) fragmenturl_node = xmlFirstElementChild(representation_segmentlist_node); while (fragmenturl_node) { ret = parse_manifest_segmenturlnode(s, rep, fragmenturl_node, diff -Nru ffmpeg-5.1.8/libavformat/demux.c ffmpeg-5.1.9/libavformat/demux.c --- ffmpeg-5.1.8/libavformat/demux.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/demux.c 2026-05-05 15:50:55.000000000 +0000 @@ -761,9 +761,14 @@ } else { for (int i = 0; i < delay; i++) { if (pts_buffer[i] != AV_NOPTS_VALUE) { - int64_t diff = FFABS(pts_buffer[i] - dts) - + (uint64_t)sti->pts_reorder_error[i]; - diff = FFMAX(diff, sti->pts_reorder_error[i]); +#define ABSDIFF(a,b) (((a) < (b)) ? (b) - (uint64_t)(a) : ((a) - (uint64_t)(b))) + uint64_t diff = ABSDIFF(pts_buffer[i], dts); + + if (diff > INT64_MAX - sti->pts_reorder_error[i]) { + diff = INT64_MAX; + } else + diff += sti->pts_reorder_error[i]; + sti->pts_reorder_error[i] = diff; sti->pts_reorder_error_count[i]++; if (sti->pts_reorder_error_count[i] > 250) { diff -Nru ffmpeg-5.1.8/libavformat/dhav.c ffmpeg-5.1.9/libavformat/dhav.c --- ffmpeg-5.1.8/libavformat/dhav.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/dhav.c 2026-05-05 15:50:55.000000000 +0000 @@ -288,7 +288,9 @@ if (seek_back < 9) break; dhav->last_good_pos = avio_tell(s->pb); - avio_seek(s->pb, -seek_back, SEEK_CUR); + int64_t ret64 = avio_seek(s->pb, -seek_back, SEEK_CUR); + if (ret64 < 0) + return ret64; } avio_seek(s->pb, dhav->last_good_pos, SEEK_SET); } diff -Nru ffmpeg-5.1.8/libavformat/dss.c ffmpeg-5.1.9/libavformat/dss.c --- ffmpeg-5.1.8/libavformat/dss.c 2025-11-23 02:57:58.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/dss.c 2026-05-05 15:50:55.000000000 +0000 @@ -24,6 +24,7 @@ #include "avformat.h" #include "internal.h" +#include "avio_internal.h" #define DSS_HEAD_OFFSET_AUTHOR 0xc #define DSS_AUTHOR_SIZE 16 @@ -336,7 +337,9 @@ if (ret < 0) return ret; - avio_read(s->pb, header, DSS_AUDIO_BLOCK_HEADER_SIZE); + ret = ffio_read_size(s->pb, header, DSS_AUDIO_BLOCK_HEADER_SIZE); + if (ret < 0) + return ret; ctx->swap = !!(header[0] & 0x80); offset = 2*header[1] + 2*ctx->swap; if (offset < DSS_AUDIO_BLOCK_HEADER_SIZE) diff -Nru ffmpeg-5.1.8/libavformat/dtshddec.c ffmpeg-5.1.9/libavformat/dtshddec.c --- ffmpeg-5.1.8/libavformat/dtshddec.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/dtshddec.c 2026-05-05 15:50:55.000000000 +0000 @@ -24,6 +24,7 @@ #include "libavcodec/dca.h" #include "avformat.h" #include "internal.h" +#include "avio_internal.h" #define AUPR_HDR 0x415550522D484452 #define AUPRINFO 0x41555052494E464F @@ -114,7 +115,11 @@ value = av_malloc(chunk_size); if (!value) goto skip; - avio_read(pb, value, chunk_size); + ret = ffio_read_size(pb, value, chunk_size); + if (ret < 0) { + av_free(value); + goto skip; + } value[chunk_size - 1] = 0; av_dict_set(&s->metadata, "fileinfo", value, AV_DICT_DONT_STRDUP_VAL); diff -Nru ffmpeg-5.1.8/libavformat/fifo_test.c ffmpeg-5.1.9/libavformat/fifo_test.c --- ffmpeg-5.1.8/libavformat/fifo_test.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/fifo_test.c 2026-05-05 15:50:52.000000000 +0000 @@ -148,4 +148,3 @@ .priv_class = &failing_muxer_class, .flags = AVFMT_NOFILE | AVFMT_ALLOW_FLUSH, }; - diff -Nru ffmpeg-5.1.8/libavformat/flac_picture.c ffmpeg-5.1.9/libavformat/flac_picture.c --- ffmpeg-5.1.8/libavformat/flac_picture.c 2025-11-23 02:57:58.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/flac_picture.c 2026-05-05 15:50:55.000000000 +0000 @@ -23,6 +23,7 @@ #include "libavcodec/bytestream.h" #include "libavcodec/png.h" #include "avformat.h" +#include "avio_internal.h" #include "demux.h" #include "flac_picture.h" #include "id3v2.h" @@ -160,8 +161,9 @@ // If truncation was detected copy all data from block and // read missing bytes not included in the block size. bytestream2_get_bufferu(&g, data->data, left); - if (avio_read(s->pb, data->data + len - trunclen, trunclen) < trunclen) - RETURN_ERROR(AVERROR_INVALIDDATA); + ret = ffio_read_size(s->pb, data->data + len - trunclen, trunclen); + if (ret < 0) + goto fail; } } memset(data->data + len, 0, AV_INPUT_BUFFER_PADDING_SIZE); diff -Nru ffmpeg-5.1.8/libavformat/g726.c ffmpeg-5.1.9/libavformat/g726.c --- ffmpeg-5.1.8/libavformat/g726.c 2025-11-23 02:57:58.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/g726.c 2026-05-05 14:22:01.000000000 +0000 @@ -97,4 +97,3 @@ .raw_codec_id = AV_CODEC_ID_ADPCM_G726LE, }; #endif - diff -Nru ffmpeg-5.1.8/libavformat/hls.c ffmpeg-5.1.9/libavformat/hls.c --- ffmpeg-5.1.8/libavformat/hls.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/hls.c 2026-05-05 15:50:55.000000000 +0000 @@ -958,13 +958,22 @@ if (pls) pls->finished = 1; } else if (av_strstart(line, "#EXTINF:", &ptr)) { + double d = atof(ptr) * AV_TIME_BASE; + if (d < 0 || d > INT64_MAX || isnan(d)) { + av_log(c->ctx, AV_LOG_WARNING, "EXTINF %f unsupported\n", d / AV_TIME_BASE); + d = 0; + } + duration = d; is_segment = 1; - duration = atof(ptr) * AV_TIME_BASE; } else if (av_strstart(line, "#EXT-X-BYTERANGE:", &ptr)) { seg_size = strtoll(ptr, NULL, 10); ptr = strchr(ptr, '@'); if (ptr) seg_offset = strtoll(ptr+1, NULL, 10); + if (seg_size < 0 || seg_offset > INT64_MAX - seg_size) { + ret = AVERROR_INVALIDDATA; + goto fail; + } } else if (av_strstart(line, "#", NULL)) { av_log(c->ctx, AV_LOG_INFO, "Skip ('%s')\n", line); continue; diff -Nru ffmpeg-5.1.8/libavformat/hls_sample_encryption.c ffmpeg-5.1.9/libavformat/hls_sample_encryption.c --- ffmpeg-5.1.8/libavformat/hls_sample_encryption.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/hls_sample_encryption.c 2026-05-05 15:50:55.000000000 +0000 @@ -86,6 +86,7 @@ return; memcpy(info->setup_data, buf, info->setup_data_length); + memset(info->setup_data + info->setup_data_length, 0, AV_INPUT_BUFFER_PADDING_SIZE); } int ff_hls_senc_parse_audio_setup_info(AVStream *st, HLSAudioSetupInfo *info) diff -Nru ffmpeg-5.1.8/libavformat/hls_sample_encryption.h ffmpeg-5.1.9/libavformat/hls_sample_encryption.h --- ffmpeg-5.1.8/libavformat/hls_sample_encryption.h 2025-11-21 01:15:18.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/hls_sample_encryption.h 2026-05-05 15:50:55.000000000 +0000 @@ -52,7 +52,7 @@ uint16_t priming; uint8_t version; uint8_t setup_data_length; - uint8_t setup_data[HLS_MAX_AUDIO_SETUP_DATA_LEN]; + uint8_t setup_data[HLS_MAX_AUDIO_SETUP_DATA_LEN + AV_INPUT_BUFFER_PADDING_SIZE]; } HLSAudioSetupInfo; @@ -63,4 +63,3 @@ int ff_hls_senc_decrypt_frame(enum AVCodecID codec_id, HLSCryptoContext *crypto_ctx, AVPacket *pkt); #endif /* AVFORMAT_HLS_SAMPLE_ENCRYPTION_H */ - diff -Nru ffmpeg-5.1.8/libavformat/hlsplaylist.c ffmpeg-5.1.9/libavformat/hlsplaylist.c --- ffmpeg-5.1.8/libavformat/hlsplaylist.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/hlsplaylist.c 2026-05-05 15:50:52.000000000 +0000 @@ -192,4 +192,3 @@ return; avio_printf(out, "#EXT-X-ENDLIST\n"); } - diff -Nru ffmpeg-5.1.8/libavformat/http.c ffmpeg-5.1.9/libavformat/http.c --- ffmpeg-5.1.8/libavformat/http.c 2025-11-26 02:41:35.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/http.c 2026-05-05 15:50:55.000000000 +0000 @@ -136,6 +136,7 @@ char *new_location; AVDictionary *redirect_cache; uint64_t filesize_from_content_range; + int max_redirects; } HTTPContext; #define OFFSET(x) offsetof(HTTPContext, x) @@ -178,6 +179,7 @@ { "resource", "The resource requested by a client", OFFSET(resource), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, E }, { "reply_code", "The http status code to return to a client", OFFSET(reply_code), AV_OPT_TYPE_INT, { .i64 = 200}, INT_MIN, 599, E}, { "short_seek_size", "Threshold to favor readahead over seek.", OFFSET(short_seek_size), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, D }, + { "max_redirects", "Maximum number of redirects", OFFSET(max_redirects), AV_OPT_TYPE_INT, { .i64 = MAX_REDIRECTS }, 0, INT_MAX, D }, { NULL } }; @@ -233,7 +235,11 @@ if (err < 0) goto end; } + } else if (strcmp(proto, "http")) { + err = AVERROR(EINVAL); + goto end; } + if (port < 0) port = 80; @@ -362,6 +368,9 @@ cached = redirect_cache_get(s); if (cached) { + if (redirects++ >= s->max_redirects) + return AVERROR(EIO); + av_free(s->location); s->location = av_strdup(cached); if (!s->location) { @@ -418,7 +427,7 @@ s->new_location) { /* url moved, get next */ ffurl_closep(&s->hd); - if (redirects++ >= MAX_REDIRECTS) + if (redirects++ >= s->max_redirects) return AVERROR(EIO); if (!s->expires) { diff -Nru ffmpeg-5.1.8/libavformat/icodec.c ffmpeg-5.1.9/libavformat/icodec.c --- ffmpeg-5.1.8/libavformat/icodec.c 2025-11-23 02:57:58.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/icodec.c 2026-05-05 15:50:55.000000000 +0000 @@ -111,7 +111,7 @@ avio_skip(pb, 5); ico->images[i].size = avio_rl32(pb); - if (ico->images[i].size <= 0) { + if (ico->images[i].size <= 0 || ico->images[i].size > INT_MAX - 14) { av_log(s, AV_LOG_ERROR, "Invalid image size %d\n", ico->images[i].size); return AVERROR_INVALIDDATA; } diff -Nru ffmpeg-5.1.8/libavformat/iff.c ffmpeg-5.1.9/libavformat/iff.c --- ffmpeg-5.1.8/libavformat/iff.c 2025-11-23 02:57:58.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/iff.c 2026-05-05 15:50:55.000000000 +0000 @@ -338,8 +338,10 @@ if (config != 0xFFFF) { if (config < FF_ARRAY_ELEMS(dsd_loudspeaker_config)) st->codecpar->ch_layout = dsd_loudspeaker_config[config]; - if (!st->codecpar->ch_layout.nb_channels) + if (!st->codecpar->ch_layout.nb_channels) { avpriv_request_sample(s, "loudspeaker configuration %d", config); + return AVERROR_PATCHWELCOME; + } } break; } diff -Nru ffmpeg-5.1.8/libavformat/img2dec.c ffmpeg-5.1.9/libavformat/img2dec.c --- ffmpeg-5.1.8/libavformat/img2dec.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/img2dec.c 2026-05-05 15:50:55.000000000 +0000 @@ -413,8 +413,9 @@ char filename_bytes[1024]; char *filename = filename_bytes; int i, res; - int size[3] = { 0 }, ret[3] = { 0 }; - AVIOContext *f[3] = { NULL }; + int ret[3] = { 0 }; + int64_t size[3] = { 0 }; + AVIOContext *f[3] = { NULL }; AVCodecParameters *par = s1->streams[0]->codecpar; if (!s->is_pipe) { @@ -494,7 +495,15 @@ } } - res = av_new_packet(pkt, size[0] + size[1] + size[2]); + int total_size = 0; + for (int i = 0; i < 3; i++) { + if ((uint64_t)size[i] > INT_MAX - total_size) + return AVERROR_INVALIDDATA; + + total_size += size[i]; + } + + res = av_new_packet(pkt, total_size); if (res < 0) { goto fail; } diff -Nru ffmpeg-5.1.8/libavformat/img2enc.c ffmpeg-5.1.9/libavformat/img2enc.c --- ffmpeg-5.1.8/libavformat/img2enc.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/img2enc.c 2026-05-05 15:50:55.000000000 +0000 @@ -204,6 +204,11 @@ ysize *= 2; usize *= 2; } + if (ysize + 2*usize + (desc->nb_components > 3) * ysize > pkt->size) { + ret = AVERROR(EINVAL); + goto fail; + } + if ((ret = write_and_close(s, &pb[0], pkt->data , ysize)) < 0 || (ret = write_and_close(s, &pb[1], pkt->data + ysize , usize)) < 0 || (ret = write_and_close(s, &pb[2], pkt->data + ysize + usize, usize)) < 0) diff -Nru ffmpeg-5.1.8/libavformat/lrcdec.c ffmpeg-5.1.9/libavformat/lrcdec.c --- ffmpeg-5.1.8/libavformat/lrcdec.c 2025-11-26 02:41:35.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/lrcdec.c 2026-05-05 15:50:55.000000000 +0000 @@ -88,7 +88,7 @@ return 0; } int ret = sscanf(p, "%2[[-]%"SCNu32":%lf]", prefix, &mm, &ss); - if (ret != 3 || prefix[0] != '[' || ss < 0 || ss > 60) { + if (ret != 3 || prefix[0] != '[' || ss < 0 || ss > 60 || !isfinite(ss)) { return 0; } *start = llrint((mm * 60 + ss) * AV_TIME_BASE); diff -Nru ffmpeg-5.1.8/libavformat/matroskadec.c ffmpeg-5.1.9/libavformat/matroskadec.c --- ffmpeg-5.1.8/libavformat/matroskadec.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/matroskadec.c 2026-05-05 15:50:55.000000000 +0000 @@ -4043,6 +4043,10 @@ // Clusters. cue_desc.end_offset = cues_start - matroska->segment_start; } + + if (cue_desc.end_time_ns < cue_desc.start_time_ns) + return (CueDesc) {-1, -1, -1, -1}; + return cue_desc; } diff -Nru ffmpeg-5.1.8/libavformat/mlvdec.c ffmpeg-5.1.9/libavformat/mlvdec.c --- ffmpeg-5.1.8/libavformat/mlvdec.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/mlvdec.c 2026-05-05 15:50:55.000000000 +0000 @@ -30,6 +30,7 @@ #include "avformat.h" #include "demux.h" #include "internal.h" +#include "avio_internal.h" #include "riff.h" #define MLV_VERSION "v2.0" @@ -64,12 +65,15 @@ { unsigned int size; uint8_t version[8]; + int ret; avio_skip(pb, 4); size = avio_rl32(pb); if (size < 52) return AVERROR_INVALIDDATA; - avio_read(pb, version, 8); + ret = ffio_read_size(pb, version, 8); + if (ret < 0) + return ret; if (memcmp(version, MLV_VERSION, 5) || avio_rl64(pb) != guid) return AVERROR_INVALIDDATA; avio_skip(pb, size - 24); @@ -87,7 +91,7 @@ } ret = avio_read(pb, value, size); - if (ret != size || !value[0]) { + if (ret != size || !size || !value[0]) { av_free(value); return; } diff -Nru ffmpeg-5.1.8/libavformat/mov.c ffmpeg-5.1.9/libavformat/mov.c --- ffmpeg-5.1.8/libavformat/mov.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/mov.c 2026-05-05 15:50:55.000000000 +0000 @@ -2705,6 +2705,7 @@ av_freep(&sc->extradata[j]); } + sc->stsd_count = 0; av_freep(&sc->extradata); av_freep(&sc->extradata_size); return ret; @@ -3103,6 +3104,9 @@ av_freep(&sc->sdtp_data); sc->sdtp_count = 0; + if (entries < 0 || entries > SIZE_MAX) + return AVERROR(ERANGE); + sc->sdtp_data = av_malloc(entries); if (!sc->sdtp_data) return AVERROR(ENOMEM); @@ -3722,7 +3726,12 @@ st->index, edit_list_index, edit_list_media_time, edit_list_duration); edit_list_index++; edit_list_dts_counter = edit_list_dts_entry_end; - edit_list_dts_entry_end += edit_list_duration; + edit_list_dts_entry_end = av_sat_add64(edit_list_dts_entry_end, edit_list_duration); + if (edit_list_dts_entry_end == INT64_MAX) { + av_log(mov->fc, AV_LOG_ERROR, "Cannot calculate dts entry length with duration %"PRId64"\n", + edit_list_duration); + break; + } num_discarded_begin = 0; if (!found_non_empty_edit && edit_list_media_time == -1) { empty_edits_sum_duration += edit_list_duration; @@ -6897,7 +6906,7 @@ } for (i = 0; i < sample->subsample_count; i++) { - if (sample->subsamples[i].bytes_of_clear_data + sample->subsamples[i].bytes_of_protected_data > size) { + if (sample->subsamples[i].bytes_of_clear_data + (int64_t)sample->subsamples[i].bytes_of_protected_data > size) { av_log(c->fc, AV_LOG_ERROR, "subsample size exceeds the packet size left\n"); return AVERROR_INVALIDDATA; } @@ -6952,7 +6961,7 @@ } for (i = 0; i < sample->subsample_count; i++) { - if (sample->subsamples[i].bytes_of_clear_data + sample->subsamples[i].bytes_of_protected_data > size) { + if (sample->subsamples[i].bytes_of_clear_data + (int64_t)sample->subsamples[i].bytes_of_protected_data > size) { av_log(c->fc, AV_LOG_ERROR, "subsample size exceeds the packet size left\n"); return AVERROR_INVALIDDATA; } @@ -7014,7 +7023,7 @@ } for (i = 0; i < sample->subsample_count; i++) { - if (sample->subsamples[i].bytes_of_clear_data + sample->subsamples[i].bytes_of_protected_data > size) { + if (sample->subsamples[i].bytes_of_clear_data + (int64_t)sample->subsamples[i].bytes_of_protected_data > size) { av_log(c->fc, AV_LOG_ERROR, "subsample size exceeds the packet size left\n"); return AVERROR_INVALIDDATA; } @@ -7079,7 +7088,7 @@ } for (i = 0; i < sample->subsample_count; i++) { - if (sample->subsamples[i].bytes_of_clear_data + sample->subsamples[i].bytes_of_protected_data > size) { + if (sample->subsamples[i].bytes_of_clear_data + (int64_t)sample->subsamples[i].bytes_of_protected_data > size) { av_log(c->fc, AV_LOG_ERROR, "subsample size exceeds the packet size left\n"); return AVERROR_INVALIDDATA; } @@ -7208,7 +7217,7 @@ return 0; st = c->fc->streams[c->fc->nb_streams-1]; - if ((uint64_t)atom.size > (1<<30) || atom.size < 11) + if ((uint64_t)atom.size > (1<<30) || atom.size < 11 || st->codecpar->extradata) return AVERROR_INVALIDDATA; /* Check OpusSpecificBox version. */ @@ -7226,7 +7235,11 @@ AV_WL32(st->codecpar->extradata, MKTAG('O','p','u','s')); AV_WL32(st->codecpar->extradata + 4, MKTAG('H','e','a','d')); AV_WB8(st->codecpar->extradata + 8, 1); /* OpusHead version */ - avio_read(pb, st->codecpar->extradata + 9, size - 9); + if ((ret = ffio_read_size(pb, st->codecpar->extradata + 9, size - 9)) < 0) { + av_freep(&st->codecpar->extradata); + st->codecpar->extradata_size = 0; + return ret; + } /* OpusSpecificBox is stored in big-endian, but OpusHead is little-endian; aside from the preceeding magic and version they're diff -Nru ffmpeg-5.1.8/libavformat/mpegts.c ffmpeg-5.1.9/libavformat/mpegts.c --- ffmpeg-5.1.8/libavformat/mpegts.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/mpegts.c 2026-05-05 15:50:55.000000000 +0000 @@ -1675,7 +1675,7 @@ ret = parse_mp4_descr(&d, avio_tell(&d.pb.pub), size, MP4IODescrTag); - *descr_count = d.descr_count; + *descr_count += d.descr_count; return ret; } @@ -2373,7 +2373,8 @@ av_log(ts->stream, AV_LOG_TRACE, "pcr_pid=0x%x\n", pcr_pid); program_info_length = get16(&p, p_end); - if (program_info_length < 0) + + if (program_info_length < 0 || (program_info_length & 0xFFF) > p_end - p) return; program_info_length &= 0xfff; while (program_info_length >= 2) { @@ -2388,12 +2389,12 @@ // something else is broken, exit the program_descriptors_loop break; program_info_length -= len; - if (tag == IOD_DESCRIPTOR) { + if (tag == IOD_DESCRIPTOR && len >= 2) { get8(&p, p_end); // scope get8(&p, p_end); // label len -= 2; mp4_read_iods(ts->stream, p, len, mp4_descr + mp4_descr_count, - &mp4_descr_count, MAX_MP4_DESCR_COUNT); + &mp4_descr_count, MAX_MP4_DESCR_COUNT - mp4_descr_count); } else if (tag == REGISTRATION_DESCRIPTOR && len >= 4) { prog_reg_desc = bytestream_get_le32(&p); len -= 4; diff -Nru ffmpeg-5.1.8/libavformat/mpegtsenc.c ffmpeg-5.1.9/libavformat/mpegtsenc.c --- ffmpeg-5.1.8/libavformat/mpegtsenc.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/mpegtsenc.c 2026-05-05 15:50:55.000000000 +0000 @@ -51,6 +51,7 @@ int discontinuity; void (*write_packet)(struct MpegTSSection *s, const uint8_t *packet); void *opaque; + int remaining; } MpegTSSection; typedef struct MpegTSService { @@ -1001,6 +1002,10 @@ av_log(s, AV_LOG_ERROR, "Too long service or provider name\n"); goto fail; } + ts->sdt.remaining -= 10 + service->provider_name[0] + service->name[0]; + if (ts->sdt.remaining < 0) + goto fail; + if (av_dynarray_add_nofree(&ts->services, &ts->nb_services, service) < 0) goto fail; @@ -1111,6 +1116,8 @@ // round up to a whole number of TS packets ts->pes_payload_size = (ts->pes_payload_size + 14 + 183) / 184 * 184 - 14; + ts->sdt.remaining = SECTION_LENGTH - 3; + if (!s->nb_programs) { /* allocate a single DVB service */ if (!mpegts_add_service(s, ts->service_id, s->metadata, NULL)) diff -Nru ffmpeg-5.1.8/libavformat/mpjpegdec.c ffmpeg-5.1.9/libavformat/mpjpegdec.c --- ffmpeg-5.1.8/libavformat/mpjpegdec.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/mpjpegdec.c 2026-05-05 15:50:52.000000000 +0000 @@ -393,5 +393,3 @@ .priv_class = &mpjpeg_demuxer_class, .flags = AVFMT_NOTIMESTAMPS, }; - - diff -Nru ffmpeg-5.1.8/libavformat/os_support.h ffmpeg-5.1.9/libavformat/os_support.h --- ffmpeg-5.1.8/libavformat/os_support.h 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/os_support.h 2026-05-05 14:21:58.000000000 +0000 @@ -42,6 +42,7 @@ #ifdef _WIN32 # include +# include # ifdef lseek # undef lseek # endif diff -Nru ffmpeg-5.1.8/libavformat/pcm.c ffmpeg-5.1.9/libavformat/pcm.c --- ffmpeg-5.1.8/libavformat/pcm.c 2025-11-23 02:57:58.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/pcm.c 2026-05-05 15:50:55.000000000 +0000 @@ -57,7 +57,8 @@ int stream_index, int64_t timestamp, int flags) { AVStream *st; - int block_align, byte_rate; + int block_align; + int64_t byte_rate; int64_t pos, ret; st = s->streams[0]; @@ -65,9 +66,9 @@ block_align = st->codecpar->block_align ? st->codecpar->block_align : (av_get_bits_per_sample(st->codecpar->codec_id) * st->codecpar->ch_layout.nb_channels) >> 3; byte_rate = st->codecpar->bit_rate ? st->codecpar->bit_rate >> 3 : - block_align * st->codecpar->sample_rate; + block_align * (int64_t)st->codecpar->sample_rate; - if (block_align <= 0 || byte_rate <= 0) + if (block_align <= 0 || byte_rate <= 0 || FFMAX(timestamp, st->time_base.num) > INT64_MAX / byte_rate) return -1; if (timestamp < 0) timestamp = 0; @@ -76,6 +77,9 @@ st->time_base.num, st->time_base.den * (int64_t)block_align, (flags & AVSEEK_FLAG_BACKWARD) ? AV_ROUND_DOWN : AV_ROUND_UP); + + if (pos > (INT64_MAX - FFMAX(ffformatcontext(s)->data_offset, 0)) / block_align) + return -1; pos *= block_align; /* recompute exact position */ diff -Nru ffmpeg-5.1.8/libavformat/rdt.c ffmpeg-5.1.9/libavformat/rdt.c --- ffmpeg-5.1.8/libavformat/rdt.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/rdt.c 2026-05-05 15:50:52.000000000 +0000 @@ -572,4 +572,3 @@ RDT_HANDLER(live_audio, "x-pn-multirate-realaudio-live", AVMEDIA_TYPE_AUDIO); RDT_HANDLER(video, "x-pn-realvideo", AVMEDIA_TYPE_VIDEO); RDT_HANDLER(audio, "x-pn-realaudio", AVMEDIA_TYPE_AUDIO); - diff -Nru ffmpeg-5.1.8/libavformat/rsd.c ffmpeg-5.1.9/libavformat/rsd.c --- ffmpeg-5.1.8/libavformat/rsd.c 2025-11-23 02:57:58.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/rsd.c 2026-05-05 15:50:55.000000000 +0000 @@ -22,6 +22,7 @@ #include "libavutil/intreadwrite.h" #include "avformat.h" #include "avio.h" +#include "avio_internal.h" #include "demux.h" #include "internal.h" @@ -131,9 +132,9 @@ return ret; for (i = 0; i < par->ch_layout.nb_channels; i++) { - if (avio_feof(pb)) - return AVERROR_EOF; - avio_read(s->pb, st->codecpar->extradata + 32 * i, 32); + ret = ffio_read_size(s->pb, st->codecpar->extradata + 32 * i, 32); + if (ret < 0) + return ret; avio_skip(s->pb, 8); } break; diff -Nru ffmpeg-5.1.8/libavformat/rtmpproto.c ffmpeg-5.1.9/libavformat/rtmpproto.c --- ffmpeg-5.1.8/libavformat/rtmpproto.c 2025-11-26 02:41:35.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/rtmpproto.c 2026-05-05 15:50:55.000000000 +0000 @@ -1164,6 +1164,10 @@ if (!memcmp(in_data, "CWS", 3)) { #if CONFIG_ZLIB int64_t out_size; + if (in_size < 8) { + ret = AVERROR_INVALIDDATA; + goto fail; + } /* Decompress the SWF player file using Zlib. */ if (!(out_data = av_malloc(8))) { ret = AVERROR(ENOMEM); @@ -2207,9 +2211,14 @@ { int old_flv_size; + if (size < 0) + return AVERROR(EINVAL); + // generate packet header and put data into buffer for FLV demuxer if (rt->flv_off < rt->flv_size) { // There is old unread data in the buffer, thus append at the end + if (rt->flv_size > INT_MAX - size) + return AVERROR(ERANGE); old_flv_size = rt->flv_size; rt->flv_size += size; } else { @@ -2236,7 +2245,11 @@ rt->has_video = 1; } + if (size > INT_MAX - 15) + return AVERROR(ERANGE); old_flv_size = update_offset(rt, size + 15); + if (old_flv_size < 0) + return old_flv_size; if ((ret = av_reallocp(&rt->flv_data, rt->flv_size)) < 0) { rt->flv_size = rt->flv_off = 0; @@ -2366,48 +2379,50 @@ static int handle_metadata(RTMPContext *rt, RTMPPacket *pkt) { int ret, old_flv_size, type; - const uint8_t *next; - uint8_t *p; + PutByteContext pbc; + GetByteContext gbc; uint32_t size; uint32_t ts, cts, pts = 0; old_flv_size = update_offset(rt, pkt->size); + if (old_flv_size < 0) + return old_flv_size; if ((ret = av_reallocp(&rt->flv_data, rt->flv_size)) < 0) { rt->flv_size = rt->flv_off = 0; return ret; } - next = pkt->data; - p = rt->flv_data + old_flv_size; + bytestream2_init(&gbc, pkt->data, pkt->size); + bytestream2_init_writer(&pbc, rt->flv_data, rt->flv_size); + bytestream2_skip_p(&pbc, old_flv_size); /* copy data while rewriting timestamps */ ts = pkt->timestamp; - while (next - pkt->data < pkt->size - RTMP_HEADER) { - type = bytestream_get_byte(&next); - size = bytestream_get_be24(&next); - cts = bytestream_get_be24(&next); - cts |= bytestream_get_byte(&next) << 24; + while (bytestream2_get_bytes_left(&gbc) > RTMP_HEADER) { + type = bytestream2_get_byte(&gbc); + size = bytestream2_get_be24(&gbc); + cts = bytestream2_get_be24(&gbc); + cts |= bytestream2_get_byte(&gbc) << 24; if (!pts) pts = cts; ts += cts - pts; pts = cts; - if (size + 3 + 4 > pkt->data + pkt->size - next) + if (size + 3 + 4 > bytestream2_get_bytes_left(&gbc)) break; - bytestream_put_byte(&p, type); - bytestream_put_be24(&p, size); - bytestream_put_be24(&p, ts); - bytestream_put_byte(&p, ts >> 24); - memcpy(p, next, size + 3 + 4); - p += size + 3; - bytestream_put_be32(&p, size + RTMP_HEADER); - next += size + 3 + 4; + bytestream2_put_byte(&pbc, type); + bytestream2_put_be24(&pbc, size); + bytestream2_put_be24(&pbc, ts); + bytestream2_put_byte(&pbc, ts >> 24); + bytestream2_copy_buffer(&pbc, &gbc, size + 3); + bytestream2_skip(&gbc, 4); + bytestream2_put_be32(&pbc, size + RTMP_HEADER); } - if (p != rt->flv_data + rt->flv_size) { + if (bytestream2_tell_p(&pbc) != rt->flv_size) { av_log(rt, AV_LOG_WARNING, "Incomplete flv packets in " "RTMP_PT_METADATA packet\n"); - rt->flv_size = p - rt->flv_data; + rt->flv_size = bytestream2_tell_p(&pbc); } return 0; @@ -2674,7 +2689,8 @@ if (rt->listen) ff_url_join(buf, sizeof(buf), "tcp", NULL, hostname, port, "?listen&listen_timeout=%d&tcp_nodelay=%d", - rt->listen_timeout * 1000, rt->tcp_nodelay); + rt->listen_timeout < 0 ? -1 : rt->listen_timeout * 1000, + rt->tcp_nodelay); else ff_url_join(buf, sizeof(buf), "tcp", NULL, hostname, port, "?tcp_nodelay=%d", rt->tcp_nodelay); } diff -Nru ffmpeg-5.1.8/libavformat/rtpdec_jpeg.c ffmpeg-5.1.9/libavformat/rtpdec_jpeg.c --- ffmpeg-5.1.8/libavformat/rtpdec_jpeg.c 2025-11-21 01:15:18.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/rtpdec_jpeg.c 2026-05-05 15:50:55.000000000 +0000 @@ -274,6 +274,12 @@ av_log(ctx, AV_LOG_WARNING, "Only 8-bit precision is supported.\n"); if (qtable_len > 0) { + if (qtable_len != 128) { + av_log(ctx, AV_LOG_ERROR, "Invalid RTP/JPEG packet. Invalid qtable length %d.\n", qtable_len); + if (qtable_len%64 || qtable_len > 4*64) + return AVERROR_INVALIDDATA; + } + if (len < qtable_len) { av_log(ctx, AV_LOG_ERROR, "Too short RTP/JPEG packet.\n"); return AVERROR_INVALIDDATA; diff -Nru ffmpeg-5.1.8/libavformat/rtpdec_latm.c ffmpeg-5.1.9/libavformat/rtpdec_latm.c --- ffmpeg-5.1.8/libavformat/rtpdec_latm.c 2025-11-23 02:57:58.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/rtpdec_latm.c 2026-05-05 15:50:55.000000000 +0000 @@ -72,11 +72,15 @@ cur_len = 0; while (data->pos < data->len) { uint8_t val = data->buf[data->pos++]; + if (val > data->len - cur_len) { + av_log(ctx, AV_LOG_ERROR, "Malformed LATM packet\n"); + return AVERROR_INVALIDDATA; + } cur_len += val; if (val != 0xff) break; } - if (data->pos + cur_len > data->len) { + if (cur_len > data->len - data->pos) { av_log(ctx, AV_LOG_ERROR, "Malformed LATM packet\n"); return AVERROR(EIO); } diff -Nru ffmpeg-5.1.8/libavformat/rtpdec_mpeg4.c ffmpeg-5.1.9/libavformat/rtpdec_mpeg4.c --- ffmpeg-5.1.8/libavformat/rtpdec_mpeg4.c 2025-11-23 02:57:58.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/rtpdec_mpeg4.c 2026-05-05 15:50:55.000000000 +0000 @@ -133,7 +133,7 @@ length in bits */ au_headers_length = AV_RB16(buf); - if (au_headers_length > RTP_MAX_PACKET_LENGTH) + if (au_headers_length == 0 || au_headers_length > RTP_MAX_PACKET_LENGTH) return -1; data->au_headers_length_bytes = (au_headers_length + 7) / 8; diff -Nru ffmpeg-5.1.8/libavformat/rtpdec_qdm2.c ffmpeg-5.1.9/libavformat/rtpdec_qdm2.c --- ffmpeg-5.1.8/libavformat/rtpdec_qdm2.c 2025-08-05 00:22:34.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/rtpdec_qdm2.c 2026-05-05 15:50:55.000000000 +0000 @@ -186,8 +186,9 @@ */ static int qdm2_restore_block(PayloadContext *qdm, AVStream *st, AVPacket *pkt) { - int to_copy, n, res, include_csum; + int to_copy, n, res; uint8_t *p, *csum_pos = NULL; + int include_csum = qdm->block_type == 2 || qdm->block_type == 4; /* create packet to hold subpkts into a superblock */ av_assert0(qdm->cache > 0); @@ -196,6 +197,11 @@ break; av_assert0(n < 0x80); + int min_size = 2 + (qdm->len[n] > 0xff) + 2*include_csum; + + if (qdm->block_size < min_size) + return AVERROR_INVALIDDATA; + if ((res = av_new_packet(pkt, qdm->block_size)) < 0) return res; memset(pkt->data, 0, pkt->size); @@ -211,7 +217,7 @@ *p++ = qdm->block_type; *p++ = qdm->len[n]; } - if ((include_csum = (qdm->block_type == 2 || qdm->block_type == 4))) { + if (include_csum) { csum_pos = p; p += 2; } diff -Nru ffmpeg-5.1.8/libavformat/rtsp.c ffmpeg-5.1.9/libavformat/rtsp.c --- ffmpeg-5.1.8/libavformat/rtsp.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/rtsp.c 2026-05-05 15:50:55.000000000 +0000 @@ -582,9 +582,10 @@ NULL, NULL, 0, p); if (proto[0] == '\0') { /* relative control URL */ - if (rtsp_st->control_url[strlen(rtsp_st->control_url)-1]!='/') - av_strlcat(rtsp_st->control_url, "/", - sizeof(rtsp_st->control_url)); + size_t len = strlen(rtsp_st->control_url); + if (len == 0 || rtsp_st->control_url[len - 1] != '/') + av_strlcat(rtsp_st->control_url, "/", + sizeof(rtsp_st->control_url)); av_strlcat(rtsp_st->control_url, p, sizeof(rtsp_st->control_url)); } else @@ -1772,7 +1773,8 @@ } else if (!strcmp(proto, "satip")) { av_strlcpy(proto, "rtsp", sizeof(proto)); rt->server_type = RTSP_SERVER_SATIP; - } + } else if (strcmp(proto, "rtsp")) + return AVERROR_INVALIDDATA; if (*auth) { av_strlcpy(rt->auth, auth, sizeof(rt->auth)); @@ -1838,6 +1840,15 @@ err = AVERROR(ENOMEM); goto fail; } + } + + if (!rt->rtsp_hd->protocol_blacklist && s->protocol_blacklist) { + rt->rtsp_hd->protocol_blacklist = av_strdup(s->protocol_blacklist); + if (!rt->rtsp_hd->protocol_blacklist) { + av_dict_free(&options); + err = AVERROR(ENOMEM); + goto fail; + } } /* complete the connection */ diff -Nru ffmpeg-5.1.8/libavformat/rtspdec.c ffmpeg-5.1.9/libavformat/rtspdec.c --- ffmpeg-5.1.8/libavformat/rtspdec.c 2025-11-23 02:57:58.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/rtspdec.c 2026-05-05 15:50:55.000000000 +0000 @@ -188,7 +188,7 @@ rtsp_send_reply(s, RTSP_STATUS_SERVICE, NULL, request.seq); return AVERROR_OPTION_NOT_FOUND; } - if (request.content_length) { + if (request.content_length > 0) { sdp = av_malloc(request.content_length + 1); if (!sdp) return AVERROR(ENOMEM); @@ -212,10 +212,10 @@ return 0; } av_log(s, AV_LOG_ERROR, - "Content-Length header value exceeds sdp allocated buffer (4KB)\n"); + "Invalid ANNOUNCE Content-Length %d\n", request.content_length); rtsp_send_reply(s, RTSP_STATUS_INTERNAL, - "Content-Length exceeds buffer size", request.seq); - return AVERROR(EIO); + "Invalid Content-Length", request.seq); + return AVERROR_INVALIDDATA; } static int rtsp_read_options(AVFormatContext *s) diff -Nru ffmpeg-5.1.8/libavformat/scd.c ffmpeg-5.1.9/libavformat/scd.c --- ffmpeg-5.1.8/libavformat/scd.c 2025-11-23 02:57:58.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/scd.c 2026-05-05 15:50:55.000000000 +0000 @@ -28,6 +28,7 @@ #include "libavutil/avassert.h" #include "libavformat/internal.h" #include "avformat.h" +#include "avio_internal.h" #define SCD_MAGIC ((uint64_t)MKBETAG('S', 'E', 'D', 'B') << 32 | \ MKBETAG('S', 'S', 'C', 'F')) @@ -118,7 +119,7 @@ SCDDemuxContext *ctx = s->priv_data; uint8_t buf[SCD_OFFSET_HEADER_SIZE]; - if ((ret = avio_read(s->pb, buf, SCD_OFFSET_HEADER_SIZE)) < 0) + if ((ret = ffio_read_size(s->pb, buf, SCD_OFFSET_HEADER_SIZE)) < 0) return ret; ctx->hdr.table0.count = AV_RB16(buf + 0); diff -Nru ffmpeg-5.1.8/libavformat/segafilm.c ffmpeg-5.1.9/libavformat/segafilm.c --- ffmpeg-5.1.8/libavformat/segafilm.c 2025-11-23 02:57:58.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/segafilm.c 2026-05-05 15:50:55.000000000 +0000 @@ -160,7 +160,7 @@ st->codecpar->height = AV_RB32(&scratch[12]); if (film->video_type == AV_CODEC_ID_RAWVIDEO) { - if (scratch[20] == 24) { + if (film->version == 0 || scratch[20] == 24) { st->codecpar->format = AV_PIX_FMT_RGB24; } else { av_log(s, AV_LOG_ERROR, "raw video is using unhandled %dbpp\n", scratch[20]); diff -Nru ffmpeg-5.1.8/libavformat/vividas.c ffmpeg-5.1.9/libavformat/vividas.c --- ffmpeg-5.1.8/libavformat/vividas.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/vividas.c 2026-05-05 15:50:55.000000000 +0000 @@ -584,7 +584,9 @@ block_type = avio_r8(pb); if (block_type == 22) { - avio_read(pb, keybuffer, 187); + ret = ffio_read_size(pb, keybuffer, 187); + if (ret < 0) + return ret; b22_key = decode_key(keybuffer); b22_size = avio_rl32(pb); } @@ -718,8 +720,10 @@ } last_start = viv->audio_subpackets[viv->n_audio_subpackets].start = (int)(off - avio_tell(pb)); - if (last_start < last) + if (last_start < last) { + viv->n_audio_subpackets = 0; return AVERROR_INVALIDDATA; + } viv->current_audio_subpacket = 0; } else { diff -Nru ffmpeg-5.1.8/libavformat/wavdec.c ffmpeg-5.1.9/libavformat/wavdec.c --- ffmpeg-5.1.8/libavformat/wavdec.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/wavdec.c 2026-05-05 15:50:55.000000000 +0000 @@ -683,7 +683,8 @@ int64_t size; while (!avio_feof(pb)) { - avio_read(pb, guid, 16); + if (avio_read(pb, guid, 16) != 16) + break; size = avio_rl64(pb); if (size <= 24 || size > INT64_MAX - 8) return AVERROR_INVALIDDATA; diff -Nru ffmpeg-5.1.8/libavformat/wtvdec.c ffmpeg-5.1.9/libavformat/wtvdec.c --- ffmpeg-5.1.8/libavformat/wtvdec.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/wtvdec.c 2026-05-05 15:50:55.000000000 +0000 @@ -885,7 +885,8 @@ AVStream *st = s->streams[stream_index]; uint8_t language[4]; avio_skip(pb, 12); - avio_read(pb, language, 3); + if (avio_read(pb, language, 3) != 3) + return AVERROR_INVALIDDATA; if (language[0]) { language[3] = 0; av_dict_set(&st->metadata, "language", language, 0); diff -Nru ffmpeg-5.1.8/libavformat/xwma.c ffmpeg-5.1.9/libavformat/xwma.c --- ffmpeg-5.1.8/libavformat/xwma.c 2025-11-23 02:57:58.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/xwma.c 2026-05-05 15:50:55.000000000 +0000 @@ -267,7 +267,7 @@ * an offset / timestamp pair. */ av_add_index_entry(st, - cur_pos + (i+1) * st->codecpar->block_align, /* pos */ + cur_pos + (i+1LL) * st->codecpar->block_align, /* pos */ dpds_table[i] / bytes_per_sample, /* timestamp */ st->codecpar->block_align, /* size */ 0, /* duration */ diff -Nru ffmpeg-5.1.8/libavformat/yuv4mpegenc.c ffmpeg-5.1.9/libavformat/yuv4mpegenc.c --- ffmpeg-5.1.8/libavformat/yuv4mpegenc.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavformat/yuv4mpegenc.c 2026-05-05 15:50:55.000000000 +0000 @@ -190,6 +190,9 @@ width = st->codecpar->width; height = st->codecpar->height; + if (frame->width != width || frame->height != height) + return AVERROR(EINVAL); + desc = av_pix_fmt_desc_get(st->codecpar->format); /* The following code presumes all planes to be non-interleaved. */ diff -Nru ffmpeg-5.1.8/libavutil/aarch64/float_dsp_neon.S ffmpeg-5.1.9/libavutil/aarch64/float_dsp_neon.S --- ffmpeg-5.1.8/libavutil/aarch64/float_dsp_neon.S 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavutil/aarch64/float_dsp_neon.S 2026-03-16 18:10:00.000000000 +0000 @@ -25,16 +25,16 @@ function ff_vector_fmul_neon, export=1 1: subs w3, w3, #16 - ld1 {v0.4S, v1.4S}, [x1], #32 - ld1 {v2.4S, v3.4S}, [x1], #32 - ld1 {v4.4S, v5.4S}, [x2], #32 - ld1 {v6.4S, v7.4S}, [x2], #32 - fmul v16.4S, v0.4S, v4.4S - fmul v17.4S, v1.4S, v5.4S - fmul v18.4S, v2.4S, v6.4S - fmul v19.4S, v3.4S, v7.4S - st1 {v16.4S, v17.4S}, [x0], #32 - st1 {v18.4S, v19.4S}, [x0], #32 + ld1 {v0.4s, v1.4s}, [x1], #32 + ld1 {v2.4s, v3.4s}, [x1], #32 + ld1 {v4.4s, v5.4s}, [x2], #32 + ld1 {v6.4s, v7.4s}, [x2], #32 + fmul v16.4s, v0.4s, v4.4s + fmul v17.4s, v1.4s, v5.4s + fmul v18.4s, v2.4s, v6.4s + fmul v19.4s, v3.4s, v7.4s + st1 {v16.4s, v17.4s}, [x0], #32 + st1 {v18.4s, v19.4s}, [x0], #32 b.ne 1b ret endfunc @@ -42,16 +42,16 @@ function ff_vector_fmac_scalar_neon, export=1 mov x3, #-32 1: subs w2, w2, #16 - ld1 {v16.4S, v17.4S}, [x0], #32 - ld1 {v18.4S, v19.4S}, [x0], x3 - ld1 {v4.4S, v5.4S}, [x1], #32 - ld1 {v6.4S, v7.4S}, [x1], #32 - fmla v16.4S, v4.4S, v0.S[0] - fmla v17.4S, v5.4S, v0.S[0] - fmla v18.4S, v6.4S, v0.S[0] - fmla v19.4S, v7.4S, v0.S[0] - st1 {v16.4S, v17.4S}, [x0], #32 - st1 {v18.4S, v19.4S}, [x0], #32 + ld1 {v16.4s, v17.4s}, [x0], #32 + ld1 {v18.4s, v19.4s}, [x0], x3 + ld1 {v4.4s, v5.4s}, [x1], #32 + ld1 {v6.4s, v7.4s}, [x1], #32 + fmla v16.4s, v4.4s, v0.s[0] + fmla v17.4s, v5.4s, v0.s[0] + fmla v18.4s, v6.4s, v0.s[0] + fmla v19.4s, v7.4s, v0.s[0] + st1 {v16.4s, v17.4s}, [x0], #32 + st1 {v18.4s, v19.4s}, [x0], #32 b.ne 1b ret endfunc @@ -59,43 +59,43 @@ function ff_vector_fmul_scalar_neon, export=1 mov w4, #15 bics w3, w2, w4 - dup v16.4S, v0.S[0] + dup v16.4s, v0.s[0] b.eq 3f - ld1 {v0.4S, v1.4S}, [x1], #32 + ld1 {v0.4s, v1.4s}, [x1], #32 1: subs w3, w3, #16 - fmul v0.4S, v0.4S, v16.4S - ld1 {v2.4S, v3.4S}, [x1], #32 - fmul v1.4S, v1.4S, v16.4S - fmul v2.4S, v2.4S, v16.4S - st1 {v0.4S, v1.4S}, [x0], #32 - fmul v3.4S, v3.4S, v16.4S + fmul v0.4s, v0.4s, v16.4s + ld1 {v2.4s, v3.4s}, [x1], #32 + fmul v1.4s, v1.4s, v16.4s + fmul v2.4s, v2.4s, v16.4s + st1 {v0.4s, v1.4s}, [x0], #32 + fmul v3.4s, v3.4s, v16.4s b.eq 2f - ld1 {v0.4S, v1.4S}, [x1], #32 - st1 {v2.4S, v3.4S}, [x0], #32 + ld1 {v0.4s, v1.4s}, [x1], #32 + st1 {v2.4s, v3.4s}, [x0], #32 b 1b 2: ands w2, w2, #15 - st1 {v2.4S, v3.4S}, [x0], #32 + st1 {v2.4s, v3.4s}, [x0], #32 b.eq 4f -3: ld1 {v0.4S}, [x1], #16 - fmul v0.4S, v0.4S, v16.4S - st1 {v0.4S}, [x0], #16 +3: ld1 {v0.4s}, [x1], #16 + fmul v0.4s, v0.4s, v16.4s + st1 {v0.4s}, [x0], #16 subs w2, w2, #4 b.gt 3b 4: ret endfunc function ff_vector_dmul_scalar_neon, export=1 - dup v16.2D, v0.D[0] - ld1 {v0.2D, v1.2D}, [x1], #32 + dup v16.2d, v0.d[0] + ld1 {v0.2d, v1.2d}, [x1], #32 1: subs w2, w2, #8 - fmul v0.2D, v0.2D, v16.2D - ld1 {v2.2D, v3.2D}, [x1], #32 - fmul v1.2D, v1.2D, v16.2D - fmul v2.2D, v2.2D, v16.2D - st1 {v0.2D, v1.2D}, [x0], #32 - fmul v3.2D, v3.2D, v16.2D - ld1 {v0.2D, v1.2D}, [x1], #32 - st1 {v2.2D, v3.2D}, [x0], #32 + fmul v0.2d, v0.2d, v16.2d + ld1 {v2.2d, v3.2d}, [x1], #32 + fmul v1.2d, v1.2d, v16.2d + fmul v2.2d, v2.2d, v16.2d + st1 {v0.2d, v1.2d}, [x0], #32 + fmul v3.2d, v3.2d, v16.2d + ld1 {v0.2d, v1.2d}, [x1], #32 + st1 {v2.2d, v3.2d}, [x0], #32 b.gt 1b ret endfunc @@ -108,49 +108,49 @@ add x6, x3, x5, lsl #3 // win + 8 * (len - 2) add x5, x0, x5, lsl #3 // dst + 8 * (len - 2) mov x7, #-16 - ld1 {v0.4S}, [x1], #16 // s0 - ld1 {v2.4S}, [x3], #16 // wi - ld1 {v1.4S}, [x2], x7 // s1 -1: ld1 {v3.4S}, [x6], x7 // wj + ld1 {v0.4s}, [x1], #16 // s0 + ld1 {v2.4s}, [x3], #16 // wi + ld1 {v1.4s}, [x2], x7 // s1 +1: ld1 {v3.4s}, [x6], x7 // wj subs x4, x4, #4 - fmul v17.4S, v0.4S, v2.4S // s0 * wi - rev64 v4.4S, v1.4S - rev64 v5.4S, v3.4S - rev64 v17.4S, v17.4S - ext v4.16B, v4.16B, v4.16B, #8 // s1_r - ext v5.16B, v5.16B, v5.16B, #8 // wj_r - ext v17.16B, v17.16B, v17.16B, #8 // (s0 * wi)_rev - fmul v16.4S, v0.4S, v5.4S // s0 * wj_r - fmla v17.4S, v1.4S, v3.4S // (s0 * wi)_rev + s1 * wj + fmul v17.4s, v0.4s, v2.4s // s0 * wi + rev64 v4.4s, v1.4s + rev64 v5.4s, v3.4s + rev64 v17.4s, v17.4s + ext v4.16b, v4.16b, v4.16b, #8 // s1_r + ext v5.16b, v5.16b, v5.16b, #8 // wj_r + ext v17.16b, v17.16b, v17.16b, #8 // (s0 * wi)_rev + fmul v16.4s, v0.4s, v5.4s // s0 * wj_r + fmla v17.4s, v1.4s, v3.4s // (s0 * wi)_rev + s1 * wj b.eq 2f - ld1 {v0.4S}, [x1], #16 - fmls v16.4S, v4.4S, v2.4S // s0 * wj_r - s1_r * wi - st1 {v17.4S}, [x5], x7 - ld1 {v2.4S}, [x3], #16 - ld1 {v1.4S}, [x2], x7 - st1 {v16.4S}, [x0], #16 + ld1 {v0.4s}, [x1], #16 + fmls v16.4s, v4.4s, v2.4s // s0 * wj_r - s1_r * wi + st1 {v17.4s}, [x5], x7 + ld1 {v2.4s}, [x3], #16 + ld1 {v1.4s}, [x2], x7 + st1 {v16.4s}, [x0], #16 b 1b 2: - fmls v16.4S, v4.4S, v2.4S // s0 * wj_r - s1_r * wi - st1 {v17.4S}, [x5], x7 - st1 {v16.4S}, [x0], #16 + fmls v16.4s, v4.4s, v2.4s // s0 * wj_r - s1_r * wi + st1 {v17.4s}, [x5], x7 + st1 {v16.4s}, [x0], #16 ret endfunc function ff_vector_fmul_add_neon, export=1 - ld1 {v0.4S, v1.4S}, [x1], #32 - ld1 {v2.4S, v3.4S}, [x2], #32 - ld1 {v4.4S, v5.4S}, [x3], #32 + ld1 {v0.4s, v1.4s}, [x1], #32 + ld1 {v2.4s, v3.4s}, [x2], #32 + ld1 {v4.4s, v5.4s}, [x3], #32 1: subs w4, w4, #8 - fmla v4.4S, v0.4S, v2.4S - fmla v5.4S, v1.4S, v3.4S + fmla v4.4s, v0.4s, v2.4s + fmla v5.4s, v1.4s, v3.4s b.eq 2f - ld1 {v0.4S, v1.4S}, [x1], #32 - ld1 {v2.4S, v3.4S}, [x2], #32 - st1 {v4.4S, v5.4S}, [x0], #32 - ld1 {v4.4S, v5.4S}, [x3], #32 + ld1 {v0.4s, v1.4s}, [x1], #32 + ld1 {v2.4s, v3.4s}, [x2], #32 + st1 {v4.4s, v5.4s}, [x0], #32 + ld1 {v4.4s, v5.4s}, [x3], #32 b 1b -2: st1 {v4.4S, v5.4S}, [x0], #32 +2: st1 {v4.4s, v5.4s}, [x0], #32 ret endfunc @@ -159,44 +159,44 @@ add x2, x2, x3, lsl #2 sub x2, x2, #32 mov x4, #-32 - ld1 {v2.4S, v3.4S}, [x2], x4 - ld1 {v0.4S, v1.4S}, [x1], #32 + ld1 {v2.4s, v3.4s}, [x2], x4 + ld1 {v0.4s, v1.4s}, [x1], #32 1: subs x3, x3, #8 - rev64 v3.4S, v3.4S - rev64 v2.4S, v2.4S - ext v3.16B, v3.16B, v3.16B, #8 - ext v2.16B, v2.16B, v2.16B, #8 - fmul v16.4S, v0.4S, v3.4S - fmul v17.4S, v1.4S, v2.4S + rev64 v3.4s, v3.4s + rev64 v2.4s, v2.4s + ext v3.16b, v3.16b, v3.16b, #8 + ext v2.16b, v2.16b, v2.16b, #8 + fmul v16.4s, v0.4s, v3.4s + fmul v17.4s, v1.4s, v2.4s b.eq 2f - ld1 {v2.4S, v3.4S}, [x2], x4 - ld1 {v0.4S, v1.4S}, [x1], #32 - st1 {v16.4S, v17.4S}, [x0], #32 + ld1 {v2.4s, v3.4s}, [x2], x4 + ld1 {v0.4s, v1.4s}, [x1], #32 + st1 {v16.4s, v17.4s}, [x0], #32 b 1b -2: st1 {v16.4S, v17.4S}, [x0], #32 +2: st1 {v16.4s, v17.4s}, [x0], #32 ret endfunc function ff_butterflies_float_neon, export=1 -1: ld1 {v0.4S}, [x0] - ld1 {v1.4S}, [x1] +1: ld1 {v0.4s}, [x0] + ld1 {v1.4s}, [x1] subs w2, w2, #4 - fsub v2.4S, v0.4S, v1.4S - fadd v3.4S, v0.4S, v1.4S - st1 {v2.4S}, [x1], #16 - st1 {v3.4S}, [x0], #16 + fsub v2.4s, v0.4s, v1.4s + fadd v3.4s, v0.4s, v1.4s + st1 {v2.4s}, [x1], #16 + st1 {v3.4s}, [x0], #16 b.gt 1b ret endfunc function ff_scalarproduct_float_neon, export=1 - movi v2.4S, #0 -1: ld1 {v0.4S}, [x0], #16 - ld1 {v1.4S}, [x1], #16 + movi v2.4s, #0 +1: ld1 {v0.4s}, [x0], #16 + ld1 {v1.4s}, [x1], #16 subs w2, w2, #4 - fmla v2.4S, v0.4S, v1.4S + fmla v2.4s, v0.4s, v1.4s b.gt 1b - faddp v0.4S, v2.4S, v2.4S - faddp s0, v0.2S + faddp v0.4s, v2.4s, v2.4s + faddp s0, v0.2s ret endfunc diff -Nru ffmpeg-5.1.8/libavutil/aes.c ffmpeg-5.1.9/libavutil/aes.c --- ffmpeg-5.1.8/libavutil/aes.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavutil/aes.c 2026-05-05 15:50:52.000000000 +0000 @@ -269,4 +269,3 @@ return 0; } - diff -Nru ffmpeg-5.1.8/libavutil/bswap.h ffmpeg-5.1.9/libavutil/bswap.h --- ffmpeg-5.1.8/libavutil/bswap.h 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavutil/bswap.h 2026-05-05 15:50:55.000000000 +0000 @@ -72,7 +72,7 @@ #ifndef av_bswap64 static inline uint64_t av_const av_bswap64(uint64_t x) { - return (uint64_t)av_bswap32(x) << 32 | av_bswap32(x >> 32); + return (uint64_t)av_bswap32((uint32_t)x) << 32 | av_bswap32((uint32_t)(x >> 32)); } #endif diff -Nru ffmpeg-5.1.8/libavutil/eval.c ffmpeg-5.1.9/libavutil/eval.c --- ffmpeg-5.1.8/libavutil/eval.c 2025-11-23 02:57:58.000000000 +0000 +++ ffmpeg-5.1.9/libavutil/eval.c 2026-05-05 15:50:55.000000000 +0000 @@ -40,6 +40,8 @@ #include "timer.h" #include "reverse.h" +#define MAX_DEPTH 100 + typedef struct Parser { const AVClass *class; int stack_index; @@ -174,6 +176,7 @@ } a; struct AVExpr *param[3]; double *var; + int depth; }; static double etime(double v) @@ -422,6 +425,14 @@ } p->s++; // ")" + for (int i = 0; i<3; i++) + if (d->param[i]) + d->depth = FFMAX(d->depth, d->param[i]->depth+1); + if (d->depth > MAX_DEPTH) { + av_expr_free(d); + return AVERROR(EINVAL); + } + d->type = e_func0; if (strmatch(next, "sinh" )) d->a.func0 = sinh; else if (strmatch(next, "cosh" )) d->a.func0 = cosh; @@ -505,6 +516,9 @@ static AVExpr *make_eval_expr(int type, int value, AVExpr *p0, AVExpr *p1) { + int depth = FFMAX(p0->depth, p1->depth) + 1; + if (depth > MAX_DEPTH) + return NULL; AVExpr *e = av_mallocz(sizeof(AVExpr)); if (!e) return NULL; @@ -512,6 +526,7 @@ e->value =value ; e->param[0] =p0 ; e->param[1] =p1 ; + e->depth = depth; return e; } diff -Nru ffmpeg-5.1.8/libavutil/hwcontext_cuda_internal.h ffmpeg-5.1.9/libavutil/hwcontext_cuda_internal.h --- ffmpeg-5.1.8/libavutil/hwcontext_cuda_internal.h 2025-11-21 01:15:18.000000000 +0000 +++ ffmpeg-5.1.9/libavutil/hwcontext_cuda_internal.h 2026-05-05 14:22:01.000000000 +0000 @@ -36,4 +36,3 @@ }; #endif /* AVUTIL_HWCONTEXT_CUDA_INTERNAL_H */ - diff -Nru ffmpeg-5.1.8/libavutil/hwcontext_qsv.h ffmpeg-5.1.9/libavutil/hwcontext_qsv.h --- ffmpeg-5.1.8/libavutil/hwcontext_qsv.h 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavutil/hwcontext_qsv.h 2026-05-05 15:50:52.000000000 +0000 @@ -50,4 +50,3 @@ } AVQSVFramesContext; #endif /* AVUTIL_HWCONTEXT_QSV_H */ - diff -Nru ffmpeg-5.1.8/libavutil/samplefmt.h ffmpeg-5.1.9/libavutil/samplefmt.h --- ffmpeg-5.1.8/libavutil/samplefmt.h 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libavutil/samplefmt.h 2026-05-05 15:50:55.000000000 +0000 @@ -122,8 +122,7 @@ * @param sample_fmt the number of the sample format to print the * corresponding info string, or a negative value to print the * corresponding header. - * @return the pointer to the filled buffer or NULL if sample_fmt is - * unknown or in case of other errors + * @return the pointer to the filled buffer or NULL in case of other errors */ char *av_get_sample_fmt_string(char *buf, int buf_size, enum AVSampleFormat sample_fmt); diff -Nru ffmpeg-5.1.8/libavutil/tests/blowfish.c ffmpeg-5.1.9/libavutil/tests/blowfish.c --- ffmpeg-5.1.8/libavutil/tests/blowfish.c 2025-11-21 01:15:18.000000000 +0000 +++ ffmpeg-5.1.9/libavutil/tests/blowfish.c 2026-05-05 14:22:01.000000000 +0000 @@ -191,4 +191,3 @@ return 0; } - diff -Nru ffmpeg-5.1.8/libavutil/timecode.c ffmpeg-5.1.9/libavutil/timecode.c --- ffmpeg-5.1.8/libavutil/timecode.c 2025-11-21 01:15:18.000000000 +0000 +++ ffmpeg-5.1.9/libavutil/timecode.c 2026-05-05 15:50:55.000000000 +0000 @@ -232,6 +232,7 @@ int av_timecode_init_from_components(AVTimecode *tc, AVRational rate, int flags, int hh, int mm, int ss, int ff, void *log_ctx) { int ret; + int64_t s; memset(tc, 0, sizeof(*tc)); tc->flags = flags; @@ -242,7 +243,15 @@ if (ret < 0) return ret; - tc->start = (hh*3600 + mm*60 + ss) * tc->fps + ff; + s = hh*3600LL + mm*60LL + ss; + if (s != (int32_t)s) + return AVERROR(EINVAL); + + s = s * tc->fps + ff; + if (s != (int32_t)s) + return AVERROR(EINVAL); + tc->start = s; + if (tc->flags & AV_TIMECODE_FLAG_DROPFRAME) { /* adjust frame number */ int tmins = 60*hh + mm; tc->start -= (tc->fps / 30 * 2) * (tmins - tmins/10); diff -Nru ffmpeg-5.1.8/libswresample/aarch64/resample.S ffmpeg-5.1.9/libswresample/aarch64/resample.S --- ffmpeg-5.1.8/libswresample/aarch64/resample.S 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libswresample/aarch64/resample.S 2026-05-05 14:22:01.000000000 +0000 @@ -21,57 +21,57 @@ #include "libavutil/aarch64/asm.S" function ff_resample_common_apply_filter_x4_float_neon, export=1 - movi v0.4S, #0 // accumulator -1: ld1 {v1.4S}, [x1], #16 // src[0..3] - ld1 {v2.4S}, [x2], #16 // filter[0..3] - fmla v0.4S, v1.4S, v2.4S // accumulator += src[0..3] * filter[0..3] - subs w3, w3, #4 // filter_length -= 4 - b.gt 1b // loop until filter_length - faddp v0.4S, v0.4S, v0.4S // pair adding of the 4x32-bit accumulated values - faddp v0.4S, v0.4S, v0.4S // pair adding of the 4x32-bit accumulated values - st1 {v0.S}[0], [x0], #4 // write accumulator - ret + movi v0.4s, #0 // accumulator +1: ld1 {v1.4s}, [x1], #16 // src[0..3] + ld1 {v2.4s}, [x2], #16 // filter[0..3] + fmla v0.4s, v1.4s, v2.4s // accumulator += src[0..3] * filter[0..3] + subs w3, w3, #4 // filter_length -= 4 + b.gt 1b // loop until filter_length + faddp v0.4s, v0.4s, v0.4s // pair adding of the 4x32-bit accumulated values + faddp v0.4s, v0.4s, v0.4s // pair adding of the 4x32-bit accumulated values + st1 {v0.s}[0], [x0], #4 // write accumulator + ret endfunc function ff_resample_common_apply_filter_x8_float_neon, export=1 - movi v0.4S, #0 // accumulator -1: ld1 {v1.4S}, [x1], #16 // src[0..3] - ld1 {v2.4S}, [x2], #16 // filter[0..3] - ld1 {v3.4S}, [x1], #16 // src[4..7] - ld1 {v4.4S}, [x2], #16 // filter[4..7] - fmla v0.4S, v1.4S, v2.4S // accumulator += src[0..3] * filter[0..3] - fmla v0.4S, v3.4S, v4.4S // accumulator += src[4..7] * filter[4..7] - subs w3, w3, #8 // filter_length -= 8 - b.gt 1b // loop until filter_length - faddp v0.4S, v0.4S, v0.4S // pair adding of the 4x32-bit accumulated values - faddp v0.4S, v0.4S, v0.4S // pair adding of the 4x32-bit accumulated values - st1 {v0.S}[0], [x0], #4 // write accumulator - ret + movi v0.4s, #0 // accumulator +1: ld1 {v1.4s}, [x1], #16 // src[0..3] + ld1 {v2.4s}, [x2], #16 // filter[0..3] + ld1 {v3.4s}, [x1], #16 // src[4..7] + ld1 {v4.4s}, [x2], #16 // filter[4..7] + fmla v0.4s, v1.4s, v2.4s // accumulator += src[0..3] * filter[0..3] + fmla v0.4s, v3.4s, v4.4s // accumulator += src[4..7] * filter[4..7] + subs w3, w3, #8 // filter_length -= 8 + b.gt 1b // loop until filter_length + faddp v0.4s, v0.4s, v0.4s // pair adding of the 4x32-bit accumulated values + faddp v0.4s, v0.4s, v0.4s // pair adding of the 4x32-bit accumulated values + st1 {v0.s}[0], [x0], #4 // write accumulator + ret endfunc function ff_resample_common_apply_filter_x4_s16_neon, export=1 - movi v0.4S, #0 // accumulator -1: ld1 {v1.4H}, [x1], #8 // src[0..3] - ld1 {v2.4H}, [x2], #8 // filter[0..3] - smlal v0.4S, v1.4H, v2.4H // accumulator += src[0..3] * filter[0..3] - subs w3, w3, #4 // filter_length -= 4 - b.gt 1b // loop until filter_length - addp v0.4S, v0.4S, v0.4S // pair adding of the 4x32-bit accumulated values - addp v0.4S, v0.4S, v0.4S // pair adding of the 4x32-bit accumulated values - st1 {v0.S}[0], [x0], #4 // write accumulator - ret + movi v0.4s, #0 // accumulator +1: ld1 {v1.4h}, [x1], #8 // src[0..3] + ld1 {v2.4h}, [x2], #8 // filter[0..3] + smlal v0.4s, v1.4h, v2.4h // accumulator += src[0..3] * filter[0..3] + subs w3, w3, #4 // filter_length -= 4 + b.gt 1b // loop until filter_length + addp v0.4s, v0.4s, v0.4s // pair adding of the 4x32-bit accumulated values + addp v0.4s, v0.4s, v0.4s // pair adding of the 4x32-bit accumulated values + st1 {v0.s}[0], [x0], #4 // write accumulator + ret endfunc function ff_resample_common_apply_filter_x8_s16_neon, export=1 - movi v0.4S, #0 // accumulator -1: ld1 {v1.8H}, [x1], #16 // src[0..7] - ld1 {v2.8H}, [x2], #16 // filter[0..7] - smlal v0.4S, v1.4H, v2.4H // accumulator += src[0..3] * filter[0..3] - smlal2 v0.4S, v1.8H, v2.8H // accumulator += src[4..7] * filter[4..7] - subs w3, w3, #8 // filter_length -= 8 - b.gt 1b // loop until filter_length - addp v0.4S, v0.4S, v0.4S // pair adding of the 4x32-bit accumulated values - addp v0.4S, v0.4S, v0.4S // pair adding of the 4x32-bit accumulated values - st1 {v0.S}[0], [x0], #4 // write accumulator - ret + movi v0.4s, #0 // accumulator +1: ld1 {v1.8h}, [x1], #16 // src[0..7] + ld1 {v2.8h}, [x2], #16 // filter[0..7] + smlal v0.4s, v1.4h, v2.4h // accumulator += src[0..3] * filter[0..3] + smlal2 v0.4s, v1.8h, v2.8h // accumulator += src[4..7] * filter[4..7] + subs w3, w3, #8 // filter_length -= 8 + b.gt 1b // loop until filter_length + addp v0.4s, v0.4s, v0.4s // pair adding of the 4x32-bit accumulated values + addp v0.4s, v0.4s, v0.4s // pair adding of the 4x32-bit accumulated values + st1 {v0.s}[0], [x0], #4 // write accumulator + ret endfunc diff -Nru ffmpeg-5.1.8/libswresample/rematrix.c ffmpeg-5.1.9/libswresample/rematrix.c --- ffmpeg-5.1.8/libswresample/rematrix.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libswresample/rematrix.c 2026-05-05 15:50:55.000000000 +0000 @@ -66,7 +66,10 @@ int nb_in, nb_out, in, out; int user_in_chlayout_nb_channels, user_out_chlayout_nb_channels; - if (!s || s->in_convert) // s needs to be allocated but not initialized + if (!s || s->in_convert || // s needs to be allocated but not initialized + swri_check_chlayout(s, &s->user_in_chlayout , "input") || + swri_check_chlayout(s, &s->user_out_chlayout, "output") + ) return AVERROR(EINVAL); memset(s->matrix, 0, sizeof(s->matrix)); memset(s->matrix_flt, 0, sizeof(s->matrix_flt)); diff -Nru ffmpeg-5.1.8/libswresample/resample_template.c ffmpeg-5.1.9/libswresample/resample_template.c --- ffmpeg-5.1.8/libswresample/resample_template.c 2023-11-09 23:38:51.000000000 +0000 +++ ffmpeg-5.1.9/libswresample/resample_template.c 2026-05-05 15:50:55.000000000 +0000 @@ -25,6 +25,8 @@ * @author Michael Niedermayer */ +// FELEM2U, a variant of FELEM2 which does not produce undefined overflow + #if defined(TEMPLATE_RESAMPLE_DBL) # define RENAME(N) N ## _double @@ -32,6 +34,7 @@ # define DELEM double # define FELEM double # define FELEM2 double +# define FELEM2U double # define FOFFSET 0 # define OUT(d, v) d = v @@ -42,6 +45,7 @@ # define DELEM float # define FELEM float # define FELEM2 float +# define FELEM2U float # define FOFFSET 0 # define OUT(d, v) d = v @@ -52,6 +56,7 @@ # define DELEM int32_t # define FELEM int32_t # define FELEM2 int64_t +# define FELEM2U uint64_t # define FELEM_MAX INT32_MAX # define FELEM_MIN INT32_MIN # define FOFFSET (1<<(FILTER_SHIFT-1)) @@ -64,6 +69,7 @@ # define DELEM int16_t # define FELEM int16_t # define FELEM2 int32_t +# define FELEM2U uint32_t # define FELEML int64_t # define FELEM_MAX INT16_MAX # define FELEM_MIN INT16_MIN @@ -161,7 +167,7 @@ for (dst_index = 0; dst_index < n; dst_index++) { FELEM *filter = ((FELEM *) c->filter_bank) + c->filter_alloc * index; - FELEM2 val = FOFFSET, v2 = FOFFSET; + FELEM2U val = FOFFSET, v2 = FOFFSET; int i; for (i = 0; i < c->filter_length; i++) { @@ -169,15 +175,15 @@ v2 += src[sample_index + i] * (FELEM2)filter[i + c->filter_alloc]; } #ifdef FELEML - val += (v2 - val) * (FELEML) frac / c->src_incr; + val += (FELEM2)(v2 - val) * (FELEML) frac / c->src_incr; #else # if FILTER_SHIFT == 0 - val += (v2 - val) * inv_src_incr * frac; + val += (FELEM2)(v2 - val) * inv_src_incr * frac; # else - val += (v2 - val) / c->src_incr * frac; + val += (FELEM2)(v2 - val) / c->src_incr * frac; # endif #endif - OUT(dst[dst_index], val); + OUT(dst[dst_index], (FELEM2)val); frac += c->dst_incr_mod; index += c->dst_incr_div; @@ -205,6 +211,7 @@ #undef DELEM #undef FELEM #undef FELEM2 +#undef FELEM2U #undef FELEML #undef FELEM_MAX #undef FELEM_MIN diff -Nru ffmpeg-5.1.8/libswresample/soxr_resample.c ffmpeg-5.1.9/libswresample/soxr_resample.c --- ffmpeg-5.1.8/libswresample/soxr_resample.c 2025-11-21 01:15:18.000000000 +0000 +++ ffmpeg-5.1.9/libswresample/soxr_resample.c 2026-05-05 14:22:01.000000000 +0000 @@ -127,4 +127,3 @@ create, destroy, process, flush, NULL /* set_compensation */, get_delay, invert_initial_buffer, get_out_samples }; - diff -Nru ffmpeg-5.1.8/libswresample/swresample.c ffmpeg-5.1.9/libswresample/swresample.c --- ffmpeg-5.1.8/libswresample/swresample.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libswresample/swresample.c 2026-05-05 15:50:55.000000000 +0000 @@ -29,6 +29,20 @@ #define ALIGN 32 +int swri_check_chlayout(struct SwrContext *s, const AVChannelLayout *chl, const char *name) { + char l1[1024]; + int ret; + + if (!(ret = av_channel_layout_check(chl)) || chl->nb_channels > SWR_CH_MAX) { + if (ret) + av_channel_layout_describe(chl, l1, sizeof(l1)); + av_log(s, AV_LOG_WARNING, "%s channel layout \"%s\" is invalid or unsupported.\n", name, ret ? l1 : ""); + return AVERROR(EINVAL); + } + + return 0; +} + int swr_set_channel_mapping(struct SwrContext *s, const int *channel_map){ if(!s || s->in_convert) // s needs to be allocated but not initialized return AVERROR(EINVAL); @@ -99,6 +113,8 @@ if ((ret = av_opt_set_chlayout(s, "ochl", out_ch_layout, 0)) < 0) goto fail; + if ((ret = swri_check_chlayout(s, out_ch_layout, "ochl")) < 0) + goto fail; if ((ret = av_opt_set_int(s, "osf", out_sample_fmt, 0)) < 0) goto fail; @@ -108,6 +124,8 @@ if ((ret = av_opt_set_chlayout(s, "ichl", in_ch_layout, 0)) < 0) goto fail; + if ((ret = swri_check_chlayout(s, in_ch_layout, "ichl")) < 0) + goto fail; if ((ret = av_opt_set_int(s, "isf", in_sample_fmt, 0)) < 0) goto fail; @@ -265,19 +283,9 @@ s->out.ch_count = s-> user_out_chlayout.nb_channels; s-> in.ch_count = s-> user_in_chlayout.nb_channels; - if (!(ret = av_channel_layout_check(&s->user_in_chlayout)) || s->user_in_chlayout.nb_channels > SWR_CH_MAX) { - if (ret) - av_channel_layout_describe(&s->user_in_chlayout, l1, sizeof(l1)); - av_log(s, AV_LOG_WARNING, "Input channel layout \"%s\" is invalid or unsupported.\n", ret ? l1 : ""); + if (swri_check_chlayout(s, &s->user_in_chlayout , "input") || + swri_check_chlayout(s, &s->user_out_chlayout, "output")) return AVERROR(EINVAL); - } - - if (!(ret = av_channel_layout_check(&s->user_out_chlayout)) || s->user_out_chlayout.nb_channels > SWR_CH_MAX) { - if (ret) - av_channel_layout_describe(&s->user_out_chlayout, l2, sizeof(l2)); - av_log(s, AV_LOG_WARNING, "Output channel layout \"%s\" is invalid or unsupported.\n", ret ? l2 : ""); - return AVERROR(EINVAL); - } ret = av_channel_layout_copy(&s->in_ch_layout, &s->user_in_chlayout); ret |= av_channel_layout_copy(&s->out_ch_layout, &s->user_out_chlayout); diff -Nru ffmpeg-5.1.8/libswresample/swresample_frame.c ffmpeg-5.1.9/libswresample/swresample_frame.c --- ffmpeg-5.1.8/libswresample/swresample_frame.c 2025-11-23 02:57:58.000000000 +0000 +++ ffmpeg-5.1.9/libswresample/swresample_frame.c 2026-05-05 14:22:01.000000000 +0000 @@ -217,4 +217,3 @@ return convert_frame(s, out, in); } - diff -Nru ffmpeg-5.1.8/libswresample/swresample_internal.h ffmpeg-5.1.9/libswresample/swresample_internal.h --- ffmpeg-5.1.8/libswresample/swresample_internal.h 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libswresample/swresample_internal.h 2026-05-05 15:50:55.000000000 +0000 @@ -193,6 +193,7 @@ av_warn_unused_result int swri_realloc_audio(AudioData *a, int count); +int swri_check_chlayout(struct SwrContext *s, const AVChannelLayout *chl, const char *name); void swri_noise_shaping_int16 (SwrContext *s, AudioData *dsts, const AudioData *srcs, const AudioData *noises, int count); void swri_noise_shaping_int32 (SwrContext *s, AudioData *dsts, const AudioData *srcs, const AudioData *noises, int count); diff -Nru ffmpeg-5.1.8/libswresample/version.c ffmpeg-5.1.9/libswresample/version.c --- ffmpeg-5.1.8/libswresample/version.c 2025-11-23 02:57:58.000000000 +0000 +++ ffmpeg-5.1.9/libswresample/version.c 2026-05-05 14:22:01.000000000 +0000 @@ -42,4 +42,3 @@ #define LICENSE_PREFIX "libswresample license: " return &LICENSE_PREFIX FFMPEG_LICENSE[sizeof(LICENSE_PREFIX) - 1]; } - diff -Nru ffmpeg-5.1.8/libswscale/aarch64/hscale.S ffmpeg-5.1.9/libswscale/aarch64/hscale.S --- ffmpeg-5.1.8/libswscale/aarch64/hscale.S 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libswscale/aarch64/hscale.S 2026-05-05 15:50:52.000000000 +0000 @@ -41,53 +41,53 @@ ;----------------------------------------------------------------------------- */ function ff_hscale8to15_X8_neon, export=1 - sbfiz x7, x6, #1, #32 // filterSize*2 (*2 because int16) -1: ldr w8, [x5], #4 // filterPos[idx] - ldr w0, [x5], #4 // filterPos[idx + 1] - ldr w11, [x5], #4 // filterPos[idx + 2] - ldr w9, [x5], #4 // filterPos[idx + 3] - mov x16, x4 // filter0 = filter - add x12, x16, x7 // filter1 = filter0 + filterSize*2 - add x13, x12, x7 // filter2 = filter1 + filterSize*2 - add x4, x13, x7 // filter3 = filter2 + filterSize*2 - movi v0.2D, #0 // val sum part 1 (for dst[0]) - movi v1.2D, #0 // val sum part 2 (for dst[1]) - movi v2.2D, #0 // val sum part 3 (for dst[2]) - movi v3.2D, #0 // val sum part 4 (for dst[3]) - add x17, x3, w8, UXTW // srcp + filterPos[0] - add x8, x3, w0, UXTW // srcp + filterPos[1] - add x0, x3, w11, UXTW // srcp + filterPos[2] - add x11, x3, w9, UXTW // srcp + filterPos[3] - mov w15, w6 // filterSize counter -2: ld1 {v4.8B}, [x17], #8 // srcp[filterPos[0] + {0..7}] - ld1 {v5.8H}, [x16], #16 // load 8x16-bit filter values, part 1 - ld1 {v6.8B}, [x8], #8 // srcp[filterPos[1] + {0..7}] - ld1 {v7.8H}, [x12], #16 // load 8x16-bit at filter+filterSize - uxtl v4.8H, v4.8B // unpack part 1 to 16-bit - smlal v0.4S, v4.4H, v5.4H // v0 accumulates srcp[filterPos[0] + {0..3}] * filter[{0..3}] - smlal2 v0.4S, v4.8H, v5.8H // v0 accumulates srcp[filterPos[0] + {4..7}] * filter[{4..7}] - ld1 {v16.8B}, [x0], #8 // srcp[filterPos[2] + {0..7}] - ld1 {v17.8H}, [x13], #16 // load 8x16-bit at filter+2*filterSize - uxtl v6.8H, v6.8B // unpack part 2 to 16-bit - smlal v1.4S, v6.4H, v7.4H // v1 accumulates srcp[filterPos[1] + {0..3}] * filter[{0..3}] - uxtl v16.8H, v16.8B // unpack part 3 to 16-bit - smlal v2.4S, v16.4H, v17.4H // v2 accumulates srcp[filterPos[2] + {0..3}] * filter[{0..3}] - smlal2 v2.4S, v16.8H, v17.8H // v2 accumulates srcp[filterPos[2] + {4..7}] * filter[{4..7}] - ld1 {v18.8B}, [x11], #8 // srcp[filterPos[3] + {0..7}] - smlal2 v1.4S, v6.8H, v7.8H // v1 accumulates srcp[filterPos[1] + {4..7}] * filter[{4..7}] - ld1 {v19.8H}, [x4], #16 // load 8x16-bit at filter+3*filterSize - subs w15, w15, #8 // j -= 8: processed 8/filterSize - uxtl v18.8H, v18.8B // unpack part 4 to 16-bit - smlal v3.4S, v18.4H, v19.4H // v3 accumulates srcp[filterPos[3] + {0..3}] * filter[{0..3}] - smlal2 v3.4S, v18.8H, v19.8H // v3 accumulates srcp[filterPos[3] + {4..7}] * filter[{4..7}] - b.gt 2b // inner loop if filterSize not consumed completely - addp v0.4S, v0.4S, v1.4S // part01 horizontal pair adding - addp v2.4S, v2.4S, v3.4S // part23 horizontal pair adding - addp v0.4S, v0.4S, v2.4S // part0123 horizontal pair adding - subs w2, w2, #4 // dstW -= 4 - sqshrn v0.4H, v0.4S, #7 // shift and clip the 2x16-bit final values - st1 {v0.4H}, [x1], #8 // write to destination part0123 - b.gt 1b // loop until end of line + sbfiz x7, x6, #1, #32 // filterSize*2 (*2 because int16) +1: ldr w8, [x5], #4 // filterPos[idx] + ldr w0, [x5], #4 // filterPos[idx + 1] + ldr w11, [x5], #4 // filterPos[idx + 2] + ldr w9, [x5], #4 // filterPos[idx + 3] + mov x16, x4 // filter0 = filter + add x12, x16, x7 // filter1 = filter0 + filterSize*2 + add x13, x12, x7 // filter2 = filter1 + filterSize*2 + add x4, x13, x7 // filter3 = filter2 + filterSize*2 + movi v0.2d, #0 // val sum part 1 (for dst[0]) + movi v1.2d, #0 // val sum part 2 (for dst[1]) + movi v2.2d, #0 // val sum part 3 (for dst[2]) + movi v3.2d, #0 // val sum part 4 (for dst[3]) + add x17, x3, w8, uxtw // srcp + filterPos[0] + add x8, x3, w0, uxtw // srcp + filterPos[1] + add x0, x3, w11, uxtw // srcp + filterPos[2] + add x11, x3, w9, uxtw // srcp + filterPos[3] + mov w15, w6 // filterSize counter +2: ld1 {v4.8b}, [x17], #8 // srcp[filterPos[0] + {0..7}] + ld1 {v5.8h}, [x16], #16 // load 8x16-bit filter values, part 1 + ld1 {v6.8b}, [x8], #8 // srcp[filterPos[1] + {0..7}] + ld1 {v7.8h}, [x12], #16 // load 8x16-bit at filter+filterSize + uxtl v4.8h, v4.8b // unpack part 1 to 16-bit + smlal v0.4s, v4.4h, v5.4h // v0 accumulates srcp[filterPos[0] + {0..3}] * filter[{0..3}] + smlal2 v0.4s, v4.8h, v5.8h // v0 accumulates srcp[filterPos[0] + {4..7}] * filter[{4..7}] + ld1 {v16.8b}, [x0], #8 // srcp[filterPos[2] + {0..7}] + ld1 {v17.8h}, [x13], #16 // load 8x16-bit at filter+2*filterSize + uxtl v6.8h, v6.8b // unpack part 2 to 16-bit + smlal v1.4s, v6.4h, v7.4h // v1 accumulates srcp[filterPos[1] + {0..3}] * filter[{0..3}] + uxtl v16.8h, v16.8b // unpack part 3 to 16-bit + smlal v2.4s, v16.4h, v17.4h // v2 accumulates srcp[filterPos[2] + {0..3}] * filter[{0..3}] + smlal2 v2.4s, v16.8h, v17.8h // v2 accumulates srcp[filterPos[2] + {4..7}] * filter[{4..7}] + ld1 {v18.8b}, [x11], #8 // srcp[filterPos[3] + {0..7}] + smlal2 v1.4s, v6.8h, v7.8h // v1 accumulates srcp[filterPos[1] + {4..7}] * filter[{4..7}] + ld1 {v19.8h}, [x4], #16 // load 8x16-bit at filter+3*filterSize + subs w15, w15, #8 // j -= 8: processed 8/filterSize + uxtl v18.8h, v18.8b // unpack part 4 to 16-bit + smlal v3.4s, v18.4h, v19.4h // v3 accumulates srcp[filterPos[3] + {0..3}] * filter[{0..3}] + smlal2 v3.4s, v18.8h, v19.8h // v3 accumulates srcp[filterPos[3] + {4..7}] * filter[{4..7}] + b.gt 2b // inner loop if filterSize not consumed completely + addp v0.4s, v0.4s, v1.4s // part01 horizontal pair adding + addp v2.4s, v2.4s, v3.4s // part23 horizontal pair adding + addp v0.4s, v0.4s, v2.4s // part0123 horizontal pair adding + subs w2, w2, #4 // dstW -= 4 + sqshrn v0.4h, v0.4s, #7 // shift and clip the 2x16-bit final values + st1 {v0.4h}, [x1], #8 // write to destination part0123 + b.gt 1b // loop until end of line ret endfunc @@ -112,131 +112,131 @@ // 3. Complete madd // 4. Complete remaining iterations when dstW % 8 != 0 - sub sp, sp, #32 // allocate 32 bytes on the stack - cmp w2, #16 // if dstW <16, skip to the last block used for wrapping up - b.lt 2f + sub sp, sp, #32 // allocate 32 bytes on the stack + cmp w2, #16 // if dstW <16, skip to the last block used for wrapping up + b.lt 2f // load 8 values from filterPos to be used as offsets into src - ldp w8, w9, [x5] // filterPos[idx + 0], [idx + 1] - ldp w10, w11, [x5, #8] // filterPos[idx + 2], [idx + 3] - ldp w12, w13, [x5, #16] // filterPos[idx + 4], [idx + 5] - ldp w14, w15, [x5, #24] // filterPos[idx + 6], [idx + 7] - add x5, x5, #32 // advance filterPos + ldp w8, w9, [x5] // filterPos[idx + 0], [idx + 1] + ldp w10, w11, [x5, #8] // filterPos[idx + 2], [idx + 3] + ldp w12, w13, [x5, #16] // filterPos[idx + 4], [idx + 5] + ldp w14, w15, [x5, #24] // filterPos[idx + 6], [idx + 7] + add x5, x5, #32 // advance filterPos // gather random access data from src into contiguous memory - ldr w8, [x3, w8, UXTW] // src[filterPos[idx + 0]][0..3] - ldr w9, [x3, w9, UXTW] // src[filterPos[idx + 1]][0..3] - ldr w10, [x3, w10, UXTW] // src[filterPos[idx + 2]][0..3] - ldr w11, [x3, w11, UXTW] // src[filterPos[idx + 3]][0..3] - ldr w12, [x3, w12, UXTW] // src[filterPos[idx + 4]][0..3] - ldr w13, [x3, w13, UXTW] // src[filterPos[idx + 5]][0..3] - ldr w14, [x3, w14, UXTW] // src[filterPos[idx + 6]][0..3] - ldr w15, [x3, w15, UXTW] // src[filterPos[idx + 7]][0..3] - stp w8, w9, [sp] // *scratch_mem = { src[filterPos[idx + 0]][0..3], src[filterPos[idx + 1]][0..3] } - stp w10, w11, [sp, #8] // *scratch_mem = { src[filterPos[idx + 2]][0..3], src[filterPos[idx + 3]][0..3] } - stp w12, w13, [sp, #16] // *scratch_mem = { src[filterPos[idx + 4]][0..3], src[filterPos[idx + 5]][0..3] } - stp w14, w15, [sp, #24] // *scratch_mem = { src[filterPos[idx + 6]][0..3], src[filterPos[idx + 7]][0..3] } + ldr w8, [x3, w8, uxtw] // src[filterPos[idx + 0]][0..3] + ldr w9, [x3, w9, uxtw] // src[filterPos[idx + 1]][0..3] + ldr w10, [x3, w10, uxtw] // src[filterPos[idx + 2]][0..3] + ldr w11, [x3, w11, uxtw] // src[filterPos[idx + 3]][0..3] + ldr w12, [x3, w12, uxtw] // src[filterPos[idx + 4]][0..3] + ldr w13, [x3, w13, uxtw] // src[filterPos[idx + 5]][0..3] + ldr w14, [x3, w14, uxtw] // src[filterPos[idx + 6]][0..3] + ldr w15, [x3, w15, uxtw] // src[filterPos[idx + 7]][0..3] + stp w8, w9, [sp] // *scratch_mem = { src[filterPos[idx + 0]][0..3], src[filterPos[idx + 1]][0..3] } + stp w10, w11, [sp, #8] // *scratch_mem = { src[filterPos[idx + 2]][0..3], src[filterPos[idx + 3]][0..3] } + stp w12, w13, [sp, #16] // *scratch_mem = { src[filterPos[idx + 4]][0..3], src[filterPos[idx + 5]][0..3] } + stp w14, w15, [sp, #24] // *scratch_mem = { src[filterPos[idx + 6]][0..3], src[filterPos[idx + 7]][0..3] } 1: - ld4 {v16.8B, v17.8B, v18.8B, v19.8B}, [sp] // transpose 8 bytes each from src into 4 registers + ld4 {v16.8b, v17.8b, v18.8b, v19.8b}, [sp] // transpose 8 bytes each from src into 4 registers // load 8 values from filterPos to be used as offsets into src - ldp w8, w9, [x5] // filterPos[idx + 0][0..3], [idx + 1][0..3], next iteration - ldp w10, w11, [x5, #8] // filterPos[idx + 2][0..3], [idx + 3][0..3], next iteration - ldp w12, w13, [x5, #16] // filterPos[idx + 4][0..3], [idx + 5][0..3], next iteration - ldp w14, w15, [x5, #24] // filterPos[idx + 6][0..3], [idx + 7][0..3], next iteration + ldp w8, w9, [x5] // filterPos[idx + 0][0..3], [idx + 1][0..3], next iteration + ldp w10, w11, [x5, #8] // filterPos[idx + 2][0..3], [idx + 3][0..3], next iteration + ldp w12, w13, [x5, #16] // filterPos[idx + 4][0..3], [idx + 5][0..3], next iteration + ldp w14, w15, [x5, #24] // filterPos[idx + 6][0..3], [idx + 7][0..3], next iteration - movi v0.2D, #0 // Clear madd accumulator for idx 0..3 - movi v5.2D, #0 // Clear madd accumulator for idx 4..7 + movi v0.2d, #0 // Clear madd accumulator for idx 0..3 + movi v5.2d, #0 // Clear madd accumulator for idx 4..7 - ld4 {v1.8H, v2.8H, v3.8H, v4.8H}, [x4], #64 // load filter idx + 0..7 + ld4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x4], #64 // load filter idx + 0..7 - add x5, x5, #32 // advance filterPos + add x5, x5, #32 // advance filterPos // interleaved SIMD and prefetching intended to keep ld/st and vector pipelines busy - uxtl v16.8H, v16.8B // unsigned extend long, covert src data to 16-bit - uxtl v17.8H, v17.8B // unsigned extend long, covert src data to 16-bit - ldr w8, [x3, w8, UXTW] // src[filterPos[idx + 0]], next iteration - ldr w9, [x3, w9, UXTW] // src[filterPos[idx + 1]], next iteration - uxtl v18.8H, v18.8B // unsigned extend long, covert src data to 16-bit - uxtl v19.8H, v19.8B // unsigned extend long, covert src data to 16-bit - ldr w10, [x3, w10, UXTW] // src[filterPos[idx + 2]], next iteration - ldr w11, [x3, w11, UXTW] // src[filterPos[idx + 3]], next iteration - - smlal v0.4S, v1.4H, v16.4H // multiply accumulate inner loop j = 0, idx = 0..3 - smlal v0.4S, v2.4H, v17.4H // multiply accumulate inner loop j = 1, idx = 0..3 - ldr w12, [x3, w12, UXTW] // src[filterPos[idx + 4]], next iteration - ldr w13, [x3, w13, UXTW] // src[filterPos[idx + 5]], next iteration - smlal v0.4S, v3.4H, v18.4H // multiply accumulate inner loop j = 2, idx = 0..3 - smlal v0.4S, v4.4H, v19.4H // multiply accumulate inner loop j = 3, idx = 0..3 - ldr w14, [x3, w14, UXTW] // src[filterPos[idx + 6]], next iteration - ldr w15, [x3, w15, UXTW] // src[filterPos[idx + 7]], next iteration - - smlal2 v5.4S, v1.8H, v16.8H // multiply accumulate inner loop j = 0, idx = 4..7 - smlal2 v5.4S, v2.8H, v17.8H // multiply accumulate inner loop j = 1, idx = 4..7 - stp w8, w9, [sp] // *scratch_mem = { src[filterPos[idx + 0]][0..3], src[filterPos[idx + 1]][0..3] } - stp w10, w11, [sp, #8] // *scratch_mem = { src[filterPos[idx + 2]][0..3], src[filterPos[idx + 3]][0..3] } - smlal2 v5.4S, v3.8H, v18.8H // multiply accumulate inner loop j = 2, idx = 4..7 - smlal2 v5.4S, v4.8H, v19.8H // multiply accumulate inner loop j = 3, idx = 4..7 - stp w12, w13, [sp, #16] // *scratch_mem = { src[filterPos[idx + 4]][0..3], src[filterPos[idx + 5]][0..3] } - stp w14, w15, [sp, #24] // *scratch_mem = { src[filterPos[idx + 6]][0..3], src[filterPos[idx + 7]][0..3] } - - sub w2, w2, #8 // dstW -= 8 - sqshrn v0.4H, v0.4S, #7 // shift and clip the 2x16-bit final values - sqshrn v1.4H, v5.4S, #7 // shift and clip the 2x16-bit final values - st1 {v0.4H, v1.4H}, [x1], #16 // write to dst[idx + 0..7] - cmp w2, #16 // continue on main loop if there are at least 16 iterations left - b.ge 1b + uxtl v16.8h, v16.8b // unsigned extend long, covert src data to 16-bit + uxtl v17.8h, v17.8b // unsigned extend long, covert src data to 16-bit + ldr w8, [x3, w8, uxtw] // src[filterPos[idx + 0]], next iteration + ldr w9, [x3, w9, uxtw] // src[filterPos[idx + 1]], next iteration + uxtl v18.8h, v18.8b // unsigned extend long, covert src data to 16-bit + uxtl v19.8h, v19.8b // unsigned extend long, covert src data to 16-bit + ldr w10, [x3, w10, uxtw] // src[filterPos[idx + 2]], next iteration + ldr w11, [x3, w11, uxtw] // src[filterPos[idx + 3]], next iteration + + smlal v0.4s, v1.4h, v16.4h // multiply accumulate inner loop j = 0, idx = 0..3 + smlal v0.4s, v2.4h, v17.4h // multiply accumulate inner loop j = 1, idx = 0..3 + ldr w12, [x3, w12, uxtw] // src[filterPos[idx + 4]], next iteration + ldr w13, [x3, w13, uxtw] // src[filterPos[idx + 5]], next iteration + smlal v0.4s, v3.4h, v18.4h // multiply accumulate inner loop j = 2, idx = 0..3 + smlal v0.4s, v4.4h, v19.4h // multiply accumulate inner loop j = 3, idx = 0..3 + ldr w14, [x3, w14, uxtw] // src[filterPos[idx + 6]], next iteration + ldr w15, [x3, w15, uxtw] // src[filterPos[idx + 7]], next iteration + + smlal2 v5.4s, v1.8h, v16.8h // multiply accumulate inner loop j = 0, idx = 4..7 + smlal2 v5.4s, v2.8h, v17.8h // multiply accumulate inner loop j = 1, idx = 4..7 + stp w8, w9, [sp] // *scratch_mem = { src[filterPos[idx + 0]][0..3], src[filterPos[idx + 1]][0..3] } + stp w10, w11, [sp, #8] // *scratch_mem = { src[filterPos[idx + 2]][0..3], src[filterPos[idx + 3]][0..3] } + smlal2 v5.4s, v3.8h, v18.8h // multiply accumulate inner loop j = 2, idx = 4..7 + smlal2 v5.4s, v4.8h, v19.8h // multiply accumulate inner loop j = 3, idx = 4..7 + stp w12, w13, [sp, #16] // *scratch_mem = { src[filterPos[idx + 4]][0..3], src[filterPos[idx + 5]][0..3] } + stp w14, w15, [sp, #24] // *scratch_mem = { src[filterPos[idx + 6]][0..3], src[filterPos[idx + 7]][0..3] } + + sub w2, w2, #8 // dstW -= 8 + sqshrn v0.4h, v0.4s, #7 // shift and clip the 2x16-bit final values + sqshrn v1.4h, v5.4s, #7 // shift and clip the 2x16-bit final values + st1 {v0.4h, v1.4h}, [x1], #16 // write to dst[idx + 0..7] + cmp w2, #16 // continue on main loop if there are at least 16 iterations left + b.ge 1b // last full iteration - ld4 {v16.8B, v17.8B, v18.8B, v19.8B}, [sp] - ld4 {v1.8H, v2.8H, v3.8H, v4.8H}, [x4], #64 // load filter idx + 0..7 + ld4 {v16.8b, v17.8b, v18.8b, v19.8b}, [sp] + ld4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x4], #64 // load filter idx + 0..7 - movi v0.2D, #0 // Clear madd accumulator for idx 0..3 - movi v5.2D, #0 // Clear madd accumulator for idx 4..7 + movi v0.2d, #0 // Clear madd accumulator for idx 0..3 + movi v5.2d, #0 // Clear madd accumulator for idx 4..7 - uxtl v16.8H, v16.8B // unsigned extend long, covert src data to 16-bit - uxtl v17.8H, v17.8B // unsigned extend long, covert src data to 16-bit - uxtl v18.8H, v18.8B // unsigned extend long, covert src data to 16-bit - uxtl v19.8H, v19.8B // unsigned extend long, covert src data to 16-bit - - smlal v0.4S, v1.4H, v16.4H // multiply accumulate inner loop j = 0, idx = 0..3 - smlal v0.4S, v2.4H, v17.4H // multiply accumulate inner loop j = 1, idx = 0..3 - smlal v0.4S, v3.4H, v18.4H // multiply accumulate inner loop j = 2, idx = 0..3 - smlal v0.4S, v4.4H, v19.4H // multiply accumulate inner loop j = 3, idx = 0..3 - - smlal2 v5.4S, v1.8H, v16.8H // multiply accumulate inner loop j = 0, idx = 4..7 - smlal2 v5.4S, v2.8H, v17.8H // multiply accumulate inner loop j = 1, idx = 4..7 - smlal2 v5.4S, v3.8H, v18.8H // multiply accumulate inner loop j = 2, idx = 4..7 - smlal2 v5.4S, v4.8H, v19.8H // multiply accumulate inner loop j = 3, idx = 4..7 - - subs w2, w2, #8 // dstW -= 8 - sqshrn v0.4H, v0.4S, #7 // shift and clip the 2x16-bit final values - sqshrn v1.4H, v5.4S, #7 // shift and clip the 2x16-bit final values - st1 {v0.4H, v1.4H}, [x1], #16 // write to dst[idx + 0..7] + uxtl v16.8h, v16.8b // unsigned extend long, covert src data to 16-bit + uxtl v17.8h, v17.8b // unsigned extend long, covert src data to 16-bit + uxtl v18.8h, v18.8b // unsigned extend long, covert src data to 16-bit + uxtl v19.8h, v19.8b // unsigned extend long, covert src data to 16-bit + + smlal v0.4s, v1.4h, v16.4h // multiply accumulate inner loop j = 0, idx = 0..3 + smlal v0.4s, v2.4h, v17.4h // multiply accumulate inner loop j = 1, idx = 0..3 + smlal v0.4s, v3.4h, v18.4h // multiply accumulate inner loop j = 2, idx = 0..3 + smlal v0.4s, v4.4h, v19.4h // multiply accumulate inner loop j = 3, idx = 0..3 + + smlal2 v5.4s, v1.8h, v16.8h // multiply accumulate inner loop j = 0, idx = 4..7 + smlal2 v5.4s, v2.8h, v17.8h // multiply accumulate inner loop j = 1, idx = 4..7 + smlal2 v5.4s, v3.8h, v18.8h // multiply accumulate inner loop j = 2, idx = 4..7 + smlal2 v5.4s, v4.8h, v19.8h // multiply accumulate inner loop j = 3, idx = 4..7 + + subs w2, w2, #8 // dstW -= 8 + sqshrn v0.4h, v0.4s, #7 // shift and clip the 2x16-bit final values + sqshrn v1.4h, v5.4s, #7 // shift and clip the 2x16-bit final values + st1 {v0.4h, v1.4h}, [x1], #16 // write to dst[idx + 0..7] - cbnz w2, 2f // if >0 iterations remain, jump to the wrap up section + cbnz w2, 2f // if >0 iterations remain, jump to the wrap up section - add sp, sp, #32 // clean up stack + add sp, sp, #32 // clean up stack ret // finish up when dstW % 8 != 0 or dstW < 16 2: // load src - ldr w8, [x5], #4 // filterPos[i] - add x9, x3, w8, UXTW // calculate the address for src load - ld1 {v5.S}[0], [x9] // src[filterPos[i] + 0..3] + ldr w8, [x5], #4 // filterPos[i] + add x9, x3, w8, uxtw // calculate the address for src load + ld1 {v5.s}[0], [x9] // src[filterPos[i] + 0..3] // load filter - ld1 {v6.4H}, [x4], #8 // filter[filterSize * i + 0..3] + ld1 {v6.4h}, [x4], #8 // filter[filterSize * i + 0..3] - uxtl v5.8H, v5.8B // unsigned exten long, convert src data to 16-bit - smull v0.4S, v5.4H, v6.4H // 4 iterations of src[...] * filter[...] - addv s0, v0.4S // add up products of src and filter values - sqshrn h0, s0, #7 // shift and clip the 2x16-bit final value - st1 {v0.H}[0], [x1], #2 // dst[i] = ... - sub w2, w2, #1 // dstW-- - cbnz w2, 2b + uxtl v5.8h, v5.8b // unsigned exten long, convert src data to 16-bit + smull v0.4s, v5.4h, v6.4h // 4 iterations of src[...] * filter[...] + addv s0, v0.4s // add up products of src and filter values + sqshrn h0, s0, #7 // shift and clip the 2x16-bit final value + st1 {v0.h}[0], [x1], #2 // dst[i] = ... + sub w2, w2, #1 // dstW-- + cbnz w2, 2b - add sp, sp, #32 // clean up stack + add sp, sp, #32 // clean up stack ret endfunc diff -Nru ffmpeg-5.1.8/libswscale/aarch64/output.S ffmpeg-5.1.9/libswscale/aarch64/output.S --- ffmpeg-5.1.8/libswscale/aarch64/output.S 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libswscale/aarch64/output.S 2026-05-05 15:50:52.000000000 +0000 @@ -21,38 +21,38 @@ #include "libavutil/aarch64/asm.S" function ff_yuv2planeX_8_neon, export=1 - ld1 {v0.8B}, [x5] // load 8x8-bit dither - cbz w6, 1f // check if offsetting present - ext v0.8B, v0.8B, v0.8B, #3 // honor offsetting which can be 0 or 3 only -1: uxtl v0.8H, v0.8B // extend dither to 16-bit - ushll v1.4S, v0.4H, #12 // extend dither to 32-bit with left shift by 12 (part 1) - ushll2 v2.4S, v0.8H, #12 // extend dither to 32-bit with left shift by 12 (part 2) - mov x7, #0 // i = 0 -2: mov v3.16B, v1.16B // initialize accumulator part 1 with dithering value - mov v4.16B, v2.16B // initialize accumulator part 2 with dithering value - mov w8, w1 // tmpfilterSize = filterSize - mov x9, x2 // srcp = src - mov x10, x0 // filterp = filter -3: ldp x11, x12, [x9], #16 // get 2 pointers: src[j] and src[j+1] - add x11, x11, x7, lsl #1 // &src[j ][i] - add x12, x12, x7, lsl #1 // &src[j+1][i] - ld1 {v5.8H}, [x11] // read 8x16-bit @ src[j ][i + {0..7}]: A,B,C,D,E,F,G,H - ld1 {v6.8H}, [x12] // read 8x16-bit @ src[j+1][i + {0..7}]: I,J,K,L,M,N,O,P - ld1r {v7.8H}, [x10], #2 // read 1x16-bit coeff X at filter[j ] and duplicate across lanes - ld1r {v16.8H}, [x10], #2 // read 1x16-bit coeff Y at filter[j+1] and duplicate across lanes - smlal v3.4S, v5.4H, v7.4H // val0 += {A,B,C,D} * X - smlal2 v4.4S, v5.8H, v7.8H // val1 += {E,F,G,H} * X - smlal v3.4S, v6.4H, v16.4H // val0 += {I,J,K,L} * Y - smlal2 v4.4S, v6.8H, v16.8H // val1 += {M,N,O,P} * Y - subs w8, w8, #2 // tmpfilterSize -= 2 - b.gt 3b // loop until filterSize consumed + ld1 {v0.8b}, [x5] // load 8x8-bit dither + cbz w6, 1f // check if offsetting present + ext v0.8b, v0.8b, v0.8b, #3 // honor offsetting which can be 0 or 3 only +1: uxtl v0.8h, v0.8b // extend dither to 16-bit + ushll v1.4s, v0.4h, #12 // extend dither to 32-bit with left shift by 12 (part 1) + ushll2 v2.4s, v0.8h, #12 // extend dither to 32-bit with left shift by 12 (part 2) + mov x7, #0 // i = 0 +2: mov v3.16b, v1.16b // initialize accumulator part 1 with dithering value + mov v4.16b, v2.16b // initialize accumulator part 2 with dithering value + mov w8, w1 // tmpfilterSize = filterSize + mov x9, x2 // srcp = src + mov x10, x0 // filterp = filter +3: ldp x11, x12, [x9], #16 // get 2 pointers: src[j] and src[j+1] + add x11, x11, x7, lsl #1 // &src[j ][i] + add x12, x12, x7, lsl #1 // &src[j+1][i] + ld1 {v5.8h}, [x11] // read 8x16-bit @ src[j ][i + {0..7}]: A,B,C,D,E,F,G,H + ld1 {v6.8h}, [x12] // read 8x16-bit @ src[j+1][i + {0..7}]: I,J,K,L,M,N,O,P + ld1r {v7.8h}, [x10], #2 // read 1x16-bit coeff X at filter[j ] and duplicate across lanes + ld1r {v16.8h}, [x10], #2 // read 1x16-bit coeff Y at filter[j+1] and duplicate across lanes + smlal v3.4s, v5.4h, v7.4h // val0 += {A,B,C,D} * X + smlal2 v4.4s, v5.8h, v7.8h // val1 += {E,F,G,H} * X + smlal v3.4s, v6.4h, v16.4h // val0 += {I,J,K,L} * Y + smlal2 v4.4s, v6.8h, v16.8h // val1 += {M,N,O,P} * Y + subs w8, w8, #2 // tmpfilterSize -= 2 + b.gt 3b // loop until filterSize consumed - sqshrun v3.4h, v3.4s, #16 // clip16(val0>>16) - sqshrun2 v3.8h, v4.4s, #16 // clip16(val1>>16) - uqshrn v3.8b, v3.8h, #3 // clip8(val>>19) - st1 {v3.8b}, [x3], #8 // write to destination - subs w4, w4, #8 // dstW -= 8 - add x7, x7, #8 // i += 8 - b.gt 2b // loop until width consumed + sqshrun v3.4h, v3.4s, #16 // clip16(val0>>16) + sqshrun2 v3.8h, v4.4s, #16 // clip16(val1>>16) + uqshrn v3.8b, v3.8h, #3 // clip8(val>>19) + st1 {v3.8b}, [x3], #8 // write to destination + subs w4, w4, #8 // dstW -= 8 + add x7, x7, #8 // i += 8 + b.gt 2b // loop until width consumed ret endfunc diff -Nru ffmpeg-5.1.8/libswscale/aarch64/yuv2rgb_neon.S ffmpeg-5.1.9/libswscale/aarch64/yuv2rgb_neon.S --- ffmpeg-5.1.8/libswscale/aarch64/yuv2rgb_neon.S 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libswscale/aarch64/yuv2rgb_neon.S 2026-05-05 14:22:01.000000000 +0000 @@ -23,187 +23,187 @@ .macro load_yoff_ycoeff yoff ycoeff #if defined(__APPLE__) - ldp w9, w10, [sp, #\yoff] + ldp w9, w10, [sp, #\yoff] #else - ldr w9, [sp, #\yoff] - ldr w10, [sp, #\ycoeff] + ldr w9, [sp, #\yoff] + ldr w10, [sp, #\ycoeff] #endif .endm .macro load_args_nv12 - ldr x8, [sp] // table - load_yoff_ycoeff 8, 16 // y_offset, y_coeff - ld1 {v1.1D}, [x8] - dup v0.8H, w10 - dup v3.8H, w9 - sub w3, w3, w0, lsl #2 // w3 = linesize - width * 4 (padding) - sub w5, w5, w0 // w5 = linesizeY - width (paddingY) - sub w7, w7, w0 // w7 = linesizeC - width (paddingC) - neg w11, w0 + ldr x8, [sp] // table + load_yoff_ycoeff 8, 16 // y_offset, y_coeff + ld1 {v1.1d}, [x8] + dup v0.8h, w10 + dup v3.8h, w9 + sub w3, w3, w0, lsl #2 // w3 = linesize - width * 4 (padding) + sub w5, w5, w0 // w5 = linesizeY - width (paddingY) + sub w7, w7, w0 // w7 = linesizeC - width (paddingC) + neg w11, w0 .endm .macro load_args_nv21 - load_args_nv12 + load_args_nv12 .endm .macro load_args_yuv420p - ldr x13, [sp] // srcV - ldr w14, [sp, #8] // linesizeV - ldr x8, [sp, #16] // table - load_yoff_ycoeff 24, 32 // y_offset, y_coeff - ld1 {v1.1D}, [x8] - dup v0.8H, w10 - dup v3.8H, w9 - sub w3, w3, w0, lsl #2 // w3 = linesize - width * 4 (padding) - sub w5, w5, w0 // w5 = linesizeY - width (paddingY) - sub w7, w7, w0, lsr #1 // w7 = linesizeU - width / 2 (paddingU) - sub w14, w14, w0, lsr #1 // w14 = linesizeV - width / 2 (paddingV) - lsr w11, w0, #1 - neg w11, w11 + ldr x13, [sp] // srcV + ldr w14, [sp, #8] // linesizeV + ldr x8, [sp, #16] // table + load_yoff_ycoeff 24, 32 // y_offset, y_coeff + ld1 {v1.1d}, [x8] + dup v0.8h, w10 + dup v3.8h, w9 + sub w3, w3, w0, lsl #2 // w3 = linesize - width * 4 (padding) + sub w5, w5, w0 // w5 = linesizeY - width (paddingY) + sub w7, w7, w0, lsr #1 // w7 = linesizeU - width / 2 (paddingU) + sub w14, w14, w0, lsr #1 // w14 = linesizeV - width / 2 (paddingV) + lsr w11, w0, #1 + neg w11, w11 .endm .macro load_args_yuv422p - ldr x13, [sp] // srcV - ldr w14, [sp, #8] // linesizeV - ldr x8, [sp, #16] // table - load_yoff_ycoeff 24, 32 // y_offset, y_coeff - ld1 {v1.1D}, [x8] - dup v0.8H, w10 - dup v3.8H, w9 - sub w3, w3, w0, lsl #2 // w3 = linesize - width * 4 (padding) - sub w5, w5, w0 // w5 = linesizeY - width (paddingY) - sub w7, w7, w0, lsr #1 // w7 = linesizeU - width / 2 (paddingU) - sub w14, w14, w0, lsr #1 // w14 = linesizeV - width / 2 (paddingV) + ldr x13, [sp] // srcV + ldr w14, [sp, #8] // linesizeV + ldr x8, [sp, #16] // table + load_yoff_ycoeff 24, 32 // y_offset, y_coeff + ld1 {v1.1d}, [x8] + dup v0.8h, w10 + dup v3.8h, w9 + sub w3, w3, w0, lsl #2 // w3 = linesize - width * 4 (padding) + sub w5, w5, w0 // w5 = linesizeY - width (paddingY) + sub w7, w7, w0, lsr #1 // w7 = linesizeU - width / 2 (paddingU) + sub w14, w14, w0, lsr #1 // w14 = linesizeV - width / 2 (paddingV) .endm .macro load_chroma_nv12 - ld2 {v16.8B, v17.8B}, [x6], #16 - ushll v18.8H, v16.8B, #3 - ushll v19.8H, v17.8B, #3 + ld2 {v16.8b, v17.8b}, [x6], #16 + ushll v18.8h, v16.8b, #3 + ushll v19.8h, v17.8b, #3 .endm .macro load_chroma_nv21 - ld2 {v16.8B, v17.8B}, [x6], #16 - ushll v19.8H, v16.8B, #3 - ushll v18.8H, v17.8B, #3 + ld2 {v16.8b, v17.8b}, [x6], #16 + ushll v19.8h, v16.8b, #3 + ushll v18.8h, v17.8b, #3 .endm .macro load_chroma_yuv420p - ld1 {v16.8B}, [ x6], #8 - ld1 {v17.8B}, [x13], #8 - ushll v18.8H, v16.8B, #3 - ushll v19.8H, v17.8B, #3 + ld1 {v16.8b}, [ x6], #8 + ld1 {v17.8b}, [x13], #8 + ushll v18.8h, v16.8b, #3 + ushll v19.8h, v17.8b, #3 .endm .macro load_chroma_yuv422p - load_chroma_yuv420p + load_chroma_yuv420p .endm .macro increment_nv12 - ands w15, w1, #1 - csel w16, w7, w11, ne // incC = (h & 1) ? paddincC : -width - add x6, x6, w16, SXTW // srcC += incC + ands w15, w1, #1 + csel w16, w7, w11, ne // incC = (h & 1) ? paddincC : -width + add x6, x6, w16, sxtw // srcC += incC .endm .macro increment_nv21 - increment_nv12 + increment_nv12 .endm .macro increment_yuv420p - ands w15, w1, #1 - csel w16, w7, w11, ne // incU = (h & 1) ? paddincU : -width/2 - csel w17, w14, w11, ne // incV = (h & 1) ? paddincV : -width/2 - add x6, x6, w16, SXTW // srcU += incU - add x13, x13, w17, SXTW // srcV += incV + ands w15, w1, #1 + csel w16, w7, w11, ne // incU = (h & 1) ? paddincU : -width/2 + csel w17, w14, w11, ne // incV = (h & 1) ? paddincV : -width/2 + add x6, x6, w16, sxtw // srcU += incU + add x13, x13, w17, sxtw // srcV += incV .endm .macro increment_yuv422p - add x6, x6, w7, SXTW // srcU += incU - add x13, x13, w14, SXTW // srcV += incV + add x6, x6, w7, sxtw // srcU += incU + add x13, x13, w14, sxtw // srcV += incV .endm .macro compute_rgba r1 g1 b1 a1 r2 g2 b2 a2 - add v20.8H, v26.8H, v20.8H // Y1 + R1 - add v21.8H, v27.8H, v21.8H // Y2 + R2 - add v22.8H, v26.8H, v22.8H // Y1 + G1 - add v23.8H, v27.8H, v23.8H // Y2 + G2 - add v24.8H, v26.8H, v24.8H // Y1 + B1 - add v25.8H, v27.8H, v25.8H // Y2 + B2 - sqrshrun \r1, v20.8H, #1 // clip_u8((Y1 + R1) >> 1) - sqrshrun \r2, v21.8H, #1 // clip_u8((Y2 + R1) >> 1) - sqrshrun \g1, v22.8H, #1 // clip_u8((Y1 + G1) >> 1) - sqrshrun \g2, v23.8H, #1 // clip_u8((Y2 + G1) >> 1) - sqrshrun \b1, v24.8H, #1 // clip_u8((Y1 + B1) >> 1) - sqrshrun \b2, v25.8H, #1 // clip_u8((Y2 + B1) >> 1) - movi \a1, #255 - movi \a2, #255 + add v20.8h, v26.8h, v20.8h // Y1 + R1 + add v21.8h, v27.8h, v21.8h // Y2 + R2 + add v22.8h, v26.8h, v22.8h // Y1 + G1 + add v23.8h, v27.8h, v23.8h // Y2 + G2 + add v24.8h, v26.8h, v24.8h // Y1 + B1 + add v25.8h, v27.8h, v25.8h // Y2 + B2 + sqrshrun \r1, v20.8h, #1 // clip_u8((Y1 + R1) >> 1) + sqrshrun \r2, v21.8h, #1 // clip_u8((Y2 + R1) >> 1) + sqrshrun \g1, v22.8h, #1 // clip_u8((Y1 + G1) >> 1) + sqrshrun \g2, v23.8h, #1 // clip_u8((Y2 + G1) >> 1) + sqrshrun \b1, v24.8h, #1 // clip_u8((Y1 + B1) >> 1) + sqrshrun \b2, v25.8h, #1 // clip_u8((Y2 + B1) >> 1) + movi \a1, #255 + movi \a2, #255 .endm .macro declare_func ifmt ofmt function ff_\ifmt\()_to_\ofmt\()_neon, export=1 - load_args_\ifmt - mov w9, w1 + load_args_\ifmt + mov w9, w1 1: - mov w8, w0 // w8 = width + mov w8, w0 // w8 = width 2: - movi v5.8H, #4, lsl #8 // 128 * (1<<3) - load_chroma_\ifmt - sub v18.8H, v18.8H, v5.8H // U*(1<<3) - 128*(1<<3) - sub v19.8H, v19.8H, v5.8H // V*(1<<3) - 128*(1<<3) - sqdmulh v20.8H, v19.8H, v1.H[0] // V * v2r (R) - sqdmulh v22.8H, v18.8H, v1.H[1] // U * u2g - sqdmulh v19.8H, v19.8H, v1.H[2] // V * v2g - add v22.8H, v22.8H, v19.8H // U * u2g + V * v2g (G) - sqdmulh v24.8H, v18.8H, v1.H[3] // U * u2b (B) - zip2 v21.8H, v20.8H, v20.8H // R2 - zip1 v20.8H, v20.8H, v20.8H // R1 - zip2 v23.8H, v22.8H, v22.8H // G2 - zip1 v22.8H, v22.8H, v22.8H // G1 - zip2 v25.8H, v24.8H, v24.8H // B2 - zip1 v24.8H, v24.8H, v24.8H // B1 - ld1 {v2.16B}, [x4], #16 // load luma - ushll v26.8H, v2.8B, #3 // Y1*(1<<3) - ushll2 v27.8H, v2.16B, #3 // Y2*(1<<3) - sub v26.8H, v26.8H, v3.8H // Y1*(1<<3) - y_offset - sub v27.8H, v27.8H, v3.8H // Y2*(1<<3) - y_offset - sqdmulh v26.8H, v26.8H, v0.8H // ((Y1*(1<<3) - y_offset) * y_coeff) >> 15 - sqdmulh v27.8H, v27.8H, v0.8H // ((Y2*(1<<3) - y_offset) * y_coeff) >> 15 + movi v5.8h, #4, lsl #8 // 128 * (1<<3) + load_chroma_\ifmt + sub v18.8h, v18.8h, v5.8h // U*(1<<3) - 128*(1<<3) + sub v19.8h, v19.8h, v5.8h // V*(1<<3) - 128*(1<<3) + sqdmulh v20.8h, v19.8h, v1.h[0] // V * v2r (R) + sqdmulh v22.8h, v18.8h, v1.h[1] // U * u2g + sqdmulh v19.8h, v19.8h, v1.h[2] // V * v2g + add v22.8h, v22.8h, v19.8h // U * u2g + V * v2g (G) + sqdmulh v24.8h, v18.8h, v1.h[3] // U * u2b (B) + zip2 v21.8h, v20.8h, v20.8h // R2 + zip1 v20.8h, v20.8h, v20.8h // R1 + zip2 v23.8h, v22.8h, v22.8h // G2 + zip1 v22.8h, v22.8h, v22.8h // G1 + zip2 v25.8h, v24.8h, v24.8h // B2 + zip1 v24.8h, v24.8h, v24.8h // B1 + ld1 {v2.16b}, [x4], #16 // load luma + ushll v26.8h, v2.8b, #3 // Y1*(1<<3) + ushll2 v27.8h, v2.16b, #3 // Y2*(1<<3) + sub v26.8h, v26.8h, v3.8h // Y1*(1<<3) - y_offset + sub v27.8h, v27.8h, v3.8h // Y2*(1<<3) - y_offset + sqdmulh v26.8h, v26.8h, v0.8h // ((Y1*(1<<3) - y_offset) * y_coeff) >> 15 + sqdmulh v27.8h, v27.8h, v0.8h // ((Y2*(1<<3) - y_offset) * y_coeff) >> 15 .ifc \ofmt,argb // 1 2 3 0 - compute_rgba v5.8B,v6.8B,v7.8B,v4.8B, v17.8B,v18.8B,v19.8B,v16.8B + compute_rgba v5.8b,v6.8b,v7.8b,v4.8b, v17.8b,v18.8b,v19.8b,v16.8b .endif .ifc \ofmt,rgba // 0 1 2 3 - compute_rgba v4.8B,v5.8B,v6.8B,v7.8B, v16.8B,v17.8B,v18.8B,v19.8B + compute_rgba v4.8b,v5.8b,v6.8b,v7.8b, v16.8b,v17.8b,v18.8b,v19.8b .endif .ifc \ofmt,abgr // 3 2 1 0 - compute_rgba v7.8B,v6.8B,v5.8B,v4.8B, v19.8B,v18.8B,v17.8B,v16.8B + compute_rgba v7.8b,v6.8b,v5.8b,v4.8b, v19.8b,v18.8b,v17.8b,v16.8b .endif .ifc \ofmt,bgra // 2 1 0 3 - compute_rgba v6.8B,v5.8B,v4.8B,v7.8B, v18.8B,v17.8B,v16.8B,v19.8B + compute_rgba v6.8b,v5.8b,v4.8b,v7.8b, v18.8b,v17.8b,v16.8b,v19.8b .endif - st4 { v4.8B, v5.8B, v6.8B, v7.8B}, [x2], #32 - st4 {v16.8B,v17.8B,v18.8B,v19.8B}, [x2], #32 - subs w8, w8, #16 // width -= 16 - b.gt 2b - add x2, x2, w3, SXTW // dst += padding - add x4, x4, w5, SXTW // srcY += paddingY - increment_\ifmt - subs w1, w1, #1 // height -= 1 - b.gt 1b - mov w0, w9 - ret + st4 { v4.8b, v5.8b, v6.8b, v7.8b}, [x2], #32 + st4 {v16.8b,v17.8b,v18.8b,v19.8b}, [x2], #32 + subs w8, w8, #16 // width -= 16 + b.gt 2b + add x2, x2, w3, sxtw // dst += padding + add x4, x4, w5, sxtw // srcY += paddingY + increment_\ifmt + subs w1, w1, #1 // height -= 1 + b.gt 1b + mov w0, w9 + ret endfunc .endm .macro declare_rgb_funcs ifmt - declare_func \ifmt, argb - declare_func \ifmt, rgba - declare_func \ifmt, abgr - declare_func \ifmt, bgra + declare_func \ifmt, argb + declare_func \ifmt, rgba + declare_func \ifmt, abgr + declare_func \ifmt, bgra .endm declare_rgb_funcs nv12 diff -Nru ffmpeg-5.1.8/libswscale/gamma.c ffmpeg-5.1.9/libswscale/gamma.c --- ffmpeg-5.1.8/libswscale/gamma.c 2025-11-23 02:57:58.000000000 +0000 +++ ffmpeg-5.1.9/libswscale/gamma.c 2026-05-05 14:22:01.000000000 +0000 @@ -69,4 +69,3 @@ return 0; } - diff -Nru ffmpeg-5.1.8/libswscale/output.c ffmpeg-5.1.9/libswscale/output.c --- ffmpeg-5.1.8/libswscale/output.c 2025-11-26 02:41:35.000000000 +0000 +++ ffmpeg-5.1.9/libswscale/output.c 2026-05-05 15:50:55.000000000 +0000 @@ -405,8 +405,10 @@ for (i=0; i>19); } @@ -1037,8 +1039,8 @@ int j; unsigned Y1 = -0x40000000; unsigned Y2 = -0x40000000; - int U = -(128 << 23); // 19 - int V = -(128 << 23); + unsigned U = -(128 << 23); // 19 + unsigned V = -(128 << 23); int R, G, B; for (j = 0; j < lumFilterSize; j++) { @@ -1068,8 +1070,8 @@ Y1 += 0x10000; Y2 = (int)Y2 >> 14; Y2 += 0x10000; - U >>= 14; - V >>= 14; + U = (int)U >> 14; + V = (int)V >> 14; // 8 bits: 27 -> 17 bits, 16 bits: 31 - 14 = 17 bits Y1 -= c->yuv2rgb_y_offset; @@ -1177,7 +1179,7 @@ { const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0]; int i; - int A1 = 0xffff<<14, A2= 0xffff<<14; + SUINT A1 = 0xffff<<14, A2= 0xffff<<14; if (uvalpha < 2048) { for (i = 0; i < ((dstW + 1) >> 1); i++) { @@ -1195,8 +1197,8 @@ Y2 += (1 << 13) - (1 << 29); if (hasAlpha) { - A1 = abuf0[i * 2 ] * (1 << 11); - A2 = abuf0[i * 2 + 1] * (1 << 11); + A1 = abuf0[i * 2 ] * (SUINT)(1 << 11); + A2 = abuf0[i * 2 + 1] * (SUINT)(1 << 11); A1 += 1 << 13; A2 += 1 << 13; diff -Nru ffmpeg-5.1.8/libswscale/rgb2rgb_template.c ffmpeg-5.1.9/libswscale/rgb2rgb_template.c --- ffmpeg-5.1.8/libswscale/rgb2rgb_template.c 2025-11-23 02:57:58.000000000 +0000 +++ ffmpeg-5.1.9/libswscale/rgb2rgb_template.c 2026-05-05 15:50:55.000000000 +0000 @@ -466,11 +466,11 @@ for (i = 0; i < chromWidth; i++) { #if HAVE_BIGENDIAN - *idst++ = (uc[0] << 24) + (yc[0] << 16) + + *idst++ = ((unsigned)uc[0] << 24) + (yc[0] << 16) + (vc[0] << 8) + (yc[1] << 0); #else *idst++ = uc[0] + (yc[0] << 8) + - (vc[0] << 16) + (yc[1] << 24); + (vc[0] << 16) + ((unsigned)yc[1] << 24); #endif yc += 2; uc++; diff -Nru ffmpeg-5.1.8/libswscale/swscale_unscaled.c ffmpeg-5.1.9/libswscale/swscale_unscaled.c --- ffmpeg-5.1.8/libswscale/swscale_unscaled.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libswscale/swscale_unscaled.c 2026-05-05 15:50:55.000000000 +0000 @@ -126,9 +126,13 @@ int srcSliceY, int srcSliceH, int width, uint8_t *dst, int dstStride) { + if (!srcSliceH) + return; + av_assert0(srcSliceH > 0); + dst += dstStride * srcSliceY; if (dstStride == srcStride && srcStride > 0) { - memcpy(dst, src, srcSliceH * dstStride); + memcpy(dst, src, (srcSliceH - 1) * dstStride + width); } else { int i; for (i = 0; i < srcSliceH; i++) { diff -Nru ffmpeg-5.1.8/libswscale/utils.c ffmpeg-5.1.9/libswscale/utils.c --- ffmpeg-5.1.8/libswscale/utils.c 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/libswscale/utils.c 2026-05-05 15:50:55.000000000 +0000 @@ -273,7 +273,8 @@ if ((c->srcBpc == 8) && (c->dstBpc <= 14)) { int16_t *filterCopy = NULL; if (filterSize > 4) { - if (!FF_ALLOC_TYPED_ARRAY(filterCopy, dstW * filterSize)) + filterCopy = av_malloc_array(dstW, filterSize * sizeof(*filterCopy)); + if (!filterCopy) return AVERROR(ENOMEM); memcpy(filterCopy, filter, dstW * filterSize * sizeof(int16_t)); } @@ -448,6 +449,11 @@ sizeFactor = param[0] != SWS_PARAM_DEFAULT ? ceil(2 * param[0]) : 6; av_assert0(sizeFactor > 0); + if (sizeFactor > 50) { + ret = AVERROR(EINVAL); + goto fail; + } + if (xInc <= 1 << 16) filterSize = 1 + sizeFactor; // upscale else @@ -456,7 +462,8 @@ filterSize = FFMIN(filterSize, srcW - 2); filterSize = FFMAX(filterSize, 1); - if (!FF_ALLOC_TYPED_ARRAY(filter, dstW * filterSize)) + filter = av_malloc_array(dstW, filterSize * sizeof(*filter)); + if (!filter) goto nomem; xDstInSrc = ((dstPos*(int64_t)xInc)>>7) - ((srcPos*0x10000LL)>>7); for (i = 0; i < dstW; i++) { @@ -555,7 +562,8 @@ if (dstFilter) filter2Size += dstFilter->length - 1; av_assert0(filter2Size > 0); - if (!FF_ALLOCZ_TYPED_ARRAY(filter2, dstW * filter2Size)) + filter2 = av_calloc(dstW, filter2Size * sizeof(*filter2)); + if (!filter2) goto nomem; for (i = 0; i < dstW; i++) { int j, k; @@ -714,7 +722,8 @@ // Note the +1 is for the MMX scaler which reads over the end /* align at 16 for AltiVec (needed by hScale_altivec_real) */ - if (!FF_ALLOCZ_TYPED_ARRAY(*outFilter, *outFilterSize * (dstW + 3))) + *outFilter = av_calloc(dstW + 3, *outFilterSize * sizeof(**outFilter)); + if (!*outFilter) goto nomem; /* normalize & store in outFilter */ @@ -1408,8 +1417,8 @@ if (!srcFilter) srcFilter = &dummyFilter; - c->lumXInc = (((int64_t)srcW << 16) + (dstW >> 1)) / dstW; - c->lumYInc = (((int64_t)srcH << 16) + (dstH >> 1)) / dstH; + int64_t lumXInc = (((int64_t)srcW << 16) + (dstW >> 1)) / dstW; + int64_t lumYInc = (((int64_t)srcH << 16) + (dstH >> 1)) / dstH; c->dstFormatBpp = av_get_bits_per_pixel(desc_dst); c->srcFormatBpp = av_get_bits_per_pixel(desc_src); c->vRounder = 4 * 0x0001000100010001ULL; @@ -1584,8 +1593,8 @@ } else c->canMMXEXTBeUsed = 0; - c->chrXInc = (((int64_t)c->chrSrcW << 16) + (c->chrDstW >> 1)) / c->chrDstW; - c->chrYInc = (((int64_t)c->chrSrcH << 16) + (c->chrDstH >> 1)) / c->chrDstH; + int64_t chrXInc = (((int64_t)c->chrSrcW << 16) + (c->chrDstW >> 1)) / c->chrDstW; + int64_t chrYInc = (((int64_t)c->chrSrcH << 16) + (c->chrDstH >> 1)) / c->chrDstH; /* Match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src * to pixel n-2 of dst, but only for the FAST_BILINEAR mode otherwise do @@ -1596,15 +1605,26 @@ * some special code for the first and last pixel */ if (flags & SWS_FAST_BILINEAR) { if (c->canMMXEXTBeUsed) { - c->lumXInc += 20; - c->chrXInc += 20; + lumXInc += 20; + chrXInc += 20; } // we don't use the x86 asm scaler if MMX is available else if (INLINE_MMX(cpu_flags) && c->dstBpc <= 14) { - c->lumXInc = ((int64_t)(srcW - 2) << 16) / (dstW - 2) - 20; - c->chrXInc = ((int64_t)(c->chrSrcW - 2) << 16) / (c->chrDstW - 2) - 20; + lumXInc = ((int64_t)(srcW - 2) << 16) / (dstW - 2) - 20; + chrXInc = ((int64_t)(c->chrSrcW - 2) << 16) / (c->chrDstW - 2) - 20; } } + if (chrXInc < 10 || chrXInc > INT_MAX || + chrYInc < 10 || chrYInc > INT_MAX || + lumXInc < 10 || lumXInc > INT_MAX || + lumYInc < 10 || lumYInc > INT_MAX) + return AVERROR_PATCHWELCOME; + + c->lumXInc = lumXInc; + c->lumYInc = lumYInc; + c->chrXInc = chrXInc; + c->chrYInc = chrYInc; + // hardcoded for now c->gamma_value = 2.2; @@ -1854,13 +1874,15 @@ PPC_ALTIVEC(cpu_flags) ? 8 : have_neon(cpu_flags) ? 2 : 1; - if ((ret = initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, + ret = initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc, srcH, dstH, filterAlign, (1 << 12), (flags & SWS_BICUBLIN) ? (flags | SWS_BICUBIC) : flags, cpu_flags, srcFilter->lumV, dstFilter->lumV, c->param, get_local_pos(c, 0, 0, 1), - get_local_pos(c, 0, 0, 1))) < 0) + get_local_pos(c, 0, 0, 1)); + int usecascade = (ret == RETCODE_USE_CASCADE); + if (ret < 0 && !usecascade) goto fail; if ((ret = initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc, c->chrSrcH, c->chrDstH, @@ -1872,10 +1894,15 @@ get_local_pos(c, c->chrDstVSubSample, c->dst_v_chr_pos, 1))) < 0) goto fail; + if (usecascade) { + ret = RETCODE_USE_CASCADE; + goto fail; + } #if HAVE_ALTIVEC - if (!FF_ALLOC_TYPED_ARRAY(c->vYCoeffsBank, c->vLumFilterSize * c->dstH) || - !FF_ALLOC_TYPED_ARRAY(c->vCCoeffsBank, c->vChrFilterSize * c->chrDstH)) + c->vYCoeffsBank = av_malloc_array(c->dstH, c->vLumFilterSize * sizeof(*c->vYCoeffsBank)); + c->vCCoeffsBank = av_malloc_array(c->chrDstH, c->vChrFilterSize * sizeof(*c->vCCoeffsBank)); + if (c->vYCoeffsBank == NULL || c->vCCoeffsBank == NULL) goto nomem; for (i = 0; i < c->vLumFilterSize * c->dstH; i++) { diff -Nru ffmpeg-5.1.8/libswscale/vscale.c ffmpeg-5.1.9/libswscale/vscale.c --- ffmpeg-5.1.8/libswscale/vscale.c 2025-11-23 02:57:58.000000000 +0000 +++ ffmpeg-5.1.9/libswscale/vscale.c 2026-05-05 14:22:01.000000000 +0000 @@ -318,5 +318,3 @@ lumCtx->pfn.yuv2anyX = yuv2anyX; } } - - diff -Nru ffmpeg-5.1.8/libswscale/x86/yuv2rgb_template.c ffmpeg-5.1.9/libswscale/x86/yuv2rgb_template.c --- ffmpeg-5.1.8/libswscale/x86/yuv2rgb_template.c 2025-11-23 02:57:58.000000000 +0000 +++ ffmpeg-5.1.9/libswscale/x86/yuv2rgb_template.c 2026-05-05 15:50:52.000000000 +0000 @@ -26,6 +26,23 @@ #include "libavutil/x86/asm.h" #include "libswscale/swscale_internal.h" +#undef EMMS_IF_MMX + +#if defined(COMPILE_TEMPLATE_MMX) || defined(COMPILE_TEMPLATE_MMXEXT) +// Don't use emms_c() directly as it may entail an av_get_cpu_flags() call. +#if HAVE_MMX_INLINE +# define EMMS_IF_MMX __asm__ volatile ("emms" ::: "memory"); +#elif HAVE_MM_EMPTY +# include +# define EMMS_IF_MMX _mm_empty(); +#else +# include "libavutil/x86/emms.h" +# define EMMS_IF_MMX emms_c(); +#endif +#else +#define EMMS_IF_MMX +#endif + #define YUV2RGB_LOOP(depth) \ h_size = (c->dstW + 7) & ~7; \ if (h_size * depth > FFABS(dstStride[0])) \ @@ -84,6 +101,7 @@ RENAME(ff_yuv_420_rgb15)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); } + EMMS_IF_MMX return srcSliceH; } @@ -104,6 +122,7 @@ RENAME(ff_yuv_420_rgb16)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); } + EMMS_IF_MMX return srcSliceH; } @@ -118,6 +137,7 @@ RENAME(ff_yuv_420_rgb32)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); } + EMMS_IF_MMX return srcSliceH; } @@ -132,6 +152,7 @@ RENAME(ff_yuv_420_bgr32)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); } + EMMS_IF_MMX return srcSliceH; } @@ -146,6 +167,7 @@ const uint8_t *pa = src[3] + y * srcStride[3]; RENAME(ff_yuva_420_rgb32)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index, pa - 2 * index); } + EMMS_IF_MMX return srcSliceH; } @@ -161,6 +183,7 @@ const uint8_t *pa = src[3] + y * srcStride[3]; RENAME(ff_yuva_420_bgr32)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index, pa - 2 * index); } + EMMS_IF_MMX return srcSliceH; } #endif @@ -177,6 +200,7 @@ RENAME(ff_yuv_420_rgb24)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); } + EMMS_IF_MMX return srcSliceH; } @@ -191,6 +215,7 @@ RENAME(ff_yuv_420_bgr24)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); } + EMMS_IF_MMX return srcSliceH; } #endif diff -Nru ffmpeg-5.1.8/tests/extended.ffconcat ffmpeg-5.1.9/tests/extended.ffconcat --- ffmpeg-5.1.8/tests/extended.ffconcat 2025-11-21 01:15:18.000000000 +0000 +++ ffmpeg-5.1.9/tests/extended.ffconcat 2026-05-05 14:22:01.000000000 +0000 @@ -111,4 +111,3 @@ file %SRCFILE% inpoint 00:00.40 - diff -Nru ffmpeg-5.1.8/tests/fate/ffprobe.mak ffmpeg-5.1.9/tests/fate/ffprobe.mak --- ffmpeg-5.1.8/tests/fate/ffprobe.mak 2025-11-23 02:57:58.000000000 +0000 +++ ffmpeg-5.1.9/tests/fate/ffprobe.mak 2026-05-05 14:22:01.000000000 +0000 @@ -38,4 +38,3 @@ FATE_FFPROBE += $(FATE_FFPROBE-yes) fate-ffprobe: $(FATE_FFPROBE) - diff -Nru ffmpeg-5.1.8/tests/fate/lossless-audio.mak ffmpeg-5.1.9/tests/fate/lossless-audio.mak --- ffmpeg-5.1.8/tests/fate/lossless-audio.mak 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/tests/fate/lossless-audio.mak 2026-05-05 15:50:52.000000000 +0000 @@ -30,4 +30,3 @@ FATE_SAMPLES_FFMPEG += $(FATE_SAMPLES_LOSSLESS_AUDIO) fate-lossless-audio: $(FATE_SAMPLES_LOSSLESS_AUDIO) - diff -Nru ffmpeg-5.1.8/tests/ref/fate/zmbv-8bit ffmpeg-5.1.9/tests/ref/fate/zmbv-8bit --- ffmpeg-5.1.8/tests/ref/fate/zmbv-8bit 2025-11-26 02:41:32.000000000 +0000 +++ ffmpeg-5.1.9/tests/ref/fate/zmbv-8bit 2026-05-05 15:50:55.000000000 +0000 @@ -278,4 +278,3 @@ 0, 272, 272, 1, 192000, 0xd08e49d1 0, 273, 273, 1, 192000, 0xd08e49d1 0, 274, 274, 1, 192000, 0xd08e49d1 -0, 275, 275, 1, 192000, 0x1f34135f diff -Nru ffmpeg-5.1.8/tests/simple1.ffconcat ffmpeg-5.1.9/tests/simple1.ffconcat --- ffmpeg-5.1.8/tests/simple1.ffconcat 2025-11-21 01:15:18.000000000 +0000 +++ ffmpeg-5.1.9/tests/simple1.ffconcat 2026-05-05 14:22:01.000000000 +0000 @@ -9,4 +9,3 @@ inpoint 00:00.20 outpoint 00:00.40 file_packet_meta dummy 1 - diff -Nru ffmpeg-5.1.8/tests/simple2.ffconcat ffmpeg-5.1.9/tests/simple2.ffconcat --- ffmpeg-5.1.8/tests/simple2.ffconcat 2025-11-21 01:15:18.000000000 +0000 +++ ffmpeg-5.1.9/tests/simple2.ffconcat 2026-05-05 14:22:01.000000000 +0000 @@ -18,4 +18,3 @@ file %SRCFILE% inpoint 00:01.80 outpoint 00:02.00 - diff -Nru ffmpeg-5.1.8/tools/check_arm_indent.sh ffmpeg-5.1.9/tools/check_arm_indent.sh --- ffmpeg-5.1.8/tools/check_arm_indent.sh 1970-01-01 00:00:00.000000000 +0000 +++ ffmpeg-5.1.9/tools/check_arm_indent.sh 2026-05-05 14:22:01.000000000 +0000 @@ -0,0 +1,58 @@ +#!/bin/sh +# +# Copyright (c) 2025 Martin Storsjo +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +cd $(dirname $0)/.. + +if [ "$1" = "--apply" ]; then + apply=1 +fi + +ret=0 + +for i in */aarch64/*.S */aarch64/*/*.S; do + if ! [ -f "$i" ]; then + continue + fi + case $i in + libavcodec/aarch64/h264idct_neon.S|libavcodec/aarch64/h26x/epel_neon.S|libavcodec/aarch64/h26x/qpel_neon.S|libavcodec/aarch64/vc1dsp_neon.S) + # Skip files with known (and tolerated) deviations from the tool. + continue + esac + ./tools/indent_arm_assembly.pl < "$i" > tmp.S || ret=$? + if ! git diff --quiet --no-index "$i" tmp.S; then + if [ -n "$apply" ]; then + mv tmp.S "$i" + else + git --no-pager diff --no-index "$i" tmp.S + fi + ret=1 + fi +done + +rm -f tmp.S + +exit $ret diff -Nru ffmpeg-5.1.8/tools/indent_arm_assembly.pl ffmpeg-5.1.9/tools/indent_arm_assembly.pl --- ffmpeg-5.1.8/tools/indent_arm_assembly.pl 1970-01-01 00:00:00.000000000 +0000 +++ ffmpeg-5.1.9/tools/indent_arm_assembly.pl 2026-05-05 14:22:01.000000000 +0000 @@ -0,0 +1,243 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2025 Martin Storsjo +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +# A script for reformatting ARM/AArch64 assembly according to the following +# style: +# - Instructions start after 8 columns, operands start after 24 columns +# - Vector register layouts and modifiers like "uxtw" are written in lowercase +# - Optionally align operand columns vertically according to their +# maximum width (accommodating for e.g. x0 vs x10, or v0.8b vs v16.16b). +# +# The script can be executed as "indent_arm_assembly.pl file [outfile]". +# If no outfile is specified, the given file is overwritten in place. +# +# Alternatively, the if no file parameters are given, the script reads input +# code on stdin, and outputs the reformatted code on stdout. + +use strict; + +my $indent_operands = 0; +my $instr_indent = 8; +my $operand_indent = 24; +my $match_indent = 0; +my $file; +my $outfile; + +while (@ARGV) { + my $opt = shift; + + if ($opt eq "-operands") { + $indent_operands = 1; + } elsif ($opt eq "-indent") { + $instr_indent = shift; + } elsif ($opt eq "-operand-indent") { + $operand_indent = shift; + } elsif ($opt eq "-match-indent") { + $match_indent = 1; + } else { + if (!$file) { + $file = $opt; + } elsif (!$outfile) { + $outfile = $opt; + } else { + die "Unrecognized parameter $opt\n"; + } + } +} + +if ($operand_indent < $instr_indent) { + die "Can't indent operands to $operand_indent while indenting " . + "instructions to $instr_indent\n"; +} + +# Return a string consisting of n spaces +sub spaces { + my $n = $_[0]; + return " " x $n; +} + +sub indentcolumns { + my $input = $_[0]; + my $chars = $_[1]; + my @operands = split(/,/, $input); + my $num = @operands; + my $ret = ""; + for (my $i = 0; $i < $num; $i++) { + my $cur = $operands[$i]; + # Trim out leading/trailing whitespace + $cur =~ s/^\s+|\s+$//g; + $ret .= $cur; + if ($i + 1 < $num) { + # If we have a following operand, add a comma and whitespace to + # align the next operand. + my $next = $operands[$i+1]; + my $len = length($cur); + if ($len > $chars) { + # If this operand was too wide for the intended column width, + # don't try to realign the line at all, just return the input + # untouched. + return $input; + } + my $pad = $chars - $len; + if ($next =~ /[su]xt[bhw]|[la]s[lr]/) { + # If the next item isn't a regular operand, but a modifier, + # don't try to align that. E.g. "add x0, x0, w1, uxtw #1". + $pad = 0; + } + $ret .= "," . spaces(1 + $pad); + } + } + return $ret; +} + +# Realign the operands part of an instruction line, making each operand +# take up the maximum width for that kind of operand. +sub columns { + my $rest = $_[0]; + if ($rest !~ /,/) { + # No commas, no operands to split and align + return $rest; + } + if ($rest =~ /{|[^\w]\[/) { + # Check for instructions that use register ranges, like {v0.8b,v1.8b} + # or mem address operands, like "ldr x0, [sp]" - we skip trying to + # realign these. + return $rest; + } + if ($rest =~ /v[0-9]+\.[0-9]+[bhsd]/) { + # If we have references to aarch64 style vector registers, like + # v0.8b, then align all operands to the maximum width of such + # operands - v16.16b. + # + # TODO: Ideally, we'd handle mixed operand types individually. + return indentcolumns($rest, 7); + } + # Indent operands according to the maximum width of regular registers, + # like x10. + return indentcolumns($rest, 3); +} + +my $in; +my $out; +my $tempfile; + +if ($file) { + open(INPUT, "$file") or die "Unable to open $file: $!"; + $in = *INPUT; + if ($outfile) { + open(OUTPUT, ">$outfile") or die "Unable to open $outfile: $!"; + } else { + $tempfile = "$file.tmp"; + open(OUTPUT, ">$tempfile") or die "Unable to open $tempfile: $!"; + } + $out = *OUTPUT; +} else { + $in = *STDIN; + $out = *STDOUT; +} + +while (<$in>) { + # Trim off trailing whitespace. + chomp; + if (/^([\.\w\d]+:)?(\s+)([\w\\][\w\\\.]*)(?:(\s+)(.*)|$)/) { + my $label = $1; + my $indent = $2; + my $instr = $3; + my $origspace = $4; + my $rest = $5; + + my $orig_operand_indent = length($label) + length($indent) + + length($instr) + length($origspace); + + if ($indent_operands) { + $rest = columns($rest); + } + + my $size = $instr_indent; + if ($match_indent) { + # Try to check the current attempted indent size and normalize + # to it; match existing ident sizes of 4, 8, 10 and 12 columns. + my $cur_indent = length($label) + length($indent); + if ($cur_indent >= 3 && $cur_indent <= 5) { + $size = 4; + } elsif ($cur_indent >= 7 && $cur_indent <= 9) { + $size = 8; + } elsif ($cur_indent == 10 || $cur_indent == 12) { + $size = $cur_indent; + } + } + if (length($label) >= $size) { + # Not enough space for the label; just add a space between the label + # and the instruction. + $indent = " "; + } else { + $indent = spaces($size - length($label)); + } + + my $instr_end = length($label) + length($indent) + length($instr); + $size = $operand_indent - $instr_end; + if ($match_indent) { + # Check how the operands currently seem to be indented. + my $cur_indent = $orig_operand_indent; + if ($cur_indent >= 11 && $cur_indent <= 13) { + $size = 12; + } elsif ($cur_indent >= 14 && $cur_indent <= 17) { + $size = 16; + } elsif ($cur_indent >= 18 && $cur_indent <= 22) { + $size = 20; + } elsif ($cur_indent >= 23 && $cur_indent <= 27) { + $size = 24; + } + $size -= $instr_end; + } + my $operand_space = " "; + if ($size > 0) { + $operand_space = spaces($size); + } + + # Lowercase the aarch64 vector layout description, .8B -> .8b + $rest =~ s/(\.[84216]*[BHSD])/lc($1)/ge; + # Lowercase modifiers like "uxtw" or "lsl" + $rest =~ s/([SU]XT[BWH]|[LA]S[LR])/lc($1)/ge; + + # Reassemble the line + if ($rest eq "") { + $_ = $label . $indent . $instr; + } else { + $_ = $label . $indent . $instr . $operand_space . $rest; + } + } + print $out $_ . "\n"; +} + +if ($file) { + close(INPUT); + close(OUTPUT); +} +if ($tempfile) { + rename($tempfile, $file); +}