Version in base suite: 24.11.3-1~deb13u1 Base version: dpdk_24.11.3-1~deb13u1 Target version: dpdk_24.11.4-0+deb13u1 Base file: /srv/ftp-master.debian.org/ftp/pool/main/d/dpdk/dpdk_24.11.3-1~deb13u1.dsc Target file: /srv/ftp-master.debian.org/policy/pool/main/d/dpdk/dpdk_24.11.4-0+deb13u1.dsc .mailmap | 13 VERSION | 2 app/dumpcap/main.c | 4 app/pdump/main.c | 24 app/proc-info/main.c | 2 app/test-crypto-perf/cperf_test_vector_parsing.c | 47 + app/test-dma-perf/benchmark.c | 94 ++- app/test-dma-perf/main.c | 2 app/test-eventdev/test_perf_common.c | 9 app/test-flow-perf/main.c | 2 app/test-pmd/cmd_flex_item.c | 13 app/test-pmd/cmdline.c | 6 app/test-pmd/cmdline_flow.c | 2 app/test-pmd/cmdline_mtr.c | 30 - app/test-pmd/config.c | 10 app/test-pmd/testpmd.c | 142 +++++ app/test/process.h | 2 app/test/suites/test_telemetry.sh | 2 app/test/test.c | 2 app/test/test.h | 2 app/test/test_argparse.c | 31 - app/test/test_cryptodev.c | 65 +- app/test/test_debug.c | 99 ++- app/test/test_dmadev.c | 2 app/test/test_event_dma_adapter.c | 6 app/test/test_func_reentrancy.c | 4 app/test/test_hash_readwrite.c | 5 app/test/test_hash_readwrite_lf_perf.c | 4 app/test/test_net_ip6.c | 2 app/test/test_trace.c | 2 buildtools/pmdinfogen.py | 42 - config/arm/meson.build | 2 debian/changelog | 12 debian/patches/disable_arm64_autopkgtest_fails.patch | 28 + debian/patches/series | 1 doc/guides/cryptodevs/ionic.rst | 4 doc/guides/freebsd_gsg/build_dpdk.rst | 2 doc/guides/linux_gsg/linux_drivers.rst | 2 doc/guides/nics/cpfl.rst | 6 doc/guides/nics/features/iavf.ini | 4 doc/guides/nics/features/ice.ini | 1 doc/guides/nics/features/txgbe.ini | 1 doc/guides/nics/ionic.rst | 4 doc/guides/nics/mlx5.rst | 4 doc/guides/rel_notes/release_24_11.rst | 367 +++++++++++++ doc/guides/sample_app_ug/l3_forward.rst | 6 doc/guides/testpmd_app_ug/testpmd_funcs.rst | 4 drivers/bus/cdx/cdx_vfio.c | 6 drivers/bus/dpaa/base/qbman/qman_driver.c | 2 drivers/bus/dpaa/dpaa_bus.c | 59 ++ drivers/bus/fslmc/bus_fslmc_driver.h | 1 drivers/bus/fslmc/fslmc_bus.c | 2 drivers/bus/ifpga/ifpga_bus.c | 4 drivers/bus/pci/linux/pci_vfio.c | 4 drivers/bus/pci/windows/pci.c | 11 drivers/bus/pci/windows/pci_netuio.h | 6 drivers/bus/uacce/uacce.c | 2 drivers/common/cnxk/roc_bphy_cgx.c | 52 - drivers/common/cnxk/roc_nix_debug.c | 4 drivers/common/cnxk/roc_nix_inl.c | 4 drivers/common/cnxk/roc_nix_queue.c | 2 drivers/common/cnxk/roc_nix_tm_ops.c | 7 drivers/common/mlx5/linux/mlx5_nl.c | 110 ++++ drivers/common/mlx5/linux/mlx5_nl.h | 3 drivers/common/mlx5/mlx5_common.h | 3 drivers/common/mlx5/mlx5_common_mr.c | 6 drivers/common/mlx5/mlx5_devx_cmds.c | 4 drivers/common/mlx5/mlx5_devx_cmds.h | 2 drivers/common/mlx5/mlx5_prm.h | 14 drivers/common/mlx5/version.map | 1 drivers/common/mlx5/windows/mlx5_win_defs.h | 2 drivers/common/qat/qat_device.c | 7 drivers/common/qat/qat_device.h | 2 drivers/compress/qat/qat_comp_pmd.c | 2 drivers/crypto/caam_jr/caam_jr_uio.c | 12 drivers/crypto/cnxk/cnxk_ae.h | 15 drivers/crypto/ipsec_mb/ipsec_mb_ops.c | 7 drivers/crypto/mlx5/mlx5_crypto.c | 2 drivers/crypto/qat/dev/qat_crypto_pmd_gen3.c | 14 drivers/crypto/qat/dev/qat_crypto_pmd_gen4.c | 6 drivers/crypto/qat/dev/qat_crypto_pmd_gens.h | 21 drivers/crypto/qat/dev/qat_sym_pmd_gen1.c | 24 drivers/crypto/qat/qat_asym.c | 10 drivers/crypto/qat/qat_sym.c | 2 drivers/crypto/qat/qat_sym_session.c | 3 drivers/dma/hisilicon/hisi_dmadev.c | 45 + drivers/dma/hisilicon/hisi_dmadev.h | 2 drivers/net/af_packet/rte_eth_af_packet.c | 7 drivers/net/ark/ark_ethdev_rx.c | 4 drivers/net/axgbe/axgbe_ethdev.c | 6 drivers/net/bnxt/tf_core/v3/tfo.c | 8 drivers/net/bonding/rte_eth_bond_8023ad.c | 8 drivers/net/cnxk/cn10k_ethdev_sec.c | 2 drivers/net/cnxk/cnxk_ethdev_mtr.c | 8 drivers/net/dpaa/dpaa_ethdev.c | 217 +++++--- drivers/net/dpaa/dpaa_flow.c | 6 drivers/net/dpaa2/base/dpaa2_hw_dpni.c | 3 drivers/net/dpaa2/base/dpaa2_hw_dpni_annot.h | 10 drivers/net/dpaa2/dpaa2_ethdev.c | 48 + drivers/net/dpaa2/dpaa2_ethdev.h | 12 drivers/net/dpaa2/dpaa2_flow.c | 123 +++- drivers/net/dpaa2/dpaa2_recycle.c | 50 - drivers/net/dpaa2/dpaa2_rxtx.c | 55 +- drivers/net/dpaa2/dpaa2_tm.c | 8 drivers/net/e1000/base/e1000_mac.c | 1 drivers/net/ena/base/ena_com.c | 6 drivers/net/ena/ena_ethdev.c | 28 - drivers/net/enetfec/enet_ethdev.c | 27 - drivers/net/enetfec/enet_rxtx.c | 10 drivers/net/enetfec/enet_uio.c | 19 drivers/net/fm10k/base/fm10k_common.c | 4 drivers/net/gve/base/gve_adminq.c | 2 drivers/net/gve/base/gve_desc_dqo.h | 4 drivers/net/gve/gve_ethdev.c | 140 ++--- drivers/net/gve/gve_ethdev.h | 1 drivers/net/gve/gve_rx_dqo.c | 15 drivers/net/gve/gve_tx_dqo.c | 166 +++++- drivers/net/hns3/hns3_ethdev.c | 31 - drivers/net/hns3/hns3_ethdev.h | 3 drivers/net/hns3/hns3_fdir.c | 13 drivers/net/hns3/hns3_flow.c | 60 -- drivers/net/hns3/hns3_rxtx.c | 48 + drivers/net/hns3/hns3_rxtx_vec.h | 4 drivers/net/i40e/i40e_hash.c | 16 drivers/net/i40e/i40e_rxtx.c | 10 drivers/net/iavf/iavf_rxtx.c | 29 - drivers/net/iavf/iavf_rxtx.h | 3 drivers/net/iavf/iavf_vchnl.c | 2 drivers/net/ice/base/ice_flow.c | 4 drivers/net/ice/base/ice_switch.c | 18 drivers/net/ice/base/ice_type.h | 2 drivers/net/ice/ice_acl_filter.c | 5 drivers/net/ice/ice_ethdev.c | 95 --- drivers/net/ice/ice_ethdev.h | 2 drivers/net/ice/ice_fdir_filter.c | 56 +- drivers/net/ice/ice_rxtx.c | 10 drivers/net/ice/ice_rxtx_vec_common.h | 4 drivers/net/idpf/idpf_rxtx.c | 2 drivers/net/intel/ice/ice_rxtx_vec_common.h | 207 +++++++ drivers/net/ixgbe/base/ixgbe_vf.c | 2 drivers/net/ixgbe/ixgbe_flow.c | 5 drivers/net/memif/rte_eth_memif.c | 3 drivers/net/mlx4/mlx4_rxtx.c | 2 drivers/net/mlx5/hws/mlx5dr_action.c | 1 drivers/net/mlx5/hws/mlx5dr_buddy.c | 1 drivers/net/mlx5/hws/mlx5dr_definer.c | 31 + drivers/net/mlx5/hws/mlx5dr_pool.c | 3 drivers/net/mlx5/hws/mlx5dr_rule.c | 2 drivers/net/mlx5/linux/mlx5_ethdev_os.c | 44 + drivers/net/mlx5/linux/mlx5_flow_os.c | 16 drivers/net/mlx5/linux/mlx5_flow_os.h | 3 drivers/net/mlx5/linux/mlx5_os.c | 69 +- drivers/net/mlx5/linux/mlx5_verbs.c | 2 drivers/net/mlx5/mlx5.c | 33 + drivers/net/mlx5/mlx5.h | 21 drivers/net/mlx5/mlx5_defs.h | 6 drivers/net/mlx5/mlx5_devx.c | 5 drivers/net/mlx5/mlx5_ethdev.c | 42 + drivers/net/mlx5/mlx5_flow.c | 47 + drivers/net/mlx5/mlx5_flow.h | 82 +-- drivers/net/mlx5/mlx5_flow_dv.c | 513 +++++++++++-------- drivers/net/mlx5/mlx5_flow_flex.c | 3 drivers/net/mlx5/mlx5_flow_hw.c | 280 +++++----- drivers/net/mlx5/mlx5_flow_verbs.c | 6 drivers/net/mlx5/mlx5_hws_cnt.c | 19 drivers/net/mlx5/mlx5_hws_cnt.h | 133 +++- drivers/net/mlx5/mlx5_nta_rss.c | 34 - drivers/net/mlx5/mlx5_nta_split.c | 4 drivers/net/mlx5/mlx5_rx.c | 17 drivers/net/mlx5/mlx5_rx.h | 1 drivers/net/mlx5/mlx5_rxq.c | 29 - drivers/net/mlx5/mlx5_rxtx_vec.h | 7 drivers/net/mlx5/mlx5_trigger.c | 104 ++- drivers/net/mlx5/mlx5_txq.c | 18 drivers/net/mlx5/mlx5_utils.c | 2 drivers/net/mlx5/mlx5_utils.h | 3 drivers/net/mlx5/windows/mlx5_ethdev_os.c | 34 + drivers/net/mlx5/windows/mlx5_flow_os.c | 14 drivers/net/mlx5/windows/mlx5_flow_os.h | 4 drivers/net/mlx5/windows/mlx5_os.c | 3 drivers/net/mlx5/windows/mlx5_os.h | 2 drivers/net/nfp/nfp_mtr.c | 19 drivers/net/ngbe/ngbe_ethdev.c | 17 drivers/net/ngbe/ngbe_rxtx.c | 41 - drivers/net/ngbe/ngbe_rxtx.h | 1 drivers/net/ngbe/ngbe_rxtx_vec_neon.c | 9 drivers/net/ngbe/ngbe_rxtx_vec_sse.c | 9 drivers/net/ntnic/ntnic_ethdev.c | 4 drivers/net/octeon_ep/cnxk_ep_rx.c | 47 + drivers/net/octeon_ep/cnxk_ep_vf.c | 32 - drivers/net/octeon_ep/otx2_ep_vf.c | 30 - drivers/net/octeon_ep/otx_ep_common.h | 1 drivers/net/octeon_ep/otx_ep_ethdev.c | 6 drivers/net/octeon_ep/otx_ep_mbox.c | 8 drivers/net/octeon_ep/otx_ep_rxtx.h | 3 drivers/net/tap/bpf/meson.build | 8 drivers/net/tap/rte_eth_tap.c | 4 drivers/net/tap/tap_flow.c | 117 +--- drivers/net/tap/tap_netlink.c | 24 drivers/net/tap/tap_netlink.h | 10 drivers/net/tap/tap_tcmsgs.c | 6 drivers/net/txgbe/base/txgbe_type.h | 4 drivers/net/txgbe/txgbe_ethdev.c | 50 + drivers/net/txgbe/txgbe_ethdev.h | 1 drivers/net/txgbe/txgbe_fdir.c | 59 +- drivers/net/txgbe/txgbe_flow.c | 174 ++++-- drivers/net/txgbe/txgbe_rxtx.c | 35 - drivers/net/txgbe/txgbe_rxtx.h | 1 drivers/net/txgbe/txgbe_rxtx_vec_neon.c | 9 drivers/net/txgbe/txgbe_rxtx_vec_sse.c | 9 drivers/net/virtio/virtio_user/virtio_user_dev.c | 30 - drivers/net/vmxnet3/base/vmxnet3_defs.h | 3 drivers/net/vmxnet3/vmxnet3_ethdev.c | 30 - drivers/net/zxdh/meson.build | 2 drivers/regex/mlx5/mlx5_regex_fastpath.c | 1 drivers/regex/mlx5/mlx5_rxp.c | 2 drivers/regex/mlx5/mlx5_rxp.h | 20 drivers/vdpa/mlx5/mlx5_vdpa.h | 1 dts/tests/TestSuite_checksum_offload.py | 15 examples/l3fwd-power/main.c | 2 examples/l3fwd/l3fwd.h | 10 examples/l3fwd/l3fwd_acl.c | 2 examples/l3fwd/l3fwd_common.h | 5 examples/l3fwd/l3fwd_em.c | 2 examples/l3fwd/l3fwd_fib.c | 2 examples/l3fwd/l3fwd_lpm.c | 2 examples/l3fwd/main.c | 80 +- examples/server_node_efd/efd_server/main.c | 2 examples/server_node_efd/shared/common.h | 3 examples/vdpa/main.c | 8 lib/bbdev/rte_bbdev.c | 10 lib/cmdline/cmdline_parse_portlist.c | 15 lib/dmadev/rte_dmadev.h | 1 lib/eal/arm/include/rte_memcpy_32.h | 6 lib/eal/common/eal_common_options.c | 17 lib/eal/freebsd/eal.c | 8 lib/eal/include/rte_bitops.h | 8 lib/eal/include/rte_mcslock.h | 100 ++- lib/eal/include/rte_tailq.h | 9 lib/eal/linux/eal.c | 7 lib/eal/windows/eal.c | 5 lib/eal/x86/rte_power_intrinsics.c | 6 lib/efd/rte_efd.c | 3 lib/ethdev/rte_ethdev.h | 2 lib/eventdev/rte_event_crypto_adapter.c | 6 lib/eventdev/rte_event_timer_adapter.c | 4 lib/eventdev/rte_event_timer_adapter.h | 2 lib/fib/trie.c | 23 lib/graph/graph.c | 14 lib/graph/graph_private.h | 12 lib/graph/graph_stats.c | 107 ++- lib/graph/node.c | 6 lib/gro/gro_tcp.h | 2 lib/hash/rte_thash.c | 6 lib/net/rte_ip6.h | 9 lib/net/rte_net.c | 1 lib/rawdev/rte_rawdev.c | 8 lib/rawdev/rte_rawdev_pmd.h | 2 lib/ring/rte_ring_c11_pvt.h | 71 ++ lib/ring/rte_ring_hts_elem_pvt.h | 96 ++- lib/ring/rte_ring_rts_elem_pvt.h | 96 ++- lib/sched/rte_sched.c | 2 lib/telemetry/rte_telemetry.h | 11 lib/telemetry/telemetry.c | 2 lib/telemetry/telemetry_internal.h | 11 lib/vhost/socket.c | 3 lib/vhost/vduse.c | 71 ++ lib/vhost/vduse.h | 6 lib/vhost/virtio_net.c | 52 + usertools/dpdk-telemetry-exporter.py | 6 270 files changed, 4584 insertions(+), 2079 deletions(-) diff -Nru dpdk-24.11.3/.mailmap dpdk-24.11.4/.mailmap --- dpdk-24.11.3/.mailmap 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/.mailmap 2025-12-19 12:05:33.000000000 +0000 @@ -1,4 +1,5 @@ Aakash Sasidharan +Aarnav JP Aaro Koskinen Aaron Campbell Aaron Conole @@ -21,6 +22,7 @@ Adrian Moreno Adrian Pielech Adrian Podlawski +Adrian Schollmeyer Adrien Mazarguil Ady Agbarih Agalya Babu RadhaKrishnan @@ -124,6 +126,7 @@ Andy Moreton Andy Pei Anirudh Venkataramanan +Ankit Garg Ankur Dwivedi Anna Lukin Anoob Joseph @@ -133,6 +136,7 @@ Antonio Fischetti Anup Prabhu Anupam Kapoor +Anurag Mandal Apeksha Gupta Archana Muniganti Archit Pandey @@ -156,6 +160,7 @@ Ashish Paul Ashish Sadanandan Ashish Shah +Ashok Kaladi Ashwin Sekhar T K Asim Jamshed Aviad Yehezkel @@ -606,6 +611,7 @@ Ilyes Ben Hamouda Intiyaz Basha Isaac Boukris +Itai Sharoni Itamar Gozlan Itsuro Oda Ivan Boule @@ -799,6 +805,7 @@ Keiichi Watanabe Keith Wiles Kent Wires +Kerem Aksu Keunhong Lee Kevin Laatz Kevin Lampis @@ -927,6 +934,7 @@ Manish Kurup Manish Tomar Mao Jiang +Marat Khalili Marcel Apfelbaum Marcel Cornu Marcelo Ricardo Leitner @@ -991,7 +999,7 @@ Mauro Annarumma Maxime Coquelin Maxime Gouin -Maxime Leroy +Maxime Leroy Md Fahad Iqbal Polash Megha Ajmera Meijuan Zhao @@ -1127,6 +1135,7 @@ Nobuhiro Miki Norbert Ciosek Norbert Zulinski +Nupur Uttarwar Odi Assli Ofer Dagan Ognjen Joldzic @@ -1220,6 +1229,7 @@ Potnuri Bharat Teja Pradeep Satyanarayana Prashant Bhole +Prashant Gupta Prashant Upadhyaya Prateek Agarwal Prathisna Padmasanan @@ -1310,6 +1320,7 @@ Rogelio Domínguez Hernández Roger Melton Rohit Raj +Roi Dayan Roland Qi Rolf Neugebauer Romain Delhomel diff -Nru dpdk-24.11.3/VERSION dpdk-24.11.4/VERSION --- dpdk-24.11.3/VERSION 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/VERSION 2025-12-19 12:05:33.000000000 +0000 @@ -1 +1 @@ -24.11.3 +24.11.4 diff -Nru dpdk-24.11.3/app/dumpcap/main.c dpdk-24.11.4/app/dumpcap/main.c --- dpdk-24.11.3/app/dumpcap/main.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/app/dumpcap/main.c 2025-12-19 12:05:33.000000000 +0000 @@ -1058,6 +1058,10 @@ else pcap_dump_close(out.dumper); + /* If primary has exited, do not try and communicate with it */ + if (!rte_eal_primary_proc_alive(NULL)) + return 0; + cleanup_pdump_resources(); rte_ring_free(r); diff -Nru dpdk-24.11.3/app/pdump/main.c dpdk-24.11.4/app/pdump/main.c --- dpdk-24.11.3/app/pdump/main.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/app/pdump/main.c 2025-12-19 12:05:33.000000000 +0000 @@ -982,9 +982,8 @@ int ret; int i; - char n_flag[] = "-n4"; char mp_flag[] = "--proc-type=secondary"; - char *argp[argc + 2]; + char *argp[argc + 1]; /* catch ctrl-c so we can cleanup on exit */ sigemptyset(&action.sa_mask); @@ -996,13 +995,12 @@ sigaction(SIGHUP, &action, NULL); argp[0] = argv[0]; - argp[1] = n_flag; - argp[2] = mp_flag; + argp[1] = mp_flag; for (i = 1; i < argc; i++) - argp[i + 2] = argv[i]; + argp[i + 1] = argv[i]; - argc += 2; + argc += 1; diag = rte_eal_init(argc, argp); if (diag < 0) @@ -1012,7 +1010,7 @@ rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n"); argc -= diag; - argv += (diag - 2); + argv += (diag - 1); /* parse app arguments */ if (argc > 1) { @@ -1028,13 +1026,15 @@ dump_packets(); disable_primary_monitor(); - cleanup_pdump_resources(); + /* dump debug stats */ print_pdump_stats(); - ret = rte_eal_cleanup(); - if (ret) - printf("Error from rte_eal_cleanup(), %d\n", ret); + /* If primary has exited, do not try and communicate with it */ + if (!rte_eal_primary_proc_alive(NULL)) + return 0; + + cleanup_pdump_resources(); - return 0; + return rte_eal_cleanup() ? EXIT_FAILURE : 0; } diff -Nru dpdk-24.11.3/app/proc-info/main.c dpdk-24.11.4/app/proc-info/main.c --- dpdk-24.11.3/app/proc-info/main.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/app/proc-info/main.c 2025-12-19 12:05:33.000000000 +0000 @@ -696,7 +696,7 @@ static void collectd_resolve_cnt_type(char *cnt_type, size_t cnt_type_len, const char *cnt_name) { - char *type_end = strrchr(cnt_name, '_'); + const char *type_end = strrchr(cnt_name, '_'); if ((type_end != NULL) && (strncmp(cnt_name, "rx_", strlen("rx_")) == 0)) { diff -Nru dpdk-24.11.3/app/test/process.h dpdk-24.11.4/app/test/process.h --- dpdk-24.11.3/app/test/process.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/app/test/process.h 2025-12-19 12:05:33.000000000 +0000 @@ -203,7 +203,7 @@ * tests attempting to use this function on FreeBSD. */ #ifdef RTE_EXEC_ENV_LINUX -static char * +static inline char * get_current_prefix(char *prefix, int size) { char path[PATH_MAX] = {0}; diff -Nru dpdk-24.11.3/app/test/suites/test_telemetry.sh dpdk-24.11.4/app/test/suites/test_telemetry.sh --- dpdk-24.11.3/app/test/suites/test_telemetry.sh 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/app/test/suites/test_telemetry.sh 2025-12-19 12:05:33.000000000 +0000 @@ -7,7 +7,7 @@ exit 77 } -rootdir=$(readlink -f $(dirname $(readlink -f $0))/../..) +rootdir=$(readlink -f $(dirname $(readlink -f $0))/../../..) tmpoutput=$(mktemp -t dpdk.test_telemetry.XXXXXX) trap "cat $tmpoutput; rm -f $tmpoutput" EXIT diff -Nru dpdk-24.11.3/app/test/test.c dpdk-24.11.4/app/test/test.c --- dpdk-24.11.3/app/test/test.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/app/test/test.c 2025-12-19 12:05:33.000000000 +0000 @@ -80,6 +80,8 @@ { "test_memory_flags", no_action }, { "test_file_prefix", no_action }, { "test_no_huge_flag", no_action }, + { "test_panic", test_panic }, + { "test_exit", test_exit }, #ifdef RTE_LIB_TIMER #ifndef RTE_EXEC_ENV_WINDOWS { "timer_secondary_spawn_wait", test_timer_secondary }, diff -Nru dpdk-24.11.3/app/test/test.h dpdk-24.11.4/app/test/test.h --- dpdk-24.11.3/app/test/test.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/app/test/test.h 2025-12-19 12:05:33.000000000 +0000 @@ -174,7 +174,9 @@ int commands_init(void); int command_valid(const char *cmd); +int test_exit(void); int test_mp_secondary(void); +int test_panic(void); int test_timer_secondary(void); int test_set_rxtx_conf(cmdline_fixed_string_t mode); diff -Nru dpdk-24.11.3/app/test/test_argparse.c dpdk-24.11.4/app/test/test_argparse.c --- dpdk-24.11.3/app/test/test_argparse.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/app/test/test_argparse.c 2025-12-19 12:05:33.000000000 +0000 @@ -71,7 +71,7 @@ } /* valid templater, must contain at least two args. */ -#define argparse_templater() { \ +#define ARGPARSE_TEMPLATE { \ .prog_name = "test_argparse", \ .usage = "-a xx -b yy", \ .descriptor = NULL, \ @@ -85,25 +85,24 @@ }, \ } -static void -test_argparse_copy(struct rte_argparse *dst, struct rte_argparse *src) -{ - uint32_t i; - memcpy(dst, src, sizeof(*src)); - for (i = 0; /* NULL */; i++) { - memcpy(&dst->args[i], &src->args[i], sizeof(src->args[i])); - if (src->args[i].name_long == NULL) - break; - } -} static struct rte_argparse * test_argparse_init_obj(void) { - static struct rte_argparse backup = argparse_templater(); - static struct rte_argparse obj = argparse_templater(); - /* Because obj may be overwritten, do a deep copy. */ - test_argparse_copy(&obj, &backup); + /* Note: initialization of structure with flexible array + * increases the size of the variable to match. + */ + static const struct rte_argparse backup = ARGPARSE_TEMPLATE; + static struct rte_argparse obj = ARGPARSE_TEMPLATE; + unsigned int i; + + obj = backup; + for (i = 0; ; i++) { + obj.args[i] = backup.args[i]; + if (backup.args[i].name_long == NULL) + break; + } + return &obj; } diff -Nru dpdk-24.11.3/app/test/test_cryptodev.c dpdk-24.11.4/app/test/test_cryptodev.c --- dpdk-24.11.3/app/test/test_cryptodev.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/app/test/test_cryptodev.c 2025-12-19 12:05:33.000000000 +0000 @@ -289,7 +289,7 @@ struct rte_crypto_vec data_vec[UINT8_MAX], dest_data_vec[UINT8_MAX]; struct rte_crypto_va_iova_ptr cipher_iv, digest, aad_auth_iv; union rte_crypto_sym_ofs ofs; - struct rte_crypto_sym_vec vec; + struct rte_crypto_sym_vec vec = {0}; struct rte_crypto_sgl sgl, dest_sgl; uint32_t max_len; union rte_cryptodev_session_ctx sess; @@ -526,7 +526,7 @@ struct rte_crypto_sym_op *sop; union rte_crypto_sym_ofs ofs; struct rte_crypto_sgl sgl; - struct rte_crypto_sym_vec symvec; + struct rte_crypto_sym_vec symvec = {0}; struct rte_crypto_va_iova_ptr iv_ptr, aad_ptr, digest_ptr; struct rte_crypto_vec vec[UINT8_MAX]; @@ -572,7 +572,7 @@ struct rte_crypto_sym_op *sop; union rte_crypto_sym_ofs ofs; struct rte_crypto_sgl sgl; - struct rte_crypto_sym_vec symvec; + struct rte_crypto_sym_vec symvec = {0}; struct rte_crypto_va_iova_ptr iv_ptr, digest_ptr; struct rte_crypto_vec vec[UINT8_MAX]; @@ -3459,6 +3459,8 @@ uint16_t remaining_off = (auth_offset >> 3) + (auth_len >> 3); struct rte_mbuf *sgl_buf = (op_mode == IN_PLACE ? sym_op->m_src : sym_op->m_dst); + struct rte_mbuf *sgl_buf_head = sgl_buf; + while (remaining_off >= rte_pktmbuf_data_len(sgl_buf)) { remaining_off -= rte_pktmbuf_data_len(sgl_buf); sgl_buf = sgl_buf->next; @@ -3466,11 +3468,18 @@ /* The last segment should be large enough to hold full digest */ if (sgl_buf->data_len < auth_tag_len) { - rte_pktmbuf_free(sgl_buf->next); - sgl_buf->next = NULL; - TEST_ASSERT_NOT_NULL(rte_pktmbuf_append(sgl_buf, - auth_tag_len - sgl_buf->data_len), - "No room to append auth tag"); + uint16_t next_data_len = 0; + if (sgl_buf->next != NULL) { + next_data_len = sgl_buf->next->data_len; + + rte_pktmbuf_free(sgl_buf->next); + sgl_buf->next = NULL; + sgl_buf_head->nb_segs -= 1; + sgl_buf_head->pkt_len -= next_data_len; + } + TEST_ASSERT_NOT_NULL(rte_pktmbuf_append( + sgl_buf_head, auth_tag_len - sgl_buf->data_len), + "No room to append auth tag"); } sym_op->auth.digest.data = rte_pktmbuf_mtod_offset(sgl_buf, @@ -9766,11 +9775,13 @@ buf_oop = buf_oop->next; memset(rte_pktmbuf_mtod(buf_oop, uint8_t *), 0, rte_pktmbuf_tailroom(buf_oop)); - rte_pktmbuf_append(buf_oop, to_trn); + TEST_ASSERT_NOT_NULL(ut_params->obuf, "Output buffer not initialized"); + TEST_ASSERT_NOT_NULL(rte_pktmbuf_append(ut_params->obuf, to_trn), "Failed to append to mbuf"); } - plaintext = (uint8_t *)rte_pktmbuf_append(buf, + plaintext = (uint8_t *)rte_pktmbuf_append(ut_params->ibuf, to_trn); + TEST_ASSERT_NOT_NULL(plaintext, "Failed to append plaintext"); memcpy(plaintext, input_vec + trn_data, to_trn); trn_data += to_trn; @@ -9799,7 +9810,7 @@ buf_oop = buf_oop->next; memset(rte_pktmbuf_mtod(buf_oop, uint8_t *), 0, rte_pktmbuf_tailroom(buf_oop)); - rte_pktmbuf_append(buf_oop, to_trn); + TEST_ASSERT_NOT_NULL(rte_pktmbuf_append(ut_params->obuf, to_trn), "Failed to append to mbuf"); trn_data += to_trn; } @@ -15871,15 +15882,18 @@ memset(rte_pktmbuf_mtod(buf, uint8_t *), 0, rte_pktmbuf_tailroom(buf)); - plaintext = (uint8_t *)rte_pktmbuf_append(buf, + plaintext = (uint8_t *)rte_pktmbuf_append(ut_params->ibuf, to_trn); + TEST_ASSERT_NOT_NULL(plaintext, "Failed to append plaintext"); memcpy(plaintext, tdata->plaintext.data + trn_data, to_trn); trn_data += to_trn; - if (trn_data == tdata->plaintext.len) - digest_mem = (uint8_t *)rte_pktmbuf_append(buf, + if (trn_data == tdata->plaintext.len) { + digest_mem = (uint8_t *)rte_pktmbuf_append(ut_params->ibuf, tdata->gmac_tag.len); + TEST_ASSERT_NOT_NULL(digest_mem, "Failed to append digest data"); + } } ut_params->ibuf->nb_segs = segs; @@ -17176,23 +17190,28 @@ buf_oop = buf_oop->next; memset(rte_pktmbuf_mtod(buf_oop, uint8_t *), 0, rte_pktmbuf_tailroom(buf_oop)); - rte_pktmbuf_append(buf_oop, to_trn); + TEST_ASSERT_NOT_NULL(rte_pktmbuf_append(ut_params->obuf, to_trn), "Failed to append to mbuf"); } - plaintext = (uint8_t *)rte_pktmbuf_append(buf, + plaintext = (uint8_t *)rte_pktmbuf_append(ut_params->ibuf, to_trn); + TEST_ASSERT_NOT_NULL(plaintext, "Failed to append plaintext"); memcpy(plaintext, tdata->plaintext.data + trn_data, to_trn); trn_data += to_trn; if (trn_data == tdata->plaintext.len) { if (oop) { - if (!fragsz_oop) - digest_mem = rte_pktmbuf_append(buf_oop, + if (!fragsz_oop) { + digest_mem = rte_pktmbuf_append(ut_params->obuf, tdata->auth_tag.len); - } else - digest_mem = (uint8_t *)rte_pktmbuf_append(buf, + TEST_ASSERT_NOT_NULL(digest_mem, "Failed to append auth tag"); + } + } else { + digest_mem = (uint8_t *)rte_pktmbuf_append(ut_params->ibuf, tdata->auth_tag.len); + TEST_ASSERT_NOT_NULL(digest_mem, "Failed to append auth tag"); + } } } @@ -17227,16 +17246,18 @@ buf_last_oop = buf_oop->next = rte_pktmbuf_alloc(ts_params->mbuf_pool); + TEST_ASSERT_NOT_NULL(buf_oop->next, "Unexpected end of chain"); buf_oop = buf_oop->next; memset(rte_pktmbuf_mtod(buf_oop, uint8_t *), 0, rte_pktmbuf_tailroom(buf_oop)); - rte_pktmbuf_append(buf_oop, to_trn); + TEST_ASSERT_NOT_NULL(rte_pktmbuf_append(ut_params->obuf, to_trn), "Failed to append to mbuf"); trn_data += to_trn; if (trn_data == tdata->plaintext.len) { - digest_mem = rte_pktmbuf_append(buf_oop, + digest_mem = rte_pktmbuf_append(ut_params->obuf, tdata->auth_tag.len); + TEST_ASSERT_NOT_NULL(digest_mem, "Failed to append auth tag"); } } diff -Nru dpdk-24.11.3/app/test/test_debug.c dpdk-24.11.4/app/test/test_debug.c --- dpdk-24.11.3/app/test/test_debug.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/app/test/test_debug.c 2025-12-19 12:05:33.000000000 +0000 @@ -8,6 +8,18 @@ #include #ifdef RTE_EXEC_ENV_WINDOWS +int +test_panic(void) +{ + printf("debug not supported on Windows, skipping test\n"); + return TEST_SKIPPED; +} +int +test_exit(void) +{ + printf("debug not supported on Windows, skipping test\n"); + return TEST_SKIPPED; +} static int test_debug(void) { @@ -25,34 +37,31 @@ #include #include #include -#include +#include + +#include "process.h" /* * Debug test * ========== */ -/* use fork() to test rte_panic() */ -static int +static const char *test_args[8]; + +int test_panic(void) { - int pid; int status; - pid = fork(); - - if (pid == 0) { + if (getenv(RECURSIVE_ENV_VAR) != NULL) { struct rlimit rl; /* No need to generate a coredump when panicking. */ rl.rlim_cur = rl.rlim_max = 0; setrlimit(RLIMIT_CORE, &rl); rte_panic("Test Debug\n"); - } else if (pid < 0) { - printf("Fork Failed\n"); - return -1; } - wait(&status); + status = process_dup(test_args, RTE_DIM(test_args), "test_panic"); if(status == 0){ printf("Child process terminated normally!\n"); return -1; @@ -62,27 +71,16 @@ return 0; } -/* use fork() to test rte_exit() */ static int test_exit_val(int exit_val) { - int pid; + char buf[5]; int status; - /* manually cleanup EAL memory, as the fork() below would otherwise - * cause the same hugepages to be free()-ed multiple times. - */ - rte_service_finalize(); - - pid = fork(); - - if (pid == 0) - rte_exit(exit_val, __func__); - else if (pid < 0){ - printf("Fork Failed\n"); - return -1; - } - wait(&status); + sprintf(buf, "%d", exit_val); + if (setenv("TEST_DEBUG_EXIT_VAL", buf, 1) == -1) + rte_panic("Failed to set exit value in env\n"); + status = process_dup(test_args, RTE_DIM(test_args), "test_exit"); printf("Child process status: %d\n", status); if(!WIFEXITED(status) || WEXITSTATUS(status) != (uint8_t)exit_val){ printf("Child process terminated with incorrect status (expected = %d)!\n", @@ -92,11 +90,22 @@ return 0; } -static int +int test_exit(void) { int test_vals[] = { 0, 1, 2, 255, -1 }; unsigned i; + + if (getenv(RECURSIVE_ENV_VAR) != NULL) { + int exit_val; + + if (!getenv("TEST_DEBUG_EXIT_VAL")) + rte_panic("No exit value set in env\n"); + + exit_val = strtol(getenv("TEST_DEBUG_EXIT_VAL"), NULL, 0); + rte_exit(exit_val, __func__); + } + for (i = 0; i < RTE_DIM(test_vals); i++) { if (test_exit_val(test_vals[i]) < 0) return -1; @@ -128,6 +137,40 @@ static int test_debug(void) { +#ifdef RTE_EXEC_ENV_FREEBSD + /* BSD target doesn't support prefixes at this point, and we also need to + * run another primary process here. + */ + const char * prefix = "--no-shconf"; +#else + const char * prefix = "--file-prefix=debug"; +#endif + char core[10]; + + sprintf(core, "%d", rte_get_main_lcore()); + + test_args[0] = prgname; + test_args[1] = prefix; + test_args[2] = "-l"; + test_args[3] = core; + + if (rte_eal_has_hugepages()) { + test_args[4] = ""; + test_args[5] = ""; + test_args[6] = ""; + test_args[7] = ""; + } else { + test_args[4] = "--no-huge"; + test_args[5] = "-m"; + test_args[6] = "2048"; +#ifdef RTE_ARCH_PPC_64 + /* iova=pa is the default, but fails on ppc64 with --no-huge */ + test_args[7] = "--iova-mode=va"; +#else + test_args[7] = ""; +#endif + } + rte_dump_stack(); if (test_panic() < 0) return -1; diff -Nru dpdk-24.11.3/app/test/test_dmadev.c dpdk-24.11.4/app/test/test_dmadev.c --- dpdk-24.11.3/app/test/test_dmadev.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/app/test/test_dmadev.c 2025-12-19 12:05:33.000000000 +0000 @@ -92,7 +92,7 @@ printf("DMA Dev %d: Running %s Tests %s\n", dev_id, printable, check_err_stats ? " " : "(errors expected)"); for (i = 0; i < iterations; i++) { - if (test_fn(dev_id, vchan) < 0) + if (test_fn(dev_id, vchan) != 0) return -1; rte_dma_stats_get(dev_id, 0, &stats); diff -Nru dpdk-24.11.3/app/test/test_event_dma_adapter.c dpdk-24.11.4/app/test/test_event_dma_adapter.c --- dpdk-24.11.3/app/test/test_event_dma_adapter.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/app/test/test_event_dma_adapter.c 2025-12-19 12:05:33.000000000 +0000 @@ -136,7 +136,6 @@ { struct rte_event_dma_adapter_runtime_params out_params; struct rte_event_dma_adapter_runtime_params in_params; - struct rte_event event; uint32_t cap; int err, rc; @@ -144,6 +143,8 @@ TEST_ASSERT_SUCCESS(err, "Failed to get adapter capabilities\n"); if (cap & RTE_EVENT_DMA_ADAPTER_CAP_INTERNAL_PORT_VCHAN_EV_BIND) { + struct rte_event event = { .queue_id = 0, }; + err = rte_event_dma_adapter_vchan_add(TEST_ADAPTER_ID, TEST_DMA_DEV_ID, TEST_DMA_VCHAN_ID, &event); } else @@ -520,7 +521,6 @@ static int test_dma_adapter_vchan_add_del(void) { - struct rte_event event; uint32_t cap; int ret; @@ -528,6 +528,8 @@ TEST_ASSERT_SUCCESS(ret, "Failed to get adapter capabilities\n"); if (cap & RTE_EVENT_DMA_ADAPTER_CAP_INTERNAL_PORT_VCHAN_EV_BIND) { + struct rte_event event = { .queue_id = 0, }; + ret = rte_event_dma_adapter_vchan_add(TEST_ADAPTER_ID, TEST_DMA_DEV_ID, TEST_DMA_VCHAN_ID, &event); } else diff -Nru dpdk-24.11.3/app/test/test_func_reentrancy.c dpdk-24.11.4/app/test/test_func_reentrancy.c --- dpdk-24.11.3/app/test/test_func_reentrancy.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/app/test/test_func_reentrancy.c 2025-12-19 12:05:33.000000000 +0000 @@ -68,13 +68,15 @@ static int test_eal_init_once(__rte_unused void *arg) { + char appname[] = __FILE__; unsigned lcore_self = rte_lcore_id(); + char *argv[] = { appname, NULL }; WAIT_SYNCHRO_FOR_WORKERS(); /* silent the check in the caller */ rte_atomic_store_explicit(&obj_count, 1, rte_memory_order_relaxed); - if (rte_eal_init(0, NULL) != -1) + if (rte_eal_init(RTE_DIM(argv) - 1, argv) != -1) return -1; return 0; diff -Nru dpdk-24.11.3/app/test/test_hash_readwrite.c dpdk-24.11.4/app/test/test_hash_readwrite.c --- dpdk-24.11.3/app/test/test_hash_readwrite.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/app/test/test_hash_readwrite.c 2025-12-19 12:05:33.000000000 +0000 @@ -64,6 +64,11 @@ ret = rte_malloc(NULL, sizeof(int) * tbl_rw_test_param.num_insert, 0); + if (ret == NULL) { + printf("allocation failed\n"); + return -1; + } + for (i = 0; i < rte_lcore_count(); i++) { if (worker_core_ids[i] == lcore_id) break; diff -Nru dpdk-24.11.3/app/test/test_hash_readwrite_lf_perf.c dpdk-24.11.4/app/test/test_hash_readwrite_lf_perf.c --- dpdk-24.11.3/app/test/test_hash_readwrite_lf_perf.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/app/test/test_hash_readwrite_lf_perf.c 2025-12-19 12:05:33.000000000 +0000 @@ -1310,6 +1310,10 @@ sz = rte_rcu_qsbr_get_memsize(RTE_MAX_LCORE); rv = (struct rte_rcu_qsbr *)rte_zmalloc(NULL, sz, RTE_CACHE_LINE_SIZE); + if (rv == NULL) { + printf("allocation failed\n"); + goto err; + } rcu_config.v = rv; if (rte_hash_rcu_qsbr_add(tbl_rwc_test_param.h, &rcu_config) < 0) { diff -Nru dpdk-24.11.3/app/test/test_net_ip6.c dpdk-24.11.4/app/test/test_net_ip6.c --- dpdk-24.11.3/app/test/test_net_ip6.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/app/test/test_net_ip6.c 2025-12-19 12:05:33.000000000 +0000 @@ -160,7 +160,7 @@ { const struct rte_ether_addr local_mac = {{0x04, 0x7b, 0xcb, 0x5c, 0x08, 0x44}}; const struct rte_ipv6_addr local_ip = - RTE_IPV6(0xfe80, 0, 0, 0, 0x047b, 0xcbff, 0xfe5c, 0x0844); + RTE_IPV6(0xfe80, 0, 0, 0, 0x067b, 0xcbff, 0xfe5c, 0x0844); struct rte_ipv6_addr ip; rte_ipv6_llocal_from_ethernet(&ip, &local_mac); diff -Nru dpdk-24.11.3/app/test/test_trace.c dpdk-24.11.4/app/test/test_trace.c --- dpdk-24.11.3/app/test/test_trace.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/app/test/test_trace.c 2025-12-19 12:05:33.000000000 +0000 @@ -179,7 +179,7 @@ test_generic_trace_points(void) { uint8_t arr[RTE_TRACE_BLOB_LEN_MAX]; - int tmp; + int tmp = 0; int i; for (i = 0; i < RTE_TRACE_BLOB_LEN_MAX; i++) diff -Nru dpdk-24.11.3/app/test-crypto-perf/cperf_test_vector_parsing.c dpdk-24.11.4/app/test-crypto-perf/cperf_test_vector_parsing.c --- dpdk-24.11.3/app/test-crypto-perf/cperf_test_vector_parsing.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/app/test-crypto-perf/cperf_test_vector_parsing.c 2025-12-19 12:05:33.000000000 +0000 @@ -308,12 +308,19 @@ if (strstr(key_token, "plaintext")) { rte_free(vector->plaintext.data); vector->plaintext.data = data; + + if (opts->test == CPERF_TEST_TYPE_VERIFY && data_length > opts->max_buffer_size) { + printf("Global plaintext (%u) larger than buffer_sz (%u)\n", + data_length, opts->max_buffer_size); + return -1; + } + if (tc_found) vector->plaintext.length = data_length; else { if (opts->max_buffer_size > data_length) { - printf("Global plaintext shorter than " - "buffer_sz\n"); + printf("Global plaintext (%u) shorter than " + "buffer_sz (%u)\n", data_length, opts->max_buffer_size); return -1; } vector->plaintext.length = opts->max_buffer_size; @@ -326,8 +333,8 @@ vector->cipher_key.length = data_length; else { if (opts->cipher_key_sz > data_length) { - printf("Global cipher_key shorter than " - "cipher_key_sz\n"); + printf("Global cipher_key (%u) shorter than " + "cipher_key_sz (%u)\n", data_length, opts->cipher_key_sz); return -1; } vector->cipher_key.length = opts->cipher_key_sz; @@ -340,8 +347,8 @@ vector->auth_key.length = data_length; else { if (opts->auth_key_sz > data_length) { - printf("Global auth_key shorter than " - "auth_key_sz\n"); + printf("Global auth_key (%u) shorter than " + "auth_key_sz (%u)\n", data_length, opts->auth_key_sz); return -1; } vector->auth_key.length = opts->auth_key_sz; @@ -354,8 +361,8 @@ vector->aead_key.length = data_length; else { if (opts->aead_key_sz > data_length) { - printf("Global aead_key shorter than " - "aead_key_sz\n"); + printf("Global aead_key (%u) shorter than " + "aead_key_sz (%u)\n", data_length, opts->aead_key_sz); return -1; } vector->aead_key.length = opts->aead_key_sz; @@ -368,8 +375,8 @@ vector->cipher_iv.length = data_length; else { if (opts->cipher_iv_sz > data_length) { - printf("Global cipher iv shorter than " - "cipher_iv_sz\n"); + printf("Global cipher iv (%u) shorter than " + "cipher_iv_sz (%u)\n", data_length, opts->cipher_iv_sz); return -1; } vector->cipher_iv.length = opts->cipher_iv_sz; @@ -382,8 +389,8 @@ vector->auth_iv.length = data_length; else { if (opts->auth_iv_sz > data_length) { - printf("Global auth iv shorter than " - "auth_iv_sz\n"); + printf("Global auth iv (%u) shorter than " + "auth_iv_sz (%u)\n", data_length, opts->auth_iv_sz); return -1; } vector->auth_iv.length = opts->auth_iv_sz; @@ -396,8 +403,8 @@ vector->aead_iv.length = data_length; else { if (opts->aead_iv_sz > data_length) { - printf("Global aead iv shorter than " - "aead_iv_sz\n"); + printf("Global aead iv (%u) shorter than " + "aead_iv_sz (%u)\n", data_length, opts->aead_iv_sz); return -1; } vector->aead_iv.length = opts->aead_iv_sz; @@ -410,8 +417,8 @@ vector->ciphertext.length = data_length; else { if (opts->max_buffer_size > data_length) { - printf("Global ciphertext shorter than " - "buffer_sz\n"); + printf("Global ciphertext (%u) shorter than " + "buffer_sz (%u)\n", data_length, opts->max_buffer_size); return -1; } vector->ciphertext.length = opts->max_buffer_size; @@ -425,8 +432,8 @@ vector->aad.length = data_length; else { if (opts->aead_aad_sz > data_length) { - printf("Global aad shorter than " - "aead_aad_sz\n"); + printf("Global aad (%u) shorter than " + "aead_aad_sz (%u)\n", data_length, opts->aead_aad_sz); return -1; } vector->aad.length = opts->aead_aad_sz; @@ -441,8 +448,8 @@ vector->digest.length = data_length; else { if (opts->digest_sz > data_length) { - printf("Global digest shorter than " - "digest_sz\n"); + printf("Global digest (%u) shorter than " + "digest_sz (%u)\n", data_length, opts->digest_sz); return -1; } vector->digest.length = opts->digest_sz; diff -Nru dpdk-24.11.3/app/test-dma-perf/benchmark.c dpdk-24.11.4/app/test-dma-perf/benchmark.c --- dpdk-24.11.3/app/test-dma-perf/benchmark.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/app/test-dma-perf/benchmark.c 2025-12-19 12:05:33.000000000 +0000 @@ -19,7 +19,6 @@ #define MAX_DMA_CPL_NB 255 #define TEST_WAIT_U_SECOND 10000 -#define POLL_MAX 1000 #define CSV_LINE_DMA_FMT "Scenario %u,%u,%s,%u,%u,%u,%u,%.2lf,%" PRIu64 ",%.3lf,%.3lf\n" #define CSV_LINE_CPU_FMT "Scenario %u,%u,NA,NA,NA,%u,%u,%.2lf,%" PRIu64 ",%.3lf,%.3lf\n" @@ -259,6 +258,25 @@ } static void +stop_dmadev(struct test_configure *cfg, bool *stopped) +{ + struct lcore_dma_map_t *lcore_dma_map; + uint32_t i; + + if (*stopped) + return; + + if (cfg->is_dma) { + for (i = 0; i < cfg->num_worker; i++) { + lcore_dma_map = &cfg->dma_config[i].lcore_dma_map; + printf("Stopping dmadev %d\n", lcore_dma_map->dma_id); + rte_dma_stop(lcore_dma_map->dma_id); + } + } + *stopped = true; +} + +static void error_exit(int dev_id) { rte_dma_stop(dev_id); @@ -282,6 +300,40 @@ worker_info->total_cpl += nr_cpl; } +static int +do_dma_submit_and_wait_cpl(uint16_t dev_id, uint64_t async_cnt) +{ +#define MAX_WAIT_MSEC 1000 +#define MAX_POLL 1000 +#define DEQ_SZ 64 + enum rte_dma_vchan_status st; + uint32_t poll_cnt = 0; + uint32_t wait_ms = 0; + uint16_t nr_cpl; + + rte_dma_submit(dev_id, 0); + + if (rte_dma_vchan_status(dev_id, 0, &st) < 0) { + rte_delay_ms(MAX_WAIT_MSEC); + goto wait_cpl; + } + + while (st == RTE_DMA_VCHAN_ACTIVE && wait_ms++ < MAX_WAIT_MSEC) { + rte_delay_ms(1); + rte_dma_vchan_status(dev_id, 0, &st); + } + +wait_cpl: + while ((async_cnt > 0) && (poll_cnt++ < MAX_POLL)) { + nr_cpl = rte_dma_completed(dev_id, 0, MAX_DMA_CPL_NB, NULL, NULL); + async_cnt -= nr_cpl; + } + if (async_cnt > 0) + PRINT_ERR("Error: wait DMA %u failed!\n", dev_id); + + return async_cnt == 0 ? 0 : -1; +} + static inline int do_dma_plain_mem_copy(void *p) { @@ -293,10 +345,8 @@ const uint32_t buf_size = para->buf_size; struct rte_mbuf **srcs = para->srcs; struct rte_mbuf **dsts = para->dsts; - uint16_t nr_cpl; uint64_t async_cnt = 0; uint32_t i; - uint32_t poll_cnt = 0; int ret; worker_info->stop_flag = false; @@ -327,13 +377,7 @@ break; } - rte_dma_submit(dev_id, 0); - while ((async_cnt > 0) && (poll_cnt++ < POLL_MAX)) { - nr_cpl = rte_dma_completed(dev_id, 0, MAX_DMA_CPL_NB, NULL, NULL); - async_cnt -= nr_cpl; - } - - return 0; + return do_dma_submit_and_wait_cpl(dev_id, async_cnt); } static inline int @@ -349,8 +393,6 @@ const uint16_t dev_id = para->dev_id; uint32_t nr_buf = para->nr_buf; uint64_t async_cnt = 0; - uint32_t poll_cnt = 0; - uint16_t nr_cpl; uint32_t i, j; int ret; @@ -386,13 +428,7 @@ break; } - rte_dma_submit(dev_id, 0); - while ((async_cnt > 0) && (poll_cnt++ < POLL_MAX)) { - nr_cpl = rte_dma_completed(dev_id, 0, MAX_DMA_CPL_NB, NULL, NULL); - async_cnt -= nr_cpl; - } - - return 0; + return do_dma_submit_and_wait_cpl(dev_id, async_cnt); } static inline int @@ -458,6 +494,11 @@ return -1; } + if (buf_size > UINT16_MAX) { + PRINT_ERR("Error: Invalid buf size: %u\n", cur_buf_size); + return -1; + } + src_pool = rte_pktmbuf_pool_create("Benchmark_DMA_SRC", nr_buf, 0, @@ -687,6 +728,7 @@ float memory = 0; uint32_t avg_cycles = 0; uint32_t avg_cycles_total; + bool dev_stopped = false; float mops, mops_total; float bandwidth, bandwidth_total; uint32_t nr_sgsrc = 0, nr_sgdst = 0; @@ -748,7 +790,7 @@ vchan_dev->tdir == RTE_DMA_DIR_MEM_TO_DEV) { if (attach_ext_buffer(vchan_dev, lcores[i], cfg->is_sg, (nr_sgsrc/nb_workers), (nr_sgdst/nb_workers)) < 0) - goto out; + goto stop_dmadev; } rte_eal_remote_launch(get_work_function(cfg), (void *)(lcores[i]), lcore_id); @@ -783,6 +825,8 @@ rte_eal_mp_wait_lcore(); + stop_dmadev(cfg, &dev_stopped); + for (k = 0; k < nb_workers; k++) { struct rte_mbuf **src_buf = NULL, **dst_buf = NULL; uint32_t nr_buf_pt = nr_buf / nb_workers; @@ -867,6 +911,8 @@ cfg->scenario_id, nr_buf, memory * nb_workers, (avg_cycles_total * (float) 1.0) / nb_workers, bandwidth_total, mops_total); +stop_dmadev: + stop_dmadev(cfg, &dev_stopped); out: for (k = 0; k < nb_workers; k++) { @@ -922,13 +968,5 @@ lcores[i] = NULL; } - if (cfg->is_dma) { - for (i = 0; i < nb_workers; i++) { - lcore_dma_map = &cfg->dma_config[i].lcore_dma_map; - printf("Stopping dmadev %d\n", lcore_dma_map->dma_id); - rte_dma_stop(lcore_dma_map->dma_id); - } - } - return ret; } diff -Nru dpdk-24.11.3/app/test-dma-perf/main.c dpdk-24.11.4/app/test-dma-perf/main.c --- dpdk-24.11.3/app/test-dma-perf/main.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/app/test-dma-perf/main.c 2025-12-19 12:05:33.000000000 +0000 @@ -480,6 +480,8 @@ section_name, "test_seconds")); test_case->eal_args = rte_cfgfile_get_entry(cfgfile, section_name, "eal_args"); + if (test_case->eal_args != NULL) + test_case->eal_args = strdup(test_case->eal_args); test_case->is_valid = true; } diff -Nru dpdk-24.11.3/app/test-eventdev/test_perf_common.c dpdk-24.11.4/app/test-eventdev/test_perf_common.c --- dpdk-24.11.3/app/test-eventdev/test_perf_common.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/app/test-eventdev/test_perf_common.c 2025-12-19 12:05:33.000000000 +0000 @@ -1258,7 +1258,6 @@ perf_event_dma_adapter_setup(struct test_perf *t, struct prod_data *p) { struct evt_options *opt = t->opt; - struct rte_event event; uint32_t cap; int ret; @@ -1277,13 +1276,15 @@ return -ENOTSUP; } - if (cap & RTE_EVENT_DMA_ADAPTER_CAP_INTERNAL_PORT_VCHAN_EV_BIND) + if (cap & RTE_EVENT_DMA_ADAPTER_CAP_INTERNAL_PORT_VCHAN_EV_BIND) { + struct rte_event event = { .queue_id = p->queue_id, }; + ret = rte_event_dma_adapter_vchan_add(TEST_PERF_DA_ID, p->da.dma_dev_id, p->da.vchan_id, &event); - else + } else { ret = rte_event_dma_adapter_vchan_add(TEST_PERF_DA_ID, p->da.dma_dev_id, p->da.vchan_id, NULL); - + } return ret; } diff -Nru dpdk-24.11.3/app/test-flow-perf/main.c dpdk-24.11.4/app/test-flow-perf/main.c --- dpdk-24.11.3/app/test-flow-perf/main.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/app/test-flow-perf/main.c 2025-12-19 12:05:33.000000000 +0000 @@ -1404,7 +1404,7 @@ global_actions[0] = FLOW_ITEM_MASK(RTE_FLOW_ACTION_TYPE_JUMP); flows_list = rte_zmalloc("flows_list", - (sizeof(struct rte_flow *) * rules_count_per_core) + 1, 0); + (sizeof(struct rte_flow *) * (rules_count_per_core + 1)), 0); if (flows_list == NULL) rte_exit(EXIT_FAILURE, "No Memory available!\n"); diff -Nru dpdk-24.11.3/app/test-pmd/cmd_flex_item.c dpdk-24.11.4/app/test-pmd/cmd_flex_item.c --- dpdk-24.11.3/app/test-pmd/cmd_flex_item.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/app/test-pmd/cmd_flex_item.c 2025-12-19 12:05:33.000000000 +0000 @@ -143,21 +143,22 @@ if (ret) return ret; item->type = pattern->type; - if (pattern->spec) { + ret = rte_flow_conv(RTE_FLOW_CONV_OP_ITEM_MASK, NULL, 0, item, NULL); + if ((ret > 0) && pattern->spec) { ptr = (void *)(uintptr_t)item->spec; - memcpy(ptr, pattern->spec, FLEX_MAX_FLOW_PATTERN_LENGTH); + memcpy(ptr, pattern->spec, ret); } else { item->spec = NULL; } - if (pattern->mask) { + if ((ret > 0) && pattern->mask) { ptr = (void *)(uintptr_t)item->mask; - memcpy(ptr, pattern->mask, FLEX_MAX_FLOW_PATTERN_LENGTH); + memcpy(ptr, pattern->mask, ret); } else { item->mask = NULL; } - if (pattern->last) { + if ((ret > 0) && pattern->last) { ptr = (void *)(uintptr_t)item->last; - memcpy(ptr, pattern->last, FLEX_MAX_FLOW_PATTERN_LENGTH); + memcpy(ptr, pattern->last, ret); } else { item->last = NULL; } diff -Nru dpdk-24.11.3/app/test-pmd/cmdline.c dpdk-24.11.4/app/test-pmd/cmdline.c --- dpdk-24.11.3/app/test-pmd/cmdline.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/app/test-pmd/cmdline.c 2025-12-19 12:05:33.000000000 +0000 @@ -3906,7 +3906,7 @@ value = 0; nb_item = 0; value_ok = 0; - for (i = 0; i < strnlen(str, STR_TOKEN_SIZE); i++) { + for (i = 0; i < strnlen(str, STR_MULTI_TOKEN_SIZE); i++) { c = str[i]; if ((c >= '0') && (c <= '9')) { value = (unsigned int) (value * 10 + (c - '0')); @@ -3957,7 +3957,7 @@ struct cmd_set_list_result { cmdline_fixed_string_t cmd_keyword; cmdline_fixed_string_t list_name; - cmdline_fixed_string_t list_of_items; + cmdline_multi_string_t list_of_items; }; static void cmd_set_list_parsed(void *parsed_result, @@ -4006,7 +4006,7 @@ "corelist#portlist"); static cmdline_parse_token_string_t cmd_set_list_of_items = TOKEN_STRING_INITIALIZER(struct cmd_set_list_result, list_of_items, - NULL); + TOKEN_STRING_MULTI); static cmdline_parse_inst_t cmd_set_fwd_list = { .f = cmd_set_list_parsed, diff -Nru dpdk-24.11.3/app/test-pmd/cmdline_flow.c dpdk-24.11.4/app/test-pmd/cmdline_flow.c --- dpdk-24.11.3/app/test-pmd/cmdline_flow.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/app/test-pmd/cmdline_flow.c 2025-12-19 12:05:33.000000000 +0000 @@ -5582,7 +5582,7 @@ .next = NEXT(item_random, NEXT_ENTRY(COMMON_UNSIGNED), item_param), .args = ARGS(ARGS_ENTRY_MASK(struct rte_flow_item_random, - value, "\x00\x00\xff\xff")), + value, "\xff\xff\xff\xff")), }, [ITEM_GRE_KEY] = { .name = "gre_key", diff -Nru dpdk-24.11.3/app/test-pmd/cmdline_mtr.c dpdk-24.11.4/app/test-pmd/cmdline_mtr.c --- dpdk-24.11.3/app/test-pmd/cmdline_mtr.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/app/test-pmd/cmdline_mtr.c 2025-12-19 12:05:33.000000000 +0000 @@ -86,14 +86,34 @@ } static int +validate_input_color_table_entries(char *str) +{ + char *saveptr; + char *token = strtok_r(str, PARSE_DELIMITER, &saveptr); + for (int i = 0; token != NULL; i++) { + if (i > ((MAX_DSCP_TABLE_ENTRIES + MAX_VLAN_TABLE_ENTRIES) - 1)) + return -1; + token = strtok_r(NULL, PARSE_DELIMITER, &saveptr); + } + return 0; +} + +static int parse_input_color_table_entries(char *str, enum rte_color **dscp_table, enum rte_color **vlan_table) { enum rte_color *vlan, *dscp; - char *token; + char *token, *saveptr; + char *temp_str = strdup(str); int i = 0; - token = strtok_r(str, PARSE_DELIMITER, &str); + if (validate_input_color_table_entries(temp_str)) { + free(temp_str); + return -1; + } + free(temp_str); + + token = strtok_r(str, PARSE_DELIMITER, &saveptr); if (token == NULL) return 0; @@ -117,7 +137,7 @@ if (i == MAX_DSCP_TABLE_ENTRIES) break; - token = strtok_r(str, PARSE_DELIMITER, &str); + token = strtok_r(NULL, PARSE_DELIMITER, &saveptr); if (token == NULL) { free(dscp); return -1; @@ -126,7 +146,7 @@ *dscp_table = dscp; - token = strtok_r(str, PARSE_DELIMITER, &str); + token = strtok_r(NULL, PARSE_DELIMITER, &saveptr); if (token == NULL) return 0; @@ -154,7 +174,7 @@ if (i == MAX_VLAN_TABLE_ENTRIES) break; - token = strtok_r(str, PARSE_DELIMITER, &str); + token = strtok_r(NULL, PARSE_DELIMITER, &saveptr); if (token == NULL) { free(vlan); free(*dscp_table); diff -Nru dpdk-24.11.3/app/test-pmd/config.c dpdk-24.11.4/app/test-pmd/config.c --- dpdk-24.11.3/app/test-pmd/config.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/app/test-pmd/config.c 2025-12-19 12:05:33.000000000 +0000 @@ -2152,6 +2152,7 @@ case RTE_FLOW_ACTION_TYPE_AGE: case RTE_FLOW_ACTION_TYPE_COUNT: case RTE_FLOW_ACTION_TYPE_QUOTA: + case RTE_FLOW_ACTION_TYPE_CONNTRACK: break; default: fprintf(stderr, @@ -5165,7 +5166,7 @@ /* reinitialize forwarding streams */ init_fwd_streams(); sm_id = 0; - txp = 1; + txp = fwd_topology_tx_port_get(rxp); /* get the dcb info on the first RX and TX ports */ (void)rte_eth_dev_get_dcb_info(fwd_ports_ids[rxp], &rxp_dcb_info); (void)rte_eth_dev_get_dcb_info(fwd_ports_ids[txp], &txp_dcb_info); @@ -5183,7 +5184,7 @@ fwd_lcores[lc_id]->stream_idx; rxq = rxp_dcb_info.tc_queue.tc_rxq[i][tc].base; txq = txp_dcb_info.tc_queue.tc_txq[i][tc].base; - nb_rx_queue = txp_dcb_info.tc_queue.tc_rxq[i][tc].nb_queue; + nb_rx_queue = rxp_dcb_info.tc_queue.tc_rxq[i][tc].nb_queue; nb_tx_queue = txp_dcb_info.tc_queue.tc_txq[i][tc].nb_queue; for (j = 0; j < nb_rx_queue; j++) { struct fwd_stream *fs; @@ -5213,11 +5214,8 @@ rxp++; if (rxp >= nb_fwd_ports) return; + txp = fwd_topology_tx_port_get(rxp); /* get the dcb information on next RX and TX ports */ - if ((rxp & 0x1) == 0) - txp = (portid_t) (rxp + 1); - else - txp = (portid_t) (rxp - 1); rte_eth_dev_get_dcb_info(fwd_ports_ids[rxp], &rxp_dcb_info); rte_eth_dev_get_dcb_info(fwd_ports_ids[txp], &txp_dcb_info); } diff -Nru dpdk-24.11.3/app/test-pmd/testpmd.c dpdk-24.11.4/app/test-pmd/testpmd.c --- dpdk-24.11.3/app/test-pmd/testpmd.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/app/test-pmd/testpmd.c 2025-12-19 12:05:33.000000000 +0000 @@ -101,12 +101,14 @@ uint16_t verbose_level = 0; /**< Silent by default. */ int testpmd_logtype; /**< Log type for testpmd logs */ +/* Maximum delay for exiting after primary process. */ +#define MONITOR_INTERVAL (500 * 1000) + /* use main core for command line ? */ uint8_t interactive = 0; uint8_t auto_start = 0; uint8_t tx_first; char cmdline_filename[PATH_MAX] = {0}; - /* * NUMA support configuration. * When set, the NUMA support attempts to dispatch the allocation of the @@ -3565,6 +3567,83 @@ rte_devargs_reset(&da); } +#ifndef RTE_EXEC_ENV_WINDOWS + +enum testpmd_req_type { + TESTPMD_REQ_TYPE_EXIT, +}; + +struct testpmd_mp_req { + enum testpmd_req_type t; +}; + +struct testpmd_mp_resp { + int result; +}; + +#define TESTPMD_MP "mp_testpmd" + +/* Send reply to this peer when testpmd exits */ +static RTE_ATOMIC(const char *) primary_name; + +static void +reply_to_primary(const char *peer, int result) +{ + struct rte_mp_msg reply = { }; + struct testpmd_mp_resp *resp = (struct testpmd_mp_resp *) &reply.param; + + strlcpy(reply.name, TESTPMD_MP, RTE_MP_MAX_NAME_LEN); + reply.len_param = sizeof(*resp); + resp->result = result; + + printf("Replying %d to primary\n", result); + fflush(stdout); + + if (rte_mp_reply(&reply, peer) < 0) + printf("Failed to send response to primary:%s", strerror(rte_errno)); +} + +/* Primary process is exiting, stop secondary process */ +static void +pmd_notify_secondary(void) +{ + struct testpmd_mp_req request = { + .t = TESTPMD_REQ_TYPE_EXIT, + }; + struct rte_mp_msg mp_req = { + .name = TESTPMD_MP, + .len_param = sizeof(request), + }; + struct rte_mp_reply reply; + struct timespec ts = {.tv_sec = 5, .tv_nsec = 0}; + + printf("\nPrimary: Sending 'stop_req' request to secondary...\n"); + fflush(stdout); + + memcpy(mp_req.param, &request, sizeof(request)); + rte_mp_request_sync(&mp_req, &reply, &ts); +} + +static int +handle_testpmd_request(const struct rte_mp_msg *request, const void *peer) +{ + const struct testpmd_mp_req *req = (const struct testpmd_mp_req *)request->param; + + if (req->t == TESTPMD_REQ_TYPE_EXIT) { + printf("\nReceived notification of primary exiting\n"); + fflush(stdout); + + /* Response is sent after forwarding loop exits */ + rte_atomic_store_explicit(&primary_name, peer, rte_memory_order_relaxed); + + kill(getpid(), SIGINT); + } else { + reply_to_primary(peer, -EINVAL); + } + return 0; +} +#endif + void pmd_test_exit(void) { @@ -3576,6 +3655,10 @@ stop_packet_forwarding(); #ifndef RTE_EXEC_ENV_WINDOWS + /* Tell secondary to exit */ + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + pmd_notify_secondary(); + for (i = 0 ; i < RTE_DIM(mempools) ; i++) { if (mempools[i]) { if (mp_alloc_type == MP_ALLOC_ANON) @@ -4292,6 +4375,38 @@ prompt_exit(); } +#ifndef RTE_EXEC_ENV_WINDOWS +/* Alarm signal handler, used to check that primary process */ +static void +monitor_primary(void *arg __rte_unused) +{ + if (rte_eal_primary_proc_alive(NULL)) { + rte_eal_alarm_set(MONITOR_INTERVAL, monitor_primary, NULL); + } else { + /* + * If primary process exits, then all the device information + * is no longer valid. Calling any cleanup code is going to + * run into use after free. + */ + fprintf(stderr, "\nPrimary process is no longer active, exiting...\n"); + exit(EXIT_FAILURE); + } +} + +/* Setup handler to check when primary exits. */ +static int +enable_primary_monitor(void) +{ + return rte_eal_alarm_set(MONITOR_INTERVAL, monitor_primary, NULL); +} + +static void +disable_primary_monitor(void) +{ + rte_eal_alarm_cancel(monitor_primary, NULL); +} +#endif + int main(int argc, char** argv) { @@ -4323,6 +4438,15 @@ rte_exit(EXIT_FAILURE, "Cannot init EAL: %s\n", rte_strerror(rte_errno)); +#ifndef RTE_EXEC_ENV_WINDOWS + if (rte_eal_process_type() == RTE_PROC_SECONDARY) { + if (enable_primary_monitor() < 0) + rte_exit(EXIT_FAILURE, "Cannot setup primary monitor"); + if (rte_mp_action_register(TESTPMD_MP, handle_testpmd_request) < 0) + rte_exit(EXIT_FAILURE, "Failed to register message action\n"); + } +#endif + /* allocate port structures, and init them */ init_port(); @@ -4516,8 +4640,24 @@ } } +#ifndef RTE_EXEC_ENV_WINDOWS + if (rte_eal_process_type() == RTE_PROC_SECONDARY) { + disable_primary_monitor(); + rte_mp_action_unregister(TESTPMD_MP); + } +#endif + pmd_test_exit(); +#ifndef RTE_EXEC_ENV_WINDOWS + if (rte_eal_process_type() == RTE_PROC_SECONDARY) { + const char *peer = rte_atomic_exchange_explicit(&primary_name, NULL, + rte_memory_order_relaxed); + if (peer) + reply_to_primary(peer, 0); + } +#endif + #ifdef RTE_LIB_PDUMP /* uninitialize packet capture framework */ rte_pdump_uninit(); diff -Nru dpdk-24.11.3/buildtools/pmdinfogen.py dpdk-24.11.4/buildtools/pmdinfogen.py --- dpdk-24.11.3/buildtools/pmdinfogen.py 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/buildtools/pmdinfogen.py 2025-12-19 12:05:33.000000000 +0000 @@ -4,9 +4,9 @@ # Copyright (c) 2020 Dmitry Kozlyuk import argparse -import ctypes import json import re +import struct import sys import tempfile @@ -110,24 +110,6 @@ return None -def define_rte_pci_id(is_big_endian): - base_type = ctypes.LittleEndianStructure - if is_big_endian: - base_type = ctypes.BigEndianStructure - - class rte_pci_id(base_type): - _pack_ = True - _fields_ = [ - ("class_id", ctypes.c_uint32), - ("vendor_id", ctypes.c_uint16), - ("device_id", ctypes.c_uint16), - ("subsystem_vendor_id", ctypes.c_uint16), - ("subsystem_device_id", ctypes.c_uint16), - ] - - return rte_pci_id - - class Driver: OPTIONS = [ ("params", "_param_string_export"), @@ -166,26 +148,24 @@ if not table_symbol: raise Exception("PCI table declared but not defined: %d" % table_name) - rte_pci_id = define_rte_pci_id(image.is_big_endian) + if image.is_big_endian: + fmt = ">" + else: + fmt = "<" + fmt += "LHHHH" result = [] while True: - size = ctypes.sizeof(rte_pci_id) + size = struct.calcsize(fmt) offset = size * len(result) data = table_symbol.get_value(offset, size) if not data: break - pci_id = rte_pci_id.from_buffer_copy(data) - if not pci_id.device_id: + _, vendor, device, ss_vendor, ss_device = struct.unpack_from(fmt, data) + if not device: break - result.append( - [ - pci_id.vendor_id, - pci_id.device_id, - pci_id.subsystem_vendor_id, - pci_id.subsystem_device_id, - ] - ) + result.append((vendor, device, ss_vendor, ss_device)) + return result def dump(self, file): diff -Nru dpdk-24.11.3/config/arm/meson.build dpdk-24.11.4/config/arm/meson.build --- dpdk-24.11.3/config/arm/meson.build 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/config/arm/meson.build 2025-12-19 12:05:33.000000000 +0000 @@ -587,7 +587,7 @@ 'description': 'Arm Neoverse N2', 'implementer': '0x41', 'part_number': '0xd49', - 'numa': false + 'numa': true } soc_n3 = { diff -Nru dpdk-24.11.3/debian/changelog dpdk-24.11.4/debian/changelog --- dpdk-24.11.3/debian/changelog 2025-08-25 15:13:59.000000000 +0000 +++ dpdk-24.11.4/debian/changelog 2025-12-24 14:21:11.000000000 +0000 @@ -1,3 +1,15 @@ +dpdk (24.11.4-0+deb13u1) trixie; urgency=medium + + [ Christian Ehrhardt ] + * d/p/disable_arm64_autopkgtest_fails.patch: disable tests that are + flaky in debci (Closes: #1114911) + + [ Luca Boccassi ] + * New upstream release 24.11.4. For a full list of changes in 24.11.4 see: + https://doc.dpdk.org/guides/rel_notes/release_24_11.html + + -- Luca Boccassi Wed, 24 Dec 2025 14:21:11 +0000 + dpdk (24.11.3-1~deb13u1) trixie; urgency=medium * Upload to trixie diff -Nru dpdk-24.11.3/debian/patches/disable_arm64_autopkgtest_fails.patch dpdk-24.11.4/debian/patches/disable_arm64_autopkgtest_fails.patch --- dpdk-24.11.3/debian/patches/disable_arm64_autopkgtest_fails.patch 1970-01-01 00:00:00.000000000 +0000 +++ dpdk-24.11.4/debian/patches/disable_arm64_autopkgtest_fails.patch 2025-12-24 14:21:11.000000000 +0000 @@ -0,0 +1,28 @@ +Description: Skip tests failing in arm64 test environment + These tests are very flaky on the Debci infrastructure and now also + started to affect arm64 without DPDK changing. +Forwarded: not-needed +X-Not-Forwarded-Reason: This only applies to Debci environments +Author: Christian Ehrhardt +--- a/app/test/test_pflock.c ++++ b/app/test/test_pflock.c +@@ -163,7 +163,7 @@ test_pflock(void) + { + int i; + +-#if defined(__PPC64__) ++#if defined(__PPC64__) || defined(__aarch64__) + return TEST_SKIPPED; + #endif + +--- a/app/test/test_ticketlock.c ++++ b/app/test/test_ticketlock.c +@@ -207,7 +207,7 @@ test_ticketlock(void) + int ret = 0; + int i; + +-#if defined(__PPC64__) ++#if defined(__PPC64__) || defined(__aarch64__) + return TEST_SKIPPED; + #endif + diff -Nru dpdk-24.11.3/debian/patches/series dpdk-24.11.4/debian/patches/series --- dpdk-24.11.3/debian/patches/series 2025-08-19 23:01:23.000000000 +0000 +++ dpdk-24.11.4/debian/patches/series 2025-12-24 14:21:11.000000000 +0000 @@ -1,4 +1,5 @@ disable_ppc64_autopkgtest_fails.patch +disable_arm64_autopkgtest_fails.patch disable_riscv_autopkgtest_fails.patch add-install_tag-to-meson.patch doc-install-guides-and-api-docs-to-different-directory.patch diff -Nru dpdk-24.11.3/doc/guides/cryptodevs/ionic.rst dpdk-24.11.4/doc/guides/cryptodevs/ionic.rst --- dpdk-24.11.3/doc/guides/cryptodevs/ionic.rst 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/doc/guides/cryptodevs/ionic.rst 2025-12-19 12:05:33.000000000 +0000 @@ -1,5 +1,5 @@ .. SPDX-License-Identifier: BSD-3-Clause - Copyright 2021-2024 Advanced Micro Devices, Inc. + Copyright 2021-2025 Advanced Micro Devices, Inc. IONIC Crypto Driver =================== @@ -16,6 +16,8 @@ `(pdf) `__ - DSC3-400 dual-port 400G Distributed Services Card `(pdf) `__ +- Pollara 400 single-port 400G AI NIC + `(pdf) `__ Please visit the `AMD Pensando Networking diff -Nru dpdk-24.11.3/doc/guides/freebsd_gsg/build_dpdk.rst dpdk-24.11.4/doc/guides/freebsd_gsg/build_dpdk.rst --- dpdk-24.11.3/doc/guides/freebsd_gsg/build_dpdk.rst 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/doc/guides/freebsd_gsg/build_dpdk.rst 2025-12-19 12:05:33.000000000 +0000 @@ -18,7 +18,7 @@ * pkgconf * py38-pyelftools -.. note: +.. note:: The specific package for pyelftools is dependent on the version of python in use, Python 3.8 being the version at type of writing, hence the ``py38`` prefix. diff -Nru dpdk-24.11.3/doc/guides/linux_gsg/linux_drivers.rst dpdk-24.11.4/doc/guides/linux_gsg/linux_drivers.rst --- dpdk-24.11.3/doc/guides/linux_gsg/linux_drivers.rst 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/doc/guides/linux_gsg/linux_drivers.rst 2025-12-19 12:05:33.000000000 +0000 @@ -384,7 +384,7 @@ More about the bifurcated driver can be found in NVIDIA `bifurcated PMD -`_ presentation. +`_ presentation. .. _uio: diff -Nru dpdk-24.11.3/doc/guides/nics/cpfl.rst dpdk-24.11.4/doc/guides/nics/cpfl.rst --- dpdk-24.11.3/doc/guides/nics/cpfl.rst 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/doc/guides/nics/cpfl.rst 2025-12-19 12:05:33.000000000 +0000 @@ -213,14 +213,14 @@ dpdk-testpmd -c 0x3 -n 4 -a 0000:af:00.6,vport=[0-1],flow_parser="refpkg.json" -- -i #. Create one flow to forward ETH-IPV4-TCP from I/O port to a local(CPF's) vport. Flow should be created on - vport X. Group M should match fxp module. Action port_representor Y means forward packet to local vport Y:: + vport X. Group M should match fxp module. Action port_representor Y means forward packet to local vport Y: .. code-block:: console flow create X ingress group M pattern eth dst is 00:01:00:00:03:14 / ipv4 src is 192.168.0.1 \ - dst is 192.168.0.2 / tcp / end actions port_representor port_id Y / end + dst is 192.168.0.2 / tcp / end actions port_representor port_id Y / end -#. Send a matched packet, and it should be displayed on PMD:: +#. Send a matched packet, and it should be displayed on PMD: .. code-block:: console diff -Nru dpdk-24.11.3/doc/guides/nics/features/iavf.ini dpdk-24.11.4/doc/guides/nics/features/iavf.ini --- dpdk-24.11.3/doc/guides/nics/features/iavf.ini 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/doc/guides/nics/features/iavf.ini 2025-12-19 12:05:33.000000000 +0000 @@ -7,12 +7,13 @@ ; is selected. ; [Features] -Speed capabilities = Y Link status = Y Rx interrupt = Y +Free Tx mbuf on demand = Y Queue start/stop = Y Runtime Rx queue setup = Y Runtime Tx queue setup = Y +Burst mode info = Y Power mgmt address monitor = Y MTU update = Y Scattered Rx = Y @@ -38,6 +39,7 @@ Rx descriptor status = Y Tx descriptor status = Y Basic stats = Y +Extended stats = Y Multiprocess aware = Y FreeBSD = Y Linux = Y diff -Nru dpdk-24.11.3/doc/guides/nics/features/ice.ini dpdk-24.11.4/doc/guides/nics/features/ice.ini --- dpdk-24.11.3/doc/guides/nics/features/ice.ini 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/doc/guides/nics/features/ice.ini 2025-12-19 12:05:33.000000000 +0000 @@ -14,6 +14,7 @@ FEC = Y Rx interrupt = Y Fast mbuf free = P +Free Tx mbuf on demand = Y Queue start/stop = Y Burst mode info = Y Power mgmt address monitor = Y diff -Nru dpdk-24.11.3/doc/guides/nics/features/txgbe.ini dpdk-24.11.4/doc/guides/nics/features/txgbe.ini --- dpdk-24.11.3/doc/guides/nics/features/txgbe.ini 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/doc/guides/nics/features/txgbe.ini 2025-12-19 12:05:33.000000000 +0000 @@ -72,7 +72,6 @@ [rte_flow actions] drop = Y -mark = Y pf = Y queue = Y rss = Y diff -Nru dpdk-24.11.3/doc/guides/nics/ionic.rst dpdk-24.11.4/doc/guides/nics/ionic.rst --- dpdk-24.11.3/doc/guides/nics/ionic.rst 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/doc/guides/nics/ionic.rst 2025-12-19 12:05:33.000000000 +0000 @@ -1,5 +1,5 @@ .. SPDX-License-Identifier: BSD-3-Clause - Copyright 2018-2022 Advanced Micro Devices, Inc. + Copyright 2018-2025 Advanced Micro Devices, Inc. IONIC Driver ============ @@ -15,6 +15,8 @@ `(pdf) `__ - DSC3-400 dual-port 400G Distributed Services Card `(pdf) `__ +- Pollara 400 single-port 400G AI NIC + `(pdf) `__ Please visit the `AMD Pensando Networking diff -Nru dpdk-24.11.3/doc/guides/nics/mlx5.rst dpdk-24.11.4/doc/guides/nics/mlx5.rst --- dpdk-24.11.3/doc/guides/nics/mlx5.rst 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/doc/guides/nics/mlx5.rst 2025-12-19 12:05:33.000000000 +0000 @@ -2222,6 +2222,10 @@ This option is supported only for Tx hairpin queues. +#. With strict queueing enabled + (``RTE_FLOW_PORT_FLAG_STRICT_QUEUE`` passed to ``rte_flow_configure()``), + indirect age actions can be created only through asynchronous flow API. + Notes for testpmd ----------------- diff -Nru dpdk-24.11.3/doc/guides/rel_notes/release_24_11.rst dpdk-24.11.4/doc/guides/rel_notes/release_24_11.rst --- dpdk-24.11.3/doc/guides/rel_notes/release_24_11.rst 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/doc/guides/rel_notes/release_24_11.rst 2025-12-19 12:05:33.000000000 +0000 @@ -1339,3 +1339,370 @@ * 9843181aa5 crypto/virtio: fix driver ID * 25e35296b5 net/cnxk: fix reconfiguring MAC address * 98cf04bb90 test/graph: fix second run + +24.11.4 Release Notes +--------------------- + + +24.11.4 Fixes +~~~~~~~~~~~~~ + +* app/crypto-perf: fix plaintext size exceeds buffer size +* app/dma-perf: check buffer size +* app/dma-perf: fix on-flight DMA when verifying data +* app/dma-perf: fix stopping device +* app/dma-perf: fix use after free +* app/eventdev: fix build with clang 21 +* app/flow-perf: fix rules array length +* app/pdump: remove hard-coded memory channels +* app/procinfo: fix const pointer in collectd format +* app/testpmd: fix conntrack action query +* app/testpmd: fix DCB Rx queues +* app/testpmd: fix DCB Tx port +* app/testpmd: fix flex item link parsing +* app/testpmd: fix mask in flow random item +* app/testpmd: increase size of set cores list command +* app/testpmd: monitor state of primary process +* app/testpmd: stop forwarding in secondary process +* app/testpmd: validate DSCP and VLAN for meter creation +* bbdev: fix build with MinGW 13 +* bitops: improve power of 2 alignment documentation +* buildtools/pmdinfogen: fix warning with python 3.14 +* bus/cdx: fix device name in probing error message +* bus/cdx: fix release in probing for secondary process +* bus/dpaa: improve cleanup +* bus/fslmc: fix const pointer in device name parsing +* bus/ifpga: fix const pointer in device name parsing +* bus/pci: fix build with MinGW 13 +* bus/pci: fix resource leak in secondary process +* bus/uacce: fix const pointer in device matching +* cmdline: fix highest bit port list parsing +* cmdline: fix port list parsing +* common/cnxk: fix async event handling +* common/cnxk: fix format specifier for bandwidth profile ID +* common/cnxk: fix max number of SQB buffers in clean up +* common/cnxk: fix NIX Rx inject enabling +* common/cnxk: fix null SQ access +* common/mlx5: release unused mempool entries +* common/mlx5: remove useless constants +* common/qat: fix some const pointers +* config/arm: enable NUMA for Neoverse N2 +* crypto/caam_jr: fix const pointer in UIO filename parsing +* crypto/cnxk: refactor RSA verification +* crypto/ipsec_mb: fix QP release in secondary +* crypto/mlx5: remove unused constants +* crypto/qat: fix CCM request descriptor hash state size +* crypto/qat: fix ECDH +* crypto/qat: fix source buffer alignment +* crypto/virtio: fix cookies leak +* dmadev: fix debug build with tracepoints +* dma/hisilicon: fix stop with pending transfers +* doc: add conntrack state inspect command to testpmd guide +* doc: add Pollara 400 device in ionic guide +* doc: fix display of commands in cpfl guide +* doc: fix feature list of iavf driver +* doc: fix feature list of ice driver +* doc: fix note in FreeBSD guide +* doc: fix NVIDIA bifurcated driver presentation link +* dts: add reception check to checksum suite +* dts: fix docstring in checksum suite +* eal/arm: fix C++ build for 32-bit memcpy +* eal: fix DMA mask validation with IOVA mode option +* eal: fix MP socket cleanup +* eal: fix plugin dir walk +* eal/x86: enable timeout in AMD power monitor +* efd: fix AVX2 support +* ethdev: fix VLAN filter parameter description +* eventdev/crypto: fix build with clang 21 +* eventdev: fix listing timer adapters with telemetry +* examples/l3fwd: add Tx burst size configuration option +* examples/l3fwd-power: fix telemetry command registration +* examples/server_node_efd: fix format overflow +* examples/vdpa: fix format overflow +* fib6: fix memory leak on delete operation +* fib6: fix tbl8 allocation check logic +* graph: fix stats query with no node xstats +* graph: fix unaligned access in stats +* graph: fix updating edge with active graph +* graph: fix xstats description allocation +* gro: fix payload corruption in coalescing packets +* hash: fix unaligned access in predictable RSS +* lib: fix backticks matching in Doxygen comments +* mcslock: fix memory ordering +* net/af_packet: fix crash in secondary process +* net/ark: remove double mbuf free +* net/axgbe: fix build with GCC 16 +* net/bnxt: fix free of not allocated object +* net/bonding: fix MAC address propagation in 802.3ad mode +* net/cnxk: fix default meter pre-color +* net/cnxk: fix Rx inject LF +* net/dpaa2: clear active VDQ state when freeing Rx queues +* net/dpaa2: fix duplicate call of close +* net/dpaa2: fix error frame dump +* net/dpaa2: fix extract buffer preparation +* net/dpaa2: fix flow rule resizing +* net/dpaa2: fix L3/L4 checksum results +* net/dpaa2: fix shaper rate +* net/dpaa2: fix uninitialized variable +* net/dpaa2: free buffers from error queue +* net/dpaa2: receive packets with additional parse errors +* net/dpaa2: remove ethdev pointer from bus device +* net/dpaa: fix resource leak +* net/e1000/base: fix crash on init with GCC 13 +* net/ena/base: fix unsafe memcpy on invalid memory +* net/ena: fix PCI BAR mapping on 64K page size +* net/enetfec: fix buffer descriptor size configuration +* net/enetfec: fix checksum flag handling and error return +* net/enetfec: fix const pointer in UIO filename parsing +* net/enetfec: fix file descriptor leak on read error +* net/enetfec: fix memory leak in Rx buffer cleanup +* net/enetfec: fix out-of-bounds access in UIO mapping +* net/enetfec: fix Tx queue free +* net/enetfec: reject multi-queue configuration +* net/enetfec: reject Tx deferred queue +* net: fix IPv6 link local compliance with RFC 4291 +* net: fix L2 length for GRE packets +* net/fm10k: fix build with GCC 16 +* net/gve: add DQO Tx descriptor limit +* net/gve: clean when insufficient Tx descriptors +* net/gve: clear DQO Tx descriptors before writing +* net/gve: do not write zero-length descriptors +* net/gve: fix disabling interrupts on DQ +* net/gve: fix DQO TSO descriptor limit +* net/gve: free device resources on close +* net/gve: free Rx mbufs if allocation fails on ring setup +* net/gve: send whole packet when mbuf is large +* net/gve: validate Tx packet before sending +* net/hns3: fix inconsistent lock +* net/hns3: fix overwrite mbuf in vector path +* net/hns3: fix VLAN resources freeing +* net/hns3: fix VLAN tag loss for short tunnel frame +* net/i40e: fix symmetric Toeplitz hashing for SCTP +* net/iavf: fix build with clang 21 +* net/iavf: fix Rx timestamp validity check +* net/iavf: fix Tx vector path selection logic +* net/ice/base: fix adding special words +* net/ice/base: fix integer overflow on NVM init +* net/ice/base: fix memory leak in HW profile handling +* net/ice/base: fix memory leak in recipe handling +* net/ice: fix initialization with 8 ports +* net/ice: fix memory leak in raw pattern parse +* net/ice: fix path selection for QinQ Tx offload +* net/ice: fix statistics +* net/ice: fix vector Rx VLAN offload flags +* net/ice: fix VLAN tag reporting on Rx +* net/ice: remove indirection for FDIR filters +* net/ice: remove unsupported SCTP Rx offload +* net/idpf: fix queue setup with TSO offload +* net/intel: fix assumption about tag placement order +* net/ixgbe/base: fix PF link state request size +* net/ixgbe: fix SCTP port filtering on E610 +* net/memif: fix const pointer in socket check +* net/mlx4: fix unnecessary comma +* net/mlx5: fix build with MinGW 13 +* net/mlx5: fix connection tracking state item validation +* net/mlx5: fix crash on flow rule destruction +* net/mlx5: fix default flow rules start +* net/mlx5: fix device start error handling +* net/mlx5: fix Direct Verbs counter offset detection +* net/mlx5: fix double free in non-template flow destroy +* net/mlx5: fix error reporting on masked indirect actions +* net/mlx5: fix ESP header match after UDP for group 0 +* net/mlx5: fix ESP item validation to match on seqnum +* net/mlx5: fix external queues access +* net/mlx5: fix flex flow item header length +* net/mlx5: fix flow aging race condition +* net/mlx5: fix flow encapsulation hash +* net/mlx5: fix flow tag indexes support on root table +* net/mlx5: fix index-based flow rules +* net/mlx5: fix indirect flow action memory leak +* net/mlx5: fix indirect flow age action handling +* net/mlx5: fix indirect meter index leak +* net/mlx5: fix indirect RSS action hash +* net/mlx5: fix interface name parameter definition +* net/mlx5: fix IPv6 DSCP offset in HWS sync API +* net/mlx5: fix leak of flow indexed pools +* net/mlx5: fix meter mark allocation +* net/mlx5: fix min and max MTU reporting +* net/mlx5: fix modify field action restriction +* net/mlx5: fix MTU initialization +* net/mlx5: fix multicast +* net/mlx5: fix multi-process Tx default rules +* net/mlx5: fix non-template age rules flush +* net/mlx5: fix non-template RSS expansion +* net/mlx5: fix null dereference in modify header +* net/mlx5: fix send to kernel action resources release +* net/mlx5: fix spurious CPU wakeups +* net/mlx5: fix storage of shared Rx queues +* net/mlx5: fix Tx metadata pattern template mismatch +* net/mlx5: fix uninitialized variable +* net/mlx5: fix unnecessary commas +* net/mlx5: fix unsupported flow rule port action +* net/mlx5: fix use after scope of RSS configuration +* net/mlx5/hws: fix buddy memory allocation +* net/mlx5/hws: fix ESP header match in strict mode +* net/mlx5/hws: fix flow rule hash capability +* net/mlx5/hws: fix TIR action support in FDB +* net/mlx5: move auxiliary data inline +* net/mlx5: release representor interrupt handler +* net/mlx5: remove counter alignment +* net/mlx5: remove unused macros +* net/mlx5: remove useless constants +* net/mlx5: skip Rx control flow tables in isolated mode +* net/mlx5: store MTU at Rx queue allocation time +* net/mlx5/windows: fix match criteria in flow creation +* net/nfp: fix metering cleanup +* net/ngbe: fix checksum error counter +* net/ngbe: reduce memory size of ring descriptors +* net/ntnic: fix potential format overflow +* net/octeon_ep: fix device start +* net/octeon_ep: fix mbuf data offset update +* net/octeon_ep: handle interrupt enable failure +* net/tap: fix BPF with cross-compilation +* net/tap: fix build with LTO +* net/tap: fix interrupt callback crash after failed start +* net/txgbe: add device arguments for FDIR +* net/txgbe: filter FDIR match flex bytes for tunnel +* net/txgbe: fix checksum error counter +* net/txgbe: fix FDIR drop action for L4 match packets +* net/txgbe: fix FDIR filter for SCTP tunnel +* net/txgbe: fix FDIR input mask +* net/txgbe: fix FDIR mode clearing +* net/txgbe: fix FDIR rule raw relative for L3 packets +* net/txgbe: fix maximum number of FDIR filters +* net/txgbe: fix VF Rx buffer size in config register +* net/txgbe: reduce memory size of ring descriptors +* net/txgbe: remove duplicate Tx queue assignment +* net/txgbe: remove unsupported flow action mark +* net/txgbe: switch to FDIR when ntuple filter is full +* net/virtio-user: fix used ring address calculation +* net/vmxnet3: disable RSS for single queue for ESX8.0+ +* net/vmxnet3: fix mapping of mempools to queues +* net/zxdh: fix Arm build +* pdump: handle primary process exit +* rawdev: fix build with clang 21 +* regex/mlx5: remove useless constants +* Revert "crypto/virtio: fix cookies leak" +* ring: establish a safe partial order in hts-ring +* ring: establish safe partial order in default mode +* ring: establish safe partial order in RTS mode +* sched: fix WRR parameter data type +* tailq: fix lookup macro +* telemetry: make socket handler typedef private +* test/argparse: change initialization to workaround LTO +* test/crypto: fix mbuf handling +* test/crypto: fix vector initialization +* test/debug: fix crash with mlx5 devices +* test/debug: fix IOVA mode on PPC64 without huge pages +* test/dma: fix failure condition +* test: fix build with clang 21 +* test/func_reentrancy: fix args to EAL init call +* test/hash: check memory allocation +* test/telemetry: fix test calling all commands +* usertools/telemetry: fix exporter default IP binding +* vdpa/mlx5: remove unused constant +* version: 24.11.4-rc1 +* vhost: add VDUSE virtqueue ready state polling workaround +* vhost: fix double fetch when dequeue offloading +* vhost: fix external buffer in VDUSE +* vhost: fix virtqueue info init in VDUSE vring setup + +24.11.4 Validation +~~~~~~~~~~~~~~~~~~ + +* `Red Hat(R) Testing `__ + + * Platform + + * RHEL 9 + * Kernel 5.14 + * Qemu 8.2.0 + * X540-AT2 NIC(ixgbe, 10G) + + * Functionality + + * Guest with device assignment(PF) throughput testing(1G hugepage size) + * Guest with device assignment(PF) throughput testing(2M hugepage size) + * Guest with device assignment(VF) throughput testing + * PVP (host dpdk testpmd as vswitch) 1Q: throughput testing + * PVP vhost-user 2Q throughput testing + * PVP vhost-user 1Q cross numa node throughput testing + * Guest with vhost-user 2 queues throughput testing + * vhost-user reconnect with dpdk-client, qemu-server: qemu reconnect + * vhost-user reconnect with dpdk-client, qemu-server: ovs reconnect + * PVP 1Q live migration testing + * PVP 1Q cross numa node live migration testing + * Guest with ovs+dpdk+vhost-user 1Q live migration testing + * Guest with ovs+dpdk+vhost-user 1Q live migration testing (2M) + * Guest with ovs+dpdk+vhost-user 2Q live migration testing + * Guest with ovs+dpdk+vhost-user 4Q live migration testing + * Host PF + DPDK testing + * Host VF + DPDK testing + + +* `Intel(R) Testing `__ + + * Compile testing + + * Functional testing + + * PF (i40e, ixgbe) + * VF (i40e, ixgbe) + * PF/VF (ice) + * IPsec + * Virtio + * Cryptodev + * DLB + + * Performance testing + + * Throughput performance + * Cryptodev latency + * PF/VF NIC single core/NIC performance + * XXV710/E810 NIC Performance + + +* `Nvidia(R) Testing `__ + + * Basic functionality with testpmd + + * Tx/Rx + * xstats + * Timestamps + * Link status + * RTE flow + * RSS + * VLAN filtering, stripping and insertion + * Checksum/TSO + * ptype + * link_status_interrupt example application + * l3fwd-power example application + * Multi-process example applications + * Hardware LRO tests + * Regex application + * Buffer Split + * Tx scheduling + + * Build tests + * ConnectX-6 Dx + +24.11.4 Known Issues +~~~~~~~~~~~~~~~~~~~~ + +* DPDK 24.11.4 contains fixes from DPDK 25.11 +* Issues identified/fixed in DPDK main branch after DPDK 25.11 may be present in DPDK 24.11.4 +* `Bug 1855 - E610 vfs can't forward packets. `__ reported by Intel validation team. + +24.11.4 Fixes skipped and status unresolved +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* 0e3c389b9e bus/dpaa: do proper cleanup of frame queues on shutdown +* 08b83e84e1 crypto/cnxk: fix TLS mbuf +* 9843181aa5 crypto/virtio: fix driver ID +* 3e48adc135 event/cnxk: fix Rx offload flags +* 5a753913e0 Kushwaha common/cnxk: fix inline device write operation +* 25e35296b5 net/cnxk: fix reconfiguring MAC address +* 3bf9f0f9f0 net/mlx5: fix control flow leakage for external SQ +* f2f75ffe14 net/mlx5/hws: fix ESP header match in strict mode +* 98cf04bb90 test/graph: fix second run +* 8a8c02d2bb vfio: fix custom containers in multiprocess diff -Nru dpdk-24.11.3/doc/guides/sample_app_ug/l3_forward.rst dpdk-24.11.4/doc/guides/sample_app_ug/l3_forward.rst --- dpdk-24.11.3/doc/guides/sample_app_ug/l3_forward.rst 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/doc/guides/sample_app_ug/l3_forward.rst 2025-12-19 12:05:33.000000000 +0000 @@ -77,6 +77,8 @@ [-P] [--lookup LOOKUP_METHOD] --config(port,queue,lcore)[,(port,queue,lcore)] + [--rx-burst NPKTS] + [--tx-burst NPKTS] [--eth-dest=X,MM:MM:MM:MM:MM:MM] [--max-pkt-len PKTLEN] [--no-numa] @@ -113,6 +115,10 @@ * ``--config (port,queue,lcore)[,(port,queue,lcore)]:`` Determines which queues from which ports are mapped to which cores. +* ``--rx-burst NPKTS:`` Optional, Rx burst size in decimal (default 32). + +* ``--tx-burst NPKTS:`` Optional, Tx burst size in decimal (default 32). + * ``--eth-dest=X,MM:MM:MM:MM:MM:MM:`` Optional, ethernet destination for port X. * ``--max-pkt-len:`` Optional, maximum packet length in decimal (64-9600). diff -Nru dpdk-24.11.3/doc/guides/testpmd_app_ug/testpmd_funcs.rst dpdk-24.11.4/doc/guides/testpmd_app_ug/testpmd_funcs.rst --- dpdk-24.11.3/doc/guides/testpmd_app_ug/testpmd_funcs.rst 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/doc/guides/testpmd_app_ug/testpmd_funcs.rst 2025-12-19 12:05:33.000000000 +0000 @@ -5390,6 +5390,10 @@ testpmd> flow indirect_action 0 update 0 action conntrack_update dir / end +Inspect the conntrack action state through the following command:: + + testpmd> flow indirect_action 0 query + Sample meter with policy rules ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff -Nru dpdk-24.11.3/drivers/bus/cdx/cdx_vfio.c dpdk-24.11.4/drivers/bus/cdx/cdx_vfio.c --- dpdk-24.11.3/drivers/bus/cdx/cdx_vfio.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/bus/cdx/cdx_vfio.c 2025-12-19 12:05:33.000000000 +0000 @@ -385,7 +385,6 @@ cdx_vfio_map_resource_primary(struct rte_cdx_device *dev) { struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; - char cdx_addr[PATH_MAX] = {0}; static void *cdx_map_addr; struct mapped_cdx_resource *vfio_res = NULL; struct mapped_cdx_res_list *vfio_res_list = @@ -451,7 +450,7 @@ ret = cdx_vfio_mmap_resource(vfio_dev_fd, vfio_res, i, 0); if (ret < 0) { CDX_BUS_ERR("%s mapping region %i failed: %s", - cdx_addr, i, strerror(errno)); + dev_name, i, strerror(errno)); free(reg); goto err_vfio_res; } @@ -482,7 +481,6 @@ cdx_vfio_map_resource_secondary(struct rte_cdx_device *dev) { struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; - char cdx_addr[PATH_MAX] = {0}; int vfio_dev_fd; int i, ret; struct mapped_cdx_resource *vfio_res = NULL; @@ -533,7 +531,7 @@ return 0; err_vfio_dev_fd: - rte_vfio_release_device(RTE_CDX_BUS_DEVICES_PATH, cdx_addr, vfio_dev_fd); + rte_vfio_release_device(RTE_CDX_BUS_DEVICES_PATH, dev_name, vfio_dev_fd); return -1; } diff -Nru dpdk-24.11.3/drivers/bus/dpaa/base/qbman/qman_driver.c dpdk-24.11.4/drivers/bus/dpaa/base/qbman/qman_driver.c --- dpdk-24.11.3/drivers/bus/dpaa/base/qbman/qman_driver.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/bus/dpaa/base/qbman/qman_driver.c 2025-12-19 12:05:33.000000000 +0000 @@ -228,8 +228,6 @@ if (ret) pr_err("qman_free_global_portal() (%d)\n", ret); - kfree(qp); - process_portal_irq_unmap(cfg->irq); addr.cena = cfg->addr_virt[DPAA_PORTAL_CE]; diff -Nru dpdk-24.11.3/drivers/bus/dpaa/dpaa_bus.c dpdk-24.11.4/drivers/bus/dpaa/dpaa_bus.c --- dpdk-24.11.3/drivers/bus/dpaa/dpaa_bus.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/bus/dpaa/dpaa_bus.c 2025-12-19 12:05:33.000000000 +0000 @@ -45,6 +45,8 @@ #include #include +#define RTE_PRIORITY_102 102 + struct rte_dpaa_bus { struct rte_bus bus; TAILQ_HEAD(, rte_dpaa_device) device_list; @@ -58,6 +60,9 @@ /* define a variable to hold the portal_key, once created.*/ static pthread_key_t dpaa_portal_key; +/* dpaa lcore specific portals */ +struct dpaa_portal *dpaa_portals[RTE_MAX_LCORE] = {NULL}; +static int dpaa_bus_global_init; unsigned int dpaa_svr_family; @@ -387,6 +392,7 @@ return ret; } + dpaa_portals[lcore] = DPAA_PER_LCORE_PORTAL; DPAA_BUS_LOG(DEBUG, "QMAN thread initialized"); @@ -444,6 +450,8 @@ rte_free(dpaa_io_portal); dpaa_io_portal = NULL; DPAA_PER_LCORE_PORTAL = NULL; + dpaa_portals[rte_lcore_id()] = NULL; + DPAA_BUS_DEBUG("Portal cleanup done for lcore = %d", rte_lcore_id()); } static int @@ -736,6 +744,7 @@ break; } } + dpaa_bus_global_init = 1; return 0; } @@ -843,6 +852,55 @@ return NULL; } +static int +dpaa_bus_cleanup(void) +{ + struct rte_dpaa_device *dev, *tmp_dev; + + BUS_INIT_FUNC_TRACE(); + RTE_TAILQ_FOREACH_SAFE(dev, &rte_dpaa_bus.device_list, next, tmp_dev) { + struct rte_dpaa_driver *drv = dev->driver; + int ret = 0; + + if (!rte_dev_is_probed(&dev->device)) + continue; + if (!drv || !drv->remove) + continue; + ret = drv->remove(dev); + if (ret < 0) { + rte_errno = errno; + return -1; + } + dev->driver = NULL; + dev->device.driver = NULL; + } + dpaa_portal_finish((void *)DPAA_PER_LCORE_PORTAL); + dpaa_bus_global_init = 0; + DPAA_BUS_DEBUG("Bus cleanup done"); + + return 0; +} + +/* Adding destructor for double check in case non-gracefully + * exit. + */ +RTE_FINI_PRIO(dpaa_cleanup, 102) +{ + unsigned int lcore_id; + + if (!dpaa_bus_global_init) + return; + + /* cleanup portals in case non-graceful exit */ + RTE_LCORE_FOREACH_WORKER(lcore_id) { + /* Check for non zero id */ + dpaa_portal_finish((void *)dpaa_portals[lcore_id]); + } + dpaa_portal_finish((void *)DPAA_PER_LCORE_PORTAL); + dpaa_bus_global_init = 0; + DPAA_BUS_DEBUG("Worker thread clean up done"); +} + static struct rte_dpaa_bus rte_dpaa_bus = { .bus = { .scan = rte_dpaa_bus_scan, @@ -853,6 +911,7 @@ .plug = dpaa_bus_plug, .unplug = dpaa_bus_unplug, .dev_iterate = dpaa_bus_dev_iterate, + .cleanup = dpaa_bus_cleanup, }, .device_list = TAILQ_HEAD_INITIALIZER(rte_dpaa_bus.device_list), .driver_list = TAILQ_HEAD_INITIALIZER(rte_dpaa_bus.driver_list), diff -Nru dpdk-24.11.3/drivers/bus/fslmc/bus_fslmc_driver.h dpdk-24.11.4/drivers/bus/fslmc/bus_fslmc_driver.h --- dpdk-24.11.3/drivers/bus/fslmc/bus_fslmc_driver.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/bus/fslmc/bus_fslmc_driver.h 2025-12-19 12:05:33.000000000 +0000 @@ -96,7 +96,6 @@ TAILQ_ENTRY(rte_dpaa2_device) next; /**< Next probed DPAA2 device. */ struct rte_device device; /**< Inherit core device */ union { - struct rte_eth_dev *eth_dev; /**< ethernet device */ struct rte_cryptodev *cryptodev; /**< Crypto Device */ struct rte_dma_dev *dmadev; /**< DMA Device */ struct rte_rawdev *rawdev; /**< Raw Device */ diff -Nru dpdk-24.11.3/drivers/bus/fslmc/fslmc_bus.c dpdk-24.11.4/drivers/bus/fslmc/fslmc_bus.c --- dpdk-24.11.3/drivers/bus/fslmc/fslmc_bus.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/bus/fslmc/fslmc_bus.c 2025-12-19 12:05:33.000000000 +0000 @@ -238,7 +238,7 @@ rte_fslmc_parse(const char *name, void *addr) { uint16_t dev_id; - char *t_ptr; + const char *t_ptr; const char *sep; uint8_t sep_exists = 0; int ret = -1; diff -Nru dpdk-24.11.3/drivers/bus/ifpga/ifpga_bus.c dpdk-24.11.4/drivers/bus/ifpga/ifpga_bus.c --- dpdk-24.11.3/drivers/bus/ifpga/ifpga_bus.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/bus/ifpga/ifpga_bus.c 2025-12-19 12:05:33.000000000 +0000 @@ -464,8 +464,8 @@ int *out = addr; struct rte_rawdev *rawdev = NULL; char rawdev_name[RTE_RAWDEV_NAME_MAX_LEN]; - char *c1 = NULL; - char *c2 = NULL; + const char *c1 = NULL; + const char *c2 = NULL; int port = IFPGA_BUS_DEV_PORT_MAX; char str_port[8]; int str_port_len = 0; diff -Nru dpdk-24.11.3/drivers/bus/pci/linux/pci_vfio.c dpdk-24.11.4/drivers/bus/pci/linux/pci_vfio.c --- dpdk-24.11.3/drivers/bus/pci/linux/pci_vfio.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/bus/pci/linux/pci_vfio.c 2025-12-19 12:05:33.000000000 +0000 @@ -947,7 +947,7 @@ char pci_addr[PATH_MAX] = {0}; int vfio_dev_fd; struct rte_pci_addr *loc = &dev->addr; - int i, j, ret; + int j, ret, i = 0; struct mapped_pci_resource *vfio_res = NULL; struct mapped_pci_res_list *vfio_res_list = RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list); @@ -985,7 +985,7 @@ ret = pci_vfio_fill_regions(dev, vfio_dev_fd, &device_info); if (ret) - return ret; + goto err_vfio_dev_fd; /* map BARs */ maps = vfio_res->maps; diff -Nru dpdk-24.11.3/drivers/bus/pci/windows/pci.c dpdk-24.11.4/drivers/bus/pci/windows/pci.c --- dpdk-24.11.3/drivers/bus/pci/windows/pci.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/bus/pci/windows/pci.c 2025-12-19 12:05:33.000000000 +0000 @@ -11,18 +11,19 @@ #include #include -#include "private.h" -#include "pci_netuio.h" - +/* DEVPKEY_Device_Numa_Node should be defined in devpkey.h */ #include -#include - #if defined RTE_TOOLCHAIN_GCC && (__MINGW64_VERSION_MAJOR < 8) #include DEFINE_DEVPROPKEY(DEVPKEY_Device_Numa_Node, 0x540b947e, 0x8b40, 0x45bc, 0xa8, 0xa2, 0x6a, 0x0b, 0x89, 0x4c, 0xbd, 0xa2, 3); #endif +#include + +#include "private.h" +#include "pci_netuio.h" + /* * This code is used to simulate a PCI probe by parsing information in * the registry hive for PCI devices. diff -Nru dpdk-24.11.3/drivers/bus/pci/windows/pci_netuio.h dpdk-24.11.4/drivers/bus/pci/windows/pci_netuio.h --- dpdk-24.11.3/drivers/bus/pci/windows/pci_netuio.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/bus/pci/windows/pci_netuio.h 2025-12-19 12:05:33.000000000 +0000 @@ -5,12 +5,12 @@ #ifndef _PCI_NETUIO_H_ #define _PCI_NETUIO_H_ -#if !defined(NTDDI_WIN10_FE) || NTDDI_VERSION < NTDDI_WIN10_FE -/* GUID definition for device class netUIO */ +#if !defined(NTDDI_WIN10_FE) || NTDDI_VERSION < NTDDI_WIN10_FE || defined(__MINGW64__) +/* GUID_DEVCLASS_NETUIO should be defined in devguid.h */ DEFINE_GUID(GUID_DEVCLASS_NETUIO, 0x78912bc1, 0xcb8e, 0x4b28, 0xa3, 0x29, 0xf3, 0x22, 0xeb, 0xad, 0xbe, 0x0f); -/* GUID definition for the netuio device interface */ +/* GUID_DEVINTERFACE_NETUIO should be defined in ndisguid.h */ DEFINE_GUID(GUID_DEVINTERFACE_NETUIO, 0x08336f60, 0x0679, 0x4c6c, 0x85, 0xd2, 0xae, 0x7c, 0xed, 0x65, 0xff, 0xf7); #endif diff -Nru dpdk-24.11.3/drivers/bus/uacce/uacce.c dpdk-24.11.4/drivers/bus/uacce/uacce.c --- dpdk-24.11.3/drivers/bus/uacce/uacce.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/bus/uacce/uacce.c 2025-12-19 12:05:33.000000000 +0000 @@ -327,8 +327,8 @@ uacce_match(const struct rte_uacce_driver *dr, const struct rte_uacce_device *dev) { const struct rte_uacce_id *id_table; + const char *map; uint32_t len; - char *map; for (id_table = dr->id_table; id_table->dev_api != NULL; id_table++) { if (strcmp(id_table->dev_api, dev->api) != 0) diff -Nru dpdk-24.11.3/drivers/common/cnxk/roc_bphy_cgx.c dpdk-24.11.4/drivers/common/cnxk/roc_bphy_cgx.c --- dpdk-24.11.3/drivers/common/cnxk/roc_bphy_cgx.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/common/cnxk/roc_bphy_cgx.c 2025-12-19 12:05:33.000000000 +0000 @@ -65,8 +65,7 @@ } static int -roc_bphy_cgx_wait_for_ownership(struct roc_bphy_cgx *roc_cgx, unsigned int lmac, - uint64_t *scr0) +roc_bphy_cgx_wait_ack(struct roc_bphy_cgx *roc_cgx, unsigned int lmac, uint64_t *scr0, bool ack) { int tries = 5000; uint64_t scr1; @@ -75,16 +74,18 @@ *scr0 = roc_bphy_cgx_read(roc_cgx, lmac, CGX_CMRX_SCRATCH0); scr1 = roc_bphy_cgx_read(roc_cgx, lmac, CGX_CMRX_SCRATCH1); - if (FIELD_GET(SCR1_OWN_STATUS, scr1) == ETH_OWN_NON_SECURE_SW && - FIELD_GET(SCR0_ETH_EVT_STS_S_ACK, *scr0) == 0) - break; - /* clear async events if any */ - if (FIELD_GET(SCR0_ETH_EVT_STS_S_EVT_TYPE, *scr0) == - ETH_EVT_ASYNC && - FIELD_GET(SCR0_ETH_EVT_STS_S_ACK, *scr0)) + if (FIELD_GET(SCR0_ETH_EVT_STS_S_EVT_TYPE, *scr0) == ETH_EVT_ASYNC && + FIELD_GET(SCR0_ETH_EVT_STS_S_ACK, *scr0)) { roc_bphy_cgx_ack(roc_cgx, lmac, scr0); + goto skip; + } + if (FIELD_GET(SCR1_OWN_STATUS, scr1) == ETH_OWN_NON_SECURE_SW && + FIELD_GET(SCR0_ETH_EVT_STS_S_ACK, *scr0) == ack) + break; + +skip: plt_delay_ms(1); } while (--tries); @@ -92,29 +93,20 @@ } static int -roc_bphy_cgx_wait_for_ack(struct roc_bphy_cgx *roc_cgx, unsigned int lmac, - uint64_t *scr0) +roc_bphy_cgx_wait_for_ownership(struct roc_bphy_cgx *roc_cgx, unsigned int lmac, uint64_t *scr0) { - int tries = 5000; - uint64_t scr1; - - do { - *scr0 = roc_bphy_cgx_read(roc_cgx, lmac, CGX_CMRX_SCRATCH0); - scr1 = roc_bphy_cgx_read(roc_cgx, lmac, CGX_CMRX_SCRATCH1); - - if (FIELD_GET(SCR1_OWN_STATUS, scr1) == ETH_OWN_NON_SECURE_SW && - FIELD_GET(SCR0_ETH_EVT_STS_S_ACK, *scr0)) - break; - - plt_delay_ms(1); - } while (--tries); + return roc_bphy_cgx_wait_ack(roc_cgx, lmac, scr0, false); +} - return tries ? 0 : -ETIMEDOUT; +static int +roc_bphy_cgx_wait_for_ack(struct roc_bphy_cgx *roc_cgx, unsigned int lmac, uint64_t *scr0) +{ + return roc_bphy_cgx_wait_ack(roc_cgx, lmac, scr0, true); } static int -roc_bphy_cgx_intf_req(struct roc_bphy_cgx *roc_cgx, unsigned int lmac, - uint64_t scr1, uint64_t *scr0) +roc_bphy_cgx_intf_req(struct roc_bphy_cgx *roc_cgx, unsigned int lmac, uint64_t scr1, + uint64_t *scr0) { uint8_t cmd_id = FIELD_GET(SCR1_ETH_CMD_ID, scr1); int ret; @@ -142,12 +134,6 @@ if (cmd_id == ETH_CMD_INTF_SHUTDOWN) goto out; - if (FIELD_GET(SCR0_ETH_EVT_STS_S_EVT_TYPE, *scr0) != ETH_EVT_CMD_RESP) { - plt_err("received async event instead of cmd resp event"); - ret = -EIO; - goto out; - } - if (FIELD_GET(SCR0_ETH_EVT_STS_S_ID, *scr0) != cmd_id) { plt_err("received resp for cmd %d expected for cmd %d", (int)FIELD_GET(SCR0_ETH_EVT_STS_S_ID, *scr0), cmd_id); diff -Nru dpdk-24.11.3/drivers/common/cnxk/roc_nix_debug.c dpdk-24.11.4/drivers/common/cnxk/roc_nix_debug.c --- dpdk-24.11.3/drivers/common/cnxk/roc_nix_debug.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/common/cnxk/roc_nix_debug.c 2025-12-19 12:05:33.000000000 +0000 @@ -769,8 +769,8 @@ nix_dump(file, "W2: xqe_hdr_split \t\t%d\nW2: xqe_imm_copy \t\t%d", ctx->xqe_hdr_split, ctx->xqe_imm_copy); - nix_dump(file, "W2: band_prof_id\t\t%d\n", - ((ctx->band_prof_id_h << 10) | ctx->band_prof_id_l)); + nix_dump(file, "W2: band_prof_id\t\t0x%" PRIx64 "\n", + (uint64_t)((ctx->band_prof_id_h << 10) | ctx->band_prof_id_l)); nix_dump(file, "W2: xqe_imm_size \t\t%d\nW2: later_skip \t\t\t%d", ctx->xqe_imm_size, ctx->later_skip); nix_dump(file, "W2: sso_bp_ena\t\t%d\n", ctx->sso_bp_ena); diff -Nru dpdk-24.11.3/drivers/common/cnxk/roc_nix_inl.c dpdk-24.11.4/drivers/common/cnxk/roc_nix_inl.c --- dpdk-24.11.3/drivers/common/cnxk/roc_nix_inl.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/common/cnxk/roc_nix_inl.c 2025-12-19 12:05:33.000000000 +0000 @@ -563,12 +563,12 @@ if (inb_inl_dev) { inl_dev = idev->nix_inl_dev; - if (inl_dev && inl_dev->attach_cptlf && inl_dev->rx_inj_ena && + if (inl_dev && inl_dev->attach_cptlf && inl_dev->rx_inj_ena && roc_nix && roc_nix->rx_inj_ena) return true; } - return roc_nix->rx_inj_ena; + return roc_nix ? roc_nix->rx_inj_ena : 0; } uint32_t diff -Nru dpdk-24.11.3/drivers/common/cnxk/roc_nix_queue.c dpdk-24.11.4/drivers/common/cnxk/roc_nix_queue.c --- dpdk-24.11.3/drivers/common/cnxk/roc_nix_queue.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/common/cnxk/roc_nix_queue.c 2025-12-19 12:05:33.000000000 +0000 @@ -2031,7 +2031,7 @@ /* Restore limit to max SQB count that the pool was created * for aura drain to succeed. */ - roc_npa_aura_limit_modify(sq->aura_handle, NIX_MAX_SQB); + roc_npa_aura_limit_modify(sq->aura_handle, sq->aura_sqb_bufs); rc |= roc_npa_pool_destroy(sq->aura_handle); plt_free(sq->fc); plt_free(sq->sqe_mem); diff -Nru dpdk-24.11.3/drivers/common/cnxk/roc_nix_tm_ops.c dpdk-24.11.4/drivers/common/cnxk/roc_nix_tm_ops.c --- dpdk-24.11.3/drivers/common/cnxk/roc_nix_tm_ops.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/common/cnxk/roc_nix_tm_ops.c 2025-12-19 12:05:33.000000000 +0000 @@ -618,6 +618,13 @@ sq_id = node->id; sq = nix->sqs[sq_id]; + if (!sq) { + plt_err("nb_rxq %d nb_txq %d sq_id %d lvl %d", nix->nb_rx_queues, + nix->nb_tx_queues, sq_id, node->lvl); + roc_nix_tm_dump(roc_nix, NULL); + roc_nix_dump(roc_nix, NULL); + return NIX_ERR_TM_INVALID_NODE; + } rc = roc_nix_sq_ena_dis(sq, true); if (rc) { plt_err("TM sw xon failed on SQ %u, rc=%d", node->id, diff -Nru dpdk-24.11.3/drivers/common/mlx5/linux/mlx5_nl.c dpdk-24.11.4/drivers/common/mlx5/linux/mlx5_nl.c --- dpdk-24.11.3/drivers/common/mlx5/linux/mlx5_nl.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/common/mlx5/linux/mlx5_nl.c 2025-12-19 12:05:33.000000000 +0000 @@ -27,8 +27,6 @@ #endif -/* Size of the buffer to receive kernel messages */ -#define MLX5_NL_BUF_SIZE (32 * 1024) /* Send buffer size for the Netlink socket */ #define MLX5_SEND_BUF_SIZE 32768 /* Receive buffer size for the Netlink socket */ @@ -2033,3 +2031,111 @@ *enable ? "en" : "dis", pci_addr); return ret; } + +struct mlx5_mtu { + uint32_t min_mtu; + bool min_mtu_set; + uint32_t max_mtu; + bool max_mtu_set; +}; + +static int +mlx5_nl_get_mtu_bounds_cb(struct nlmsghdr *nh, void *arg) +{ + size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + struct mlx5_mtu *out = arg; + + while (off < nh->nlmsg_len) { + struct rtattr *ra = RTE_PTR_ADD(nh, off); + uint32_t *payload; + + switch (ra->rta_type) { + case IFLA_MIN_MTU: + payload = RTA_DATA(ra); + out->min_mtu = *payload; + out->min_mtu_set = true; + break; + case IFLA_MAX_MTU: + payload = RTA_DATA(ra); + out->max_mtu = *payload; + out->max_mtu_set = true; + break; + default: + /* Nothing to do for other attributes. */ + break; + } + off += RTA_ALIGN(ra->rta_len); + } + + return 0; +} + +/** + * Query minimum and maximum allowed MTU values for given Linux network interface. + * + * This function queries the following interface attributes exposed in netlink since Linux 4.18: + * + * - IFLA_MIN_MTU - minimum allowed MTU + * - IFLA_MAX_MTU - maximum allowed MTU + * + * @param[in] nl + * Netlink socket of the ROUTE kind (NETLINK_ROUTE). + * @param[in] ifindex + * Linux network device index. + * @param[out] min_mtu + * Pointer to minimum allowed MTU. Populated only if both minimum and maximum MTU was queried. + * @param[out] max_mtu + * Pointer to maximum allowed MTU. Populated only if both minimum and maximum MTU was queried. + * + * @return + * 0 on success, negative on error and rte_errno is set. + * + * Known errors: + * + * - (-EINVAL) - either @p min_mtu or @p max_mtu is NULL. + * - (-ENOENT) - either minimum or maximum allowed MTU was not found in interface attributes. + */ +int +mlx5_nl_get_mtu_bounds(int nl, unsigned int ifindex, uint16_t *min_mtu, uint16_t *max_mtu) +{ + struct mlx5_mtu out = { 0 }; + struct { + struct nlmsghdr nh; + struct ifinfomsg info; + } req = { + .nh = { + .nlmsg_len = NLMSG_LENGTH(sizeof(req.info)), + .nlmsg_type = RTM_GETLINK, + .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, + }, + .info = { + .ifi_family = AF_UNSPEC, + .ifi_index = ifindex, + }, + }; + uint32_t sn = MLX5_NL_SN_GENERATE; + int ret; + + if (min_mtu == NULL || max_mtu == NULL) { + rte_errno = EINVAL; + return -rte_errno; + } + + ret = mlx5_nl_send(nl, &req.nh, sn); + if (ret < 0) + return ret; + + ret = mlx5_nl_recv(nl, sn, mlx5_nl_get_mtu_bounds_cb, &out); + if (ret < 0) + return ret; + + if (!out.min_mtu_set || !out.max_mtu_set) { + rte_errno = ENOENT; + return -rte_errno; + } + + *min_mtu = out.min_mtu; + *max_mtu = out.max_mtu; + + return ret; +} diff -Nru dpdk-24.11.3/drivers/common/mlx5/linux/mlx5_nl.h dpdk-24.11.4/drivers/common/mlx5/linux/mlx5_nl.h --- dpdk-24.11.3/drivers/common/mlx5/linux/mlx5_nl.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/common/mlx5/linux/mlx5_nl.h 2025-12-19 12:05:33.000000000 +0000 @@ -87,4 +87,7 @@ int mlx5_nl_devlink_esw_multiport_get(int nlsk_fd, int family_id, const char *pci_addr, int *enable); +__rte_internal +int mlx5_nl_get_mtu_bounds(int nl, unsigned int ifindex, uint16_t *min_mtu, uint16_t *max_mtu); + #endif /* RTE_PMD_MLX5_NL_H_ */ diff -Nru dpdk-24.11.3/drivers/common/mlx5/mlx5_common.h dpdk-24.11.4/drivers/common/mlx5/mlx5_common.h --- dpdk-24.11.3/drivers/common/mlx5/mlx5_common.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/common/mlx5/mlx5_common.h 2025-12-19 12:05:33.000000000 +0000 @@ -558,9 +558,6 @@ */ typedef int (mlx5_class_driver_remove_t)(struct mlx5_common_device *cdev); -/** Device already probed can be probed again to check for new ports. */ -#define MLX5_DRV_PROBE_AGAIN 0x0004 - /** * A structure describing a mlx5 common class driver. */ diff -Nru dpdk-24.11.3/drivers/common/mlx5/mlx5_common_mr.c dpdk-24.11.4/drivers/common/mlx5/mlx5_common_mr.c --- dpdk-24.11.3/drivers/common/mlx5/mlx5_common_mr.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/common/mlx5/mlx5_common_mr.c 2025-12-19 12:05:33.000000000 +0000 @@ -1710,18 +1710,24 @@ * hugepage can be shared across mempools that also fit in it. */ if (share_hugepage) { + struct mlx5_mempool_mr *gc_mrs = NULL; + rte_rwlock_write_lock(&share_cache->rwlock); LIST_FOREACH(mpr, &share_cache->mempool_reg_list, next) { if (mpr->mrs[0].pmd_mr.addr == (void *)ranges[0].start) break; } if (mpr != NULL) { + /* Releasing MRs here can create a dead-lock on share_cache->rwlock */ + gc_mrs = new_mpr->mrs; new_mpr->mrs = mpr->mrs; mlx5_mempool_reg_attach(new_mpr); LIST_INSERT_HEAD(&share_cache->mempool_reg_list, new_mpr, next); } rte_rwlock_write_unlock(&share_cache->rwlock); + if (gc_mrs != NULL) + mlx5_free(gc_mrs); if (mpr != NULL) { DRV_LOG(DEBUG, "Shared MR %#x in PD %p for mempool %s with mempool %s", mpr->mrs[0].pmd_mr.lkey, pd, mp->name, diff -Nru dpdk-24.11.3/drivers/common/mlx5/mlx5_devx_cmds.c dpdk-24.11.4/drivers/common/mlx5/mlx5_devx_cmds.c --- dpdk-24.11.3/drivers/common/mlx5/mlx5_devx_cmds.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/common/mlx5/mlx5_devx_cmds.c 2025-12-19 12:05:33.000000000 +0000 @@ -743,6 +743,8 @@ MLX5_GENERAL_OBJ_TYPE_FLEX_PARSE_GRAPH); MLX5_SET(parse_graph_flex, flex, header_length_mode, data->header_length_mode); + MLX5_SET(parse_graph_flex, flex, header_length_field_offset_mode, + data->header_length_field_offset_mode); MLX5_SET64(parse_graph_flex, flex, modify_field_select, data->modify_field_select); MLX5_SET(parse_graph_flex, flex, header_length_base_value, @@ -870,6 +872,8 @@ max_next_header_offset); attr->header_length_mask_width = MLX5_GET(parse_graph_node_cap, hcattr, header_length_mask_width); + attr->header_length_field_mode_wa = !MLX5_GET(parse_graph_node_cap, hcattr, + header_length_field_offset_mode); /* Get the max supported samples from HCA CAP 2 */ hcattr = mlx5_devx_get_hca_cap(ctx, in, out, &rc, MLX5_GET_HCA_CAP_OP_MOD_GENERAL_DEVICE_2 | diff -Nru dpdk-24.11.3/drivers/common/mlx5/mlx5_devx_cmds.h dpdk-24.11.4/drivers/common/mlx5/mlx5_devx_cmds.h --- dpdk-24.11.3/drivers/common/mlx5/mlx5_devx_cmds.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/common/mlx5/mlx5_devx_cmds.h 2025-12-19 12:05:33.000000000 +0000 @@ -119,6 +119,7 @@ uint8_t sample_tunnel_inner2:1; uint8_t zero_size_supported:1; uint8_t sample_id_in_out:1; + uint8_t header_length_field_mode_wa:1; uint16_t max_base_header_length; uint8_t max_sample_base_offset; uint16_t max_next_header_offset; @@ -630,6 +631,7 @@ uint32_t header_length_base_value:16; uint32_t header_length_field_shift:4; uint32_t header_length_field_offset:16; + uint32_t header_length_field_offset_mode:1; uint32_t header_length_field_mask; struct mlx5_devx_match_sample_attr sample[MLX5_GRAPH_NODE_SAMPLE_NUM]; uint32_t next_header_field_offset:16; diff -Nru dpdk-24.11.3/drivers/common/mlx5/mlx5_prm.h dpdk-24.11.4/drivers/common/mlx5/mlx5_prm.h --- dpdk-24.11.3/drivers/common/mlx5/mlx5_prm.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/common/mlx5/mlx5_prm.h 2025-12-19 12:05:33.000000000 +0000 @@ -1202,7 +1202,11 @@ u8 tunnel_header_1[0x20]; u8 tunnel_header_2[0x20]; u8 tunnel_header_3[0x20]; - u8 reserved[0x100]; + u8 reserved[0x80]; + u8 metadata_reg_c_8[0x20]; + u8 metadata_reg_c_9[0x20]; + u8 metadata_reg_c_10[0x20]; + u8 metadata_reg_c_11[0x20]; }; /* Flow matcher. */ @@ -2107,7 +2111,9 @@ u8 max_num_arc_in[0x08]; u8 max_num_arc_out[0x08]; u8 max_num_sample[0x08]; - u8 reserved_at_78[0x03]; + u8 reserved_at_78[0x01]; + u8 header_length_field_offset_mode[0x1]; + u8 reserved_at_79[0x01]; u8 parse_graph_anchor[0x1]; u8 reserved_at_7c[0x01]; u8 sample_tunnel_inner2[0x1]; @@ -4916,7 +4922,9 @@ u8 next_header_field_offset[0x10]; u8 reserved_at_160[0x12]; u8 head_anchor_id[0x6]; - u8 reserved_at_178[0x3]; + u8 reserved_at_178[0x1]; + u8 header_length_field_offset_mode[0x1]; + u8 reserved_at_17a[0x1]; u8 next_header_field_size[0x5]; u8 header_length_field_mask[0x20]; u8 reserved_at_224[0x20]; diff -Nru dpdk-24.11.3/drivers/common/mlx5/version.map dpdk-24.11.4/drivers/common/mlx5/version.map --- dpdk-24.11.3/drivers/common/mlx5/version.map 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/common/mlx5/version.map 2025-12-19 12:05:33.000000000 +0000 @@ -146,6 +146,7 @@ mlx5_nl_vf_mac_addr_modify; # WINDOWS_NO_EXPORT mlx5_nl_vlan_vmwa_create; # WINDOWS_NO_EXPORT mlx5_nl_vlan_vmwa_delete; # WINDOWS_NO_EXPORT + mlx5_nl_get_mtu_bounds; # WINDOWS_NO_EXPORT mlx5_os_get_physical_device_ctx; mlx5_os_umem_dereg; diff -Nru dpdk-24.11.3/drivers/common/mlx5/windows/mlx5_win_defs.h dpdk-24.11.4/drivers/common/mlx5/windows/mlx5_win_defs.h --- dpdk-24.11.3/drivers/common/mlx5/windows/mlx5_win_defs.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/common/mlx5/windows/mlx5_win_defs.h 2025-12-19 12:05:33.000000000 +0000 @@ -184,7 +184,6 @@ #define MLX5DV_FLOW_TABLE_TYPE_NIC_RX MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX #define MLX5DV_FLOW_TABLE_TYPE_NIC_TX MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX #define MLX5DV_FLOW_TABLE_TYPE_FDB MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB -#define MLX5DV_FLOW_TABLE_TYPE_RDMA_RX MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX struct mlx5dv_flow_match_parameters { size_t match_sz; @@ -263,6 +262,5 @@ #define MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES 6 #define MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES 13 #define MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES 3 -#define IB_QPT_RAW_PACKET 8 #endif /* MLX5_WIN_DEFS_H */ diff -Nru dpdk-24.11.3/drivers/common/qat/qat_device.c dpdk-24.11.4/drivers/common/qat/qat_device.c --- dpdk-24.11.3/drivers/common/qat/qat_device.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/common/qat/qat_device.c 2025-12-19 12:05:33.000000000 +0000 @@ -173,7 +173,7 @@ * other than the equals sign is ignored. It will not work with other conversion * functions like strt*. */ -char *qat_dev_cmdline_get_val(struct qat_pci_device *qat_dev, +const char *qat_dev_cmdline_get_val(struct qat_pci_device *qat_dev, const char *key) { if (qat_dev->command_line == NULL) @@ -185,8 +185,9 @@ static int cmdline_validate(const char *arg) { + const char *eq_sign = strchr(arg, '='); int i, len; - char *eq_sign = strchr(arg, '='); + /* Check for the equal sign */ if (eq_sign == NULL) { QAT_LOG(ERR, "malformed string, no equals sign, %s", arg); @@ -263,7 +264,7 @@ struct rte_mem_resource *mem_resource; const struct rte_memzone *qat_dev_mz; int qat_dev_size, extra_size; - char *cmdline = NULL; + const char *cmdline = NULL; rte_pci_device_name(&pci_dev->addr, name, sizeof(name)); snprintf(name+strlen(name), QAT_DEV_NAME_MAX_LEN-strlen(name), "_qat"); diff -Nru dpdk-24.11.3/drivers/common/qat/qat_device.h dpdk-24.11.4/drivers/common/qat/qat_device.h --- dpdk-24.11.3/drivers/common/qat/qat_device.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/common/qat/qat_device.h 2025-12-19 12:05:33.000000000 +0000 @@ -42,7 +42,7 @@ typedef int (*qat_dev_get_slice_map_t)(uint32_t *map, const struct rte_pci_device *pci_dev); -char *qat_dev_cmdline_get_val(struct qat_pci_device *qat_dev, const char *key); +const char *qat_dev_cmdline_get_val(struct qat_pci_device *qat_dev, const char *key); struct qat_dev_hw_spec_funcs { qat_dev_reset_ring_pairs_t qat_dev_reset_ring_pairs; diff -Nru dpdk-24.11.3/drivers/compress/qat/qat_comp_pmd.c dpdk-24.11.4/drivers/compress/qat/qat_comp_pmd.c --- dpdk-24.11.3/drivers/compress/qat/qat_comp_pmd.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/compress/qat/qat_comp_pmd.c 2025-12-19 12:05:33.000000000 +0000 @@ -690,7 +690,7 @@ &qat_comp_gen_dev_ops[qat_pci_dev->qat_dev_gen]; uint64_t capa_size; uint16_t sub_id = qat_dev_instance->pci_dev->id.subsystem_device_id; - char *cmdline = NULL; + const char *cmdline = NULL; snprintf(name, RTE_COMPRESSDEV_NAME_MAX_LEN, "%s_%s", qat_pci_dev->name, "comp"); diff -Nru dpdk-24.11.3/drivers/crypto/caam_jr/caam_jr_uio.c dpdk-24.11.4/drivers/crypto/caam_jr/caam_jr_uio.c --- dpdk-24.11.3/drivers/crypto/caam_jr/caam_jr_uio.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/crypto/caam_jr/caam_jr_uio.c 2025-12-19 12:05:33.000000000 +0000 @@ -106,19 +106,11 @@ static bool file_name_match_extract(const char filename[], const char match[], int *number) { - char *substr = NULL; - - substr = strstr(filename, match); - if (substr == NULL) - return false; - /* substring was found in * read number following substring in */ - if (sscanf(filename + strlen(match), "%d", number) <= 0) - return false; - - return true; + return strstr(filename, match) != NULL && + sscanf(filename + strlen(match), "%d", number) > 0; } /** @brief Reads first line from a file. diff -Nru dpdk-24.11.3/drivers/crypto/cnxk/cnxk_ae.h dpdk-24.11.4/drivers/crypto/cnxk/cnxk_ae.h --- dpdk-24.11.3/drivers/crypto/cnxk/cnxk_ae.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/crypto/cnxk/cnxk_ae.h 2025-12-19 12:05:33.000000000 +0000 @@ -1592,20 +1592,17 @@ case RTE_CRYPTO_ASYM_OP_VERIFY: if (rsa_ctx->padding.type == RTE_CRYPTO_RSA_PADDING_NONE) { rsa->sign.length = rsa_ctx->n.length; - memcpy(rsa->sign.data, rptr, rsa->sign.length); + if (memcmp(rptr, rsa->message.data, rsa->message.length)) + cop->status = RTE_CRYPTO_OP_STATUS_ERROR; } else { /* Get length of signed output */ - rsa->sign.length = - rte_cpu_to_be_16(*((uint16_t *)rptr)); + rsa->sign.length = rte_cpu_to_be_16(*((uint16_t *)rptr)); /* * Offset output data pointer by length field - * (2 bytes) and copy signed data. + * (2 bytes) and compare signed data. */ - memcpy(rsa->sign.data, rptr + 2, rsa->sign.length); - } - if (memcmp(rsa->sign.data, rsa->message.data, - rsa->message.length)) { - cop->status = RTE_CRYPTO_OP_STATUS_ERROR; + if (memcmp(rptr + 2, rsa->message.data, rsa->message.length)) + cop->status = RTE_CRYPTO_OP_STATUS_ERROR; } break; default: diff -Nru dpdk-24.11.3/drivers/crypto/ipsec_mb/ipsec_mb_ops.c dpdk-24.11.4/drivers/crypto/ipsec_mb/ipsec_mb_ops.c --- dpdk-24.11.3/drivers/crypto/ipsec_mb/ipsec_mb_ops.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/crypto/ipsec_mb/ipsec_mb_ops.c 2025-12-19 12:05:33.000000000 +0000 @@ -138,6 +138,7 @@ ipsec_mb_qp_release(struct rte_cryptodev *dev, uint16_t qp_id) { struct ipsec_mb_qp *qp = dev->data->queue_pairs[qp_id]; + uint16_t process_id = (uint16_t)getpid(); if (!qp) return 0; @@ -152,8 +153,10 @@ rte_free(qp); dev->data->queue_pairs[qp_id] = NULL; } else { /* secondary process */ - return ipsec_mb_secondary_qp_op(dev->data->dev_id, qp_id, - NULL, 0, RTE_IPSEC_MB_MP_REQ_QP_FREE); + if (qp->qp_used_by_pid == process_id) + return ipsec_mb_secondary_qp_op(dev->data->dev_id, + qp_id, NULL, 0, + RTE_IPSEC_MB_MP_REQ_QP_FREE); } return 0; } diff -Nru dpdk-24.11.3/drivers/crypto/mlx5/mlx5_crypto.c dpdk-24.11.4/drivers/crypto/mlx5/mlx5_crypto.c --- dpdk-24.11.3/drivers/crypto/mlx5/mlx5_crypto.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/crypto/mlx5/mlx5_crypto.c 2025-12-19 12:05:33.000000000 +0000 @@ -19,9 +19,7 @@ #include "mlx5_crypto.h" #define MLX5_CRYPTO_DRIVER_NAME crypto_mlx5 -#define MLX5_CRYPTO_LOG_NAME pmd.crypto.mlx5 #define MLX5_CRYPTO_MAX_QPS 128 -#define MLX5_CRYPTO_MAX_SEGS 56 #define MLX5_CRYPTO_FEATURE_FLAGS(wrapped_mode) \ (RTE_CRYPTODEV_FF_SYMMETRIC_CRYPTO | RTE_CRYPTODEV_FF_HW_ACCELERATED | \ diff -Nru dpdk-24.11.3/drivers/crypto/qat/dev/qat_crypto_pmd_gen3.c dpdk-24.11.4/drivers/crypto/qat/dev/qat_crypto_pmd_gen3.c --- dpdk-24.11.3/drivers/crypto/qat/dev/qat_crypto_pmd_gen3.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/crypto/qat/dev/qat_crypto_pmd_gen3.c 2025-12-19 12:05:33.000000000 +0000 @@ -452,7 +452,7 @@ } total_len = qat_sym_build_req_set_data(req, in_op, cookie, - in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num); + in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num, &ofs, op); if (unlikely(total_len < 0)) { op->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS; return -EINVAL; @@ -496,7 +496,7 @@ } total_len = qat_sym_build_req_set_data(req, in_op, cookie, - in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num); + in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num, &ofs, op); if (unlikely(total_len < 0)) { op->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS; return -EINVAL; @@ -602,7 +602,7 @@ rte_mov128((uint8_t *)req, (const uint8_t *)&(ctx->fw_req)); rte_prefetch0((uint8_t *)tx_queue->base_addr + tail); data_len = qat_sym_build_req_set_data(req, user_data, cookie, - data, n_data_vecs, NULL, 0); + data, n_data_vecs, NULL, 0, NULL, NULL); if (unlikely(data_len < 0)) return -1; @@ -661,7 +661,7 @@ data_len = qat_sym_build_req_set_data(req, user_data[i], cookie, vec->src_sgl[i].vec, - vec->src_sgl[i].num, NULL, 0); + vec->src_sgl[i].num, NULL, 0, NULL, NULL); } if (unlikely(data_len < 0) || error) @@ -715,7 +715,7 @@ rte_mov128((uint8_t *)req, (const uint8_t *)&(ctx->fw_req)); rte_prefetch0((uint8_t *)tx_queue->base_addr + tail); data_len = qat_sym_build_req_set_data(req, user_data, cookie, - data, n_data_vecs, NULL, 0); + data, n_data_vecs, NULL, 0, NULL, NULL); if (unlikely(data_len < 0)) return -1; @@ -770,12 +770,12 @@ data_len = qat_sym_build_req_set_data(req, user_data[i], cookie, vec->src_sgl[i].vec, vec->src_sgl[i].num, - vec->dest_sgl[i].vec, vec->dest_sgl[i].num); + vec->dest_sgl[i].vec, vec->dest_sgl[i].num, NULL, NULL); } else { data_len = qat_sym_build_req_set_data(req, user_data[i], cookie, vec->src_sgl[i].vec, - vec->src_sgl[i].num, NULL, 0); + vec->src_sgl[i].num, NULL, 0, NULL, NULL); } if (unlikely(data_len < 0)) diff -Nru dpdk-24.11.3/drivers/crypto/qat/dev/qat_crypto_pmd_gen4.c dpdk-24.11.4/drivers/crypto/qat/dev/qat_crypto_pmd_gen4.c --- dpdk-24.11.3/drivers/crypto/qat/dev/qat_crypto_pmd_gen4.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/crypto/qat/dev/qat_crypto_pmd_gen4.c 2025-12-19 12:05:33.000000000 +0000 @@ -219,7 +219,7 @@ } total_len = qat_sym_build_req_set_data(qat_req, in_op, cookie, - in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num); + in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num, &ofs, op); if (unlikely(total_len < 0)) { op->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS; return -EINVAL; @@ -376,7 +376,7 @@ rte_mov128((uint8_t *)req, (const uint8_t *)&(ctx->fw_req)); rte_prefetch0((uint8_t *)tx_queue->base_addr + tail); data_len = qat_sym_build_req_set_data(req, user_data, cookie, - data, n_data_vecs, NULL, 0); + data, n_data_vecs, NULL, 0, NULL, NULL); if (unlikely(data_len < 0)) return -1; @@ -435,7 +435,7 @@ data_len = qat_sym_build_req_set_data(req, user_data[i], cookie, vec->src_sgl[i].vec, - vec->src_sgl[i].num, NULL, 0); + vec->src_sgl[i].num, NULL, 0, NULL, NULL); } if (unlikely(data_len < 0) || error) diff -Nru dpdk-24.11.3/drivers/crypto/qat/dev/qat_crypto_pmd_gens.h dpdk-24.11.4/drivers/crypto/qat/dev/qat_crypto_pmd_gens.h --- dpdk-24.11.3/drivers/crypto/qat/dev/qat_crypto_pmd_gens.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/crypto/qat/dev/qat_crypto_pmd_gens.h 2025-12-19 12:05:33.000000000 +0000 @@ -430,7 +430,8 @@ qat_sym_build_req_set_data(struct icp_qat_fw_la_bulk_req *req, void *opaque, struct qat_sym_op_cookie *cookie, struct rte_crypto_vec *src_vec, uint16_t n_src, - struct rte_crypto_vec *dst_vec, uint16_t n_dst) + struct rte_crypto_vec *dst_vec, uint16_t n_dst, + union rte_crypto_sym_ofs *ofs, struct rte_crypto_op *op) { struct qat_sgl *list; uint32_t i; @@ -502,6 +503,24 @@ dst_data_start = src_data_start; } + /* For crypto API only try to align the in-place buffers*/ + if (op != NULL && likely(n_dst == 0)) { + uint16_t offset = src_data_start & RTE_CACHE_LINE_MASK; + if (offset) { + rte_iova_t buff_addr = rte_mbuf_iova_get(op->sym->m_src); + /* make sure src_data_start is still within the buffer */ + if (src_data_start - offset >= buff_addr) { + src_data_start -= offset; + dst_data_start = src_data_start; + ofs->ofs.auth.head += offset; + ofs->ofs.cipher.head += offset; + tl_src += offset; + total_len_src = tl_src; + total_len_dst = tl_src; + } + } + } + req->comn_mid.src_data_addr = src_data_start; req->comn_mid.dest_data_addr = dst_data_start; req->comn_mid.src_length = total_len_src; diff -Nru dpdk-24.11.3/drivers/crypto/qat/dev/qat_sym_pmd_gen1.c dpdk-24.11.4/drivers/crypto/qat/dev/qat_sym_pmd_gen1.c --- dpdk-24.11.3/drivers/crypto/qat/dev/qat_sym_pmd_gen1.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/crypto/qat/dev/qat_sym_pmd_gen1.c 2025-12-19 12:05:33.000000000 +0000 @@ -242,7 +242,7 @@ } total_len = qat_sym_build_req_set_data(req, in_op, cookie, - in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num); + in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num, &ofs, op); if (unlikely(total_len < 0)) { op->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS; return -EINVAL; @@ -297,7 +297,7 @@ req->comn_hdr.serv_specif_flags, 0); total_len = qat_sym_build_req_set_data(req, in_op, cookie, - in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num); + in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num, &ofs, op); if (unlikely(total_len < 0)) { op->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS; return -EINVAL; @@ -345,7 +345,7 @@ } total_len = qat_sym_build_req_set_data(req, in_op, cookie, - in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num); + in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num, &ofs, op); if (unlikely(total_len < 0)) { op->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS; return -EINVAL; @@ -390,7 +390,7 @@ } total_len = qat_sym_build_req_set_data(req, in_op, cookie, - in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num); + in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num, &ofs, op); if (unlikely(total_len < 0)) { op->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS; return -EINVAL; @@ -523,7 +523,7 @@ rte_prefetch0((uint8_t *)tx_queue->base_addr + tail); data_len = qat_sym_build_req_set_data(req, user_data, cookie, - data, n_data_vecs, NULL, 0); + data, n_data_vecs, NULL, 0, NULL, NULL); if (unlikely(data_len < 0)) return -1; @@ -585,7 +585,7 @@ data_len = qat_sym_build_req_set_data(req, user_data[i], cookie, vec->src_sgl[i].vec, - vec->src_sgl[i].num, NULL, 0); + vec->src_sgl[i].num, NULL, 0, NULL, NULL); } if (unlikely(data_len < 0 || error)) @@ -640,7 +640,7 @@ rte_mov128((uint8_t *)req, (const uint8_t *)&(ctx->fw_req)); rte_prefetch0((uint8_t *)tx_queue->base_addr + tail); data_len = qat_sym_build_req_set_data(req, user_data, cookie, - data, n_data_vecs, NULL, 0); + data, n_data_vecs, NULL, 0, NULL, NULL); if (unlikely(data_len < 0)) return -1; @@ -710,7 +710,7 @@ data_len = qat_sym_build_req_set_data(req, user_data[i], cookie, vec->src_sgl[i].vec, - vec->src_sgl[i].num, NULL, 0); + vec->src_sgl[i].num, NULL, 0, NULL, NULL); } if (unlikely(data_len < 0 || error)) @@ -770,7 +770,7 @@ rte_mov128((uint8_t *)req, (const uint8_t *)&(ctx->fw_req)); rte_prefetch0((uint8_t *)tx_queue->base_addr + tail); data_len = qat_sym_build_req_set_data(req, user_data, cookie, - data, n_data_vecs, NULL, 0); + data, n_data_vecs, NULL, 0, NULL, NULL); if (unlikely(data_len < 0)) return -1; @@ -843,7 +843,7 @@ data_len = qat_sym_build_req_set_data(req, user_data[i], cookie, vec->src_sgl[i].vec, - vec->src_sgl[i].num, NULL, 0); + vec->src_sgl[i].num, NULL, 0, NULL, NULL); } if (unlikely(data_len < 0 || error)) @@ -910,7 +910,7 @@ rte_mov128((uint8_t *)req, (const uint8_t *)&(ctx->fw_req)); rte_prefetch0((uint8_t *)tx_queue->base_addr + tail); data_len = qat_sym_build_req_set_data(req, user_data, cookie, - data, n_data_vecs, NULL, 0); + data, n_data_vecs, NULL, 0, NULL, NULL); if (unlikely(data_len < 0)) return -1; @@ -969,7 +969,7 @@ data_len = qat_sym_build_req_set_data(req, user_data[i], cookie, vec->src_sgl[i].vec, - vec->src_sgl[i].num, NULL, 0); + vec->src_sgl[i].num, NULL, 0, NULL, NULL); } if (unlikely(data_len < 0) || error) diff -Nru dpdk-24.11.3/drivers/crypto/qat/qat_asym.c dpdk-24.11.4/drivers/crypto/qat/qat_asym.c --- dpdk-24.11.3/drivers/crypto/qat/qat_asym.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/crypto/qat/qat_asym.c 2025-12-19 12:05:33.000000000 +0000 @@ -801,13 +801,13 @@ qat_func_alignsize = RTE_ALIGN_CEIL(qat_function.bytesize, 8); if (asym_op->ecdh.ke_type == RTE_CRYPTO_ASYM_KE_PUB_KEY_GENERATE) { - SET_PKE_LN(asym_op->ecdh.priv_key, qat_func_alignsize, 0); + SET_PKE_LN(xform->ec.pkey, qat_func_alignsize, 0); SET_PKE_LN_EC(curve[curve_id], x, 1); SET_PKE_LN_EC(curve[curve_id], y, 2); } else { - SET_PKE_LN(asym_op->ecdh.priv_key, qat_func_alignsize, 0); - SET_PKE_LN(asym_op->ecdh.pub_key.x, qat_func_alignsize, 1); - SET_PKE_LN(asym_op->ecdh.pub_key.y, qat_func_alignsize, 2); + SET_PKE_LN(xform->ec.pkey, qat_func_alignsize, 0); + SET_PKE_LN(xform->ec.q.x, qat_func_alignsize, 1); + SET_PKE_LN(xform->ec.q.y, qat_func_alignsize, 2); } SET_PKE_LN_EC(curve[curve_id], a, 3); SET_PKE_LN_EC(curve[curve_id], b, 4); @@ -1567,7 +1567,7 @@ char name[RTE_CRYPTODEV_NAME_MAX_LEN]; char capa_memz_name[RTE_CRYPTODEV_NAME_MAX_LEN]; uint16_t sub_id = qat_dev_instance->pci_dev->id.subsystem_device_id; - char *cmdline = NULL; + const char *cmdline = NULL; snprintf(name, RTE_CRYPTODEV_NAME_MAX_LEN, "%s_%s", qat_pci_dev->name, "asym"); diff -Nru dpdk-24.11.3/drivers/crypto/qat/qat_sym.c dpdk-24.11.4/drivers/crypto/qat/qat_sym.c --- dpdk-24.11.3/drivers/crypto/qat/qat_sym.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/crypto/qat/qat_sym.c 2025-12-19 12:05:33.000000000 +0000 @@ -217,7 +217,7 @@ const struct qat_crypto_gen_dev_ops *gen_dev_ops = &qat_sym_gen_dev_ops[qat_pci_dev->qat_dev_gen]; uint16_t sub_id = qat_dev_instance->pci_dev->id.subsystem_device_id; - char *cmdline = NULL; + const char *cmdline = NULL; snprintf(name, RTE_CRYPTODEV_NAME_MAX_LEN, "%s_%s", qat_pci_dev->name, "sym"); diff -Nru dpdk-24.11.3/drivers/crypto/qat/qat_sym_session.c dpdk-24.11.4/drivers/crypto/qat/qat_sym_session.c --- dpdk-24.11.3/drivers/crypto/qat/qat_sym_session.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/crypto/qat/qat_sym_session.c 2025-12-19 12:05:33.000000000 +0000 @@ -2793,7 +2793,8 @@ hash->auth_counter.counter = 0; hash_cd_ctrl->outer_prefix_sz = digestsize; - auth_param->hash_state_sz = digestsize; + auth_param->hash_state_sz = (RTE_ALIGN_CEIL(auth_param->u2.aad_sz, + ICP_QAT_HW_CCM_AAD_ALIGNMENT) >> 3); memcpy(cdesc->cd_cur_ptr + state1_size, authkey, authkeylen); break; diff -Nru dpdk-24.11.3/drivers/dma/hisilicon/hisi_dmadev.c dpdk-24.11.4/drivers/dma/hisilicon/hisi_dmadev.c --- dpdk-24.11.3/drivers/dma/hisilicon/hisi_dmadev.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/dma/hisilicon/hisi_dmadev.c 2025-12-19 12:05:33.000000000 +0000 @@ -376,6 +376,7 @@ hw->cq_head = 0; hw->cqs_completed = 0; hw->cqe_vld = 1; + hw->stop_proc = 0; hw->submitted = 0; hw->completed = 0; hw->errors = 0; @@ -388,12 +389,6 @@ } static int -hisi_dma_stop(struct rte_dma_dev *dev) -{ - return hisi_dma_reset_hw(dev->data->dev_private); -} - -static int hisi_dma_close(struct rte_dma_dev *dev) { if (rte_eal_process_type() == RTE_PROC_PRIMARY) { @@ -454,6 +449,37 @@ return 0; } +static int +hisi_dma_stop(struct rte_dma_dev *dev) +{ +#define MAX_WAIT_MSEC 10 + struct hisi_dma_dev *hw = dev->data->dev_private; + enum rte_dma_vchan_status status; + uint32_t i; + + /* Flag stop processing new requests. */ + hw->stop_proc = 1; + rte_delay_ms(1); + + /* Force set drop flag so that the hardware can quickly complete. */ + for (i = 0; i <= hw->sq_depth_mask; i++) + hw->sqe[i].dw0 |= SQE_DROP_FLAG; + + i = 0; + do { + hisi_dma_vchan_status(dev, 0, &status); + if (status != RTE_DMA_VCHAN_ACTIVE) + break; + rte_delay_ms(1); + } while (i++ < MAX_WAIT_MSEC); + if (status == RTE_DMA_VCHAN_ACTIVE) { + HISI_DMA_ERR(hw, "dev is still active!"); + return -EBUSY; + } + + return hisi_dma_reset_hw(dev->data->dev_private); +} + static void hisi_dma_dump_range(struct hisi_dma_dev *hw, FILE *f, uint32_t start, uint32_t end) @@ -548,14 +574,14 @@ " revision: 0x%x queue_id: %u ring_size: %u\n" " ridx: %u cridx: %u\n" " sq_head: %u sq_tail: %u cq_sq_head: %u\n" - " cq_head: %u cqs_completed: %u cqe_vld: %u\n" + " cq_head: %u cqs_completed: %u cqe_vld: %u stop_proc: %u\n" " submitted: %" PRIu64 " completed: %" PRIu64 " errors: %" PRIu64 " qfulls: %" PRIu64 "\n", hw->revision, hw->queue_id, hw->sq_depth_mask > 0 ? hw->sq_depth_mask + 1 : 0, hw->ridx, hw->cridx, hw->sq_head, hw->sq_tail, hw->cq_sq_head, - hw->cq_head, hw->cqs_completed, hw->cqe_vld, + hw->cq_head, hw->cqs_completed, hw->cqe_vld, hw->stop_proc, hw->submitted, hw->completed, hw->errors, hw->qfulls); hisi_dma_dump_queue(hw, f); hisi_dma_dump_common(hw, f); @@ -573,6 +599,9 @@ RTE_SET_USED(vchan); + if (unlikely(hw->stop_proc > 0)) + return -EPERM; + if (((hw->sq_tail + 1) & hw->sq_depth_mask) == hw->sq_head) { hw->qfulls++; return -ENOSPC; diff -Nru dpdk-24.11.3/drivers/dma/hisilicon/hisi_dmadev.h dpdk-24.11.4/drivers/dma/hisilicon/hisi_dmadev.h --- dpdk-24.11.3/drivers/dma/hisilicon/hisi_dmadev.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/dma/hisilicon/hisi_dmadev.h 2025-12-19 12:05:33.000000000 +0000 @@ -142,6 +142,7 @@ struct hisi_dma_sqe { uint32_t dw0; +#define SQE_DROP_FLAG BIT(4) #define SQE_FENCE_FLAG BIT(10) #define SQE_OPCODE_M2M 0x4 uint32_t dw1; @@ -212,6 +213,7 @@ */ uint16_t cqs_completed; uint8_t cqe_vld; /**< valid bit for CQE, will change for every round. */ + volatile uint8_t stop_proc; /**< whether stop processing new requests. */ uint64_t submitted; uint64_t completed; diff -Nru dpdk-24.11.3/drivers/net/af_packet/rte_eth_af_packet.c dpdk-24.11.4/drivers/net/af_packet/rte_eth_af_packet.c --- dpdk-24.11.3/drivers/net/af_packet/rte_eth_af_packet.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/af_packet/rte_eth_af_packet.c 2025-12-19 12:05:33.000000000 +0000 @@ -489,7 +489,7 @@ rte_free(internals->rx_queue[q].rd); rte_free(internals->tx_queue[q].rd); } - free(internals->if_name); + rte_free(internals->if_name); rte_free(internals->rx_queue); rte_free(internals->tx_queue); @@ -793,9 +793,10 @@ PMD_LOG_ERRNO(ERR, "%s: ioctl failed (SIOCGIFINDEX)", name); goto free_internals; } - (*internals)->if_name = strdup(pair->value); + (*internals)->if_name = rte_malloc_socket(name, ifnamelen + 1, 0, numa_node); if ((*internals)->if_name == NULL) goto free_internals; + strlcpy((*internals)->if_name, pair->value, ifnamelen + 1); (*internals)->if_index = ifr.ifr_ifindex; if (ioctl(sockfd, SIOCGIFHWADDR, &ifr) == -1) { @@ -982,7 +983,7 @@ free_internals: rte_free((*internals)->rx_queue); rte_free((*internals)->tx_queue); - free((*internals)->if_name); + rte_free((*internals)->if_name); rte_free(*internals); return -1; } diff -Nru dpdk-24.11.3/drivers/net/ark/ark_ethdev_rx.c dpdk-24.11.4/drivers/net/ark/ark_ethdev_rx.c --- dpdk-24.11.3/drivers/net/ark/ark_ethdev_rx.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/ark/ark_ethdev_rx.c 2025-12-19 12:05:33.000000000 +0000 @@ -538,7 +538,6 @@ eth_ark_dev_rx_queue_release(void *vqueue) { struct ark_rx_queue *queue; - uint32_t i; queue = (struct ark_rx_queue *)vqueue; if (queue == 0) @@ -551,9 +550,6 @@ /* Need to clear out mbufs here, dropping packets along the way */ eth_ark_rx_queue_drain(queue); - for (i = 0; i < queue->queue_size; ++i) - rte_pktmbuf_free(queue->reserve_q[i]); - rte_free(queue->reserve_q); rte_free(queue->paddress_q); rte_free(queue); diff -Nru dpdk-24.11.3/drivers/net/axgbe/axgbe_ethdev.c dpdk-24.11.4/drivers/net/axgbe/axgbe_ethdev.c --- dpdk-24.11.3/drivers/net/axgbe/axgbe_ethdev.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/axgbe/axgbe_ethdev.c 2025-12-19 12:05:33.000000000 +0000 @@ -1735,6 +1735,7 @@ { struct axgbe_port *pdata = dev->data->dev_private; unsigned int mac_tscr = 0; + unsigned int value = 0; /*disable timestamp for all pkts*/ AXGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSENALL, 0); @@ -1744,6 +1745,11 @@ AXGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSCFUPDT, 0); /*disable time stamp*/ AXGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSENA, 0); + + value = AXGMAC_IOREAD(pdata, MAC_TSCR); + value |= mac_tscr; + AXGMAC_IOWRITE(pdata, MAC_TSCR, value); + return 0; } diff -Nru dpdk-24.11.3/drivers/net/bnxt/tf_core/v3/tfo.c dpdk-24.11.4/drivers/net/bnxt/tf_core/v3/tfo.c --- dpdk-24.11.3/drivers/net/bnxt/tf_core/v3/tfo.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/bnxt/tf_core/v3/tfo.c 2025-12-19 12:05:33.000000000 +0000 @@ -74,9 +74,7 @@ rc = cfa_bld_mpc_bind(CFA_P70, &tfco->mpc_info); if (rc) { PMD_DRV_LOG_LINE(ERR, "MPC bind failed"); - rte_free(tfco); - *tfo = NULL; - return; + goto cleanup; } if (is_pf) { /* Allocate TIM */ @@ -104,8 +102,8 @@ return; cleanup: - if (tfco != NULL) - rte_free(tfo); + rte_free(tfco); + *tfo = NULL; } void tfo_close(void **tfo) diff -Nru dpdk-24.11.3/drivers/net/bonding/rte_eth_bond_8023ad.c dpdk-24.11.4/drivers/net/bonding/rte_eth_bond_8023ad.c --- dpdk-24.11.3/drivers/net/bonding/rte_eth_bond_8023ad.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/bonding/rte_eth_bond_8023ad.c 2025-12-19 12:05:33.000000000 +0000 @@ -1185,6 +1185,14 @@ continue; rte_ether_addr_copy(&internals->mode4.mac_addr, &member->actor.system); + + /* Update physical NIC hardware MAC address to match bonding device. */ + if (rte_eth_dev_default_mac_addr_set(member_id, &internals->mode4.mac_addr) != 0) { + RTE_BOND_LOG(ERR, + "Failed to update MAC address on member port %u", + member_id); + } + /* Do nothing if this port is not an aggregator. In other case * Set NTT flag on every port that use this aggregator. */ if (member->aggregator_port_id != member_id) diff -Nru dpdk-24.11.3/drivers/net/cnxk/cn10k_ethdev_sec.c dpdk-24.11.4/drivers/net/cnxk/cn10k_ethdev_sec.c --- dpdk-24.11.3/drivers/net/cnxk/cn10k_ethdev_sec.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/cnxk/cn10k_ethdev_sec.c 2025-12-19 12:05:33.000000000 +0000 @@ -1346,6 +1346,8 @@ roc_idev_nix_rx_inject_set(port_id, enable); inl_lf = roc_nix_inl_inb_inj_lf_get(nix); + if (!inl_lf) + return -ENOTSUP; sa_base = roc_nix_inl_inb_sa_base_get(nix, dev->inb.inl_dev); inj_cfg = &dev->inj_cfg; diff -Nru dpdk-24.11.3/drivers/net/cnxk/cnxk_ethdev_mtr.c dpdk-24.11.4/drivers/net/cnxk/cnxk_ethdev_mtr.c --- dpdk-24.11.3/drivers/net/cnxk/cnxk_ethdev_mtr.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/cnxk/cnxk_ethdev_mtr.c 2025-12-19 12:05:33.000000000 +0000 @@ -1261,7 +1261,13 @@ cfg->alg = alg_map[profile->profile.alg]; cfg->lmode = profile->profile.packet_mode; - cfg->icolor = color_map[mtr->params.default_input_color]; + int idx = mtr->params.default_input_color; + + /* Index validation */ + if (idx >= RTE_COLORS) + cfg->icolor = ROC_NIX_BPF_COLOR_GREEN; + else + cfg->icolor = color_map[idx]; switch (RTE_MTR_COLOR_IN_PROTO_OUTER_IP) { case RTE_MTR_COLOR_IN_PROTO_OUTER_IP: diff -Nru dpdk-24.11.3/drivers/net/dpaa/dpaa_ethdev.c dpdk-24.11.4/drivers/net/dpaa/dpaa_ethdev.c --- dpdk-24.11.3/drivers/net/dpaa/dpaa_ethdev.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/dpaa/dpaa_ethdev.c 2025-12-19 12:05:33.000000000 +0000 @@ -54,6 +54,7 @@ #define DRIVER_IEEE1588 "drv_ieee1588" #define CHECK_INTERVAL 100 /* 100ms */ #define MAX_REPEAT_TIME 90 /* 9s (90 * 100ms) in total */ +#define RTE_PRIORITY_103 103 /* Supported Rx offloads */ static uint64_t dev_rx_offloads_sup = @@ -306,11 +307,12 @@ } if (!(default_q || fmc_q)) { - if (dpaa_fm_config(dev, - eth_conf->rx_adv_conf.rss_conf.rss_hf)) { + ret = dpaa_fm_config(dev, + eth_conf->rx_adv_conf.rss_conf.rss_hf); + if (ret) { dpaa_write_fm_config_to_file(); - DPAA_PMD_ERR("FM port configuration: Failed"); - return -1; + DPAA_PMD_ERR("FM port configuration: Failed(%d)", ret); + return ret; } dpaa_write_fm_config_to_file(); } @@ -513,6 +515,7 @@ struct rte_intr_handle *intr_handle; struct rte_eth_link *link = &dev->data->dev_link; struct dpaa_if *dpaa_intf = dev->data->dev_private; + struct qman_fq *fq; int loop; int ret; @@ -522,14 +525,17 @@ return 0; if (!dpaa_intf) { - DPAA_PMD_WARN("Already closed or not started"); - return -1; + DPAA_PMD_DEBUG("Already closed or not started"); + return -ENOENT; } /* DPAA FM deconfig */ if (!(default_q || fmc_q)) { - if (dpaa_fm_deconfig(dpaa_intf, dev->process_private)) - DPAA_PMD_WARN("DPAA FM deconfig failed"); + ret = dpaa_fm_deconfig(dpaa_intf, dev->process_private); + if (ret) { + DPAA_PMD_WARN("%s: FM deconfig failed(%d)", + dev->data->name, ret); + } } dpaa_dev = container_of(rdev, struct rte_dpaa_device, device); @@ -537,21 +543,37 @@ __fif = container_of(fif, struct __fman_if, __if); ret = dpaa_eth_dev_stop(dev); + if (ret) { + DPAA_PMD_WARN("%s: stop device failed(%d)", + dev->data->name, ret); + } if (fif->mac_type == fman_offline_internal || fif->mac_type == fman_onic) return 0; /* Reset link to autoneg */ - if (link->link_status && !link->link_autoneg) - dpaa_restart_link_autoneg(__fif->node_name); + if (link->link_status && !link->link_autoneg) { + ret = dpaa_restart_link_autoneg(__fif->node_name); + if (ret) { + DPAA_PMD_WARN("%s: restart link failed(%d)", + dev->data->name, ret); + } + } if (intr_handle && rte_intr_fd_get(intr_handle) && dev->data->dev_conf.intr_conf.lsc != 0) { - dpaa_intr_disable(__fif->node_name); - rte_intr_callback_unregister(intr_handle, - dpaa_interrupt_handler, - (void *)dev); + ret = dpaa_intr_disable(__fif->node_name); + if (ret) { + DPAA_PMD_WARN("%s: disable interrupt failed(%d)", + dev->data->name, ret); + } + ret = rte_intr_callback_unregister(intr_handle, + dpaa_interrupt_handler, (void *)dev); + if (ret) { + DPAA_PMD_WARN("%s: unregister interrupt failed(%d)", + dev->data->name, ret); + } } /* release configuration memory */ @@ -559,33 +581,60 @@ /* Release RX congestion Groups */ if (dpaa_intf->cgr_rx) { - for (loop = 0; loop < dpaa_intf->nb_rx_queues; loop++) - qman_delete_cgr(&dpaa_intf->cgr_rx[loop]); + for (loop = 0; loop < dpaa_intf->nb_rx_queues; loop++) { + ret = qman_delete_cgr(&dpaa_intf->cgr_rx[loop]); + if (ret) { + DPAA_PMD_WARN("%s: delete rxq%d's cgr err(%d)", + dev->data->name, loop, ret); + } + } rte_free(dpaa_intf->cgr_rx); dpaa_intf->cgr_rx = NULL; } /* Release TX congestion Groups */ if (dpaa_intf->cgr_tx) { - for (loop = 0; loop < MAX_DPAA_CORES; loop++) - qman_delete_cgr(&dpaa_intf->cgr_tx[loop]); + for (loop = 0; loop < MAX_DPAA_CORES; loop++) { + ret = qman_delete_cgr(&dpaa_intf->cgr_tx[loop]); + if (ret) { + DPAA_PMD_WARN("%s: delete txq%d's cgr err(%d)", + dev->data->name, loop, ret); + } + } rte_free(dpaa_intf->cgr_tx); dpaa_intf->cgr_tx = NULL; } + /* Freeing queue specific portals */ + for (loop = 0; loop < dpaa_intf->nb_rx_queues; loop++) { + if (!dpaa_intf->rx_queues) + break; + + fq = &dpaa_intf->rx_queues[loop]; + if (fq->qp_initialized) { + rte_dpaa_portal_fq_close(fq); + fq->qp_initialized = 0; + } + } + rte_free(dpaa_intf->rx_queues); dpaa_intf->rx_queues = NULL; rte_free(dpaa_intf->tx_queues); dpaa_intf->tx_queues = NULL; if (dpaa_intf->port_handle) { - if (dpaa_fm_deconfig(dpaa_intf, fif)) - DPAA_PMD_WARN("DPAA FM " - "deconfig failed"); + ret = dpaa_fm_deconfig(dpaa_intf, fif); + if (ret) { + DPAA_PMD_WARN("%s: FM deconfig failed(%d)", + dev->data->name, ret); + } } if (fif->num_profiles) { - if (dpaa_port_vsp_cleanup(dpaa_intf, fif)) - DPAA_PMD_WARN("DPAA FM vsp cleanup failed"); + ret = dpaa_port_vsp_cleanup(dpaa_intf, fif); + if (ret) { + DPAA_PMD_WARN("%s: cleanup VSP failed(%d)", + dev->data->name, ret); + } } return ret; @@ -1478,6 +1527,8 @@ { struct dpaa_if *dpaa_intf = dev->data->dev_private; struct rte_eth_fc_conf *net_fc; + struct fman_if *fm_if = dev->process_private; + int ret; PMD_INIT_FUNC_TRACE(); @@ -1496,19 +1547,31 @@ return -EINVAL; } - if (fc_conf->mode == RTE_ETH_FC_NONE) { + if (fc_conf->mode == RTE_ETH_FC_NONE) return 0; - } else if (fc_conf->mode == RTE_ETH_FC_TX_PAUSE || - fc_conf->mode == RTE_ETH_FC_FULL) { - fman_if_set_fc_threshold(dev->process_private, + + if (fc_conf->mode != RTE_ETH_FC_TX_PAUSE && + fc_conf->mode != RTE_ETH_FC_FULL) + goto save_fc; + + ret = fman_if_set_fc_threshold(fm_if, fc_conf->high_water, fc_conf->low_water, dpaa_intf->bp_info->bpid); - if (fc_conf->pause_time) - fman_if_set_fc_quanta(dev->process_private, - fc_conf->pause_time); + if (ret) { + DPAA_PMD_ERR("Set %s's fc on bpid(%d) err(%d)", + dev->data->name, dpaa_intf->bp_info->bpid, + ret); + } + if (fc_conf->pause_time) { + ret = fman_if_set_fc_quanta(fm_if, fc_conf->pause_time); + if (ret) { + DPAA_PMD_ERR("Set %s's fc pause time err(%d)", + dev->data->name, ret); + } } +save_fc: /* Save the information in dpaa device */ net_fc->pause_time = fc_conf->pause_time; net_fc->high_water = fc_conf->high_water; @@ -1635,13 +1698,15 @@ { struct rte_eth_dev_data *data = dev->data; struct rte_eth_conf *eth_conf = &data->dev_conf; + int ret; PMD_INIT_FUNC_TRACE(); if (!(default_q || fmc_q)) { - if (dpaa_fm_config(dev, rss_conf->rss_hf)) { - DPAA_PMD_ERR("FM port configuration: Failed"); - return -1; + ret = dpaa_fm_config(dev, rss_conf->rss_hf); + if (ret) { + DPAA_PMD_ERR("FM port configuration: Failed(%d)", ret); + return ret; } eth_conf->rx_adv_conf.rss_conf.rss_hf = rss_conf->rss_hf; } else { @@ -2242,8 +2307,8 @@ /* Each device can not have more than DPAA_MAX_NUM_PCD_QUEUES RX * queues. */ - if (num_rx_fqs < 0 || num_rx_fqs > DPAA_MAX_NUM_PCD_QUEUES) { - DPAA_PMD_ERR("Invalid number of RX queues"); + if (num_rx_fqs > DPAA_MAX_NUM_PCD_QUEUES) { + DPAA_PMD_ERR("Invalid number of RX queues(%d)", num_rx_fqs); return -EINVAL; } @@ -2499,8 +2564,8 @@ eth_dev->dev_ops = &dpaa_devops; ret = dpaa_dev_init_secondary(eth_dev); - if (ret != 0) { - DPAA_PMD_ERR("secondary dev init failed"); + if (ret) { + DPAA_PMD_ERR("secondary dev init failed(%d)", ret); return ret; } @@ -2515,9 +2580,10 @@ } if (!(default_q || fmc_q)) { - if (dpaa_fm_init()) { - DPAA_PMD_ERR("FM init failed"); - return -1; + ret = dpaa_fm_init(); + if (ret) { + DPAA_PMD_ERR("FM init failed(%d)", ret); + return ret; } } @@ -2592,39 +2658,70 @@ return diag; } -static int -rte_dpaa_remove(struct rte_dpaa_device *dpaa_dev) +/* Adding destructor for double check in case non-gracefully + * exit. + */ +RTE_FINI_PRIO(dpaa_finish, 103) { - struct rte_eth_dev *eth_dev; - int ret; + struct dpaa_if *dpaa_intf; + int loop; + struct qman_fq *fq; + uint16_t portid; + struct rte_eth_dev *dev; PMD_INIT_FUNC_TRACE(); - - eth_dev = dpaa_dev->eth_dev; - dpaa_eth_dev_close(eth_dev); - dpaa_valid_dev--; - if (!dpaa_valid_dev) - rte_mempool_free(dpaa_tx_sg_pool); - ret = rte_eth_dev_release_port(eth_dev); - - return ret; -} - -static void __attribute__((destructor(102))) dpaa_finish(void) -{ /* For secondary, primary will do all the cleanup */ if (rte_eal_process_type() != RTE_PROC_PRIMARY) return; + if (!is_global_init) + return; + if (!(default_q || fmc_q)) { - if (is_global_init) if (dpaa_fm_term()) DPAA_PMD_WARN("DPAA FM term failed"); - is_global_init = 0; - DPAA_PMD_INFO("DPAA fman cleaned up"); } + + RTE_ETH_FOREACH_DEV(portid) { + dev = &rte_eth_devices[portid]; + if (strcmp(dev->device->driver->name, + rte_dpaa_pmd.driver.name)) + continue; + dpaa_intf = dev->data->dev_private; + /* Freeing queue specific portals */ + for (loop = 0; loop < dpaa_intf->nb_rx_queues; loop++) { + if (!dpaa_intf->rx_queues) + break; + + fq = &dpaa_intf->rx_queues[loop]; + if (fq->qp_initialized) { + rte_dpaa_portal_fq_close(fq); + fq->qp_initialized = 0; + } + } + } + is_global_init = 0; +} + +static int +rte_dpaa_remove(struct rte_dpaa_device *dpaa_dev) +{ + struct rte_eth_dev *eth_dev; + int ret; + + PMD_INIT_FUNC_TRACE(); + + eth_dev = dpaa_dev->eth_dev; + dpaa_eth_dev_close(eth_dev); + ret = rte_eth_dev_release_port(eth_dev); + dpaa_valid_dev--; + if (!dpaa_valid_dev) { + rte_mempool_free(dpaa_tx_sg_pool); + dpaa_finish(); + } + return ret; } static struct rte_dpaa_driver rte_dpaa_pmd = { diff -Nru dpdk-24.11.3/drivers/net/dpaa/dpaa_flow.c dpdk-24.11.4/drivers/net/dpaa/dpaa_flow.c --- dpdk-24.11.3/drivers/net/dpaa/dpaa_flow.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/dpaa/dpaa_flow.c 2025-12-19 12:05:33.000000000 +0000 @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright 2017-2019,2021-2024 NXP + * Copyright 2017-2019,2021-2025 NXP */ /* System headers */ @@ -889,9 +889,9 @@ /* FM PCD Enable */ ret = fm_pcd_enable(pcd_handle); if (ret) { - fm_close(fman_handle); - fm_pcd_close(pcd_handle); DPAA_PMD_ERR("fm_pcd_enable: Failed"); + fm_pcd_close(pcd_handle); + fm_close(fman_handle); return -1; } diff -Nru dpdk-24.11.3/drivers/net/dpaa2/base/dpaa2_hw_dpni.c dpdk-24.11.4/drivers/net/dpaa2/base/dpaa2_hw_dpni.c --- dpdk-24.11.3/drivers/net/dpaa2/base/dpaa2_hw_dpni.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/dpaa2/base/dpaa2_hw_dpni.c 2025-12-19 12:05:33.000000000 +0000 @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2016 Freescale Semiconductor, Inc. All rights reserved. - * Copyright 2016-2021 NXP + * Copyright 2016-2021,2023-2025 NXP * */ @@ -58,6 +58,7 @@ return -ENOMEM; } + memset(&kg_cfg, 0, sizeof(struct dpkg_profile_cfg)); kg_cfg.extracts[0].type = DPKG_EXTRACT_FROM_DATA; kg_cfg.extracts[0].extract.from_data.offset = offset; kg_cfg.extracts[0].extract.from_data.size = size; diff -Nru dpdk-24.11.3/drivers/net/dpaa2/base/dpaa2_hw_dpni_annot.h dpdk-24.11.4/drivers/net/dpaa2/base/dpaa2_hw_dpni_annot.h --- dpdk-24.11.3/drivers/net/dpaa2/base/dpaa2_hw_dpni_annot.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/dpaa2/base/dpaa2_hw_dpni_annot.h 2025-12-19 12:05:33.000000000 +0000 @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2016 Freescale Semiconductor, Inc. All rights reserved. - * Copyright 2016,2019 NXP + * Copyright 2016,2019,2022,2024 NXP * */ @@ -298,13 +298,13 @@ #define DPAA2_ETH_FAS_PHE 0x00000020 #define DPAA2_ETH_FAS_BLE 0x00000010 /* L3 csum validation performed */ -#define DPAA2_ETH_FAS_L3CV 0x00000008 +#define DPAA2_ETH_FAS_L3CV 0x0000000800000000 /* L3 csum error */ -#define DPAA2_ETH_FAS_L3CE 0x00000004 +#define DPAA2_ETH_FAS_L3CE 0x0000000400000000 /* L4 csum validation performed */ -#define DPAA2_ETH_FAS_L4CV 0x00000002 +#define DPAA2_ETH_FAS_L4CV 0x0000000200000000 /* L4 csum error */ -#define DPAA2_ETH_FAS_L4CE 0x00000001 +#define DPAA2_ETH_FAS_L4CE 0x0000000100000000 #ifdef __cplusplus } diff -Nru dpdk-24.11.3/drivers/net/dpaa2/dpaa2_ethdev.c dpdk-24.11.4/drivers/net/dpaa2/dpaa2_ethdev.c --- dpdk-24.11.3/drivers/net/dpaa2/dpaa2_ethdev.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/dpaa2/dpaa2_ethdev.c 2025-12-19 12:05:33.000000000 +0000 @@ -482,6 +482,27 @@ } static void +dpaa2_clear_queue_active_dps(struct dpaa2_queue *q, int num_lcores) +{ + int i; + + for (i = 0; i < num_lcores; i++) { + struct queue_storage_info_t *qs = q->q_storage[i]; + + if (!qs) + continue; + + if (qs->active_dqs) { + while (!qbman_check_command_complete(qs->active_dqs)) + continue; /* wait */ + + clear_swp_active_dqs(qs->active_dpio_id); + qs->active_dqs = NULL; + } + } +} + +static void dpaa2_free_rx_tx_queues(struct rte_eth_dev *dev) { struct dpaa2_dev_priv *priv = dev->data->dev_private; @@ -495,6 +516,8 @@ /* cleaning up queue storage */ for (i = 0; i < priv->nb_rx_queues; i++) { dpaa2_q = priv->rx_vq[i]; + dpaa2_clear_queue_active_dps(dpaa2_q, + RTE_MAX_LCORE); dpaa2_queue_storage_free(dpaa2_q, RTE_MAX_LCORE); } @@ -1253,7 +1276,7 @@ err_cfg.errors = DPNI_ERROR_L3CE | DPNI_ERROR_L4CE; /* if packet with parse error are not to be dropped */ - err_cfg.errors |= DPNI_ERROR_PHE; + err_cfg.errors |= DPNI_ERROR_PHE | DPNI_ERROR_BLE; err_cfg.error_action = DPNI_ERROR_ACTION_CONTINUE; } @@ -2798,7 +2821,7 @@ /* Init fields w.r.t. classification */ memset(&priv->extract.qos_key_extract, 0, sizeof(struct dpaa2_key_extract)); - priv->extract.qos_extract_param = rte_malloc(NULL, + priv->extract.qos_extract_param = rte_zmalloc(NULL, DPAA2_EXTRACT_PARAM_MAX_SIZE, RTE_CACHE_LINE_SIZE); if (!priv->extract.qos_extract_param) { @@ -2809,7 +2832,7 @@ for (i = 0; i < MAX_TCS; i++) { memset(&priv->extract.tc_key_extract[i], 0, sizeof(struct dpaa2_key_extract)); - priv->extract.tc_extract_param[i] = rte_malloc(NULL, + priv->extract.tc_extract_param[i] = rte_zmalloc(NULL, DPAA2_EXTRACT_PARAM_MAX_SIZE, RTE_CACHE_LINE_SIZE); if (!priv->extract.tc_extract_param[i]) { @@ -3028,7 +3051,6 @@ eth_dev->device = &dpaa2_dev->device; - dpaa2_dev->eth_dev = eth_dev; eth_dev->data->rx_mbuf_alloc_failed = 0; if (dpaa2_drv->drv_flags & RTE_DPAA2_DRV_INTR_LSC) @@ -3055,14 +3077,22 @@ rte_dpaa2_remove(struct rte_dpaa2_device *dpaa2_dev) { struct rte_eth_dev *eth_dev; - int ret; + int ret = 0; + + eth_dev = rte_eth_dev_allocated(dpaa2_dev->device.name); + if (eth_dev) { + ret = dpaa2_dev_close(eth_dev); + if (ret) + DPAA2_PMD_ERR("dpaa2_dev_close ret= %d", ret); + + ret = rte_eth_dev_release_port(eth_dev); + } - eth_dev = dpaa2_dev->eth_dev; - dpaa2_dev_close(eth_dev); dpaa2_valid_dev--; - if (!dpaa2_valid_dev) + if (!dpaa2_valid_dev) { rte_mempool_free(dpaa2_tx_sg_pool); - ret = rte_eth_dev_release_port(eth_dev); + dpaa2_tx_sg_pool = NULL; + } return ret; } diff -Nru dpdk-24.11.3/drivers/net/dpaa2/dpaa2_ethdev.h dpdk-24.11.4/drivers/net/dpaa2/dpaa2_ethdev.h --- dpdk-24.11.3/drivers/net/dpaa2/dpaa2_ethdev.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/dpaa2/dpaa2_ethdev.h 2025-12-19 12:05:33.000000000 +0000 @@ -31,7 +31,9 @@ #define MAX_DPNI 8 #define DPAA2_MAX_CHANNELS 16 -#define DPAA2_EXTRACT_PARAM_MAX_SIZE 256 +#define DPAA2_EXTRACT_PARAM_MAX_SIZE \ + RTE_ALIGN(sizeof(struct dpni_ext_set_rx_tc_dist), 256) + #define DPAA2_EXTRACT_ALLOC_KEY_MAX_SIZE 256 #define DPAA2_RX_DEFAULT_NBDESC 512 @@ -60,7 +62,7 @@ #define CONG_RX_OAL 128 /* Size of the input SMMU mapped memory required by MC */ -#define DIST_PARAM_IOVA_SIZE 256 +#define DIST_PARAM_IOVA_SIZE DPAA2_EXTRACT_PARAM_MAX_SIZE /* Enable TX Congestion control support * default is disable @@ -489,10 +491,4 @@ int dpaa2_dev_recycle_deconfig(struct rte_eth_dev *eth_dev); int dpaa2_soft_parser_loaded(void); -int dpaa2_dev_recycle_qp_setup(struct rte_dpaa2_device *dpaa2_dev, - uint16_t qidx, uint64_t cntx, - eth_rx_burst_t tx_lpbk, eth_tx_burst_t rx_lpbk, - struct dpaa2_queue **txq, - struct dpaa2_queue **rxq); - #endif /* _DPAA2_ETHDEV_H */ diff -Nru dpdk-24.11.3/drivers/net/dpaa2/dpaa2_flow.c dpdk-24.11.4/drivers/net/dpaa2/dpaa2_flow.c --- dpdk-24.11.3/drivers/net/dpaa2/dpaa2_flow.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/dpaa2/dpaa2_flow.c 2025-12-19 12:05:33.000000000 +0000 @@ -630,42 +630,42 @@ int offset, int size, enum dpaa2_flow_dist_type dist_type) { - int end; - if (dist_type & DPAA2_FLOW_QOS_TYPE) { - end = flow->qos_rule_size; - if (end > offset) { + if (offset < flow->qos_rule_size) { memmove(flow->qos_key_addr + offset + size, flow->qos_key_addr + offset, - end - offset); + flow->qos_rule_size - offset); memset(flow->qos_key_addr + offset, 0, size); memmove(flow->qos_mask_addr + offset + size, flow->qos_mask_addr + offset, - end - offset); + flow->qos_rule_size - offset); memset(flow->qos_mask_addr + offset, 0, size); + flow->qos_rule_size += size; + } else { + flow->qos_rule_size = offset + size; } - flow->qos_rule_size += size; } if (dist_type & DPAA2_FLOW_FS_TYPE) { - end = flow->fs_rule_size; - if (end > offset) { + if (offset < flow->fs_rule_size) { memmove(flow->fs_key_addr + offset + size, flow->fs_key_addr + offset, - end - offset); + flow->fs_rule_size - offset); memset(flow->fs_key_addr + offset, 0, size); memmove(flow->fs_mask_addr + offset + size, flow->fs_mask_addr + offset, - end - offset); + flow->fs_rule_size - offset); memset(flow->fs_mask_addr + offset, 0, size); + flow->fs_rule_size += size; + } else { + flow->fs_rule_size = offset + size; } - flow->fs_rule_size += size; } return 0; @@ -1488,8 +1488,9 @@ mask_addr = flow->qos_mask_addr + offset; if (!(*key_addr) && - key_profile->ip_addr_type == IP_NONE_ADDR_EXTRACT) - flow->qos_rule_size++; + key_profile->ip_addr_type == IP_NONE_ADDR_EXTRACT && + offset >= flow->qos_rule_size) + flow->qos_rule_size = offset + sizeof(uint8_t); *key_addr |= (1 << faf_bit_in_byte); *mask_addr |= (1 << faf_bit_in_byte); @@ -1510,8 +1511,9 @@ mask_addr = flow->fs_mask_addr + offset; if (!(*key_addr) && - key_profile->ip_addr_type == IP_NONE_ADDR_EXTRACT) - flow->fs_rule_size++; + key_profile->ip_addr_type == IP_NONE_ADDR_EXTRACT && + offset >= flow->fs_rule_size) + flow->fs_rule_size = offset + sizeof(uint8_t); *key_addr |= (1 << faf_bit_in_byte); *mask_addr |= (1 << faf_bit_in_byte); @@ -1529,6 +1531,7 @@ { int offset; uint32_t pr_field = pr_offset << 16 | pr_size; + char offset_info[64], size_info[64], rule_size_info[64]; offset = dpaa2_flow_extract_key_offset(key_profile, DPAA2_PR_KEY, NET_PROT_NONE, pr_field); @@ -1537,19 +1540,43 @@ pr_offset, pr_size); return -EINVAL; } + sprintf(offset_info, "offset(%d)", offset); + sprintf(size_info, "size(%d)", pr_size); if (dist_type & DPAA2_FLOW_QOS_TYPE) { + sprintf(rule_size_info, "qos rule size(%d)", + flow->qos_rule_size); memcpy((flow->qos_key_addr + offset), key, pr_size); memcpy((flow->qos_mask_addr + offset), mask, pr_size); - if (key_profile->ip_addr_type == IP_NONE_ADDR_EXTRACT) - flow->qos_rule_size = offset + pr_size; + if (key_profile->ip_addr_type == IP_NONE_ADDR_EXTRACT) { + if (offset >= flow->qos_rule_size) { + flow->qos_rule_size = offset + pr_size; + } else if ((offset + pr_size) > flow->qos_rule_size) { + DPAA2_PMD_ERR("%s < %s, but %s + %s > %s", + offset_info, rule_size_info, + offset_info, size_info, + rule_size_info); + return -EINVAL; + } + } } if (dist_type & DPAA2_FLOW_FS_TYPE) { + sprintf(rule_size_info, "fs rule size(%d)", + flow->fs_rule_size); memcpy((flow->fs_key_addr + offset), key, pr_size); memcpy((flow->fs_mask_addr + offset), mask, pr_size); - if (key_profile->ip_addr_type == IP_NONE_ADDR_EXTRACT) - flow->fs_rule_size = offset + pr_size; + if (key_profile->ip_addr_type == IP_NONE_ADDR_EXTRACT) { + if (offset >= flow->fs_rule_size) { + flow->fs_rule_size = offset + pr_size; + } else if ((offset + pr_size) > flow->fs_rule_size) { + DPAA2_PMD_ERR("%s < %s, but %s + %s > %s", + offset_info, rule_size_info, + offset_info, size_info, + rule_size_info); + return -EINVAL; + } + } } return 0; @@ -1563,6 +1590,7 @@ enum dpaa2_flow_dist_type dist_type) { int offset; + char offset_info[64], size_info[64], rule_size_info[64]; if (dpaa2_flow_ip_address_extract(prot, field)) { DPAA2_PMD_ERR("%s only for none IP address extract", @@ -1577,19 +1605,41 @@ prot, field); return -EINVAL; } + sprintf(offset_info, "offset(%d)", offset); + sprintf(size_info, "size(%d)", size); if (dist_type & DPAA2_FLOW_QOS_TYPE) { + sprintf(rule_size_info, "qos rule size(%d)", + flow->qos_rule_size); memcpy((flow->qos_key_addr + offset), key, size); memcpy((flow->qos_mask_addr + offset), mask, size); - if (key_profile->ip_addr_type == IP_NONE_ADDR_EXTRACT) - flow->qos_rule_size = offset + size; + if (key_profile->ip_addr_type == IP_NONE_ADDR_EXTRACT) { + if (offset >= flow->qos_rule_size) { + flow->qos_rule_size = offset + size; + } else if ((offset + size) > flow->qos_rule_size) { + DPAA2_PMD_ERR("%s: %s < %s, but %s + %s > %s", + __func__, offset_info, rule_size_info, + offset_info, size_info, rule_size_info); + return -EINVAL; + } + } } if (dist_type & DPAA2_FLOW_FS_TYPE) { + sprintf(rule_size_info, "fs rule size(%d)", + flow->fs_rule_size); memcpy((flow->fs_key_addr + offset), key, size); memcpy((flow->fs_mask_addr + offset), mask, size); - if (key_profile->ip_addr_type == IP_NONE_ADDR_EXTRACT) - flow->fs_rule_size = offset + size; + if (key_profile->ip_addr_type == IP_NONE_ADDR_EXTRACT) { + if (offset >= flow->fs_rule_size) { + flow->fs_rule_size = offset + size; + } else if ((offset + size) > flow->fs_rule_size) { + DPAA2_PMD_ERR("%s: %s < %s, but %s + %s > %s", + __func__, offset_info, rule_size_info, + offset_info, size_info, rule_size_info); + return -EINVAL; + } + } } return 0; @@ -1605,6 +1655,7 @@ int extract_size = size > DPAA2_FLOW_MAX_KEY_SIZE ? DPAA2_FLOW_MAX_KEY_SIZE : size; int offset, field; + char offset_info[64], size_info[64], rule_size_info[64]; field = extract_offset << DPAA2_FLOW_RAW_OFFSET_FIELD_SHIFT; field |= extract_size; @@ -1615,17 +1666,37 @@ extract_offset, size); return -EINVAL; } + sprintf(offset_info, "offset(%d)", offset); + sprintf(size_info, "size(%d)", size); if (dist_type & DPAA2_FLOW_QOS_TYPE) { + sprintf(rule_size_info, "qos rule size(%d)", + flow->qos_rule_size); memcpy((flow->qos_key_addr + offset), key, size); memcpy((flow->qos_mask_addr + offset), mask, size); - flow->qos_rule_size = offset + size; + if (offset >= flow->qos_rule_size) { + flow->qos_rule_size = offset + size; + } else if ((offset + size) > flow->qos_rule_size) { + DPAA2_PMD_ERR("%s: %s < %s, but %s + %s > %s", + __func__, offset_info, rule_size_info, + offset_info, size_info, rule_size_info); + return -EINVAL; + } } if (dist_type & DPAA2_FLOW_FS_TYPE) { + sprintf(rule_size_info, "fs rule size(%d)", + flow->fs_rule_size); memcpy((flow->fs_key_addr + offset), key, size); memcpy((flow->fs_mask_addr + offset), mask, size); - flow->fs_rule_size = offset + size; + if (offset >= flow->fs_rule_size) { + flow->fs_rule_size = offset + size; + } else if ((offset + size) > flow->fs_rule_size) { + DPAA2_PMD_ERR("%s: %s < %s, but %s + %s > %s", + __func__, offset_info, rule_size_info, + offset_info, size_info, rule_size_info); + return -EINVAL; + } } return 0; diff -Nru dpdk-24.11.3/drivers/net/dpaa2/dpaa2_recycle.c dpdk-24.11.4/drivers/net/dpaa2/dpaa2_recycle.c --- dpdk-24.11.3/drivers/net/dpaa2/dpaa2_recycle.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/dpaa2/dpaa2_recycle.c 2025-12-19 12:05:33.000000000 +0000 @@ -730,53 +730,3 @@ return ret; } - -int -dpaa2_dev_recycle_qp_setup(struct rte_dpaa2_device *dpaa2_dev, - uint16_t qidx, uint64_t cntx, - eth_rx_burst_t tx_lpbk, eth_tx_burst_t rx_lpbk, - struct dpaa2_queue **txq, - struct dpaa2_queue **rxq) -{ - struct rte_eth_dev *dev; - struct rte_eth_dev_data *data; - struct dpaa2_queue *txq_tmp; - struct dpaa2_queue *rxq_tmp; - struct dpaa2_dev_priv *priv; - - dev = dpaa2_dev->eth_dev; - data = dev->data; - priv = data->dev_private; - - if (!(priv->flags & DPAA2_TX_LOOPBACK_MODE) && - (tx_lpbk || rx_lpbk)) { - DPAA2_PMD_ERR("%s is NOT recycle device!", data->name); - - return -EINVAL; - } - - if (qidx >= data->nb_rx_queues || qidx >= data->nb_tx_queues) - return -EINVAL; - - rte_spinlock_lock(&priv->lpbk_qp_lock); - - if (tx_lpbk) - dev->tx_pkt_burst = tx_lpbk; - - if (rx_lpbk) - dev->rx_pkt_burst = rx_lpbk; - - txq_tmp = data->tx_queues[qidx]; - txq_tmp->lpbk_cntx = cntx; - rxq_tmp = data->rx_queues[qidx]; - rxq_tmp->lpbk_cntx = cntx; - - if (txq) - *txq = txq_tmp; - if (rxq) - *rxq = rxq_tmp; - - rte_spinlock_unlock(&priv->lpbk_qp_lock); - - return 0; -} diff -Nru dpdk-24.11.3/drivers/net/dpaa2/dpaa2_rxtx.c dpdk-24.11.4/drivers/net/dpaa2/dpaa2_rxtx.c --- dpdk-24.11.3/drivers/net/dpaa2/dpaa2_rxtx.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/dpaa2/dpaa2_rxtx.c 2025-12-19 12:05:33.000000000 +0000 @@ -200,14 +200,10 @@ goto parse_done; } - if (BIT_ISSET_AT_POS(annotation->word8, DPAA2_ETH_FAS_L3CE)) + if (BIT_ISSET_AT_POS(annotation->word1, DPAA2_ETH_FAS_L3CE)) mbuf->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD; - else - mbuf->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD; - if (BIT_ISSET_AT_POS(annotation->word8, DPAA2_ETH_FAS_L4CE)) + else if (BIT_ISSET_AT_POS(annotation->word1, DPAA2_ETH_FAS_L4CE)) mbuf->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD; - else - mbuf->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; if (BIT_ISSET_AT_POS(annotation->word4, L3_IP_1_FIRST_FRAGMENT | L3_IP_1_MORE_FRAGMENT | @@ -247,14 +243,10 @@ DPAA2_PMD_DP_DEBUG("(fast parse) Annotation = 0x%" PRIx64 "\t", annotation->word4); - if (BIT_ISSET_AT_POS(annotation->word8, DPAA2_ETH_FAS_L3CE)) + if (BIT_ISSET_AT_POS(annotation->word1, DPAA2_ETH_FAS_L3CE)) mbuf->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD; - else - mbuf->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD; - if (BIT_ISSET_AT_POS(annotation->word8, DPAA2_ETH_FAS_L4CE)) + else if (BIT_ISSET_AT_POS(annotation->word1, DPAA2_ETH_FAS_L4CE)) mbuf->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD; - else - mbuf->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; if (unlikely(dpaa2_print_parser_result)) dpaa2_print_parse_result(annotation); @@ -641,9 +633,11 @@ const struct qbman_fd *fd; struct qbman_pull_desc pulldesc; struct rte_eth_dev_data *eth_data = dpaa2_q->eth_data; - uint32_t lcore_id = rte_lcore_id(); + uint32_t lcore_id = rte_lcore_id(), i = 0; void *v_addr, *hw_annot_addr; struct dpaa2_fas *fas; + struct rte_mbuf *mbuf; + char title[32]; if (unlikely(!DPAA2_PER_LCORE_DPIO)) { ret = dpaa2_affine_qbman_swp(); @@ -699,14 +693,39 @@ hw_annot_addr = (void *)((size_t)v_addr + DPAA2_FD_PTA_SIZE); fas = hw_annot_addr; - DPAA2_PMD_ERR("[%d] error packet on port[%d]:" - " fd_off: %d, fd_err: %x, fas_status: %x", - rte_lcore_id(), eth_data->port_id, + if (DPAA2_FD_GET_FORMAT(fd) == qbman_fd_sg) + mbuf = eth_sg_fd_to_mbuf(fd, eth_data->port_id); + else + mbuf = eth_fd_to_mbuf(fd, eth_data->port_id); + + if (!dpaa2_print_parser_result) { + /** Don't print parse result twice.*/ + dpaa2_print_parse_result(hw_annot_addr); + } + + DPAA2_PMD_ERR("Err pkt on port[%d]:", eth_data->port_id); + DPAA2_PMD_ERR("FD offset: %d, FD err: %x, FAS status: %x", DPAA2_GET_FD_OFFSET(fd), DPAA2_GET_FD_ERR(fd), fas->status); - rte_hexdump(stderr, "Error packet", v_addr, - DPAA2_GET_FD_OFFSET(fd) + DPAA2_GET_FD_LEN(fd)); + if (mbuf) + __rte_mbuf_sanity_check(mbuf, 1); + if (mbuf->nb_segs > 1) { + while (mbuf) { + sprintf(title, "Payload seg[%d]", i); + rte_hexdump(stderr, title, + (char *)mbuf->buf_addr + mbuf->data_off, + mbuf->data_len); + mbuf = mbuf->next; + i++; + } + } else { + rte_hexdump(stderr, "Payload", + (char *)mbuf->buf_addr + mbuf->data_off, + mbuf->data_len); + } + + rte_pktmbuf_free(mbuf); dq_storage++; num_rx++; } while (pending); diff -Nru dpdk-24.11.3/drivers/net/dpaa2/dpaa2_tm.c dpdk-24.11.4/drivers/net/dpaa2/dpaa2_tm.c --- dpdk-24.11.3/drivers/net/dpaa2/dpaa2_tm.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/dpaa2/dpaa2_tm.c 2025-12-19 12:05:33.000000000 +0000 @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright 2020-2023 NXP + * Copyright 2020-2024 NXP */ #include @@ -733,12 +733,12 @@ tx_cr_shaper.max_burst_size = node->profile->params.committed.size; tx_cr_shaper.rate_limit = - node->profile->params.committed.rate / - (1024 * 1024); + (node->profile->params.committed.rate / + (1024 * 1024)) * 8; tx_er_shaper.max_burst_size = node->profile->params.peak.size; tx_er_shaper.rate_limit = - node->profile->params.peak.rate / (1024 * 1024); + (node->profile->params.peak.rate / (1024 * 1024)) * 8; /* root node */ if (node->parent == NULL) { DPAA2_PMD_DEBUG("LNI S.rate = %u, burst =%u", diff -Nru dpdk-24.11.3/drivers/net/e1000/base/e1000_mac.c dpdk-24.11.4/drivers/net/e1000/base/e1000_mac.c --- dpdk-24.11.3/drivers/net/e1000/base/e1000_mac.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/e1000/base/e1000_mac.c 2025-12-19 12:05:33.000000000 +0000 @@ -1826,6 +1826,7 @@ return ret_val; mac->ledctl_default = E1000_READ_REG(hw, E1000_LEDCTL); + rte_compiler_barrier(); mac->ledctl_mode1 = mac->ledctl_default; mac->ledctl_mode2 = mac->ledctl_default; diff -Nru dpdk-24.11.3/drivers/net/ena/base/ena_com.c dpdk-24.11.4/drivers/net/ena/base/ena_com.c --- dpdk-24.11.3/drivers/net/ena/base/ena_com.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/ena/base/ena_com.c 2025-12-19 12:05:33.000000000 +0000 @@ -2329,13 +2329,13 @@ } else { rc = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_MAX_QUEUES_NUM, 0); + if (rc) + return rc; + memcpy(&get_feat_ctx->max_queues, &get_resp.u.max_queue, sizeof(get_resp.u.max_queue)); ena_dev->tx_max_header_size = get_resp.u.max_queue.max_header_size; - - if (rc) - return rc; } rc = ena_com_get_feature(ena_dev, &get_resp, diff -Nru dpdk-24.11.3/drivers/net/ena/ena_ethdev.c dpdk-24.11.4/drivers/net/ena/ena_ethdev.c --- dpdk-24.11.3/drivers/net/ena/ena_ethdev.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/ena/ena_ethdev.c 2025-12-19 12:05:33.000000000 +0000 @@ -9,6 +9,7 @@ #include #include #include +#include #include "ena_ethdev.h" #include "ena_logs.h" @@ -2266,6 +2267,24 @@ return 0; } +/* + * Returns PCI BAR virtual address. + * If the physical address is not page-aligned, + * adjusts the virtual address by the page offset. + * Assumes page size is a power of 2. + */ +static void *pci_bar_addr(struct rte_pci_device *dev, uint32_t bar) +{ + const struct rte_mem_resource *res = &dev->mem_resource[bar]; + size_t offset = res->phys_addr % rte_mem_page_size(); + void *vaddr = RTE_PTR_ADD(res->addr, offset); + + PMD_INIT_LOG_LINE(INFO, "PCI BAR [%u]: phys_addr=0x%" PRIx64 ", addr=%p, offset=0x%zx, adjusted_addr=%p", + bar, res->phys_addr, res->addr, offset, vaddr); + + return vaddr; +} + static int eth_ena_dev_init(struct rte_eth_dev *eth_dev) { struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 }; @@ -2310,16 +2329,17 @@ intr_handle = pci_dev->intr_handle; - adapter->regs = pci_dev->mem_resource[ENA_REGS_BAR].addr; - adapter->dev_mem_base = pci_dev->mem_resource[ENA_MEM_BAR].addr; - + adapter->regs = pci_bar_addr(pci_dev, ENA_REGS_BAR); if (!adapter->regs) { PMD_INIT_LOG_LINE(CRIT, "Failed to access registers BAR(%d)", ENA_REGS_BAR); return -ENXIO; } - ena_dev->reg_bar = adapter->regs; + + /* Memory BAR may be NULL on non LLQ supported devices */ + adapter->dev_mem_base = pci_bar_addr(pci_dev, ENA_MEM_BAR); + /* Pass device data as a pointer which can be passed to the IO functions * by the ena_com (for example - the memory allocation). */ diff -Nru dpdk-24.11.3/drivers/net/enetfec/enet_ethdev.c dpdk-24.11.4/drivers/net/enetfec/enet_ethdev.c --- dpdk-24.11.3/drivers/net/enetfec/enet_ethdev.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/enetfec/enet_ethdev.c 2025-12-19 12:05:33.000000000 +0000 @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright 2020-2021 NXP + * Copyright 2020-2021,2023-2024 NXP */ #include @@ -172,8 +172,10 @@ bdp = rxq->bd.base; for (i = 0; i < rxq->bd.ring_size; i++) { mbuf = rxq->rx_mbuf[i]; - rxq->rx_mbuf[i] = NULL; - rte_pktmbuf_free(mbuf); + if (mbuf) { + rxq->rx_mbuf[i] = NULL; + rte_pktmbuf_free(mbuf); + } bdp = enet_get_nextdesc(bdp, &rxq->bd); } } @@ -350,7 +352,7 @@ for (i = 0; i < dev->data->nb_rx_queues; i++) rte_free(fep->rx_queues[i]); for (i = 0; i < dev->data->nb_tx_queues; i++) - rte_free(fep->rx_queues[i]); + rte_free(fep->tx_queues[i]); } static const unsigned short offset_des_active_rxq[] = { @@ -377,6 +379,17 @@ sizeof(struct bufdesc); unsigned int dsize_log2 = rte_fls_u64(dsize) - 1; + if (queue_idx > 0) { + ENETFEC_PMD_ERR("Multi queue not supported"); + return -EINVAL; + } + + /* Tx deferred start is not supported */ + if (tx_conf->tx_deferred_start) { + ENETFEC_PMD_ERR("Tx deferred start not supported"); + return -EINVAL; + } + /* Tx deferred start is not supported */ if (tx_conf->tx_deferred_start) { ENETFEC_PMD_ERR("Tx deferred start not supported"); @@ -390,7 +403,7 @@ return -ENOMEM; } - if (nb_desc > MAX_TX_BD_RING_SIZE) { + if (nb_desc != MAX_TX_BD_RING_SIZE) { nb_desc = MAX_TX_BD_RING_SIZE; ENETFEC_PMD_WARN("modified the nb_desc to MAX_TX_BD_RING_SIZE"); } @@ -474,7 +487,7 @@ return -ENOMEM; } - if (nb_rx_desc > MAX_RX_BD_RING_SIZE) { + if (nb_rx_desc != MAX_RX_BD_RING_SIZE) { nb_rx_desc = MAX_RX_BD_RING_SIZE; ENETFEC_PMD_WARN("modified the nb_desc to MAX_RX_BD_RING_SIZE"); } @@ -554,7 +567,7 @@ } } rte_free(rxq); - return errno; + return -ENOMEM; } static const struct eth_dev_ops enetfec_ops = { diff -Nru dpdk-24.11.3/drivers/net/enetfec/enet_rxtx.c dpdk-24.11.4/drivers/net/enetfec/enet_rxtx.c --- dpdk-24.11.3/drivers/net/enetfec/enet_rxtx.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/enetfec/enet_rxtx.c 2025-12-19 12:05:33.000000000 +0000 @@ -121,10 +121,11 @@ (rxq->fep->flag_csum & RX_FLAG_CSUM_EN)) { if ((rte_read32(&ebdp->bd_esc) & rte_cpu_to_le_32(RX_FLAG_CSUM_ERR)) == 0) { - /* don't check it */ - mbuf->ol_flags = RTE_MBUF_F_RX_IP_CKSUM_BAD; - } else { + /* No checksum error - checksum is good */ mbuf->ol_flags = RTE_MBUF_F_RX_IP_CKSUM_GOOD; + } else { + /* Checksum error detected */ + mbuf->ol_flags = RTE_MBUF_F_RX_IP_CKSUM_BAD; } } @@ -238,7 +239,8 @@ if (txq->fep->bufdesc_ex) { struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; - if (mbuf->ol_flags == RTE_MBUF_F_RX_IP_CKSUM_GOOD) + if (mbuf->ol_flags & (RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_TCP_CKSUM | + RTE_MBUF_F_TX_UDP_CKSUM | RTE_MBUF_F_TX_SCTP_CKSUM)) estatus |= TX_BD_PINS | TX_BD_IINS; rte_write32(0, &ebdp->bd_bdu); diff -Nru dpdk-24.11.3/drivers/net/enetfec/enet_uio.c dpdk-24.11.4/drivers/net/enetfec/enet_uio.c --- dpdk-24.11.3/drivers/net/enetfec/enet_uio.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/enetfec/enet_uio.c 2025-12-19 12:05:33.000000000 +0000 @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright 2021 NXP + * Copyright 2021,2024-2025 NXP */ #include @@ -32,13 +32,7 @@ static bool file_name_match_extract(const char filename[], const char match[]) { - char *substr = NULL; - - substr = strstr(filename, match); - if (substr == NULL) - return false; - - return true; + return strstr(filename, match) != NULL; } /* @@ -66,13 +60,16 @@ "%s/%s/%s", root, subdir, filename); fd = open(absolute_file_name, O_RDONLY); - if (fd <= 0) + if (fd < 0) { ENETFEC_PMD_ERR("Error opening file %s", absolute_file_name); + return fd; + } /* read UIO device name from first line in file */ ret = read(fd, line, FEC_UIO_MAX_DEVICE_FILE_NAME_LENGTH); if (ret <= 0) { ENETFEC_PMD_ERR("Error reading file %s", absolute_file_name); + close(fd); return ret; } close(fd); @@ -139,6 +136,10 @@ } /* Read mapping size and physical address expressed in hexa(base 16) */ uio_map_size = strtol(uio_map_size_str, NULL, 16); + if (uio_map_size <= 0 || uio_map_size > INT_MAX) { + ENETFEC_PMD_ERR("Invalid mapping size: %u.", uio_map_size); + return NULL; + } uio_map_p_addr = strtol(uio_map_p_addr_str, NULL, 16); if (uio_map_id == 0) { diff -Nru dpdk-24.11.3/drivers/net/fm10k/base/fm10k_common.c dpdk-24.11.4/drivers/net/fm10k/base/fm10k_common.c --- dpdk-24.11.3/drivers/net/fm10k/base/fm10k_common.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/fm10k/base/fm10k_common.c 2025-12-19 12:05:33.000000000 +0000 @@ -477,11 +477,11 @@ * Function invalidates the index values for the queues so any updates that * may have happened are ignored and the base for the queue stats is reset. **/ -void fm10k_unbind_hw_stats_q(struct fm10k_hw_stats_q *q, u32 idx, u32 count) +void fm10k_unbind_hw_stats_q(struct fm10k_hw_stats_q *q, u32 idx __rte_unused, u32 count) { u32 i; - for (i = 0; i < count; i++, idx++, q++) { + for (i = 0; i < count; i++, q++) { q->rx_stats_idx = 0; q->tx_stats_idx = 0; } diff -Nru dpdk-24.11.3/drivers/net/gve/base/gve_adminq.c dpdk-24.11.4/drivers/net/gve/base/gve_adminq.c --- dpdk-24.11.3/drivers/net/gve/base/gve_adminq.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/gve/base/gve_adminq.c 2025-12-19 12:05:33.000000000 +0000 @@ -244,8 +244,6 @@ i++; msleep(GVE_ADMINQ_SLEEP_LEN); } - gve_clear_device_rings_ok(priv); - gve_clear_device_resources_ok(priv); gve_clear_admin_queue_ok(priv); } diff -Nru dpdk-24.11.3/drivers/net/gve/base/gve_desc_dqo.h dpdk-24.11.4/drivers/net/gve/base/gve_desc_dqo.h --- dpdk-24.11.3/drivers/net/gve/base/gve_desc_dqo.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/gve/base/gve_desc_dqo.h 2025-12-19 12:05:33.000000000 +0000 @@ -248,4 +248,8 @@ */ #define GVE_RX_BUF_THRESH_DQO 32 +/* GVE IRQ */ +#define GVE_NO_INT_MODE_DQO BIT(30) +#define GVE_ITR_NO_UPDATE_DQO (3 << 3) + #endif /* _GVE_DESC_DQO_H_ */ diff -Nru dpdk-24.11.3/drivers/net/gve/gve_ethdev.c dpdk-24.11.4/drivers/net/gve/gve_ethdev.c --- dpdk-24.11.3/drivers/net/gve/gve_ethdev.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/gve/gve_ethdev.c 2025-12-19 12:05:33.000000000 +0000 @@ -383,6 +383,8 @@ } } + gve_set_device_rings_ok(priv); + return 0; err_rx: @@ -395,6 +397,8 @@ gve_stop_tx_queues(dev); else gve_stop_tx_queues_dqo(dev); + + gve_clear_device_rings_ok(priv); return ret; } @@ -440,8 +444,11 @@ gve_dev_stop(struct rte_eth_dev *dev) { struct gve_priv *priv = dev->data->dev_private; + + dev->data->dev_started = 0; dev->data->dev_link.link_status = RTE_ETH_LINK_DOWN; + gve_clear_device_rings_ok(priv); if (gve_is_gqi(priv)) { gve_stop_tx_queues(dev); gve_stop_rx_queues(dev); @@ -450,27 +457,18 @@ gve_stop_rx_queues_dqo(dev); } - dev->data->dev_started = 0; - if (gve_is_gqi(dev->data->dev_private)) gve_free_stats_report(dev); return 0; } -static int -gve_dev_close(struct rte_eth_dev *dev) +static void +gve_free_queues(struct rte_eth_dev *dev) { struct gve_priv *priv = dev->data->dev_private; - int err = 0; uint16_t i; - if (dev->data->dev_started) { - err = gve_dev_stop(dev); - if (err != 0) - PMD_DRV_LOG(ERR, "Failed to stop dev."); - } - if (gve_is_gqi(priv)) { for (i = 0; i < dev->data->nb_tx_queues; i++) gve_tx_queue_release(dev, i); @@ -484,8 +482,67 @@ for (i = 0; i < dev->data->nb_rx_queues; i++) gve_rx_queue_release_dqo(dev, i); } +} + +static void +gve_free_counter_array(struct gve_priv *priv) +{ + rte_memzone_free(priv->cnt_array_mz); + priv->cnt_array = NULL; +} + +static void +gve_free_irq_db(struct gve_priv *priv) +{ + rte_memzone_free(priv->irq_dbs_mz); + priv->irq_dbs = NULL; +} + +static void +gve_free_ptype_lut_dqo(struct gve_priv *priv) +{ + if (!gve_is_gqi(priv)) { + rte_free(priv->ptype_lut_dqo); + priv->ptype_lut_dqo = NULL; + } +} - rte_free(priv->adminq); +static void +gve_teardown_device_resources(struct gve_priv *priv) +{ + int err; + + /* Tell device its resources are being freed */ + if (gve_get_device_resources_ok(priv)) { + err = gve_adminq_deconfigure_device_resources(priv); + if (err) + PMD_DRV_LOG(ERR, "Could not deconfigure device resources: err=%d", err); + } + + gve_free_ptype_lut_dqo(priv); + gve_free_counter_array(priv); + gve_free_irq_db(priv); + gve_clear_device_resources_ok(priv); +} + +static int +gve_dev_close(struct rte_eth_dev *dev) +{ + struct gve_priv *priv = dev->data->dev_private; + int err = 0; + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return 0; + + if (dev->data->dev_started) { + err = gve_dev_stop(dev); + if (err != 0) + PMD_DRV_LOG(ERR, "Failed to stop dev."); + } + + gve_free_queues(dev); + gve_teardown_device_resources(priv); + gve_adminq_free(priv); dev->data->mac_addrs = NULL; @@ -603,6 +660,7 @@ .nb_max = priv->max_tx_desc_cnt, .nb_min = priv->min_tx_desc_cnt, .nb_align = 1, + .nb_mtu_seg_max = GVE_TX_MAX_DATA_DESCS - 1, }; dev_info->flow_type_rss_offloads = GVE_RTE_RSS_OFFLOAD_ALL; @@ -1055,41 +1113,6 @@ .reta_query = gve_rss_reta_query, }; -static void -gve_free_counter_array(struct gve_priv *priv) -{ - rte_memzone_free(priv->cnt_array_mz); - priv->cnt_array = NULL; -} - -static void -gve_free_irq_db(struct gve_priv *priv) -{ - rte_memzone_free(priv->irq_dbs_mz); - priv->irq_dbs = NULL; -} - -static void -gve_teardown_device_resources(struct gve_priv *priv) -{ - int err; - - /* Tell device its resources are being freed */ - if (gve_get_device_resources_ok(priv)) { - err = gve_adminq_deconfigure_device_resources(priv); - if (err) - PMD_DRV_LOG(ERR, "Could not deconfigure device resources: err=%d", err); - } - - if (!gve_is_gqi(priv)) { - rte_free(priv->ptype_lut_dqo); - priv->ptype_lut_dqo = NULL; - } - gve_free_counter_array(priv); - gve_free_irq_db(priv); - gve_clear_device_resources_ok(priv); -} - static int pci_dev_msix_vec_count(struct rte_pci_device *pdev) { @@ -1159,6 +1182,8 @@ } } + gve_set_device_resources_ok(priv); + return 0; free_ptype_lut: rte_free(priv->ptype_lut_dqo); @@ -1251,13 +1276,6 @@ return err; } -static void -gve_teardown_priv_resources(struct gve_priv *priv) -{ - gve_teardown_device_resources(priv); - gve_adminq_free(priv); -} - static int gve_dev_init(struct rte_eth_dev *eth_dev) { @@ -1328,18 +1346,6 @@ } static int -gve_dev_uninit(struct rte_eth_dev *eth_dev) -{ - struct gve_priv *priv = eth_dev->data->dev_private; - - gve_teardown_priv_resources(priv); - - eth_dev->data->mac_addrs = NULL; - - return 0; -} - -static int gve_pci_probe(__rte_unused struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) { @@ -1349,7 +1355,7 @@ static int gve_pci_remove(struct rte_pci_device *pci_dev) { - return rte_eth_dev_pci_generic_remove(pci_dev, gve_dev_uninit); + return rte_eth_dev_pci_generic_remove(pci_dev, gve_dev_close); } static const struct rte_pci_id pci_id_gve_map[] = { diff -Nru dpdk-24.11.3/drivers/net/gve/gve_ethdev.h dpdk-24.11.4/drivers/net/gve/gve_ethdev.h --- dpdk-24.11.3/drivers/net/gve/gve_ethdev.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/gve/gve_ethdev.h 2025-12-19 12:05:33.000000000 +0000 @@ -102,6 +102,7 @@ uint64_t packets; uint64_t bytes; uint64_t errors; + uint64_t too_many_descs; }; struct gve_rx_stats { diff -Nru dpdk-24.11.3/drivers/net/gve/gve_rx_dqo.c dpdk-24.11.4/drivers/net/gve/gve_rx_dqo.c --- dpdk-24.11.3/drivers/net/gve/gve_rx_dqo.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/gve/gve_rx_dqo.c 2025-12-19 12:05:33.000000000 +0000 @@ -376,14 +376,13 @@ rxq->stats.no_mbufs_bulk++; for (i = 0; i < rx_mask; i++) { nmb = rte_pktmbuf_alloc(rxq->mpool); - if (!nmb) - break; + if (!nmb) { + rxq->stats.no_mbufs++; + gve_release_rxq_mbufs_dqo(rxq); + return -ENOMEM; + } rxq->sw_ring[i] = nmb; } - if (i < rxq->nb_rx_desc - 1) { - rxq->stats.no_mbufs += rx_mask - i; - return -ENOMEM; - } } for (i = 0; i < rx_mask; i++) { @@ -415,7 +414,9 @@ rxq->qrx_tail = &hw->db_bar2[rte_be_to_cpu_32(rxq->qres->db_index)]; - rte_write32(rte_cpu_to_be_32(GVE_IRQ_MASK), rxq->ntfy_addr); + rte_write32(rte_cpu_to_le_32(GVE_NO_INT_MODE_DQO | + GVE_ITR_NO_UPDATE_DQO), + rxq->ntfy_addr); ret = gve_rxq_mbufs_alloc_dqo(rxq); if (ret != 0) { diff -Nru dpdk-24.11.3/drivers/net/gve/gve_tx_dqo.c dpdk-24.11.4/drivers/net/gve/gve_tx_dqo.c --- dpdk-24.11.3/drivers/net/gve/gve_tx_dqo.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/gve/gve_tx_dqo.c 2025-12-19 12:05:33.000000000 +0000 @@ -75,9 +75,92 @@ } static inline void +gve_tx_clean_descs_dqo(struct gve_tx_queue *txq, uint16_t nb_descs) { + while (--nb_descs) + gve_tx_clean_dqo(txq); +} + +/* GVE expects at most 10 data descriptors per mtu-sized segment. Beyond this, + * the hardware will assume the driver is malicious and stop transmitting + * packets altogether. Validate that a packet can be sent to avoid sending + * posting descriptors for an invalid packet. + */ +static inline bool +gve_tx_validate_descs(struct rte_mbuf *tx_pkt, uint16_t nb_descs, bool is_tso) +{ + if (!is_tso) + return nb_descs <= GVE_TX_MAX_DATA_DESCS; + + int tso_segsz = tx_pkt->tso_segsz; + int num_descs, seg_offset, mbuf_len; + int headlen = tx_pkt->l2_len + tx_pkt->l3_len + tx_pkt->l4_len; + + /* Headers will be split into their own buffer. */ + num_descs = 1; + seg_offset = 0; + mbuf_len = tx_pkt->data_len - headlen; + + while (tx_pkt) { + if (!mbuf_len) + goto next_mbuf; + + int seg_remain = tso_segsz - seg_offset; + if (num_descs == GVE_TX_MAX_DATA_DESCS && seg_remain) + return false; + + if (seg_remain < mbuf_len) { + seg_offset = mbuf_len % tso_segsz; + /* The MSS is bound from above by 9728B, so a + * single TSO segment in the middle of an mbuf + * will be part of at most two descriptors, and + * is not at risk of defying this limitation. + * Thus, such segments are ignored. + */ + int mbuf_remain = tx_pkt->data_len % GVE_TX_MAX_BUF_SIZE_DQO; + + /* For each TSO segment, HW will prepend + * headers. The remaining bytes of this mbuf + * will be the start of the payload of the next + * TSO segment. In addition, if the final + * segment in this mbuf is divided between two + * descriptors, both must be counted. + */ + num_descs = 1 + !!(seg_offset) + + (mbuf_remain < seg_offset && mbuf_remain); + } else { + seg_offset += mbuf_len; + num_descs++; + } + +next_mbuf: + tx_pkt = tx_pkt->next; + if (tx_pkt) + mbuf_len = tx_pkt->data_len; + } + + + return true; +} + +static uint16_t +gve_tx_pkt_nb_data_descs(struct rte_mbuf *tx_pkt) +{ + int nb_descs = 0; + + while (tx_pkt) { + nb_descs += (GVE_TX_MAX_BUF_SIZE_DQO - 1 + tx_pkt->data_len) / + GVE_TX_MAX_BUF_SIZE_DQO; + tx_pkt = tx_pkt->next; + } + return nb_descs; +} + +static inline void gve_tx_fill_seg_desc_dqo(volatile union gve_tx_desc_dqo *desc, struct rte_mbuf *tx_pkt) { uint32_t hlen = tx_pkt->l2_len + tx_pkt->l3_len + tx_pkt->l4_len; + + desc->tso_ctx = (struct gve_tx_tso_context_desc_dqo) {}; desc->tso_ctx.cmd_dtype.dtype = GVE_TX_TSO_CTX_DESC_DTYPE_DQO; desc->tso_ctx.cmd_dtype.tso = 1; desc->tso_ctx.mss = (uint16_t)tx_pkt->tso_segsz; @@ -94,14 +177,14 @@ struct rte_mbuf **sw_ring; struct rte_mbuf *tx_pkt; uint16_t mask, sw_mask; - uint16_t nb_to_clean; + uint16_t first_sw_id; + const char *reason; uint16_t nb_tx = 0; uint64_t ol_flags; - uint16_t nb_used; + uint16_t nb_descs; uint16_t tx_id; uint16_t sw_id; uint64_t bytes; - uint16_t first_sw_id; uint8_t tso; uint8_t csum; @@ -117,22 +200,40 @@ for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { tx_pkt = tx_pkts[nb_tx]; - if (txq->nb_free <= txq->free_thresh) { - nb_to_clean = DQO_TX_MULTIPLIER * txq->rs_thresh; - while (nb_to_clean--) - gve_tx_clean_dqo(txq); + if (txq->nb_free <= txq->free_thresh) + gve_tx_clean_descs_dqo(txq, DQO_TX_MULTIPLIER * + txq->rs_thresh); + + + if (rte_mbuf_check(tx_pkt, true, &reason)) { + PMD_DRV_LOG(DEBUG, "Invalid mbuf: %s", reason); + break; } ol_flags = tx_pkt->ol_flags; - nb_used = tx_pkt->nb_segs; first_sw_id = sw_id; tso = !!(ol_flags & RTE_MBUF_F_TX_TCP_SEG); csum = !!(ol_flags & GVE_TX_CKSUM_OFFLOAD_MASK_DQO); - nb_used += tso; - if (txq->nb_free < nb_used) + nb_descs = gve_tx_pkt_nb_data_descs(tx_pkt); + nb_descs += tso; + + /* Clean if there aren't enough descriptors to send the packet. */ + if (unlikely(txq->nb_free < nb_descs)) { + int nb_to_clean = RTE_MAX(DQO_TX_MULTIPLIER * txq->rs_thresh, + nb_descs); + + gve_tx_clean_descs_dqo(txq, nb_to_clean); + if (txq->nb_free < nb_descs) + break; + } + + /* Drop packet if it doesn't adhere to hardware limits. */ + if (!gve_tx_validate_descs(tx_pkt, nb_descs, tso)) { + txq->stats.too_many_descs++; break; + } if (tso) { txd = &txr[tx_id]; @@ -144,30 +245,45 @@ if (sw_ring[sw_id] != NULL) PMD_DRV_LOG(DEBUG, "Overwriting an entry in sw_ring"); + /* Skip writing descriptor if mbuf has no data. */ + if (!tx_pkt->data_len) + goto finish_mbuf; + txd = &txr[tx_id]; sw_ring[sw_id] = tx_pkt; - /* fill Tx descriptor */ - txd->pkt.buf_addr = rte_cpu_to_le_64(rte_mbuf_data_iova(tx_pkt)); - txd->pkt.dtype = GVE_TX_PKT_DESC_DTYPE_DQO; - txd->pkt.compl_tag = rte_cpu_to_le_16(first_sw_id); - txd->pkt.buf_size = RTE_MIN(tx_pkt->data_len, GVE_TX_MAX_BUF_SIZE_DQO); - txd->pkt.end_of_packet = 0; - txd->pkt.checksum_offload_enable = csum; + /* fill Tx descriptors */ + int mbuf_offset = 0; + while (mbuf_offset < tx_pkt->data_len) { + uint64_t buf_addr = rte_mbuf_data_iova(tx_pkt) + + mbuf_offset; + + txd = &txr[tx_id]; + txd->pkt = (struct gve_tx_pkt_desc_dqo) {}; + txd->pkt.buf_addr = rte_cpu_to_le_64(buf_addr); + txd->pkt.compl_tag = rte_cpu_to_le_16(first_sw_id); + txd->pkt.dtype = GVE_TX_PKT_DESC_DTYPE_DQO; + txd->pkt.buf_size = RTE_MIN(tx_pkt->data_len - mbuf_offset, + GVE_TX_MAX_BUF_SIZE_DQO); + txd->pkt.end_of_packet = 0; + txd->pkt.checksum_offload_enable = csum; + + mbuf_offset += txd->pkt.buf_size; + tx_id = (tx_id + 1) & mask; + } - /* size of desc_ring and sw_ring could be different */ - tx_id = (tx_id + 1) & mask; +finish_mbuf: sw_id = (sw_id + 1) & sw_mask; - bytes += tx_pkt->data_len; tx_pkt = tx_pkt->next; } while (tx_pkt); - /* fill the last descriptor with End of Packet (EOP) bit */ + /* fill the last written descriptor with End of Packet (EOP) bit */ + txd = &txr[(tx_id - 1) & mask]; txd->pkt.end_of_packet = 1; - txq->nb_free -= nb_used; - txq->nb_used += nb_used; + txq->nb_free -= nb_descs; + txq->nb_used += nb_descs; } /* update the tail pointer if any packets were processed */ @@ -415,7 +531,9 @@ txq->qtx_head = &hw->cnt_array[rte_be_to_cpu_32(txq->qres->counter_index)]; - rte_write32(rte_cpu_to_be_32(GVE_IRQ_MASK), txq->ntfy_addr); + rte_write32(rte_cpu_to_le_32(GVE_NO_INT_MODE_DQO | + GVE_ITR_NO_UPDATE_DQO), + txq->ntfy_addr); dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED; diff -Nru dpdk-24.11.3/drivers/net/hns3/hns3_ethdev.c dpdk-24.11.4/drivers/net/hns3/hns3_ethdev.c --- dpdk-24.11.3/drivers/net/hns3/hns3_ethdev.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/hns3/hns3_ethdev.c 2025-12-19 12:05:33.000000000 +0000 @@ -4432,25 +4432,25 @@ ret = hns3_dcb_init(hw); if (ret) { PMD_INIT_LOG(ERR, "Failed to init dcb: %d", ret); - goto err_mac_init; + goto rm_vlan_table; } ret = hns3_init_fd_config(hns); if (ret) { PMD_INIT_LOG(ERR, "Failed to init flow director: %d", ret); - goto err_mac_init; + goto rm_vlan_table; } ret = hns3_config_tso(hw, HNS3_TSO_MSS_MIN, HNS3_TSO_MSS_MAX); if (ret) { PMD_INIT_LOG(ERR, "Failed to config tso: %d", ret); - goto err_mac_init; + goto rm_vlan_table; } ret = hns3_config_gro(hw, false); if (ret) { PMD_INIT_LOG(ERR, "Failed to config gro: %d", ret); - goto err_mac_init; + goto rm_vlan_table; } /* @@ -4462,22 +4462,33 @@ ret = hns3_init_ring_with_vector(hw); if (ret) { PMD_INIT_LOG(ERR, "Failed to init ring intr vector: %d", ret); - goto err_mac_init; + goto rm_vlan_table; } ret = hns3_ptp_init(hw); if (ret) { PMD_INIT_LOG(ERR, "Failed to init PTP, ret = %d", ret); - goto err_mac_init; + goto rm_vlan_table; } return 0; - +rm_vlan_table: + hns3_rm_all_vlan_table(hns, true); err_mac_init: hns3_uninit_umv_space(hw); return ret; } +static void +hns3_uninit_hardware(struct hns3_hw *hw) +{ + struct hns3_adapter *hns = HNS3_DEV_HW_TO_ADAPTER(hw); + + (void)hns3_uninit_umv_space(hw); + hns3_ptp_uninit(hw); + hns3_rm_all_vlan_table(hns, true); +} + static int hns3_clear_hw(struct hns3_hw *hw) { @@ -4689,8 +4700,7 @@ err_enable_intr: hns3_fdir_filter_uninit(hns); err_fdir: - hns3_uninit_umv_space(hw); - hns3_ptp_uninit(hw); + hns3_uninit_hardware(hw); err_init_hw: hns3_stats_uninit(hw); err_get_config: @@ -4725,8 +4735,7 @@ hns3_promisc_uninit(hw); hns3_flow_uninit(eth_dev); hns3_fdir_filter_uninit(hns); - hns3_uninit_umv_space(hw); - hns3_ptp_uninit(hw); + hns3_uninit_hardware(hw); hns3_stats_uninit(hw); hns3_config_mac_tnl_int(hw, false); hns3_pf_disable_irq0(hw); diff -Nru dpdk-24.11.3/drivers/net/hns3/hns3_ethdev.h dpdk-24.11.4/drivers/net/hns3/hns3_ethdev.h --- dpdk-24.11.3/drivers/net/hns3/hns3_ethdev.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/hns3/hns3_ethdev.h 2025-12-19 12:05:33.000000000 +0000 @@ -5,7 +5,6 @@ #ifndef HNS3_ETHDEV_H #define HNS3_ETHDEV_H -#include #include #include #include @@ -79,6 +78,7 @@ #define HNS3_DEFAULT_MTU 1500UL #define HNS3_DEFAULT_FRAME_LEN (HNS3_DEFAULT_MTU + HNS3_ETH_OVERHEAD) #define HNS3_HIP08_MIN_TX_PKT_LEN 33 +#define HNS3_MIN_TUN_PKT_LEN 65 #define HNS3_BITS_PER_BYTE 8 @@ -680,7 +680,6 @@ struct hns3_port_base_vlan_config port_base_vlan_cfg; - pthread_mutex_t flows_lock; /* rte_flow ops lock */ struct hns3_fdir_rule_list flow_fdir_list; /* flow fdir rule list */ struct hns3_rss_filter_list flow_rss_list; /* flow RSS rule list */ struct hns3_flow_mem_list flow_list; diff -Nru dpdk-24.11.3/drivers/net/hns3/hns3_fdir.c dpdk-24.11.4/drivers/net/hns3/hns3_fdir.c --- dpdk-24.11.3/drivers/net/hns3/hns3_fdir.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/hns3/hns3_fdir.c 2025-12-19 12:05:33.000000000 +0000 @@ -1145,17 +1145,6 @@ if (hns->is_vf) return 0; - /* - * This API is called in the reset recovery process, the parent function - * must hold hw->lock. - * There maybe deadlock if acquire hw->flows_lock directly because rte - * flow driver ops first acquire hw->flows_lock and then may acquire - * hw->lock. - * So here first release the hw->lock and then acquire the - * hw->flows_lock to avoid deadlock. - */ - rte_spinlock_unlock(&hw->lock); - pthread_mutex_lock(&hw->flows_lock); TAILQ_FOREACH(fdir_filter, &fdir_info->fdir_list, entries) { ret = hns3_config_action(hw, &fdir_filter->fdir_conf); if (!ret) @@ -1166,8 +1155,6 @@ break; } } - pthread_mutex_unlock(&hw->flows_lock); - rte_spinlock_lock(&hw->lock); if (err) { hns3_err(hw, "Fail to restore FDIR filter, ret = %d", ret); diff -Nru dpdk-24.11.3/drivers/net/hns3/hns3_flow.c dpdk-24.11.4/drivers/net/hns3/hns3_flow.c --- dpdk-24.11.3/drivers/net/hns3/hns3_flow.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/hns3/hns3_flow.c 2025-12-19 12:05:33.000000000 +0000 @@ -2210,18 +2210,6 @@ return 0; } -static int -hns3_restore_rss_filter(struct hns3_hw *hw) -{ - int ret; - - pthread_mutex_lock(&hw->flows_lock); - ret = hns3_reconfig_all_rss_filter(hw); - pthread_mutex_unlock(&hw->flows_lock); - - return ret; -} - int hns3_restore_filter(struct hns3_adapter *hns) { @@ -2232,7 +2220,7 @@ if (ret != 0) return ret; - return hns3_restore_rss_filter(hw); + return hns3_reconfig_all_rss_filter(hw); } static int @@ -2624,10 +2612,10 @@ struct hns3_filter_info filter_info = {0}; int ret; - pthread_mutex_lock(&hw->flows_lock); + rte_spinlock_lock(&hw->lock); ret = hns3_flow_validate(dev, attr, pattern, actions, error, &filter_info); - pthread_mutex_unlock(&hw->flows_lock); + rte_spinlock_unlock(&hw->lock); return ret; } @@ -2641,9 +2629,9 @@ struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct rte_flow *flow; - pthread_mutex_lock(&hw->flows_lock); + rte_spinlock_lock(&hw->lock); flow = hns3_flow_create(dev, attr, pattern, actions, error); - pthread_mutex_unlock(&hw->flows_lock); + rte_spinlock_unlock(&hw->lock); return flow; } @@ -2655,9 +2643,9 @@ struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); int ret; - pthread_mutex_lock(&hw->flows_lock); + rte_spinlock_lock(&hw->lock); ret = hns3_flow_destroy(dev, flow, error); - pthread_mutex_unlock(&hw->flows_lock); + rte_spinlock_unlock(&hw->lock); return ret; } @@ -2668,9 +2656,9 @@ struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); int ret; - pthread_mutex_lock(&hw->flows_lock); + rte_spinlock_lock(&hw->lock); ret = hns3_flow_flush(dev, error); - pthread_mutex_unlock(&hw->flows_lock); + rte_spinlock_unlock(&hw->lock); return ret; } @@ -2683,9 +2671,9 @@ struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); int ret; - pthread_mutex_lock(&hw->flows_lock); + rte_spinlock_lock(&hw->lock); ret = hns3_flow_query(dev, flow, actions, data, error); - pthread_mutex_unlock(&hw->flows_lock); + rte_spinlock_unlock(&hw->lock); return ret; } @@ -2733,7 +2721,7 @@ if (hns3_check_indir_action(conf, action, error)) return NULL; - pthread_mutex_lock(&hw->flows_lock); + rte_spinlock_lock(&hw->lock); act_count = (const struct rte_flow_action_count *)action->conf; if (act_count->id >= pf->fdir.fd_cfg.cnt_num[HNS3_FD_STAGE_1]) { @@ -2758,11 +2746,11 @@ handle.indirect_type = HNS3_INDIRECT_ACTION_TYPE_COUNT; handle.counter_id = counter->id; - pthread_mutex_unlock(&hw->flows_lock); + rte_spinlock_unlock(&hw->lock); return (struct rte_flow_action_handle *)handle.val64; err_exit: - pthread_mutex_unlock(&hw->flows_lock); + rte_spinlock_unlock(&hw->lock); return NULL; } @@ -2775,11 +2763,11 @@ struct rte_flow_action_handle indir; struct hns3_flow_counter *counter; - pthread_mutex_lock(&hw->flows_lock); + rte_spinlock_lock(&hw->lock); indir.val64 = (uint64_t)handle; if (indir.indirect_type != HNS3_INDIRECT_ACTION_TYPE_COUNT) { - pthread_mutex_unlock(&hw->flows_lock); + rte_spinlock_unlock(&hw->lock); return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF, handle, "Invalid indirect type"); @@ -2787,14 +2775,14 @@ counter = hns3_counter_lookup(dev, indir.counter_id); if (counter == NULL) { - pthread_mutex_unlock(&hw->flows_lock); + rte_spinlock_unlock(&hw->lock); return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF, handle, "Counter id not exist"); } if (counter->ref_cnt > 1) { - pthread_mutex_unlock(&hw->flows_lock); + rte_spinlock_unlock(&hw->lock); return rte_flow_error_set(error, EBUSY, RTE_FLOW_ERROR_TYPE_HANDLE, handle, "Counter id in use"); @@ -2802,7 +2790,7 @@ (void)hns3_counter_release(dev, indir.counter_id); - pthread_mutex_unlock(&hw->flows_lock); + rte_spinlock_unlock(&hw->lock); return 0; } @@ -2817,11 +2805,11 @@ struct rte_flow flow; int ret; - pthread_mutex_lock(&hw->flows_lock); + rte_spinlock_lock(&hw->lock); indir.val64 = (uint64_t)handle; if (indir.indirect_type != HNS3_INDIRECT_ACTION_TYPE_COUNT) { - pthread_mutex_unlock(&hw->flows_lock); + rte_spinlock_unlock(&hw->lock); return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF, handle, "Invalid indirect type"); @@ -2831,7 +2819,7 @@ flow.counter_id = indir.counter_id; ret = hns3_counter_query(dev, &flow, (struct rte_flow_query_count *)data, error); - pthread_mutex_unlock(&hw->flows_lock); + rte_spinlock_unlock(&hw->lock); return ret; } @@ -2865,14 +2853,10 @@ hns3_flow_init(struct rte_eth_dev *dev) { struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); - pthread_mutexattr_t attr; if (rte_eal_process_type() != RTE_PROC_PRIMARY) return; - pthread_mutexattr_init(&attr); - pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED); - pthread_mutex_init(&hw->flows_lock, &attr); dev->data->dev_flags |= RTE_ETH_DEV_FLOW_OPS_THREAD_SAFE; TAILQ_INIT(&hw->flow_fdir_list); diff -Nru dpdk-24.11.3/drivers/net/hns3/hns3_rxtx.c dpdk-24.11.4/drivers/net/hns3/hns3_rxtx.c --- dpdk-24.11.3/drivers/net/hns3/hns3_rxtx.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/hns3/hns3_rxtx.c 2025-12-19 12:05:33.000000000 +0000 @@ -4205,6 +4205,37 @@ } } +static bool +hns3_tx_pktmbuf_append(struct hns3_tx_queue *txq, + struct rte_mbuf *tx_pkt) +{ + uint16_t add_len = 0; + uint32_t ptype; + char *appended; + + if (unlikely(tx_pkt->ol_flags & (RTE_MBUF_F_TX_VLAN | RTE_MBUF_F_TX_QINQ) && + rte_pktmbuf_pkt_len(tx_pkt) < HNS3_MIN_TUN_PKT_LEN)) { + ptype = rte_net_get_ptype(tx_pkt, NULL, RTE_PTYPE_L2_MASK | + RTE_PTYPE_L3_MASK | RTE_PTYPE_L4_MASK | + RTE_PTYPE_TUNNEL_MASK); + if (ptype & RTE_PTYPE_TUNNEL_MASK) + add_len = HNS3_MIN_TUN_PKT_LEN - rte_pktmbuf_pkt_len(tx_pkt); + } else if (unlikely(rte_pktmbuf_pkt_len(tx_pkt) < txq->min_tx_pkt_len)) { + add_len = txq->min_tx_pkt_len - rte_pktmbuf_pkt_len(tx_pkt); + } + + if (unlikely(add_len > 0)) { + appended = rte_pktmbuf_append(tx_pkt, add_len); + if (appended == NULL) { + txq->dfx_stats.pkt_padding_fail_cnt++; + return false; + } + memset(appended, 0, add_len); + } + + return true; +} + uint16_t hns3_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, @@ -4282,21 +4313,8 @@ * by hardware in Tx direction, driver need to pad it to avoid * error. */ - if (unlikely(rte_pktmbuf_pkt_len(tx_pkt) < - txq->min_tx_pkt_len)) { - uint16_t add_len; - char *appended; - - add_len = txq->min_tx_pkt_len - - rte_pktmbuf_pkt_len(tx_pkt); - appended = rte_pktmbuf_append(tx_pkt, add_len); - if (appended == NULL) { - txq->dfx_stats.pkt_padding_fail_cnt++; - break; - } - - memset(appended, 0, add_len); - } + if (!hns3_tx_pktmbuf_append(txq, tx_pkt)) + break; m_seg = tx_pkt; diff -Nru dpdk-24.11.3/drivers/net/hns3/hns3_rxtx_vec.h dpdk-24.11.4/drivers/net/hns3/hns3_rxtx_vec.h --- dpdk-24.11.3/drivers/net/hns3/hns3_rxtx_vec.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/hns3/hns3_rxtx_vec.h 2025-12-19 12:05:33.000000000 +0000 @@ -109,8 +109,12 @@ /* * Clear VLD bit for the first descriptor rearmed in case * of going to receive packets later. + * And also point mbufs to fake_mbuf to prevent modification + * of the mbuf field during vector packet receiving. */ rxdp[0].rx.bd_base_info = 0; + for (i = 0; i < HNS3_VECTOR_RX_OFFSET_TABLE_LEN; i++) + rxep[i].mbuf = &rxq->fake_mbuf; rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++; return; } diff -Nru dpdk-24.11.3/drivers/net/i40e/i40e_hash.c dpdk-24.11.4/drivers/net/i40e/i40e_hash.c --- dpdk-24.11.3/drivers/net/i40e/i40e_hash.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/i40e/i40e_hash.c 2025-12-19 12:05:33.000000000 +0000 @@ -561,7 +561,7 @@ } static uint64_t -i40e_hash_get_inset(uint64_t rss_types) +i40e_hash_get_inset(uint64_t rss_types, bool symmetric_enable) { uint64_t mask, inset = 0; int i; @@ -608,6 +608,17 @@ I40E_INSET_IPV4_SRC | I40E_INSET_IPV6_SRC); } + /* SCTP Verification Tag is not required in hash computation for SYMMETRIC_TOEPLITZ */ + if (symmetric_enable) { + mask = rss_types & RTE_ETH_RSS_NONFRAG_IPV4_SCTP; + if (mask == RTE_ETH_RSS_NONFRAG_IPV4_SCTP) + inset &= ~I40E_INSET_SCTP_VT; + + mask = rss_types & RTE_ETH_RSS_NONFRAG_IPV6_SCTP; + if (mask == RTE_ETH_RSS_NONFRAG_IPV6_SCTP) + inset &= ~I40E_INSET_SCTP_VT; + } + return inset; } @@ -1113,6 +1124,7 @@ RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, "RSS Queues not supported when pattern specified"); + rss_conf->symmetric_enable = false; /* by default, symmetric is disabled */ switch (rss_act->func) { case RTE_ETH_HASH_FUNCTION_SYMMETRIC_TOEPLITZ: @@ -1140,7 +1152,7 @@ rss_conf->conf.func = rss_act->func; rss_conf->conf.types = rss_act->types; - rss_conf->inset = i40e_hash_get_inset(rss_act->types); + rss_conf->inset = i40e_hash_get_inset(rss_act->types, rss_conf->symmetric_enable); return i40e_hash_get_pattern_pctypes(dev, pattern, rss_act, rss_conf, error); diff -Nru dpdk-24.11.3/drivers/net/i40e/i40e_rxtx.c dpdk-24.11.4/drivers/net/i40e/i40e_rxtx.c --- dpdk-24.11.3/drivers/net/i40e/i40e_rxtx.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/i40e/i40e_rxtx.c 2025-12-19 12:05:33.000000000 +0000 @@ -128,9 +128,13 @@ #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC if (rte_le_to_cpu_16(rxdp->wb.qword2.ext_status) & (1 << I40E_RX_DESC_EXT_STATUS_L2TAG2P_SHIFT)) { - mb->ol_flags |= RTE_MBUF_F_RX_QINQ_STRIPPED | RTE_MBUF_F_RX_QINQ | - RTE_MBUF_F_RX_VLAN_STRIPPED | RTE_MBUF_F_RX_VLAN; - mb->vlan_tci_outer = mb->vlan_tci; + if ((mb->ol_flags & RTE_MBUF_F_RX_VLAN_STRIPPED) == 0) { + mb->ol_flags |= RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED; + } else { + /* if two tags, move Tag1 to outer tag field */ + mb->ol_flags |= RTE_MBUF_F_RX_QINQ_STRIPPED | RTE_MBUF_F_RX_QINQ; + mb->vlan_tci_outer = mb->vlan_tci; + } mb->vlan_tci = rte_le_to_cpu_16(rxdp->wb.qword2.l2tag2_2); PMD_RX_LOG(DEBUG, "Descriptor l2tag2_1: %u, l2tag2_2: %u", rte_le_to_cpu_16(rxdp->wb.qword2.l2tag2_1), diff -Nru dpdk-24.11.3/drivers/net/iavf/iavf_rxtx.c dpdk-24.11.4/drivers/net/iavf/iavf_rxtx.c --- dpdk-24.11.3/drivers/net/iavf/iavf_rxtx.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/iavf/iavf_rxtx.c 2025-12-19 12:05:33.000000000 +0000 @@ -1200,11 +1200,13 @@ #ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC if (rte_le_to_cpu_16(rxdp->wb.status_error1) & (1 << IAVF_RX_FLEX_DESC_STATUS1_L2TAG2P_S)) { - mb->ol_flags |= RTE_MBUF_F_RX_QINQ_STRIPPED | - RTE_MBUF_F_RX_QINQ | - RTE_MBUF_F_RX_VLAN_STRIPPED | - RTE_MBUF_F_RX_VLAN; - mb->vlan_tci_outer = mb->vlan_tci; + if ((mb->ol_flags & RTE_MBUF_F_RX_VLAN_STRIPPED) == 0) { + mb->ol_flags |= RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED; + } else { + /* if two tags, move Tag1 to outer tag field */ + mb->ol_flags |= RTE_MBUF_F_RX_QINQ_STRIPPED | RTE_MBUF_F_RX_QINQ; + mb->vlan_tci_outer = mb->vlan_tci; + } mb->vlan_tci = rte_le_to_cpu_16(rxdp->wb.l2tag2_2nd); PMD_RX_LOG(DEBUG, "Descriptor l2tag2_1: %u, l2tag2_2: %u", rte_le_to_cpu_16(rxdp->wb.l2tag2_1st), @@ -1620,7 +1622,8 @@ rxd_to_pkt_fields_ops[rxq->rxdid](rxq, rxm, &rxd); pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0); - if (iavf_timestamp_dynflag > 0) { + if (iavf_timestamp_dynflag > 0 && + rxd.wb.time_stamp_low & IAVF_RX_FLX_DESC_TS_VALID) { ts_ns = iavf_tstamp_convert_32b_64b(rxq->phc_time, rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high)); @@ -1789,7 +1792,8 @@ rxd_to_pkt_fields_ops[rxq->rxdid](rxq, first_seg, &rxd); pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0); - if (iavf_timestamp_dynflag > 0) { + if (iavf_timestamp_dynflag > 0 && + rxd.wb.time_stamp_low & IAVF_RX_FLX_DESC_TS_VALID) { ts_ns = iavf_tstamp_convert_32b_64b(rxq->phc_time, rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high)); @@ -2074,7 +2078,8 @@ stat_err0 = rte_le_to_cpu_16(rxdp[j].wb.status_error0); pkt_flags = iavf_flex_rxd_error_to_pkt_flags(stat_err0); - if (iavf_timestamp_dynflag > 0) { + if (iavf_timestamp_dynflag > 0 && + rxdp[j].wb.time_stamp_low & IAVF_RX_FLX_DESC_TS_VALID) { ts_ns = iavf_tstamp_convert_32b_64b(rxq->phc_time, rte_le_to_cpu_32(rxdp[j].wb.flex_ts.ts_high)); @@ -4162,16 +4167,16 @@ if (check_ret == IAVF_VECTOR_PATH) { use_sse = true; } - if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 || - rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) && - rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_256) - use_avx2 = true; #ifdef CC_AVX512_SUPPORT if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1 && rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) == 1 && rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_512) use_avx512 = true; #endif + if (!use_avx512 && (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 || + rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) && + rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_256) + use_avx2 = true; if (!use_sse && !use_avx2 && !use_avx512) goto normal; diff -Nru dpdk-24.11.3/drivers/net/iavf/iavf_rxtx.h dpdk-24.11.4/drivers/net/iavf/iavf_rxtx.h --- dpdk-24.11.3/drivers/net/iavf/iavf_rxtx.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/iavf/iavf_rxtx.h 2025-12-19 12:05:33.000000000 +0000 @@ -643,6 +643,9 @@ /* for iavf_32b_rx_flex_desc.pkt_len member */ #define IAVF_RX_FLX_DESC_PKT_LEN_M (0x3FFF) /* 14-bits */ +/* Valid indicator bit for the time_stamp_low field */ +#define IAVF_RX_FLX_DESC_TS_VALID (0x1UL) + int iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, uint16_t nb_desc, diff -Nru dpdk-24.11.3/drivers/net/iavf/iavf_vchnl.c dpdk-24.11.4/drivers/net/iavf/iavf_vchnl.c --- dpdk-24.11.3/drivers/net/iavf/iavf_vchnl.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/iavf/iavf_vchnl.c 2025-12-19 12:05:33.000000000 +0000 @@ -102,7 +102,7 @@ void *param, size_t param_alloc_size) { struct iavf_event_handler *handler = &event_handler; - char notify_byte; + char notify_byte = 0; struct iavf_event_element *elem = rte_malloc(NULL, sizeof(*elem) + param_alloc_size, 0); if (!elem) return; diff -Nru dpdk-24.11.3/drivers/net/ice/base/ice_flow.c dpdk-24.11.4/drivers/net/ice/base/ice_flow.c --- dpdk-24.11.3/drivers/net/ice/base/ice_flow.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/ice/base/ice_flow.c 2025-12-19 12:05:33.000000000 +0000 @@ -2627,10 +2627,6 @@ status = ice_flow_assoc_hw_prof(hw, blk, dest_vsi_handle, fdir_vsi_handle, id); - if (status) - goto free_params; - - return 0; free_params: ice_free(hw, params); diff -Nru dpdk-24.11.3/drivers/net/ice/base/ice_switch.c dpdk-24.11.4/drivers/net/ice/base/ice_switch.c --- dpdk-24.11.3/drivers/net/ice/base/ice_switch.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/ice/base/ice_switch.c 2025-12-19 12:05:33.000000000 +0000 @@ -2435,6 +2435,7 @@ bool *refresh_required) { ice_declare_bitmap(result_bm, ICE_MAX_FV_WORDS); + struct ice_recp_grp_entry *rg, *tmprg_entry; struct ice_aqc_recipe_data_elem *tmp; u16 num_recps = ICE_MAX_NUM_RECIPES; struct ice_prot_lkup_ext *lkup_exts; @@ -2481,6 +2482,15 @@ */ lkup_exts = &recps[rid].lkup_exts; + /* Remove duplicate entries */ + LIST_FOR_EACH_ENTRY_SAFE(rg, tmprg_entry, &recps[rid].rg_list, + ice_recp_grp_entry, l_entry) { + if (rg->rid == rid) { + LIST_DEL(&rg->l_entry); + ice_free(hw, rg); + } + } + for (sub_recps = 0; sub_recps < num_recps; sub_recps++) { struct ice_aqc_recipe_data_elem root_bufs = tmp[sub_recps]; struct ice_recp_grp_entry *rg_entry; @@ -8236,10 +8246,6 @@ */ ice_get_compat_fv_bitmap(hw, rinfo, fv_bitmap); - status = ice_get_sw_fv_list(hw, lkup_exts, fv_bitmap, &rm->fv_list); - if (status) - goto err_unroll; - /* Create any special protocol/offset pairs, such as looking at tunnel * bits by extracting metadata */ @@ -8247,6 +8253,10 @@ if (status) goto err_free_lkup_exts; + status = ice_get_sw_fv_list(hw, lkup_exts, fv_bitmap, &rm->fv_list); + if (status) + goto err_unroll; + /* Group match words into recipes using preferred recipe grouping * criteria. */ diff -Nru dpdk-24.11.3/drivers/net/ice/base/ice_type.h dpdk-24.11.4/drivers/net/ice/base/ice_type.h --- dpdk-24.11.3/drivers/net/ice/base/ice_type.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/ice/base/ice_type.h 2025-12-19 12:05:33.000000000 +0000 @@ -982,7 +982,7 @@ struct ice_orom_info orom; /* Option ROM version info */ struct ice_nvm_info nvm; /* NVM version information */ struct ice_bank_info banks; /* Flash Bank information */ - u16 sr_words; /* Shadow RAM size in words */ + u32 sr_words; /* Shadow RAM size in words */ u32 flash_size; /* Size of available flash in bytes */ u8 blank_nvm_mode; /* is NVM empty (no FW present) */ }; diff -Nru dpdk-24.11.3/drivers/net/ice/ice_acl_filter.c dpdk-24.11.4/drivers/net/ice/ice_acl_filter.c --- dpdk-24.11.3/drivers/net/ice/ice_acl_filter.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/ice/ice_acl_filter.c 2025-12-19 12:05:33.000000000 +0000 @@ -113,7 +113,10 @@ else params.width = ICE_AQC_ACL_KEY_WIDTH_BYTES * 3; - params.depth = ICE_AQC_ACL_TCAM_DEPTH; + if (pf_num > 4) + params.depth = ICE_AQC_ACL_TCAM_DEPTH / 2; + else + params.depth = ICE_AQC_ACL_TCAM_DEPTH; params.entry_act_pairs = 1; params.concurr = false; diff -Nru dpdk-24.11.3/drivers/net/ice/ice_ethdev.c dpdk-24.11.4/drivers/net/ice/ice_ethdev.c --- dpdk-24.11.3/drivers/net/ice/ice_ethdev.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/ice/ice_ethdev.c 2025-12-19 12:05:33.000000000 +0000 @@ -68,14 +68,6 @@ /* Maximum number of VSI */ #define ICE_MAX_NUM_VSIS (768UL) -/* The 119 bit offset of the LAN Rx queue context is the L2TSEL control bit. */ -#define ICE_L2TSEL_QRX_CONTEXT_REG_IDX 3 -#define ICE_L2TSEL_BIT_OFFSET 23 -enum ice_l2tsel { - ICE_L2TSEL_EXTRACT_FIRST_TAG_L2TAG2_2ND, - ICE_L2TSEL_EXTRACT_FIRST_TAG_L2TAG1, -}; - struct proto_xtr_ol_flag { const struct rte_mbuf_dynflag param; bool required; @@ -4973,49 +4965,12 @@ return ret; } -/** - * ice_vsi_update_l2tsel - update l2tsel field for all Rx rings on this VSI - * @vsi: VSI used to update l2tsel on - * @l2tsel: l2tsel setting requested - * - * Use the l2tsel setting to update all of the Rx queue context bits for l2tsel. - * This will modify which descriptor field the first offloaded VLAN will be - * stripped into. - */ -static void ice_vsi_update_l2tsel(struct ice_vsi *vsi, enum ice_l2tsel l2tsel) -{ - struct ice_hw *hw = ICE_VSI_TO_HW(vsi); - struct ice_pf *pf = ICE_VSI_TO_PF(vsi); - struct rte_eth_dev_data *dev_data = pf->dev_data; - u32 l2tsel_bit; - uint16_t i; - - if (l2tsel == ICE_L2TSEL_EXTRACT_FIRST_TAG_L2TAG2_2ND) - l2tsel_bit = 0; - else - l2tsel_bit = BIT(ICE_L2TSEL_BIT_OFFSET); - - for (i = 0; i < dev_data->nb_rx_queues; i++) { - u32 qrx_context_offset; - u32 regval; - - qrx_context_offset = - QRX_CONTEXT(ICE_L2TSEL_QRX_CONTEXT_REG_IDX, i); - - regval = rd32(hw, qrx_context_offset); - regval &= ~BIT(ICE_L2TSEL_BIT_OFFSET); - regval |= l2tsel_bit; - wr32(hw, qrx_context_offset, regval); - } -} - /* Configure outer vlan stripping on or off in QinQ mode */ static int ice_vsi_config_outer_vlan_stripping(struct ice_vsi *vsi, bool on) { uint16_t outer_ethertype = vsi->adapter->pf.outer_ethertype; struct ice_hw *hw = ICE_VSI_TO_HW(vsi); - int err = 0; if (vsi->vsi_id >= ICE_MAX_NUM_VSIS) { PMD_DRV_LOG(ERR, "VSI ID exceeds the maximum"); @@ -5027,41 +4982,9 @@ return -EOPNOTSUPP; } - if (on) { - err = ice_vsi_ena_outer_stripping(vsi, outer_ethertype); - if (!err) { - enum ice_l2tsel l2tsel = - ICE_L2TSEL_EXTRACT_FIRST_TAG_L2TAG2_2ND; - - /* PF tells the VF that the outer VLAN tag is always - * extracted to VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2_2 and - * inner is always extracted to - * VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1. This is needed to - * support outer stripping so the first tag always ends - * up in L2TAG2_2ND and the second/inner tag, if - * enabled, is extracted in L2TAG1. - */ - ice_vsi_update_l2tsel(vsi, l2tsel); - } - } else { - err = ice_vsi_dis_outer_stripping(vsi); - if (!err) { - enum ice_l2tsel l2tsel = - ICE_L2TSEL_EXTRACT_FIRST_TAG_L2TAG1; - - /* PF tells the VF that the outer VLAN tag is always - * extracted to VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2_2 and - * inner is always extracted to - * VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1. This is needed to - * support inner stripping while outer stripping is - * disabled so that the first and only tag is extracted - * in L2TAG1. - */ - ice_vsi_update_l2tsel(vsi, l2tsel); - } - } - - return err; + return on ? + ice_vsi_ena_outer_stripping(vsi, outer_ethertype) : + ice_vsi_dis_outer_stripping(vsi); } static int @@ -6044,10 +5967,16 @@ uint64_t *stat) { uint64_t new_data; + uint32_t lo_old, hi, lo; + + do { + lo_old = ICE_READ_REG(hw, loreg); + hi = ICE_READ_REG(hw, hireg); + lo = ICE_READ_REG(hw, loreg); + } while (lo_old > lo); - new_data = (uint64_t)ICE_READ_REG(hw, loreg); - new_data |= (uint64_t)(ICE_READ_REG(hw, hireg) & ICE_8_BIT_MASK) << - ICE_32_BIT_WIDTH; + new_data = (uint64_t)lo; + new_data |= (uint64_t)(hi & ICE_8_BIT_MASK) << ICE_32_BIT_WIDTH; if (!offset_loaded) *offset = new_data; diff -Nru dpdk-24.11.3/drivers/net/ice/ice_ethdev.h dpdk-24.11.4/drivers/net/ice/ice_ethdev.h --- dpdk-24.11.3/drivers/net/ice/ice_ethdev.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/ice/ice_ethdev.h 2025-12-19 12:05:33.000000000 +0000 @@ -346,7 +346,7 @@ uint64_t input_set_i; /* only for tunnel inner fields */ uint32_t mark_flag; - struct ice_parser_profile *prof; + struct ice_parser_profile prof; bool parser_ena; u8 *pkt_buf; u8 pkt_len; diff -Nru dpdk-24.11.3/drivers/net/ice/ice_fdir_filter.c dpdk-24.11.4/drivers/net/ice/ice_fdir_filter.c --- dpdk-24.11.3/drivers/net/ice/ice_fdir_filter.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/ice/ice_fdir_filter.c 2025-12-19 12:05:33.000000000 +0000 @@ -1312,7 +1312,7 @@ if (filter->parser_ena) { struct ice_hw *hw = ICE_PF_TO_HW(pf); - int id = ice_find_first_bit(filter->prof->ptypes, UINT16_MAX); + int id = ice_find_first_bit(filter->prof.ptypes, UINT16_MAX); int ptg = hw->blk[ICE_BLK_FD].xlt1.t[id]; u16 ctrl_vsi = pf->fdir.fdir_vsi->idx; u16 main_vsi = pf->main_vsi->idx; @@ -1322,11 +1322,11 @@ if (pi->fdir_actived_cnt != 0) { for (i = 0; i < ICE_MAX_FV_WORDS; i++) if (pi->prof.fv[i].proto_id != - filter->prof->fv[i].proto_id || + filter->prof.fv[i].proto_id || pi->prof.fv[i].offset != - filter->prof->fv[i].offset || + filter->prof.fv[i].offset || pi->prof.fv[i].msk != - filter->prof->fv[i].msk) + filter->prof.fv[i].msk) break; if (i == ICE_MAX_FV_WORDS) { fv_found = true; @@ -1336,7 +1336,7 @@ if (!fv_found) { ret = ice_flow_set_hw_prof(hw, main_vsi, ctrl_vsi, - filter->prof, ICE_BLK_FD); + &filter->prof, ICE_BLK_FD); if (ret) goto error; } @@ -1346,12 +1346,12 @@ goto error; if (!fv_found) { - for (i = 0; i < filter->prof->fv_num; i++) { + for (i = 0; i < filter->prof.fv_num; i++) { pi->prof.fv[i].proto_id = - filter->prof->fv[i].proto_id; + filter->prof.fv[i].proto_id; pi->prof.fv[i].offset = - filter->prof->fv[i].offset; - pi->prof.fv[i].msk = filter->prof->fv[i].msk; + filter->prof.fv[i].offset; + pi->prof.fv[i].msk = filter->prof.fv[i].msk; } pi->fdir_actived_cnt = 1; } @@ -1449,7 +1449,6 @@ return -rte_errno; error: - rte_free(filter->prof); rte_free(filter->pkt_buf); return -rte_errno; } @@ -1471,7 +1470,7 @@ if (filter->parser_ena) { struct ice_hw *hw = ICE_PF_TO_HW(pf); - int id = ice_find_first_bit(filter->prof->ptypes, UINT16_MAX); + int id = ice_find_first_bit(filter->prof.ptypes, UINT16_MAX); int ptg = hw->blk[ICE_BLK_FD].xlt1.t[id]; u16 ctrl_vsi = pf->fdir.fdir_vsi->idx; u16 main_vsi = pf->main_vsi->idx; @@ -1499,7 +1498,6 @@ flow->rule = NULL; - rte_free(filter->prof); rte_free(filter->pkt_buf); rte_free(filter); @@ -1865,7 +1863,7 @@ uint16_t tmp_val = 0; uint16_t pkt_len = 0; uint8_t tmp = 0; - int i, j; + int i, j, ret_val; pkt_len = strlen((char *)(uintptr_t)raw_spec->pattern); if (strlen((char *)(uintptr_t)raw_mask->pattern) != @@ -1920,24 +1918,22 @@ pkt_len /= 2; - if (ice_parser_run(ad->psr, tmp_spec, pkt_len, &rslt)) - return -rte_errno; - - if (!tmp_mask) - return -rte_errno; - - filter->prof = (struct ice_parser_profile *) - ice_malloc(&ad->hw, sizeof(*filter->prof)); - if (!filter->prof) - return -ENOMEM; + if (ice_parser_run(ad->psr, tmp_spec, pkt_len, &rslt)) { + ret_val = -rte_errno; + goto raw_error; + } if (ice_parser_profile_init(&rslt, tmp_spec, tmp_mask, - pkt_len, ICE_BLK_FD, true, filter->prof)) - return -rte_errno; + pkt_len, ICE_BLK_FD, true, &filter->prof)) { + ret_val = -rte_errno; + goto raw_error; + } u8 *pkt_buf = (u8 *)ice_malloc(&ad->hw, pkt_len + 1); - if (!pkt_buf) - return -ENOMEM; + if (!pkt_buf) { + ret_val = -ENOMEM; + goto raw_error; + } rte_memcpy(pkt_buf, tmp_spec, pkt_len); filter->pkt_buf = pkt_buf; @@ -1948,6 +1944,11 @@ rte_free(tmp_spec); rte_free(tmp_mask); break; + +raw_error: + rte_free(tmp_spec); + rte_free(tmp_mask); + return ret_val; } case RTE_FLOW_ITEM_TYPE_ETH: @@ -2488,7 +2489,6 @@ rte_free(item); return ret; error: - rte_free(filter->prof); rte_free(filter->pkt_buf); rte_free(item); return ret; diff -Nru dpdk-24.11.3/drivers/net/ice/ice_rxtx.c dpdk-24.11.4/drivers/net/ice/ice_rxtx.c --- dpdk-24.11.3/drivers/net/ice/ice_rxtx.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/ice/ice_rxtx.c 2025-12-19 12:05:33.000000000 +0000 @@ -1685,9 +1685,13 @@ #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC if (rte_le_to_cpu_16(rxdp->wb.status_error1) & (1 << ICE_RX_FLEX_DESC_STATUS1_L2TAG2P_S)) { - mb->ol_flags |= RTE_MBUF_F_RX_QINQ_STRIPPED | RTE_MBUF_F_RX_QINQ | - RTE_MBUF_F_RX_VLAN_STRIPPED | RTE_MBUF_F_RX_VLAN; - mb->vlan_tci_outer = mb->vlan_tci; + if ((mb->ol_flags & RTE_MBUF_F_RX_VLAN_STRIPPED) == 0) { + mb->ol_flags |= RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED; + } else { + /* if two tags, move Tag1 to outer tag field */ + mb->ol_flags |= RTE_MBUF_F_RX_QINQ_STRIPPED | RTE_MBUF_F_RX_QINQ; + mb->vlan_tci_outer = mb->vlan_tci; + } mb->vlan_tci = rte_le_to_cpu_16(rxdp->wb.l2tag2_2nd); PMD_RX_LOG(DEBUG, "Descriptor l2tag2_1: %u, l2tag2_2: %u", rte_le_to_cpu_16(rxdp->wb.l2tag2_1st), diff -Nru dpdk-24.11.3/drivers/net/ice/ice_rxtx_vec_common.h dpdk-24.11.4/drivers/net/ice/ice_rxtx_vec_common.h --- dpdk-24.11.3/drivers/net/ice/ice_rxtx_vec_common.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/ice/ice_rxtx_vec_common.h 2025-12-19 12:05:33.000000000 +0000 @@ -267,8 +267,8 @@ #define ICE_RX_VECTOR_OFFLOAD ( \ RTE_ETH_RX_OFFLOAD_CHECKSUM | \ - RTE_ETH_RX_OFFLOAD_SCTP_CKSUM | \ - RTE_ETH_RX_OFFLOAD_VLAN | \ + RTE_ETH_RX_OFFLOAD_VLAN_STRIP | \ + RTE_ETH_RX_OFFLOAD_VLAN_FILTER | \ RTE_ETH_RX_OFFLOAD_RSS_HASH) #define ICE_VECTOR_PATH 0 diff -Nru dpdk-24.11.3/drivers/net/idpf/idpf_rxtx.c dpdk-24.11.4/drivers/net/idpf/idpf_rxtx.c --- dpdk-24.11.3/drivers/net/idpf/idpf_rxtx.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/idpf/idpf_rxtx.c 2025-12-19 12:05:33.000000000 +0000 @@ -42,6 +42,8 @@ ol |= IDPF_TX_OFFLOAD_TCP_CKSUM; if ((offload & RTE_ETH_TX_OFFLOAD_SCTP_CKSUM) != 0) ol |= IDPF_TX_OFFLOAD_SCTP_CKSUM; + if ((offload & RTE_ETH_TX_OFFLOAD_TCP_TSO) != 0) + ol |= IDPF_TX_OFFLOAD_TCP_TSO; if ((offload & RTE_ETH_TX_OFFLOAD_MULTI_SEGS) != 0) ol |= IDPF_TX_OFFLOAD_MULTI_SEGS; if ((offload & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) != 0) diff -Nru dpdk-24.11.3/drivers/net/intel/ice/ice_rxtx_vec_common.h dpdk-24.11.4/drivers/net/intel/ice/ice_rxtx_vec_common.h --- dpdk-24.11.3/drivers/net/intel/ice/ice_rxtx_vec_common.h 1970-01-01 00:00:00.000000000 +0000 +++ dpdk-24.11.4/drivers/net/intel/ice/ice_rxtx_vec_common.h 2025-12-19 12:05:33.000000000 +0000 @@ -0,0 +1,207 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2019 Intel Corporation + */ + +#ifndef _ICE_RXTX_VEC_COMMON_H_ +#define _ICE_RXTX_VEC_COMMON_H_ + +#include "../common/rx.h" +#include "ice_rxtx.h" + +static inline int +ice_tx_desc_done(struct ci_tx_queue *txq, uint16_t idx) +{ + return (txq->ice_tx_ring[idx].cmd_type_offset_bsz & + rte_cpu_to_le_64(ICE_TXD_QW1_DTYPE_M)) == + rte_cpu_to_le_64(ICE_TX_DESC_DTYPE_DESC_DONE); +} + +static inline void +_ice_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq) +{ + const unsigned int mask = rxq->nb_rx_desc - 1; + unsigned int i; + + if (unlikely(!rxq->sw_ring)) { + PMD_DRV_LOG(DEBUG, "sw_ring is NULL"); + return; + } + + if (rxq->rxrearm_nb >= rxq->nb_rx_desc) + return; + + /* free all mbufs that are valid in the ring */ + if (rxq->rxrearm_nb == 0) { + for (i = 0; i < rxq->nb_rx_desc; i++) { + if (rxq->sw_ring[i].mbuf) + rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf); + } + } else { + for (i = rxq->rx_tail; + i != rxq->rxrearm_start; + i = (i + 1) & mask) { + if (rxq->sw_ring[i].mbuf) + rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf); + } + } + + rxq->rxrearm_nb = rxq->nb_rx_desc; + + /* set all entries to NULL */ + memset(rxq->sw_ring, 0, sizeof(rxq->sw_ring[0]) * rxq->nb_rx_desc); +} + +#define ICE_TX_NO_VECTOR_FLAGS ( \ + RTE_ETH_TX_OFFLOAD_MULTI_SEGS | \ + RTE_ETH_TX_OFFLOAD_QINQ_INSERT | \ + RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM | \ + RTE_ETH_TX_OFFLOAD_TCP_TSO | \ + RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO | \ + RTE_ETH_TX_OFFLOAD_GRE_TNL_TSO | \ + RTE_ETH_TX_OFFLOAD_IPIP_TNL_TSO | \ + RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO | \ + RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM | \ + RTE_ETH_TX_OFFLOAD_SEND_ON_TIMESTAMP) + +#define ICE_TX_VECTOR_OFFLOAD ( \ + RTE_ETH_TX_OFFLOAD_VLAN_INSERT | \ + RTE_ETH_TX_OFFLOAD_IPV4_CKSUM | \ + RTE_ETH_TX_OFFLOAD_SCTP_CKSUM | \ + RTE_ETH_TX_OFFLOAD_UDP_CKSUM | \ + RTE_ETH_TX_OFFLOAD_TCP_CKSUM) + +#define ICE_VECTOR_PATH 0 +#define ICE_VECTOR_OFFLOAD_PATH 1 + +static inline int +ice_rx_vec_queue_default(struct ci_rx_queue *rxq) +{ + if (!rxq) + return -1; + + if (!ci_rxq_vec_capable(rxq->nb_rx_desc, rxq->rx_free_thresh)) + return -1; + + if (rxq->proto_xtr != PROTO_XTR_NONE) + return -1; + + return 0; +} + +static inline int +ice_tx_vec_queue_default(struct ci_tx_queue *txq) +{ + if (!txq) + return -1; + + if (txq->tx_rs_thresh < ICE_VPMD_TX_BURST || + txq->tx_rs_thresh > ICE_TX_MAX_FREE_BUF_SZ) + return -1; + + if (txq->offloads & ICE_TX_NO_VECTOR_FLAGS) + return -1; + + if (txq->offloads & ICE_TX_VECTOR_OFFLOAD) + return ICE_VECTOR_OFFLOAD_PATH; + + return ICE_VECTOR_PATH; +} + +static inline int +ice_rx_vec_dev_check_default(struct rte_eth_dev *dev) +{ + int i; + struct ci_rx_queue *rxq; + int ret = 0; + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + rxq = dev->data->rx_queues[i]; + ret = (ice_rx_vec_queue_default(rxq)); + if (ret < 0) + break; + } + + return ret; +} + +static inline int +ice_tx_vec_dev_check_default(struct rte_eth_dev *dev) +{ + int i; + struct ci_tx_queue *txq; + int ret = 0; + int result = 0; + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + txq = dev->data->tx_queues[i]; + ret = ice_tx_vec_queue_default(txq); + if (ret < 0) + return -1; + if (ret == ICE_VECTOR_OFFLOAD_PATH) + result = ret; + } + + return result; +} + +static inline void +ice_txd_enable_offload(struct rte_mbuf *tx_pkt, + uint64_t *txd_hi) +{ + uint64_t ol_flags = tx_pkt->ol_flags; + uint32_t td_cmd = 0; + uint32_t td_offset = 0; + + /* Tx Checksum Offload */ + /* SET MACLEN */ + td_offset |= (tx_pkt->l2_len >> 1) << + ICE_TX_DESC_LEN_MACLEN_S; + + /* Enable L3 checksum offload */ + if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM) { + td_cmd |= ICE_TX_DESC_CMD_IIPT_IPV4_CSUM; + td_offset |= (tx_pkt->l3_len >> 2) << + ICE_TX_DESC_LEN_IPLEN_S; + } else if (ol_flags & RTE_MBUF_F_TX_IPV4) { + td_cmd |= ICE_TX_DESC_CMD_IIPT_IPV4; + td_offset |= (tx_pkt->l3_len >> 2) << + ICE_TX_DESC_LEN_IPLEN_S; + } else if (ol_flags & RTE_MBUF_F_TX_IPV6) { + td_cmd |= ICE_TX_DESC_CMD_IIPT_IPV6; + td_offset |= (tx_pkt->l3_len >> 2) << + ICE_TX_DESC_LEN_IPLEN_S; + } + + /* Enable L4 checksum offloads */ + switch (ol_flags & RTE_MBUF_F_TX_L4_MASK) { + case RTE_MBUF_F_TX_TCP_CKSUM: + td_cmd |= ICE_TX_DESC_CMD_L4T_EOFT_TCP; + td_offset |= (sizeof(struct rte_tcp_hdr) >> 2) << + ICE_TX_DESC_LEN_L4_LEN_S; + break; + case RTE_MBUF_F_TX_SCTP_CKSUM: + td_cmd |= ICE_TX_DESC_CMD_L4T_EOFT_SCTP; + td_offset |= (sizeof(struct rte_sctp_hdr) >> 2) << + ICE_TX_DESC_LEN_L4_LEN_S; + break; + case RTE_MBUF_F_TX_UDP_CKSUM: + td_cmd |= ICE_TX_DESC_CMD_L4T_EOFT_UDP; + td_offset |= (sizeof(struct rte_udp_hdr) >> 2) << + ICE_TX_DESC_LEN_L4_LEN_S; + break; + default: + break; + } + + *txd_hi |= ((uint64_t)td_offset) << ICE_TXD_QW1_OFFSET_S; + + /* Tx VLAN insertion Offload */ + if (ol_flags & RTE_MBUF_F_TX_VLAN) { + td_cmd |= ICE_TX_DESC_CMD_IL2TAG1; + *txd_hi |= ((uint64_t)tx_pkt->vlan_tci << + ICE_TXD_QW1_L2TAG1_S); + } + + *txd_hi |= ((uint64_t)td_cmd) << ICE_TXD_QW1_CMD_S; +} +#endif diff -Nru dpdk-24.11.3/drivers/net/ixgbe/base/ixgbe_vf.c dpdk-24.11.4/drivers/net/ixgbe/base/ixgbe_vf.c --- dpdk-24.11.3/drivers/net/ixgbe/base/ixgbe_vf.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/ixgbe/base/ixgbe_vf.c 2025-12-19 12:05:33.000000000 +0000 @@ -482,7 +482,7 @@ msgbuf[0] = IXGBE_VF_GET_PF_LINK_STATE; - err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 6); + err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 3); if (err || (msgbuf[0] & IXGBE_VT_MSGTYPE_FAILURE)) { err = IXGBE_ERR_MBX; *speed = IXGBE_LINK_SPEED_UNKNOWN; diff -Nru dpdk-24.11.3/drivers/net/ixgbe/ixgbe_flow.c dpdk-24.11.4/drivers/net/ixgbe/ixgbe_flow.c --- dpdk-24.11.3/drivers/net/ixgbe/ixgbe_flow.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/ixgbe/ixgbe_flow.c 2025-12-19 12:05:33.000000000 +0000 @@ -2111,10 +2111,11 @@ return -rte_errno; } - /* only x550 family only support sctp port */ + /* only some mac types support sctp port */ if (hw->mac.type == ixgbe_mac_X550 || hw->mac.type == ixgbe_mac_X550EM_x || - hw->mac.type == ixgbe_mac_X550EM_a) { + hw->mac.type == ixgbe_mac_X550EM_a || + hw->mac.type == ixgbe_mac_E610) { /** * Only care about src & dst ports, * others should be masked. diff -Nru dpdk-24.11.3/drivers/net/memif/rte_eth_memif.c dpdk-24.11.4/drivers/net/memif/rte_eth_memif.c --- dpdk-24.11.3/drivers/net/memif/rte_eth_memif.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/memif/rte_eth_memif.c 2025-12-19 12:05:33.000000000 +0000 @@ -1829,7 +1829,8 @@ static int memif_check_socket_filename(const char *filename) { - char *dir = NULL, *tmp; + char *dir = NULL; + const char *tmp; uint32_t idx; int ret = 0; diff -Nru dpdk-24.11.3/drivers/net/mlx4/mlx4_rxtx.c dpdk-24.11.4/drivers/net/mlx4/mlx4_rxtx.c --- dpdk-24.11.3/drivers/net/mlx4/mlx4_rxtx.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx4/mlx4_rxtx.c 2025-12-19 12:05:33.000000000 +0000 @@ -638,7 +638,7 @@ thdr.vto = sq->buf; /* New TXBB, stash the first 32bits for later use. */ pv[*pv_counter].dst = (volatile uint32_t *)thdr.to; - pv[(*pv_counter)++].val = *(uint32_t *)from, + pv[(*pv_counter)++].val = *(uint32_t *)from; from += sizeof(uint32_t); thdr.to += sizeof(uint32_t); remain_size -= txbb_avail_space + sizeof(uint32_t); diff -Nru dpdk-24.11.3/drivers/net/mlx5/hws/mlx5dr_action.c dpdk-24.11.4/drivers/net/mlx5/hws/mlx5dr_action.c --- dpdk-24.11.3/drivers/net/mlx5/hws/mlx5dr_action.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx5/hws/mlx5dr_action.c 2025-12-19 12:05:33.000000000 +0000 @@ -87,6 +87,7 @@ BIT(MLX5DR_ACTION_TYP_REFORMAT_L2_TO_TNL_L3), BIT(MLX5DR_ACTION_TYP_TBL) | BIT(MLX5DR_ACTION_TYP_MISS) | + BIT(MLX5DR_ACTION_TYP_TIR) | BIT(MLX5DR_ACTION_TYP_VPORT) | BIT(MLX5DR_ACTION_TYP_DROP) | BIT(MLX5DR_ACTION_TYP_DEST_ROOT) | diff -Nru dpdk-24.11.3/drivers/net/mlx5/hws/mlx5dr_buddy.c dpdk-24.11.4/drivers/net/mlx5/hws/mlx5dr_buddy.c --- dpdk-24.11.3/drivers/net/mlx5/hws/mlx5dr_buddy.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx5/hws/mlx5dr_buddy.c 2025-12-19 12:05:33.000000000 +0000 @@ -147,6 +147,7 @@ simple_free(buddy->num_free); simple_free(buddy->bits); + simple_free(buddy); } int mlx5dr_buddy_alloc_mem(struct mlx5dr_buddy_mem *buddy, int order) diff -Nru dpdk-24.11.3/drivers/net/mlx5/hws/mlx5dr_definer.c dpdk-24.11.4/drivers/net/mlx5/hws/mlx5dr_definer.c --- dpdk-24.11.3/drivers/net/mlx5/hws/mlx5dr_definer.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx5/hws/mlx5dr_definer.c 2025-12-19 12:05:33.000000000 +0000 @@ -9,11 +9,10 @@ #define GTP_PDU_SC 0x85 #define BAD_PORT 0xBAD #define BAD_SQN 0xBAD -#define ETH_TYPE_IPV4_VXLAN 0x0800 -#define ETH_TYPE_IPV6_VXLAN 0x86DD #define UDP_VXLAN_PORT 4789 #define UDP_VXLAN_GPE_PORT 4790 #define UDP_GTPU_PORT 2152 +#define UDP_ESP_PORT 4500 #define UDP_PORT_MPLS 6635 #define UDP_GENEVE_PORT 6081 #define UDP_ROCEV2_PORT 4791 @@ -230,6 +229,8 @@ X(SET_BE16, nvgre_protocol, v->protocol, rte_flow_item_nvgre) \ X(SET_BE32P, nvgre_dw1, &v->tni[0], rte_flow_item_nvgre) \ X(SET, meter_color, rte_col_2_mlx5_col(v->color), rte_flow_item_meter_color) \ + X(SET, ipsec_protocol, IPPROTO_ESP, rte_flow_item_esp) \ + X(SET, ipsec_udp_port, UDP_ESP_PORT, rte_flow_item_esp) \ X(SET_BE32, ipsec_spi, v->hdr.spi, rte_flow_item_esp) \ X(SET_BE32, ipsec_sequence_number, v->hdr.seq, rte_flow_item_esp) \ X(SET, ib_l4_udp_port, UDP_ROCEV2_PORT, rte_flow_item_ib_bth) \ @@ -2804,6 +2805,32 @@ const struct rte_flow_item_esp *m = item->mask; struct mlx5dr_definer_fc *fc; + /* To match on ESP we must match on ip_protocol and optionally on l4_dport */ + if (!cd->relaxed) { + bool over_udp; + + fc = &cd->fc[DR_CALC_FNAME(IP_PROTOCOL, false)]; + over_udp = fc->tag_set == &mlx5dr_definer_udp_protocol_set; + + if (over_udp) { + fc = &cd->fc[DR_CALC_FNAME(L4_DPORT, false)]; + if (!fc->tag_set) { + fc->item_idx = item_idx; + fc->tag_mask_set = &mlx5dr_definer_ones_set; + fc->tag_set = &mlx5dr_definer_ipsec_udp_port_set; + DR_CALC_SET(fc, eth_l4, destination_port, false); + } + } else { + fc = &cd->fc[DR_CALC_FNAME(IP_PROTOCOL, false)]; + if (!fc->tag_set) { + fc->item_idx = item_idx; + fc->tag_set = &mlx5dr_definer_ipsec_protocol_set; + fc->tag_mask_set = &mlx5dr_definer_ones_set; + DR_CALC_SET(fc, eth_l3, protocol_next_header, false); + } + } + } + if (!m) return 0; if (m->hdr.spi) { diff -Nru dpdk-24.11.3/drivers/net/mlx5/hws/mlx5dr_pool.c dpdk-24.11.4/drivers/net/mlx5/hws/mlx5dr_pool.c --- dpdk-24.11.3/drivers/net/mlx5/hws/mlx5dr_pool.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx5/hws/mlx5dr_pool.c 2025-12-19 12:05:33.000000000 +0000 @@ -167,7 +167,7 @@ mlx5dr_pool_buddy_get_next_buddy(struct mlx5dr_pool *pool, int idx, uint32_t order, bool *is_new_buddy) { - static struct mlx5dr_buddy_mem *buddy; + struct mlx5dr_buddy_mem *buddy; uint32_t new_buddy_size; buddy = pool->db.buddy_manager->buddies[idx]; @@ -271,7 +271,6 @@ buddy = pool->db.buddy_manager->buddies[i]; if (buddy) { mlx5dr_buddy_cleanup(buddy); - simple_free(buddy); pool->db.buddy_manager->buddies[i] = NULL; } } diff -Nru dpdk-24.11.3/drivers/net/mlx5/hws/mlx5dr_rule.c dpdk-24.11.4/drivers/net/mlx5/hws/mlx5dr_rule.c --- dpdk-24.11.3/drivers/net/mlx5/hws/mlx5dr_rule.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx5/hws/mlx5dr_rule.c 2025-12-19 12:05:33.000000000 +0000 @@ -1071,7 +1071,7 @@ if (mlx5dr_matcher_req_fw_wqe(matcher) || mlx5dr_table_is_root(matcher->tbl) || - matcher->tbl->ctx->caps->access_index_mode == MLX5DR_MATCHER_INSERT_BY_HASH || + matcher->attr.distribute_mode != MLX5DR_MATCHER_DISTRIBUTE_BY_HASH || matcher->tbl->ctx->caps->flow_table_hash_type != MLX5_FLOW_TABLE_HASH_TYPE_CRC32) { DR_LOG(DEBUG, "Matcher is not supported"); rte_errno = ENOTSUP; diff -Nru dpdk-24.11.3/drivers/net/mlx5/linux/mlx5_ethdev_os.c dpdk-24.11.4/drivers/net/mlx5/linux/mlx5_ethdev_os.c --- dpdk-24.11.3/drivers/net/mlx5/linux/mlx5_ethdev_os.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx5/linux/mlx5_ethdev_os.c 2025-12-19 12:05:33.000000000 +0000 @@ -72,7 +72,7 @@ * 0 on success, a negative errno value otherwise and rte_errno is set. */ int -mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[MLX5_NAMESIZE]) +mlx5_get_ifname(const struct rte_eth_dev *dev, char ifname[MLX5_NAMESIZE]) { struct mlx5_priv *priv = dev->data->dev_private; unsigned int ifindex; @@ -86,12 +86,11 @@ ifindex = mlx5_ifindex(dev); if (!ifindex) { if (!priv->representor) - return mlx5_get_ifname_sysfs(priv->sh->ibdev_path, - *ifname); + return mlx5_get_ifname_sysfs(priv->sh->ibdev_path, ifname); rte_errno = ENXIO; return -rte_errno; } - if (if_indextoname(ifindex, &(*ifname)[0])) + if (if_indextoname(ifindex, ifname)) return 0; rte_errno = errno; return -rte_errno; @@ -149,16 +148,46 @@ static int mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr) { - char ifname[sizeof(ifr->ifr_name)]; + char ifname[MLX5_NAMESIZE]; int ret; - ret = mlx5_get_ifname(dev, &ifname); + ret = mlx5_get_ifname(dev, ifname); if (ret) return -rte_errno; return mlx5_ifreq_by_ifname(ifname, req, ifr); } /** + * Get device minimum and maximum allowed MTU values. + * + * @param dev + * Pointer to Ethernet device. + * @param[out] min_mtu + * Minimum MTU value output buffer. + * @param[out] max_mtu + * Maximum MTU value output buffer. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +mlx5_os_get_mtu_bounds(struct rte_eth_dev *dev, uint16_t *min_mtu, uint16_t *max_mtu) +{ + struct mlx5_priv *priv = dev->data->dev_private; + int nl_route; + int ret; + + nl_route = mlx5_nl_init(NETLINK_ROUTE, 0); + if (nl_route < 0) + return nl_route; + + ret = mlx5_nl_get_mtu_bounds(nl_route, priv->if_index, min_mtu, max_mtu); + + close(nl_route); + return ret; +} + +/** * Get device MTU. * * @param dev @@ -482,7 +511,7 @@ mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete) { int ret; - struct rte_eth_link dev_link; + struct rte_eth_link dev_link = { 0 }; time_t start_time = time(NULL); int retry = MLX5_GET_LINK_STATUS_RETRY_COUNT; @@ -1936,4 +1965,3 @@ rte_mem_unmap(base, MLX5_ST_SZ_BYTES(initial_seg)); return 0; } - diff -Nru dpdk-24.11.3/drivers/net/mlx5/linux/mlx5_flow_os.c dpdk-24.11.4/drivers/net/mlx5/linux/mlx5_flow_os.c --- dpdk-24.11.3/drivers/net/mlx5/linux/mlx5_flow_os.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx5/linux/mlx5_flow_os.c 2025-12-19 12:05:33.000000000 +0000 @@ -18,14 +18,19 @@ const struct rte_flow_item *item, uint64_t item_flags, uint8_t target_protocol, + bool allow_seq, struct rte_flow_error *error) { const struct rte_flow_item_esp *mask = item->mask; const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : MLX5_FLOW_LAYER_OUTER_L3; - const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : - MLX5_FLOW_LAYER_OUTER_L4; + static const struct rte_flow_item_esp mlx5_flow_item_esp_mask = { + .hdr = { + .spi = RTE_BE32(0xffffffff), + .seq = RTE_BE32(0xffffffff), + }, + }; int ret; if (!mlx5_hws_active(dev)) { @@ -34,10 +39,6 @@ RTE_FLOW_ERROR_TYPE_ITEM, item, "L3 is mandatory to filter on L4"); } - if (item_flags & l4m) - return rte_flow_error_set(error, EINVAL, - RTE_FLOW_ERROR_TYPE_ITEM, item, - "multiple L4 layers not supported"); if (target_protocol != 0xff && target_protocol != IPPROTO_ESP) return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, item, @@ -47,7 +48,8 @@ mask = &rte_flow_item_esp_mask; ret = mlx5_flow_item_acceptable (dev, item, (const uint8_t *)mask, - (const uint8_t *)&rte_flow_item_esp_mask, + allow_seq ? (const uint8_t *)&mlx5_flow_item_esp_mask : + (const uint8_t *)&rte_flow_item_esp_mask, sizeof(struct rte_flow_item_esp), MLX5_ITEM_RANGE_NOT_ACCEPTED, error); if (ret < 0) diff -Nru dpdk-24.11.3/drivers/net/mlx5/linux/mlx5_flow_os.h dpdk-24.11.4/drivers/net/mlx5/linux/mlx5_flow_os.h --- dpdk-24.11.3/drivers/net/mlx5/linux/mlx5_flow_os.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx5/linux/mlx5_flow_os.h 2025-12-19 12:05:33.000000000 +0000 @@ -514,6 +514,8 @@ * Bit-fields that holds the items detected until now. * @param[in] target_protocol * The next protocol in the previous item. + * @param[in] allow_seq + * The match on sequence number is supported. * @param[out] error * Pointer to error structure. * @@ -525,6 +527,7 @@ const struct rte_flow_item *item, uint64_t item_flags, uint8_t target_protocol, + bool allow_seq, struct rte_flow_error *error); /** diff -Nru dpdk-24.11.3/drivers/net/mlx5/linux/mlx5_os.c dpdk-24.11.4/drivers/net/mlx5/linux/mlx5_os.c --- dpdk-24.11.3/drivers/net/mlx5/linux/mlx5_os.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx5/linux/mlx5_os.c 2025-12-19 12:05:33.000000000 +0000 @@ -431,8 +431,8 @@ DRV_LOG(INFO, "No SW steering support"); return; } - dv_attr.type = IBV_FLOW_ATTR_NORMAL, - dv_attr.match_mask = (void *)&matcher_mask, + dv_attr.type = IBV_FLOW_ATTR_NORMAL; + dv_attr.match_mask = (void *)&matcher_mask; dv_attr.match_criteria_enable = (1 << MLX5_MATCH_CRITERIA_ENABLE_OUTER_BIT) | (1 << MLX5_MATCH_CRITERIA_ENABLE_MISC5_BIT); @@ -737,6 +737,30 @@ return err; } +#ifdef HAVE_MLX5DV_DR +static void +mlx5_destroy_send_to_kernel_action(struct mlx5_dev_ctx_shared *sh) +{ + int i; + + for (i = 0; i < MLX5DR_TABLE_TYPE_MAX; i++) { + if (sh->send_to_kernel_action[i].action) { + void *action = sh->send_to_kernel_action[i].action; + + mlx5_glue->destroy_flow_action(action); + sh->send_to_kernel_action[i].action = NULL; + } + if (sh->send_to_kernel_action[i].tbl) { + struct mlx5_flow_tbl_resource *tbl = + sh->send_to_kernel_action[i].tbl; + + flow_dv_tbl_resource_release(sh, tbl); + sh->send_to_kernel_action[i].tbl = NULL; + } + } +} +#endif /* HAVE_MLX5DV_DR */ + /** * Destroy DR related data within private structure. * @@ -747,15 +771,23 @@ mlx5_os_free_shared_dr(struct mlx5_priv *priv) { struct mlx5_dev_ctx_shared *sh = priv->sh; -#ifdef HAVE_MLX5DV_DR - int i; -#endif + struct mlx5_rxq_ctrl *rxq_ctrl; + int i = 0; MLX5_ASSERT(sh && sh->refcnt); if (sh->refcnt > 1) return; + LIST_FOREACH(rxq_ctrl, &sh->shared_rxqs, next) { + DRV_LOG(DEBUG, "port %u Rx Queue %u still referenced", + priv->dev_data->port_id, rxq_ctrl->rxq.idx); + ++i; + } + if (i > 0) + DRV_LOG(WARNING, "port %u some Rx queues still remain %d", + priv->dev_data->port_id, i); MLX5_ASSERT(LIST_EMPTY(&sh->shared_rxqs)); #ifdef HAVE_MLX5DV_DR + mlx5_destroy_send_to_kernel_action(sh); if (sh->rx_domain) { mlx5_glue->dr_destroy_domain(sh->rx_domain); sh->rx_domain = NULL; @@ -778,21 +810,6 @@ mlx5_glue->destroy_flow_action(sh->pop_vlan_action); sh->pop_vlan_action = NULL; } - for (i = 0; i < MLX5DR_TABLE_TYPE_MAX; i++) { - if (sh->send_to_kernel_action[i].action) { - void *action = sh->send_to_kernel_action[i].action; - - mlx5_glue->destroy_flow_action(action); - sh->send_to_kernel_action[i].action = NULL; - } - if (sh->send_to_kernel_action[i].tbl) { - struct mlx5_flow_tbl_resource *tbl = - sh->send_to_kernel_action[i].tbl; - - flow_dv_tbl_resource_release(sh, tbl); - sh->send_to_kernel_action[i].tbl = NULL; - } - } #endif /* HAVE_MLX5DV_DR */ if (sh->default_miss_action) mlx5_glue->destroy_flow_action @@ -1563,6 +1580,8 @@ eth_dev->data->mac_addrs = priv->mac; eth_dev->device = dpdk_dev; eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; + /* Fetch minimum and maximum allowed MTU from the device. */ + mlx5_get_mtu_bounds(eth_dev, &priv->min_mtu, &priv->max_mtu); /* Configure the first MAC address by default. */ if (mlx5_get_mac(eth_dev, &mac.addr_bytes)) { DRV_LOG(ERR, @@ -1579,7 +1598,7 @@ { char ifname[MLX5_NAMESIZE]; - if (mlx5_get_ifname(eth_dev, &ifname) == 0) + if (mlx5_get_ifname(eth_dev, ifname) == 0) DRV_LOG(DEBUG, "port %u ifname is \"%s\"", eth_dev->data->port_id, ifname); else @@ -1593,6 +1612,7 @@ err = rte_errno; goto error; } + eth_dev->data->mtu = priv->mtu; DRV_LOG(DEBUG, "port %u MTU is %u", eth_dev->data->port_id, priv->mtu); /* Initialize burst functions to prevent crashes before link-up. */ @@ -1624,16 +1644,17 @@ /* Read link status in case it is up and there will be no event. */ mlx5_link_update(eth_dev, 0); /* Watch LSC interrupts between port probe and port start. */ - priv->sh->port[priv->dev_port - 1].nl_ih_port_id = - eth_dev->data->port_id; + priv->sh->port[priv->dev_port - 1].nl_ih_port_id = eth_dev->data->port_id; mlx5_set_link_up(eth_dev); for (i = 0; i < MLX5_FLOW_TYPE_MAXI; i++) { icfg[i].release_mem_en = !!sh->config.reclaim_mode; if (sh->config.reclaim_mode) icfg[i].per_core_cache = 0; #ifdef HAVE_MLX5_HWS_SUPPORT - if (priv->sh->config.dv_flow_en == 2) + if (priv->sh->config.dv_flow_en == 2) { icfg[i].size = sizeof(struct rte_flow_hw) + sizeof(struct rte_flow_nt2hws); + icfg[i].size += sizeof(struct rte_flow_hw_aux); + } #endif priv->flows[i] = mlx5_ipool_create(&icfg[i]); if (!priv->flows[i]) diff -Nru dpdk-24.11.3/drivers/net/mlx5/linux/mlx5_verbs.c dpdk-24.11.4/drivers/net/mlx5/linux/mlx5_verbs.c --- dpdk-24.11.3/drivers/net/mlx5/linux/mlx5_verbs.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx5/linux/mlx5_verbs.c 2025-12-19 12:05:33.000000000 +0000 @@ -883,7 +883,7 @@ * dev_cap.max_sge limit and will still work properly. */ qp_attr.cap.max_send_sge = 1; - qp_attr.qp_type = IBV_QPT_RAW_PACKET, + qp_attr.qp_type = IBV_QPT_RAW_PACKET; /* Do *NOT* enable this, completions events are managed per Tx burst. */ qp_attr.sq_sig_all = 0; qp_attr.pd = priv->sh->cdev->pd; diff -Nru dpdk-24.11.3/drivers/net/mlx5/mlx5.c dpdk-24.11.4/drivers/net/mlx5/mlx5.c --- dpdk-24.11.3/drivers/net/mlx5/mlx5.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx5/mlx5.c 2025-12-19 12:05:33.000000000 +0000 @@ -385,9 +385,6 @@ }, }; -#define MLX5_FLOW_MIN_ID_POOL_SIZE 512 -#define MLX5_ID_GENERATION_ARRAY_FACTOR 16 - #define MLX5_FLOW_TABLE_HLIST_ARRAY_SIZE 1024 #define MLX5_RXQ_ENH_CQE_COMP_MASK 0x80 @@ -1095,9 +1092,15 @@ /* The unit is uint64_t. */ node.header_length_field_shift = 0x3; /* Header length is the 2nd byte. */ - node.header_length_field_offset = 0x8; - if (attr->header_length_mask_width < 8) - node.header_length_field_offset += 8 - attr->header_length_mask_width; + if (attr->header_length_field_mode_wa) { + /* Legacy firmware before ConnectX-8, we should provide offset WA. */ + node.header_length_field_offset = 8; + if (attr->header_length_mask_width < 8) + node.header_length_field_offset += 8 - attr->header_length_mask_width; + } else { + /* The new firmware, we can specify the correct offset directly. */ + node.header_length_field_offset = 12; + } node.header_length_field_mask = 0xF; /* One byte next header protocol. */ node.next_header_field_size = 0x8; @@ -2306,6 +2309,18 @@ dev->process_private = NULL; } +static void +mlx5_flow_pools_destroy(struct mlx5_priv *priv) +{ + int i; + + for (i = 0; i < MLX5_FLOW_TYPE_MAXI; i++) { + if (!priv->flows[i]) + continue; + mlx5_ipool_destroy(priv->flows[i]); + } +} + /** * DPDK callback to close the device. * @@ -2366,6 +2381,11 @@ /* Free the eCPRI flex parser resource. */ mlx5_flex_parser_ecpri_release(dev); mlx5_flex_item_port_cleanup(dev); + if (priv->representor) { + /* Each representor has a dedicated interrupts handler */ + rte_intr_instance_free(dev->intr_handle); + dev->intr_handle = NULL; + } mlx5_indirect_list_handles_release(dev); #ifdef HAVE_MLX5_HWS_SUPPORT flow_hw_destroy_vport_action(dev); @@ -2501,6 +2521,7 @@ if (!c) claim_zero(rte_eth_switch_domain_free(priv->domain_id)); } + mlx5_flow_pools_destroy(priv); memset(priv, 0, sizeof(*priv)); priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID; /* diff -Nru dpdk-24.11.3/drivers/net/mlx5/mlx5.h dpdk-24.11.4/drivers/net/mlx5/mlx5.h --- dpdk-24.11.3/drivers/net/mlx5/mlx5.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx5/mlx5.h 2025-12-19 12:05:33.000000000 +0000 @@ -74,6 +74,15 @@ /* Maximal number of field/field parts to map into sample registers .*/ #define MLX5_FLEX_ITEM_MAPPING_NUM 32 +/* Number of bytes not included in MTU. */ +#define MLX5_ETH_OVERHEAD (RTE_ETHER_HDR_LEN + RTE_VLAN_HLEN + RTE_ETHER_CRC_LEN) + +/* Minimum allowed MTU to be reported whenever PMD cannot query it from OS. */ +#define MLX5_ETH_MIN_MTU (RTE_ETHER_MIN_MTU) + +/* Maximum allowed MTU to be reported whenever PMD cannot query it from OS. */ +#define MLX5_ETH_MAX_MTU (9978) + enum mlx5_ipool_index { #if defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_INFINIBAND_VERBS_H) MLX5_IPOOL_DECAP_ENCAP = 0, /* Pool for encap/decap resource. */ @@ -485,8 +494,6 @@ #define MLX5_MAX_PENDING_QUERIES 4 #define MLX5_CNT_MR_ALLOC_BULK 64 #define MLX5_CNT_SHARED_OFFSET 0x80000000 -#define IS_BATCH_CNT(cnt) (((cnt) & (MLX5_CNT_SHARED_OFFSET - 1)) >= \ - MLX5_CNT_BATCH_OFFSET) #define MLX5_CNT_SIZE (sizeof(struct mlx5_flow_counter)) #define MLX5_AGE_SIZE (sizeof(struct mlx5_age_param)) @@ -1235,7 +1242,6 @@ #define MLX5_FLOW_TABLE_LEVEL_METER (MLX5_MAX_TABLES - 3) #define MLX5_FLOW_TABLE_LEVEL_POLICY (MLX5_MAX_TABLES - 4) #define MLX5_MAX_TABLES_EXTERNAL MLX5_FLOW_TABLE_LEVEL_POLICY -#define MLX5_FLOW_TABLE_HWS_POLICY (MLX5_MAX_TABLES - 10) #define MLX5_MAX_TABLES_FDB UINT16_MAX #define MLX5_FLOW_TABLE_PTYPE_RSS_NUM 1024 #define MLX5_FLOW_TABLE_PTYPE_RSS_LAST (MLX5_MAX_TABLES - 11) @@ -1813,8 +1819,6 @@ void (*lb_dummy_queue_release)(struct rte_eth_dev *dev); }; -#define MLX5_RSS_HASH_FIELDS_LEN RTE_DIM(mlx5_rss_hash_fields) - enum mlx5_ctrl_flow_type { MLX5_CTRL_FLOW_TYPE_GENERAL, MLX5_CTRL_FLOW_TYPE_SQ_MISS_ROOT, @@ -1957,6 +1961,8 @@ unsigned int vlan_filter_n; /* Number of configured VLAN filters. */ /* Device properties. */ uint16_t mtu; /* Configured MTU. */ + uint16_t min_mtu; /* Minimum MTU allowed on the NIC. */ + uint16_t max_mtu; /* Maximum MTU allowed on the NIC. */ unsigned int isolated:1; /* Whether isolated mode is enabled. */ unsigned int representor:1; /* Device is a port representor. */ unsigned int master:1; /* Device is a E-Switch master. */ @@ -2286,11 +2292,11 @@ int mlx5_dev_configure_rss_reta(struct rte_eth_dev *dev); uint64_t mlx5_get_restore_flags(struct rte_eth_dev *dev, enum rte_eth_dev_operation op); +void mlx5_get_mtu_bounds(struct rte_eth_dev *dev, uint16_t *min_mtu, uint16_t *max_mtu); /* mlx5_ethdev_os.c */ -int mlx5_get_ifname(const struct rte_eth_dev *dev, - char (*ifname)[MLX5_NAMESIZE]); +int mlx5_get_ifname(const struct rte_eth_dev *dev, char ifname[MLX5_NAMESIZE]); unsigned int mlx5_ifindex(const struct rte_eth_dev *dev); int mlx5_get_mac(struct rte_eth_dev *dev, uint8_t (*mac)[RTE_ETHER_ADDR_LEN]); int mlx5_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu); @@ -2324,6 +2330,7 @@ uint16_t *n_stats, uint16_t *n_stats_sec); void mlx5_os_stats_init(struct rte_eth_dev *dev); int mlx5_get_flag_dropless_rq(struct rte_eth_dev *dev); +int mlx5_os_get_mtu_bounds(struct rte_eth_dev *dev, uint16_t *min_mtu, uint16_t *max_mtu); /* mlx5_mac.c */ diff -Nru dpdk-24.11.3/drivers/net/mlx5/mlx5_defs.h dpdk-24.11.4/drivers/net/mlx5/mlx5_defs.h --- dpdk-24.11.3/drivers/net/mlx5/mlx5_defs.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx5/mlx5_defs.h 2025-12-19 12:05:33.000000000 +0000 @@ -46,9 +46,6 @@ /* Maximum number of DCS created per port. */ #define MLX5_HWS_CNT_DCS_NUM 4 -/* Alarm timeout. */ -#define MLX5_ALARM_TIMEOUT_US 100000 - /* Maximum number of extended statistics counters. */ #define MLX5_MAX_XSTATS 64 @@ -170,9 +167,6 @@ /* Size of the hash table for tag table. */ #define MLX5_TAGS_HLIST_ARRAY_SIZE (1 << 15) -/* Size fo the hash table for SFT table. */ -#define MLX5_FLOW_SFT_HLIST_ARRAY_SIZE 4096 - /* Hairpin TX/RX queue configuration parameters. */ #define MLX5_HAIRPIN_QUEUE_STRIDE 6 #define MLX5_HAIRPIN_JUMBO_LOG_SIZE (14 + 2) diff -Nru dpdk-24.11.3/drivers/net/mlx5/mlx5_devx.c dpdk-24.11.4/drivers/net/mlx5/mlx5_devx.c --- dpdk-24.11.3/drivers/net/mlx5/mlx5_devx.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx5/mlx5_devx.c 2025-12-19 12:05:33.000000000 +0000 @@ -776,6 +776,11 @@ struct mlx5_external_q *ext_rxq = mlx5_ext_rxq_get(dev, queues[i]); + if (ext_rxq == NULL) { + rte_errno = EINVAL; + mlx5_free(rqt_attr); + return NULL; + } rqt_attr->rq_list[i] = ext_rxq->hw_id; } else { struct mlx5_rxq_priv *rxq = diff -Nru dpdk-24.11.3/drivers/net/mlx5/mlx5_ethdev.c dpdk-24.11.4/drivers/net/mlx5/mlx5_ethdev.c --- dpdk-24.11.3/drivers/net/mlx5/mlx5_ethdev.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx5/mlx5_ethdev.c 2025-12-19 12:05:33.000000000 +0000 @@ -360,9 +360,11 @@ unsigned int max; uint16_t max_wqe; + info->min_mtu = priv->min_mtu; + info->max_mtu = priv->max_mtu; + info->max_rx_pktlen = info->max_mtu + MLX5_ETH_OVERHEAD; /* FIXME: we should ask the device for these values. */ info->min_rx_bufsize = 32; - info->max_rx_pktlen = 65536; info->max_lro_pkt_size = MLX5_MAX_LRO_SIZE; /* * Since we need one CQ per QP, the limit is the minimum number @@ -863,3 +865,41 @@ /* mlx5 PMD does not require any configuration restore. */ return 0; } + +/** + * Query minimum and maximum allowed MTU value on the device. + * + * This functions will always return valid MTU bounds. + * In case platform-specific implementation fails or current platform does not support it, + * the fallback default values will be used. + * + * @param[in] dev + * Pointer to Ethernet device + * @param[out] min_mtu + * Minimum MTU value output buffer. + * @param[out] max_mtu + * Maximum MTU value output buffer. + */ +void +mlx5_get_mtu_bounds(struct rte_eth_dev *dev, uint16_t *min_mtu, uint16_t *max_mtu) +{ + int ret; + + MLX5_ASSERT(min_mtu != NULL); + MLX5_ASSERT(max_mtu != NULL); + + ret = mlx5_os_get_mtu_bounds(dev, min_mtu, max_mtu); + if (ret < 0) { + if (ret != -ENOTSUP) + DRV_LOG(INFO, "port %u failed to query MTU bounds, using fallback values", + dev->data->port_id); + *min_mtu = MLX5_ETH_MIN_MTU; + *max_mtu = MLX5_ETH_MAX_MTU; + + /* This function does not fail. Clear rte_errno. */ + rte_errno = 0; + } + + DRV_LOG(INFO, "port %u minimum MTU is %u", dev->data->port_id, *min_mtu); + DRV_LOG(INFO, "port %u maximum MTU is %u", dev->data->port_id, *max_mtu); +} diff -Nru dpdk-24.11.3/drivers/net/mlx5/mlx5_flow.c dpdk-24.11.4/drivers/net/mlx5/mlx5_flow.c --- dpdk-24.11.3/drivers/net/mlx5/mlx5_flow.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx5/mlx5_flow.c 2025-12-19 12:05:33.000000000 +0000 @@ -33,6 +33,21 @@ #include "mlx5_common_os.h" #include "rte_pmd_mlx5.h" +const uint64_t mlx5_rss_hash_fields[] = { + [MLX5_RSS_HASH_IDX_IPV4] = MLX5_RSS_HASH_IPV4, + [MLX5_RSS_HASH_IDX_IPV4_TCP] = MLX5_RSS_HASH_IPV4_TCP, + [MLX5_RSS_HASH_IDX_IPV4_UDP] = MLX5_RSS_HASH_IPV4_UDP, + [MLX5_RSS_HASH_IDX_IPV4_ESP] = MLX5_RSS_HASH_IPV4_ESP, + [MLX5_RSS_HASH_IDX_IPV6] = MLX5_RSS_HASH_IPV6, + [MLX5_RSS_HASH_IDX_IPV6_TCP] = MLX5_RSS_HASH_IPV6_TCP, + [MLX5_RSS_HASH_IDX_IPV6_UDP] = MLX5_RSS_HASH_IPV6_UDP, + [MLX5_RSS_HASH_IDX_IPV6_ESP] = MLX5_RSS_HASH_IPV6_ESP, + [MLX5_RSS_HASH_IDX_TCP] = MLX5_TCP_IBV_RX_HASH, + [MLX5_RSS_HASH_IDX_UDP] = MLX5_UDP_IBV_RX_HASH, + [MLX5_RSS_HASH_IDX_ESP_SPI] = MLX5_RSS_HASH_ESP_SPI, + [MLX5_RSS_HASH_IDX_NONE] = MLX5_RSS_HASH_NONE, +}; + /* * Shared array for quick translation between port_id and vport mask/values * used for HWS rules. @@ -1651,12 +1666,18 @@ LIST_FOREACH(rxq_ctrl, &opriv->rxqsctrl, next) { rxq_ctrl->rxq.mark = 1; } + LIST_FOREACH(rxq_ctrl, &opriv->sh->shared_rxqs, next) { + rxq_ctrl->rxq.mark = 1; + } opriv->mark_enabled = 1; } } else { LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) { rxq_ctrl->rxq.mark = 1; } + LIST_FOREACH(rxq_ctrl, &priv->sh->shared_rxqs, next) { + rxq_ctrl->rxq.mark = 1; + } priv->mark_enabled = 1; } priv->sh->shared_mark_enabled = 1; @@ -8230,8 +8251,18 @@ #ifdef HAVE_MLX5_HWS_SUPPORT struct mlx5_priv *priv = dev->data->dev_private; - if (priv->sh->config.dv_flow_en == 2) - return mlx5_flow_nta_add_default_copy_action(dev, &error); + if (priv->sh->config.dv_flow_en == 2) { + /* + * Ignore this failure, if the proxy port is not started, other + * default jump actions are not created and this rule will not + * be hit. + */ + if (mlx5_flow_nta_add_default_copy_action(dev, &error)) { + DRV_LOG(DEBUG, "port %u failed to start default copy action: %s", + dev->data->port_id, strerror(rte_errno)); + return 0; + } + } #endif /* Make sure default copy action (reg_c[0] -> reg_b) is created. */ return flow_mreg_add_default_copy_action(dev, &error); @@ -11114,12 +11145,12 @@ (error, ENOMEM, RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, "invalid default miss RSS"); - ctx->action_rss.func = RTE_ETH_HASH_FUNCTION_DEFAULT, - ctx->action_rss.level = 0, - ctx->action_rss.types = priv->rss_conf.rss_hf, - ctx->action_rss.key_len = priv->rss_conf.rss_key_len, - ctx->action_rss.queue_num = priv->reta_idx_n, - ctx->action_rss.key = priv->rss_conf.rss_key, + ctx->action_rss.func = RTE_ETH_HASH_FUNCTION_DEFAULT; + ctx->action_rss.level = 0; + ctx->action_rss.types = priv->rss_conf.rss_hf; + ctx->action_rss.key_len = priv->rss_conf.rss_key_len; + ctx->action_rss.queue_num = priv->reta_idx_n; + ctx->action_rss.key = priv->rss_conf.rss_key; ctx->action_rss.queue = ctx->queue; if (!priv->reta_idx_n || !priv->rxqs_n) return rte_flow_error_set diff -Nru dpdk-24.11.3/drivers/net/mlx5/mlx5_flow.h dpdk-24.11.4/drivers/net/mlx5/mlx5_flow.h --- dpdk-24.11.3/drivers/net/mlx5/mlx5_flow.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx5/mlx5_flow.h 2025-12-19 12:05:33.000000000 +0000 @@ -99,6 +99,11 @@ #define MLX5_INDIRECT_ACT_CT_GET_IDX(index) \ ((index) & ((1 << MLX5_INDIRECT_ACT_CT_OWNER_SHIFT) - 1)) +#define MLX5_FLOW_CONNTRACK_PKT_STATE_ALL \ + (RTE_FLOW_CONNTRACK_PKT_STATE_VALID | RTE_FLOW_CONNTRACK_PKT_STATE_CHANGED | \ + RTE_FLOW_CONNTRACK_PKT_STATE_INVALID | RTE_FLOW_CONNTRACK_PKT_STATE_DISABLED | \ + RTE_FLOW_CONNTRACK_PKT_STATE_BAD) + /* * When HW steering flow engine is used, the CT action handles are encoded in a following way: * - bits 31:29 - type @@ -201,9 +206,6 @@ MLX5_SAMPLE_ID, }; -/* Default queue number. */ -#define MLX5_RSSQ_DEFAULT_NUM 16 - #define MLX5_FLOW_LAYER_OUTER_L2 (1u << 0) #define MLX5_FLOW_LAYER_OUTER_L3_IPV4 (1u << 1) #define MLX5_FLOW_LAYER_OUTER_L3_IPV6 (1u << 2) @@ -448,10 +450,6 @@ #define MLX5_FLOW_XCAP_ACTIONS (MLX5_FLOW_ACTION_ENCAP | MLX5_FLOW_ACTION_DECAP) -#ifndef IPPROTO_MPLS -#define IPPROTO_MPLS 137 -#endif - #define MLX5_IPV6_HDR_ECN_MASK 0x3 #define MLX5_IPV6_HDR_DSCP_SHIFT 2 @@ -468,6 +466,9 @@ /* UDP port numbers for GENEVE. */ #define MLX5_UDP_PORT_GENEVE 6081 +/* UDP port numbers for ESP. */ +#define MLX5_UDP_PORT_ESP 4500 + /* Lowest priority indicator. */ #define MLX5_FLOW_LOWEST_PRIO_INDICATOR ((uint32_t)-1) @@ -495,9 +496,6 @@ RTE_ETH_RSS_NONFRAG_IPV4_TCP | RTE_ETH_RSS_NONFRAG_IPV4_UDP | \ RTE_ETH_RSS_NONFRAG_IPV4_OTHER) -/* Valid L4 RSS types */ -#define MLX5_L4_RSS_TYPES (RTE_ETH_RSS_L4_SRC_ONLY | RTE_ETH_RSS_L4_DST_ONLY) - /* IBV hash source bits for IPV4. */ #define MLX5_IPV4_IBV_RX_HASH (IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4) @@ -1703,7 +1701,7 @@ struct rte_flow_pattern_template *its[MLX5_HW_TBL_MAX_ITEM_TEMPLATE]; /* Action templates bind to the table. */ struct mlx5_hw_action_template ats[MLX5_HW_TBL_MAX_ACTION_TEMPLATE]; - struct mlx5_indexed_pool *flow; /* The table's flow ipool. */ + struct mlx5_indexed_pool *flow_pool; /* The table's flow ipool. */ struct rte_flow_hw_aux *flow_aux; /**< Auxiliary data stored per flow. */ struct mlx5_indexed_pool *resource; /* The table's resource ipool. */ struct mlx5_flow_template_table_cfg cfg; @@ -1786,7 +1784,8 @@ case RTE_FLOW_ITEM_TYPE_TAG: if (id == RTE_PMD_MLX5_LINEAR_HASH_TAG_INDEX) return REG_C_3; - MLX5_ASSERT(id < MLX5_FLOW_HW_TAGS_MAX); + if (id >= MLX5_FLOW_HW_TAGS_MAX) + return REG_NON; return reg->hw_avl_tags[id]; default: return REG_NON; @@ -1866,27 +1865,31 @@ (((func) == RTE_ETH_HASH_FUNCTION_SYMMETRIC_TOEPLITZ) || \ ((func) == RTE_ETH_HASH_FUNCTION_SYMMETRIC_TOEPLITZ_SORT)) -/* extract next protocol type from Ethernet & VLAN headers */ -#define MLX5_ETHER_TYPE_FROM_HEADER(_s, _m, _itm, _prt) do { \ - (_prt) = ((const struct _s *)(_itm)->mask)->_m; \ - (_prt) &= ((const struct _s *)(_itm)->spec)->_m; \ - (_prt) = rte_be_to_cpu_16((_prt)); \ -} while (0) - -/* array of valid combinations of RX Hash fields for RSS */ -static const uint64_t mlx5_rss_hash_fields[] = { - MLX5_RSS_HASH_IPV4, - MLX5_RSS_HASH_IPV4_TCP, - MLX5_RSS_HASH_IPV4_UDP, - MLX5_RSS_HASH_IPV4_ESP, - MLX5_RSS_HASH_IPV6, - MLX5_RSS_HASH_IPV6_TCP, - MLX5_RSS_HASH_IPV6_UDP, - MLX5_RSS_HASH_IPV6_ESP, - MLX5_RSS_HASH_ESP_SPI, - MLX5_RSS_HASH_NONE, + +/** + * Each enum variant corresponds to a single valid protocols combination for hrxq configuration + * Each variant serves as an index into #mlx5_rss_hash_fields array containing default + * bitmaps of ibv_rx_hash_fields flags for given protocols combination. + */ +enum { + MLX5_RSS_HASH_IDX_IPV4, + MLX5_RSS_HASH_IDX_IPV4_TCP, + MLX5_RSS_HASH_IDX_IPV4_UDP, + MLX5_RSS_HASH_IDX_IPV4_ESP, + MLX5_RSS_HASH_IDX_IPV6, + MLX5_RSS_HASH_IDX_IPV6_TCP, + MLX5_RSS_HASH_IDX_IPV6_UDP, + MLX5_RSS_HASH_IDX_IPV6_ESP, + MLX5_RSS_HASH_IDX_TCP, + MLX5_RSS_HASH_IDX_UDP, + MLX5_RSS_HASH_IDX_ESP_SPI, + MLX5_RSS_HASH_IDX_NONE, + MLX5_RSS_HASH_IDX_MAX, }; +/** Array of valid combinations of RX Hash fields for RSS. */ +extern const uint64_t mlx5_rss_hash_fields[]; + /* Shared RSS action structure */ struct mlx5_shared_action_rss { ILIST_ENTRY(uint32_t)next; /**< Index to the next RSS structure. */ @@ -1895,7 +1898,7 @@ uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */ struct mlx5_ind_table_obj *ind_tbl; /**< Hash RX queues (hrxq, hrxq_tunnel fields) indirection table. */ - uint32_t hrxq[MLX5_RSS_HASH_FIELDS_LEN]; + uint32_t hrxq[MLX5_RSS_HASH_IDX_MAX]; /**< Hash RX queue indexes mapped to mlx5_rss_hash_fields */ rte_spinlock_t action_rss_sl; /**< Shared RSS action spinlock. */ }; @@ -2007,6 +2010,8 @@ } if (mlx5_is_external_txq(dev, tx_queue)) { ext_txq = mlx5_ext_txq_get(dev, tx_queue); + if (ext_txq == NULL) + return -EINVAL; *sqn = ext_txq->hw_id; return 0; } @@ -2992,6 +2997,8 @@ #define MLX5_CTRL_VLAN_FILTER (RTE_BIT32(6)) int mlx5_flow_hw_ctrl_flows(struct rte_eth_dev *dev, uint32_t flags); +int mlx5_flow_hw_create_ctrl_rx_tables(struct rte_eth_dev *dev); +void mlx5_flow_hw_cleanup_ctrl_rx_tables(struct rte_eth_dev *dev); /** Create a control flow rule for matching unicast DMAC with VLAN (Verbs and DV). */ int mlx5_legacy_dmac_flow_create(struct rte_eth_dev *dev, const struct rte_ether_addr *addr); @@ -3550,7 +3557,9 @@ int mlx5_flow_hw_esw_destroy_sq_miss_flow(struct rte_eth_dev *dev, uint32_t sqn); int mlx5_flow_hw_esw_create_default_jump_flow(struct rte_eth_dev *dev); -int mlx5_flow_hw_create_tx_default_mreg_copy_flow(struct rte_eth_dev *dev); +int mlx5_flow_hw_create_tx_default_mreg_copy_flow(struct rte_eth_dev *dev, + uint32_t sqn, + bool external); int mlx5_flow_hw_tx_repr_matching_flow(struct rte_eth_dev *dev, uint32_t sqn, bool external); int mlx5_flow_hw_lacp_rx_flow(struct rte_eth_dev *dev); int mlx5_flow_actions_validate(struct rte_eth_dev *dev, @@ -3642,6 +3651,13 @@ #endif return 0; } + +static inline bool +mlx5_dv_modify_ipv6_traffic_class_supported(struct mlx5_priv *priv) +{ + return priv->sh->phdev->config.ipv6_tc_fallback == MLX5_IPV6_TC_OK; +} + void mlx5_indirect_list_handles_release(struct rte_eth_dev *dev); #ifdef HAVE_MLX5_HWS_SUPPORT diff -Nru dpdk-24.11.3/drivers/net/mlx5/mlx5_flow_dv.c dpdk-24.11.4/drivers/net/mlx5/mlx5_flow_dv.c --- dpdk-24.11.3/drivers/net/mlx5/mlx5_flow_dv.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx5/mlx5_flow_dv.c 2025-12-19 12:05:33.000000000 +0000 @@ -1638,12 +1638,6 @@ } } -static inline bool -mlx5_dv_modify_ipv6_traffic_class_supported(struct mlx5_priv *priv) -{ - return priv->sh->phdev->config.ipv6_tc_fallback == MLX5_IPV6_TC_OK; -} - void mlx5_flow_field_id_to_modify_info (const struct rte_flow_field_data *data, @@ -3289,6 +3283,11 @@ RTE_FLOW_ERROR_TYPE_ITEM, NULL, "Conflict status bits"); + if (spec->flags & ~MLX5_FLOW_CONNTRACK_PKT_STATE_ALL) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + NULL, + "Invalid CT item flags"); } /* State change also needs to be considered. */ *item_flags |= MLX5_FLOW_LAYER_ASO_CT; @@ -5637,6 +5636,13 @@ } if (src_data->field != RTE_FLOW_FIELD_VALUE && src_data->field != RTE_FLOW_FIELD_POINTER) { + if (conf->operation != RTE_FLOW_MODIFY_SET) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ACTION_CONF, + &conf->operation, + "modify field action type add is not" + " supported when src field type is" + " not value/pointer"); if (root) return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION, action, @@ -7854,10 +7860,8 @@ case RTE_FLOW_ITEM_TYPE_VOID: break; case RTE_FLOW_ITEM_TYPE_ESP: - ret = mlx5_flow_os_validate_item_esp(dev, items, - item_flags, - next_protocol, - error); + ret = mlx5_flow_os_validate_item_esp(dev, items, item_flags, + next_protocol, false, error); if (ret < 0) return ret; last_item = MLX5_FLOW_ITEM_ESP; @@ -9708,29 +9712,35 @@ */ static void flow_dv_translate_item_esp(void *key, const struct rte_flow_item *item, - int inner, uint32_t key_type) + int inner, uint32_t key_type, uint64_t item_flags) { const struct rte_flow_item_esp *esp_m; const struct rte_flow_item_esp *esp_v; void *headers_v; char *spi_v; + bool over_udp = item_flags & (inner ? MLX5_FLOW_LAYER_INNER_L4_UDP : + MLX5_FLOW_LAYER_OUTER_L4_UDP); headers_v = inner ? MLX5_ADDR_OF(fte_match_param, key, inner_headers) : - MLX5_ADDR_OF(fte_match_param, key, outer_headers); - if (key_type & MLX5_SET_MATCHER_M) - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - ip_protocol, 0xff); - else - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - ip_protocol, IPPROTO_ESP); + MLX5_ADDR_OF(fte_match_param, key, outer_headers); + if (key_type & MLX5_SET_MATCHER_M) { + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, 0xff); + if (over_udp && !MLX5_GET16(fte_match_set_lyr_2_4, headers_v, udp_dport)) + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, 0xFFFF); + } else { + if (!over_udp) + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_ESP); + else + if (!MLX5_GET16(fte_match_set_lyr_2_4, headers_v, udp_dport)) + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, + MLX5_UDP_PORT_ESP); + } if (MLX5_ITEM_VALID(item, key_type)) return; - MLX5_ITEM_UPDATE(item, key_type, esp_v, esp_m, - &rte_flow_item_esp_mask); + MLX5_ITEM_UPDATE(item, key_type, esp_v, esp_m, &rte_flow_item_esp_mask); headers_v = MLX5_ADDR_OF(fte_match_param, key, misc_parameters); - spi_v = inner ? MLX5_ADDR_OF(fte_match_set_misc, headers_v, - inner_esp_spi) : MLX5_ADDR_OF(fte_match_set_misc - , headers_v, outer_esp_spi); + spi_v = inner ? MLX5_ADDR_OF(fte_match_set_misc, headers_v, inner_esp_spi) : + MLX5_ADDR_OF(fte_match_set_misc, headers_v, outer_esp_spi); *(uint32_t *)spi_v = esp_m->hdr.spi & esp_v->hdr.spi; } @@ -10542,8 +10552,8 @@ flow_dv_match_meta_reg(void *key, enum modify_reg reg_type, uint32_t data, uint32_t mask) { - void *misc2_v = - MLX5_ADDR_OF(fte_match_param, key, misc_parameters_2); + void *misc2_v = MLX5_ADDR_OF(fte_match_param, key, misc_parameters_2); + void *misc5_v = MLX5_ADDR_OF(fte_match_param, key, misc_parameters_5); uint32_t temp; if (!key) @@ -10589,6 +10599,18 @@ case REG_C_7: MLX5_SET(fte_match_set_misc2, misc2_v, metadata_reg_c_7, data); break; + case REG_C_8: + MLX5_SET(fte_match_set_misc5, misc5_v, metadata_reg_c_8, data); + break; + case REG_C_9: + MLX5_SET(fte_match_set_misc5, misc5_v, metadata_reg_c_9, data); + break; + case REG_C_10: + MLX5_SET(fte_match_set_misc5, misc5_v, metadata_reg_c_10, data); + break; + case REG_C_11: + MLX5_SET(fte_match_set_misc5, misc5_v, metadata_reg_c_11, data); + break; default: MLX5_ASSERT(false); break; @@ -10807,8 +10829,11 @@ * Flow pattern to translate. * @param[in] key_type * Set flow matcher mask or value. + * + * @return + * 0 on success. Negative errno value otherwise. */ -static void +static int flow_dv_translate_item_tag(struct rte_eth_dev *dev, void *key, const struct rte_flow_item *item, uint32_t key_type) @@ -10820,7 +10845,7 @@ uint32_t index; if (MLX5_ITEM_VALID(item, key_type)) - return; + return 0; MLX5_ITEM_UPDATE(item, key_type, tag_v, tag_m, &rte_flow_item_tag_mask); /* When set mask, the index should be from spec. */ @@ -10830,8 +10855,18 @@ reg = mlx5_flow_get_reg_id(dev, MLX5_APP_TAG, index, NULL); else reg = flow_hw_get_reg_id(dev, RTE_FLOW_ITEM_TYPE_TAG, index); - MLX5_ASSERT(reg > 0); + if (reg < 0) { + DRV_LOG(ERR, "port %u tag index %u does not map to correct register", + dev->data->port_id, index); + return -EINVAL; + } + if (reg == REG_NON) { + DRV_LOG(ERR, "port %u tag index %u maps to unsupported register", + dev->data->port_id, index); + return -ENOTSUP; + } flow_dv_match_meta_reg(key, (enum modify_reg)reg, tag_v->data, tag_m->data); + return 0; } /** @@ -14207,7 +14242,7 @@ switch (item_type) { case RTE_FLOW_ITEM_TYPE_ESP: - flow_dv_translate_item_esp(key, items, tunnel, key_type); + flow_dv_translate_item_esp(key, items, tunnel, key_type, wks->item_flags); wks->priority = MLX5_PRIORITY_MAP_L4; last_item = MLX5_FLOW_ITEM_ESP; break; @@ -14378,7 +14413,10 @@ last_item = MLX5_FLOW_LAYER_ICMP6; break; case RTE_FLOW_ITEM_TYPE_TAG: - flow_dv_translate_item_tag(dev, key, items, key_type); + ret = flow_dv_translate_item_tag(dev, key, items, key_type); + if (ret < 0) + return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM, NULL, + "invalid flow tag item"); last_item = MLX5_FLOW_ITEM_TAG; break; case MLX5_RTE_FLOW_ITEM_TYPE_TAG: @@ -15765,6 +15803,145 @@ return 0; } +/* + * Protocol selector bitmap + * Each flag is used as an indicator that given protocol is specified in given RSS hash fields. + */ +#define RX_HASH_SELECTOR_IPV4 RTE_BIT32(0) +#define RX_HASH_SELECTOR_IPV6 RTE_BIT32(1) +#define RX_HASH_SELECTOR_UDP RTE_BIT32(2) +#define RX_HASH_SELECTOR_TCP RTE_BIT32(3) +#define RX_HASH_SELECTOR_ESP_SPI RTE_BIT32(4) +#define RX_HASH_SELECTOR_NONE (0) + +#define RX_HASH_SELECTOR_IPV4_TCP (RX_HASH_SELECTOR_IPV4 | RX_HASH_SELECTOR_TCP) +#define RX_HASH_SELECTOR_IPV4_UDP (RX_HASH_SELECTOR_IPV4 | RX_HASH_SELECTOR_UDP) +#define RX_HASH_SELECTOR_IPV4_ESP (RX_HASH_SELECTOR_IPV4 | RX_HASH_SELECTOR_ESP_SPI) + +#define RX_HASH_SELECTOR_IPV6_TCP (RX_HASH_SELECTOR_IPV6 | RX_HASH_SELECTOR_TCP) +#define RX_HASH_SELECTOR_IPV6_UDP (RX_HASH_SELECTOR_IPV6 | RX_HASH_SELECTOR_UDP) +#define RX_HASH_SELECTOR_IPV6_ESP (RX_HASH_SELECTOR_IPV6 | RX_HASH_SELECTOR_ESP_SPI) + +static bool +rx_hash_selector_has_valid_l3(const uint32_t selectors) +{ + /* In TIR configuration, RSS hashing on both IPv4 and IPv6 is mutually exclusive. */ + return !((selectors & RX_HASH_SELECTOR_IPV4) && (selectors & RX_HASH_SELECTOR_IPV6)); +} + +static bool +rx_hash_selector_has_valid_l4(const uint32_t selectors) +{ + /* In TIR configuration, RSS hashing on both UDP and TCP is mutually exclusive. */ + return !((selectors & RX_HASH_SELECTOR_UDP) && (selectors & RX_HASH_SELECTOR_TCP)); +} + +static bool +rx_hash_selector_has_valid_esp(const uint32_t selectors) +{ + /* In TIR configuration, RSS hashing on ESP and other L4 protocol is mutually exclusive. */ + if (selectors & RX_HASH_SELECTOR_ESP_SPI) + return !((selectors & RX_HASH_SELECTOR_UDP) || (selectors & RX_HASH_SELECTOR_TCP)); + + return true; +} + +/** + * Calculate protocol combination based on provided RSS hashing fields. + * + * @param[in] hash_fields + * Requested RSS hashing fields specified as a flags bitmap, based on ibv_rx_hash_fields. + * @param[out] selectors_out + * Calculated protocol combination will be written here. + * Result will be a bitmap of RX_HASH_SELECTOR_* flags. + * + * @return + * 0 if conversion is successful and protocol combination written to @p selectors_out. + * (-EINVAL) otherwise. + */ +static int +rx_hash_calc_selector(const uint64_t hash_fields, uint32_t *selectors_out) +{ + const uint64_t filtered_hf = hash_fields & ~IBV_RX_HASH_INNER; + uint32_t selectors = 0; + + if (filtered_hf & MLX5_RSS_HASH_IPV4) + selectors |= RX_HASH_SELECTOR_IPV4; + if (filtered_hf & MLX5_RSS_HASH_IPV6) + selectors |= RX_HASH_SELECTOR_IPV6; + if (!rx_hash_selector_has_valid_l3(selectors)) { + DRV_LOG(NOTICE, "hrxq hashing on both IPv4 and IPv6 is invalid: " + "selectors=0x%" PRIx32, selectors); + return -EINVAL; + } + + if (filtered_hf & MLX5_UDP_IBV_RX_HASH) + selectors |= RX_HASH_SELECTOR_UDP; + if (filtered_hf & MLX5_TCP_IBV_RX_HASH) + selectors |= RX_HASH_SELECTOR_TCP; + if (!rx_hash_selector_has_valid_l4(selectors)) { + DRV_LOG(NOTICE, "hrxq hashing on both UDP and TCP is invalid: " + "selectors=0x%" PRIx32, selectors); + return -EINVAL; + } + + if (filtered_hf & MLX5_RSS_HASH_ESP_SPI) + selectors |= RX_HASH_SELECTOR_ESP_SPI; + if (!rx_hash_selector_has_valid_esp(selectors)) { + DRV_LOG(NOTICE, "hrxq hashing on ESP SPI and UDP or TCP is mutually exclusive: " + "selectors=0x%" PRIx32, selectors); + return -EINVAL; + } + + *selectors_out = selectors; + return 0; +} + +/** + * Calculate the hrxq object index based on protocol combination. + * + * @param[in] selectors + * Protocol combination specified as bitmap of RX_HASH_SELECTOR_* flags. + * + * @return + * Index into hrxq array in #mlx5_shared_action_rss based on ginve protocol combination. + * (-EINVAL) if given protocol combination is not supported or is invalid. + */ +static int +get_rss_hash_idx(const uint32_t selectors) +{ + switch (selectors) { + case RX_HASH_SELECTOR_IPV4: + return MLX5_RSS_HASH_IDX_IPV4; + case RX_HASH_SELECTOR_IPV4_TCP: + return MLX5_RSS_HASH_IDX_IPV4_TCP; + case RX_HASH_SELECTOR_IPV4_UDP: + return MLX5_RSS_HASH_IDX_IPV4_UDP; + case RX_HASH_SELECTOR_IPV4_ESP: + return MLX5_RSS_HASH_IDX_IPV4_ESP; + case RX_HASH_SELECTOR_IPV6: + return MLX5_RSS_HASH_IDX_IPV6; + case RX_HASH_SELECTOR_IPV6_TCP: + return MLX5_RSS_HASH_IDX_IPV6_TCP; + case RX_HASH_SELECTOR_IPV6_UDP: + return MLX5_RSS_HASH_IDX_IPV6_UDP; + case RX_HASH_SELECTOR_IPV6_ESP: + return MLX5_RSS_HASH_IDX_IPV6_ESP; + case RX_HASH_SELECTOR_TCP: + return MLX5_RSS_HASH_IDX_TCP; + case RX_HASH_SELECTOR_UDP: + return MLX5_RSS_HASH_IDX_UDP; + case RX_HASH_SELECTOR_ESP_SPI: + return MLX5_RSS_HASH_IDX_ESP_SPI; + case RX_HASH_SELECTOR_NONE: + return MLX5_RSS_HASH_IDX_NONE; + default: + DRV_LOG(ERR, "invalid hrxq hash fields combination: " + "selectors=0x%" PRIx32, selectors); + return -EINVAL; + } +} + /** * Set hash RX queue by hash fields (see enum ibv_rx_hash_fields) * and tunnel. @@ -15772,7 +15949,8 @@ * @param[in, out] action * Shred RSS action holding hash RX queue objects. * @param[in] hash_fields - * Defines combination of packet fields to participate in RX hash. + * Defines combination of packet fields to participate in RX hash, + * specified as a bitmap of #ibv_rx_hash_fields flags. * @param[in] tunnel * Tunnel type * @param[in] hrxq_idx @@ -15787,65 +15965,26 @@ uint32_t hrxq_idx) { uint32_t *hrxqs = action->hrxq; + uint32_t selectors = 0; + int ret; - switch (hash_fields & ~IBV_RX_HASH_INNER) { - case MLX5_RSS_HASH_IPV4: - /* fall-through. */ - case MLX5_RSS_HASH_IPV4_DST_ONLY: - /* fall-through. */ - case MLX5_RSS_HASH_IPV4_SRC_ONLY: - hrxqs[0] = hrxq_idx; - return 0; - case MLX5_RSS_HASH_IPV4_TCP: - /* fall-through. */ - case MLX5_RSS_HASH_IPV4_TCP_DST_ONLY: - /* fall-through. */ - case MLX5_RSS_HASH_IPV4_TCP_SRC_ONLY: - hrxqs[1] = hrxq_idx; - return 0; - case MLX5_RSS_HASH_IPV4_UDP: - /* fall-through. */ - case MLX5_RSS_HASH_IPV4_UDP_DST_ONLY: - /* fall-through. */ - case MLX5_RSS_HASH_IPV4_UDP_SRC_ONLY: - hrxqs[2] = hrxq_idx; - return 0; - case MLX5_RSS_HASH_IPV6: - /* fall-through. */ - case MLX5_RSS_HASH_IPV6_DST_ONLY: - /* fall-through. */ - case MLX5_RSS_HASH_IPV6_SRC_ONLY: - hrxqs[3] = hrxq_idx; - return 0; - case MLX5_RSS_HASH_IPV6_TCP: - /* fall-through. */ - case MLX5_RSS_HASH_IPV6_TCP_DST_ONLY: - /* fall-through. */ - case MLX5_RSS_HASH_IPV6_TCP_SRC_ONLY: - hrxqs[4] = hrxq_idx; - return 0; - case MLX5_RSS_HASH_IPV6_UDP: - /* fall-through. */ - case MLX5_RSS_HASH_IPV6_UDP_DST_ONLY: - /* fall-through. */ - case MLX5_RSS_HASH_IPV6_UDP_SRC_ONLY: - hrxqs[5] = hrxq_idx; - return 0; - case MLX5_RSS_HASH_NONE: - hrxqs[6] = hrxq_idx; - return 0; - case MLX5_RSS_HASH_IPV4_ESP: - hrxqs[7] = hrxq_idx; - return 0; - case MLX5_RSS_HASH_IPV6_ESP: - hrxqs[8] = hrxq_idx; - return 0; - case MLX5_RSS_HASH_ESP_SPI: - hrxqs[9] = hrxq_idx; - return 0; - default: - return -1; - } + ret = rx_hash_calc_selector(hash_fields, &selectors); + /* + * Hash fields passed to this function are constructed internally. + * If this fails, then this is a PMD bug. + */ + MLX5_ASSERT(ret == 0); + + ret = get_rss_hash_idx(selectors); + /* + * Based on above assert, selectors should always yield correct index + * in mlx5_rss_hash_fields array. + * If this fails, then this is a PMD bug. + */ + MLX5_ASSERT(ret >= 0 && ret < MLX5_RSS_HASH_IDX_MAX); + hrxqs[ret] = hrxq_idx; + + return 0; } /** @@ -15857,7 +15996,8 @@ * @param[in] idx * Shared RSS action ID holding hash RX queue objects. * @param[in] hash_fields - * Defines combination of packet fields to participate in RX hash. + * Defines combination of packet fields to participate in RX hash, + * specified as a bitmap of #ibv_rx_hash_fields flags. * @param[in] tunnel * Tunnel type * @@ -15872,56 +16012,26 @@ struct mlx5_shared_action_rss *shared_rss = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS], idx); const uint32_t *hrxqs = shared_rss->hrxq; + uint32_t selectors = 0; + int ret; - switch (hash_fields & ~IBV_RX_HASH_INNER) { - case MLX5_RSS_HASH_IPV4: - /* fall-through. */ - case MLX5_RSS_HASH_IPV4_DST_ONLY: - /* fall-through. */ - case MLX5_RSS_HASH_IPV4_SRC_ONLY: - return hrxqs[0]; - case MLX5_RSS_HASH_IPV4_TCP: - /* fall-through. */ - case MLX5_RSS_HASH_IPV4_TCP_DST_ONLY: - /* fall-through. */ - case MLX5_RSS_HASH_IPV4_TCP_SRC_ONLY: - return hrxqs[1]; - case MLX5_RSS_HASH_IPV4_UDP: - /* fall-through. */ - case MLX5_RSS_HASH_IPV4_UDP_DST_ONLY: - /* fall-through. */ - case MLX5_RSS_HASH_IPV4_UDP_SRC_ONLY: - return hrxqs[2]; - case MLX5_RSS_HASH_IPV6: - /* fall-through. */ - case MLX5_RSS_HASH_IPV6_DST_ONLY: - /* fall-through. */ - case MLX5_RSS_HASH_IPV6_SRC_ONLY: - return hrxqs[3]; - case MLX5_RSS_HASH_IPV6_TCP: - /* fall-through. */ - case MLX5_RSS_HASH_IPV6_TCP_DST_ONLY: - /* fall-through. */ - case MLX5_RSS_HASH_IPV6_TCP_SRC_ONLY: - return hrxqs[4]; - case MLX5_RSS_HASH_IPV6_UDP: - /* fall-through. */ - case MLX5_RSS_HASH_IPV6_UDP_DST_ONLY: - /* fall-through. */ - case MLX5_RSS_HASH_IPV6_UDP_SRC_ONLY: - return hrxqs[5]; - case MLX5_RSS_HASH_NONE: - return hrxqs[6]; - case MLX5_RSS_HASH_IPV4_ESP: - return hrxqs[7]; - case MLX5_RSS_HASH_IPV6_ESP: - return hrxqs[8]; - case MLX5_RSS_HASH_ESP_SPI: - return hrxqs[9]; - default: + ret = rx_hash_calc_selector(hash_fields, &selectors); + if (ret < 0) { + DRV_LOG(ERR, "port %u Rx hash selector calculation failed: " + "rss_act_idx=%u hash_fields=0x%" PRIx64 " selectors=0x%" PRIx32, + dev->data->port_id, idx, hash_fields, selectors); return 0; } + ret = get_rss_hash_idx(selectors); + if (ret < 0) { + DRV_LOG(ERR, "port %u failed hrxq index lookup: " + "rss_act_idx=%u hash_fields=0x%" PRIx64 " selectors=0x%" PRIx32, + dev->data->port_id, idx, hash_fields, selectors); + return 0; + } + + return hrxqs[ret]; } /** @@ -16611,7 +16721,7 @@ */ static int __flow_dv_hrxqs_release(struct rte_eth_dev *dev, - uint32_t (*hrxqs)[MLX5_RSS_HASH_FIELDS_LEN]) + uint32_t (*hrxqs)[MLX5_RSS_HASH_IDX_MAX]) { size_t i; int remaining = 0; @@ -16646,6 +16756,62 @@ return __flow_dv_hrxqs_release(dev, &shared_rss->hrxq); } +static inline void +filter_ipv4_types(uint64_t rss_types, uint64_t *hash_fields) +{ + if (rss_types & MLX5_IPV4_LAYER_TYPES) { + *hash_fields &= ~MLX5_RSS_HASH_IPV4; + if (rss_types & RTE_ETH_RSS_L3_DST_ONLY) + *hash_fields |= IBV_RX_HASH_DST_IPV4; + else if (rss_types & RTE_ETH_RSS_L3_SRC_ONLY) + *hash_fields |= IBV_RX_HASH_SRC_IPV4; + else + *hash_fields |= MLX5_RSS_HASH_IPV4; + } +} + +static inline void +filter_ipv6_types(uint64_t rss_types, uint64_t *hash_fields) +{ + if (rss_types & MLX5_IPV6_LAYER_TYPES) { + *hash_fields &= ~MLX5_RSS_HASH_IPV6; + if (rss_types & RTE_ETH_RSS_L3_DST_ONLY) + *hash_fields |= IBV_RX_HASH_DST_IPV6; + else if (rss_types & RTE_ETH_RSS_L3_SRC_ONLY) + *hash_fields |= IBV_RX_HASH_SRC_IPV6; + else + *hash_fields |= MLX5_RSS_HASH_IPV6; + } +} + +static inline void +filter_udp_types(uint64_t rss_types, uint64_t *hash_fields) +{ + if (rss_types & RTE_ETH_RSS_UDP) { + *hash_fields &= ~MLX5_UDP_IBV_RX_HASH; + if (rss_types & RTE_ETH_RSS_L4_DST_ONLY) + *hash_fields |= IBV_RX_HASH_DST_PORT_UDP; + else if (rss_types & RTE_ETH_RSS_L4_SRC_ONLY) + *hash_fields |= IBV_RX_HASH_SRC_PORT_UDP; + else + *hash_fields |= MLX5_UDP_IBV_RX_HASH; + } +} + +static inline void +filter_tcp_types(uint64_t rss_types, uint64_t *hash_fields) +{ + if (rss_types & RTE_ETH_RSS_TCP) { + *hash_fields &= ~MLX5_TCP_IBV_RX_HASH; + if (rss_types & RTE_ETH_RSS_L4_DST_ONLY) + *hash_fields |= IBV_RX_HASH_DST_PORT_TCP; + else if (rss_types & RTE_ETH_RSS_L4_SRC_ONLY) + *hash_fields |= IBV_RX_HASH_SRC_PORT_TCP; + else + *hash_fields |= MLX5_TCP_IBV_RX_HASH; + } +} + /** * Adjust L3/L4 hash value of pre-created shared RSS hrxq according to * user input. @@ -16657,9 +16823,9 @@ * same slot in mlx5_rss_hash_fields. * * @param[in] orig_rss_types - * RSS type as provided in shared RSS action. + * RSS type as provided in shared RSS action, specified as a bitmap of RTE_ETH_RSS_* flags. * @param[in, out] hash_field - * hash_field variable needed to be adjusted. + * hash_field variable needed to be adjusted, specified as a bitmap of #ibv_rx_hash_fields flags. * * @return * void @@ -16668,60 +16834,18 @@ flow_dv_action_rss_l34_hash_adjust(uint64_t orig_rss_types, uint64_t *hash_field) { + uint64_t hash_field_protos = *hash_field & ~IBV_RX_HASH_INNER; uint64_t rss_types = rte_eth_rss_hf_refine(orig_rss_types); - switch (*hash_field & ~IBV_RX_HASH_INNER) { - case MLX5_RSS_HASH_IPV4: - if (rss_types & MLX5_IPV4_LAYER_TYPES) { - *hash_field &= ~MLX5_RSS_HASH_IPV4; - if (rss_types & RTE_ETH_RSS_L3_DST_ONLY) - *hash_field |= IBV_RX_HASH_DST_IPV4; - else if (rss_types & RTE_ETH_RSS_L3_SRC_ONLY) - *hash_field |= IBV_RX_HASH_SRC_IPV4; - else - *hash_field |= MLX5_RSS_HASH_IPV4; - } - return; - case MLX5_RSS_HASH_IPV6: - if (rss_types & MLX5_IPV6_LAYER_TYPES) { - *hash_field &= ~MLX5_RSS_HASH_IPV6; - if (rss_types & RTE_ETH_RSS_L3_DST_ONLY) - *hash_field |= IBV_RX_HASH_DST_IPV6; - else if (rss_types & RTE_ETH_RSS_L3_SRC_ONLY) - *hash_field |= IBV_RX_HASH_SRC_IPV6; - else - *hash_field |= MLX5_RSS_HASH_IPV6; - } - return; - case MLX5_RSS_HASH_IPV4_UDP: - /* fall-through. */ - case MLX5_RSS_HASH_IPV6_UDP: - if (rss_types & RTE_ETH_RSS_UDP) { - *hash_field &= ~MLX5_UDP_IBV_RX_HASH; - if (rss_types & RTE_ETH_RSS_L4_DST_ONLY) - *hash_field |= IBV_RX_HASH_DST_PORT_UDP; - else if (rss_types & RTE_ETH_RSS_L4_SRC_ONLY) - *hash_field |= IBV_RX_HASH_SRC_PORT_UDP; - else - *hash_field |= MLX5_UDP_IBV_RX_HASH; - } - return; - case MLX5_RSS_HASH_IPV4_TCP: - /* fall-through. */ - case MLX5_RSS_HASH_IPV6_TCP: - if (rss_types & RTE_ETH_RSS_TCP) { - *hash_field &= ~MLX5_TCP_IBV_RX_HASH; - if (rss_types & RTE_ETH_RSS_L4_DST_ONLY) - *hash_field |= IBV_RX_HASH_DST_PORT_TCP; - else if (rss_types & RTE_ETH_RSS_L4_SRC_ONLY) - *hash_field |= IBV_RX_HASH_SRC_PORT_TCP; - else - *hash_field |= MLX5_TCP_IBV_RX_HASH; - } - return; - default: - return; - } + if (hash_field_protos & MLX5_RSS_HASH_IPV4) + filter_ipv4_types(rss_types, hash_field); + else if (hash_field_protos & MLX5_RSS_HASH_IPV6) + filter_ipv6_types(rss_types, hash_field); + + if (hash_field_protos & MLX5_UDP_IBV_RX_HASH) + filter_udp_types(rss_types, hash_field); + else if (hash_field_protos & MLX5_TCP_IBV_RX_HASH) + filter_tcp_types(rss_types, hash_field); } /** @@ -16773,7 +16897,7 @@ rss_desc.ind_tbl = shared_rss->ind_tbl; if (priv->sh->config.dv_flow_en == 2) rss_desc.hws_flags = MLX5DR_ACTION_FLAG_HWS_RX; - for (i = 0; i < MLX5_RSS_HASH_FIELDS_LEN; i++) { + for (i = 0; i < MLX5_RSS_HASH_IDX_MAX; i++) { struct mlx5_hrxq *hrxq; uint64_t hash_fields = mlx5_rss_hash_fields[i]; int tunnel = 0; @@ -18880,7 +19004,7 @@ } } /* Create default matcher in drop table. */ - matcher.tbl = mtrmng->drop_tbl[domain], + matcher.tbl = mtrmng->drop_tbl[domain]; tbl_data = container_of(mtrmng->drop_tbl[domain], struct mlx5_flow_tbl_data_entry, tbl); if (!mtrmng->def_matcher[domain]) { @@ -19638,7 +19762,8 @@ .size = sizeof(value.buf), }; struct mlx5dv_flow_matcher_attr dv_attr = { - .type = IBV_FLOW_ATTR_NORMAL | IBV_FLOW_ATTR_FLAGS_EGRESS, + .type = IBV_FLOW_ATTR_NORMAL, + .flags = IBV_FLOW_ATTR_FLAGS_EGRESS, .priority = 0, .match_criteria_enable = 0, .match_mask = (void *)&mask, diff -Nru dpdk-24.11.3/drivers/net/mlx5/mlx5_flow_flex.c dpdk-24.11.4/drivers/net/mlx5/mlx5_flow_flex.c --- dpdk-24.11.3/drivers/net/mlx5/mlx5_flow_flex.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx5/mlx5_flow_flex.c 2025-12-19 12:05:33.000000000 +0000 @@ -554,7 +554,7 @@ "mask and shift combination not supported (OFFSET)"); msb++; offset += field->field_size - msb; - if (msb < attr->header_length_mask_width) { + if (attr->header_length_field_mode_wa && msb < attr->header_length_mask_width) { if (attr->header_length_mask_width - msb > offset) return rte_flow_error_set (error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, NULL, @@ -572,6 +572,7 @@ node->header_length_field_mask = mask; node->header_length_field_shift = shift; node->header_length_field_offset = offset; + node->header_length_field_offset_mode = !attr->header_length_field_mode_wa; break; } case FIELD_MODE_BITMASK: diff -Nru dpdk-24.11.3/drivers/net/mlx5/mlx5_flow_hw.c dpdk-24.11.4/drivers/net/mlx5/mlx5_flow_hw.c --- dpdk-24.11.3/drivers/net/mlx5/mlx5_flow_hw.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx5/mlx5_flow_hw.c 2025-12-19 12:05:33.000000000 +0000 @@ -1287,7 +1287,8 @@ const struct rte_flow_action *action, struct mlx5_hw_actions *acts, uint16_t action_src, - uint16_t action_dst) + uint16_t action_dst, + struct rte_flow_error *error) { struct mlx5_priv *priv = dev->data->dev_private; struct mlx5_shared_action_rss *shared_rss; @@ -1304,8 +1305,10 @@ (priv, acts, (enum rte_flow_action_type)MLX5_RTE_FLOW_ACTION_TYPE_RSS, action_src, action_dst, idx, shared_rss)) { - DRV_LOG(WARNING, "Indirect RSS action index %d translate failed", act_idx); - return -1; + DRV_LOG(ERR, "port %u Indirect RSS action (handle %p) translate failed", + dev->data->port_id, action->conf); + return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, + action, "Indirect RSS action translate failed"); } break; case MLX5_INDIRECT_ACTION_TYPE_COUNT: @@ -1313,15 +1316,22 @@ (enum rte_flow_action_type) MLX5_RTE_FLOW_ACTION_TYPE_COUNT, action_src, action_dst, act_idx)) { - DRV_LOG(WARNING, "Indirect count action translate failed"); - return -1; + DRV_LOG(ERR, + "port %u Indirect count action (handle %p) " + "translate failed", + dev->data->port_id, action->conf); + return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, + action, + "Indirect count action translate failed"); } break; case MLX5_INDIRECT_ACTION_TYPE_CT: if (flow_hw_ct_compile(dev, MLX5_HW_INV_QUEUE, idx, &acts->rule_acts[action_dst])) { - DRV_LOG(WARNING, "Indirect CT action translate failed"); - return -1; + DRV_LOG(ERR, "port %u Indirect CT action (handle %p) translate failed", + dev->data->port_id, action->conf); + return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, + action, "Indirect CT action translate failed"); } break; case MLX5_INDIRECT_ACTION_TYPE_METER_MARK: @@ -1329,16 +1339,22 @@ (enum rte_flow_action_type) MLX5_RTE_FLOW_ACTION_TYPE_METER_MARK, action_src, action_dst, idx)) { - DRV_LOG(WARNING, "Indirect meter mark action translate failed"); - return -1; + DRV_LOG(ERR, + "port %u Indirect meter mark action (handle %p) " + "translate failed", + dev->data->port_id, action->conf); + return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, + action, + "Indirect meter mark action translate failed"); } break; case MLX5_INDIRECT_ACTION_TYPE_QUOTA: flow_hw_construct_quota(priv, &acts->rule_acts[action_dst], idx); break; default: - DRV_LOG(WARNING, "Unsupported shared action type:%d", type); - break; + DRV_LOG(ERR, "Unsupported shared action type: %d", type); + return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION, action, + "Unsupported shared action type"); } return 0; } @@ -1579,6 +1595,11 @@ value = *(const uint8_t *)item.spec << 24; value = rte_cpu_to_be_32(value); item.spec = &value; + } else if (conf->dst.field == RTE_FLOW_FIELD_IPV6_DSCP && + !(mask[0] & MLX5_IPV6_HDR_ECN_MASK) && + mlx5_dv_modify_ipv6_traffic_class_supported(dev->data->dev_private)) { + value = *(const unaligned_uint32_t *)item.spec << MLX5_IPV6_HDR_DSCP_SHIFT; + item.spec = &value; } } else { type = conf->operation == RTE_FLOW_MODIFY_SET ? @@ -1900,6 +1921,8 @@ aso_mtr = flow_hw_meter_mark_alloc(dev, queue, action, job, true, error); if (!aso_mtr) { + if (queue == MLX5_HW_INV_QUEUE) + queue = CTRL_QUEUE_ID(priv); flow_hw_job_put(priv, job, queue); return -1; } @@ -2279,9 +2302,10 @@ .sz = sizeof(struct mlx5_modification_cmd) * mhdr->mhdr_cmds_num }; - if (flow_hw_validate_compiled_modify_field(dev, cfg, mhdr, error)) { + int ret = flow_hw_validate_compiled_modify_field(dev, cfg, mhdr, error); + if (ret) { __flow_hw_action_template_destroy(dev, acts); - return -rte_errno; + return ret; } acts->mhdr = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*acts->mhdr), 0, SOCKET_ID_ANY); @@ -2312,9 +2336,14 @@ const struct rte_flow_template_table_attr *table_attr = &cfg->attr; const struct rte_flow_attr *attr = &table_attr->flow_attr; enum mlx5dr_table_type tbl_type = get_mlx5dr_table_type(attr); - struct mlx5dr_action_mh_pattern pattern = { - .sz = sizeof(struct mlx5_modification_cmd) * acts->mhdr->mhdr_cmds_num - }; + struct mlx5dr_action_mh_pattern pattern; + + if (!acts->mhdr) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, + "translate modify_header: mhdr is NULL"); + + pattern.sz = sizeof(struct mlx5_modification_cmd) * acts->mhdr->mhdr_cmds_num; uint16_t mhdr_ix = acts->mhdr->pos; uint32_t flags = mlx5_hw_act_flag[!!attr->group][tbl_type] | MLX5DR_ACTION_FLAG_SHARED; @@ -2498,8 +2527,8 @@ goto err; } if (actions->conf && masks->conf) { - if (flow_hw_shared_action_translate - (dev, actions, acts, src_pos, dr_pos)) + if (flow_hw_shared_action_translate(dev, actions, acts, + src_pos, dr_pos, &sub_error)) goto err; } else if (__flow_hw_act_data_indirect_append (priv, acts, RTE_FLOW_ACTION_TYPE_INDIRECT, @@ -2872,6 +2901,10 @@ case RTE_FLOW_ACTION_TYPE_END: actions_end = true; break; + case RTE_FLOW_ACTION_TYPE_PORT_ID: + DRV_LOG(ERR, "RTE_FLOW_ACTION_TYPE_PORT_ID action is not supported. " + "Use RTE_FLOW_ACTION_TYPE_REPRESENTED_PORT instead."); + goto err; default: break; } @@ -3135,6 +3168,7 @@ uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET; uint32_t idx = act_idx & ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1); + uint32_t *cnt_queue; cnt_id_t age_cnt; memset(&act_data, 0, sizeof(act_data)); @@ -3185,9 +3219,8 @@ if (param == NULL) return -1; if (action_flags & MLX5_FLOW_ACTION_COUNT) { - if (mlx5_hws_cnt_pool_get(priv->hws_cpool, - ¶m->queue_id, &age_cnt, - idx) < 0) + cnt_queue = mlx5_hws_cnt_get_queue(priv, &queue); + if (mlx5_hws_cnt_pool_get(priv->hws_cpool, cnt_queue, &age_cnt, idx, 0) < 0) return -1; flow->flags |= MLX5_FLOW_HW_FLOW_FLAG_CNT_ID; flow->cnt_id = age_cnt; @@ -3623,7 +3656,8 @@ /* Fall-through. */ case RTE_FLOW_ACTION_TYPE_COUNT: cnt_queue = mlx5_hws_cnt_get_queue(priv, &queue); - ret = mlx5_hws_cnt_pool_get(priv->hws_cpool, cnt_queue, &cnt_id, age_idx); + ret = mlx5_hws_cnt_pool_get(priv->hws_cpool, cnt_queue, &cnt_id, + age_idx, 0); if (ret != 0) { rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ACTION, action, "Failed to allocate flow counter"); @@ -3883,7 +3917,7 @@ items, pattern_template_index, actions, action_template_index, error)) return NULL; } - flow = mlx5_ipool_malloc(table->flow, &flow_idx); + flow = mlx5_ipool_malloc(table->flow_pool, &flow_idx); if (!flow) { rte_errno = ENOMEM; goto error; @@ -3934,10 +3968,14 @@ flow->table, actions, rule_acts, queue, &sub_error)) goto error; - rule_items = flow_hw_get_rule_items(dev, table, items, - pattern_template_index, &priv->hw_q[queue].pp); - if (!rule_items) - goto error; + if (insertion_type == RTE_FLOW_TABLE_INSERTION_TYPE_INDEX) { + rule_items = items; + } else { + rule_items = flow_hw_get_rule_items(dev, table, items, + pattern_template_index, &priv->hw_q[queue].pp); + if (!rule_items) + goto error; + } if (likely(!rte_flow_template_table_resizable(dev->data->port_id, &table->cfg.attr))) { ret = mlx5dr_rule_create(table->matcher_info[0].matcher, pattern_template_index, rule_items, @@ -3968,7 +4006,7 @@ if (table->resource && res_idx) mlx5_ipool_free(table->resource, res_idx); if (flow_idx) - mlx5_ipool_free(table->flow, flow_idx); + mlx5_ipool_free(table->flow_pool, flow_idx); if (sub_error.cause != RTE_FLOW_ERROR_TYPE_NONE && error != NULL) *error = sub_error; else @@ -4418,7 +4456,8 @@ if (!flow->nt_rule) { if (table->resource) mlx5_ipool_free(table->resource, res_idx); - mlx5_ipool_free(table->flow, flow->idx); + if (table->flow_pool) + mlx5_ipool_free(table->flow_pool, flow->idx); } } } @@ -4706,7 +4745,7 @@ LIST_FOREACH(tbl, &priv->flow_hw_tbl, next) { if (!tbl->cfg.external) continue; - MLX5_IPOOL_FOREACH(tbl->flow, fidx, flow) { + MLX5_IPOOL_FOREACH(tbl->flow_pool, fidx, flow) { if (flow_hw_async_flow_destroy(dev, MLX5_DEFAULT_FLUSH_QUEUE, &attr, @@ -5012,8 +5051,8 @@ goto error; tbl->cfg = *table_cfg; /* Allocate flow indexed pool. */ - tbl->flow = mlx5_ipool_create(&cfg); - if (!tbl->flow) + tbl->flow_pool = mlx5_ipool_create(&cfg); + if (!tbl->flow_pool) goto error; /* Allocate table of auxiliary flow rule structs. */ tbl->flow_aux = mlx5_malloc(MLX5_MEM_ZERO, sizeof(struct rte_flow_hw_aux) * nb_flows, @@ -5161,8 +5200,8 @@ &tbl->grp->entry); if (tbl->flow_aux) mlx5_free(tbl->flow_aux); - if (tbl->flow) - mlx5_ipool_destroy(tbl->flow); + if (tbl->flow_pool) + mlx5_ipool_destroy(tbl->flow_pool); mlx5_free(tbl); } if (error != NULL) { @@ -5383,10 +5422,10 @@ /* Build ipool allocated object bitmap. */ if (table->resource) mlx5_ipool_flush_cache(table->resource); - mlx5_ipool_flush_cache(table->flow); + mlx5_ipool_flush_cache(table->flow_pool); /* Check if ipool has allocated objects. */ if (table->refcnt || - mlx5_ipool_get_next(table->flow, &fidx) || + mlx5_ipool_get_next(table->flow_pool, &fidx) || (table->resource && mlx5_ipool_get_next(table->resource, &ridx))) { DRV_LOG(WARNING, "Table %p is still in use.", (void *)table); return rte_flow_error_set(error, EBUSY, @@ -5416,7 +5455,7 @@ if (table->resource) mlx5_ipool_destroy(table->resource); mlx5_free(table->flow_aux); - mlx5_ipool_destroy(table->flow); + mlx5_ipool_destroy(table->flow_pool); mlx5_free(table); return 0; } @@ -7525,6 +7564,10 @@ at->dr_off[i] = curr_off; action_types[curr_off++] = MLX5DR_ACTION_TYP_JUMP_TO_MATCHER; break; + case RTE_FLOW_ACTION_TYPE_PORT_ID: + DRV_LOG(ERR, "RTE_FLOW_ACTION_TYPE_PORT_ID action is not supported. " + "Use RTE_FLOW_ACTION_TYPE_REPRESENTED_PORT instead."); + return -EINVAL; default: type = mlx5_hw_dr_action_types[at->actions[i].type]; at->dr_off[i] = curr_off; @@ -8668,9 +8711,8 @@ last_item = MLX5_FLOW_ITEM_QUOTA; break; case RTE_FLOW_ITEM_TYPE_ESP: - ret = mlx5_flow_os_validate_item_esp(dev, item, - *item_flags, 0xff, - error); + ret = mlx5_flow_os_validate_item_esp(dev, item, *item_flags, + 0xff, true, error); if (ret < 0) return ret; last_item = MLX5_FLOW_ITEM_ESP; @@ -9948,46 +9990,6 @@ } /* - * Creating a flow pattern template with all ETH packets matching. - * This template is used to set up a table for default Tx copy (Tx metadata - * to REG_C_1) flow rule usage. - * - * @param dev - * Pointer to Ethernet device. - * @param error - * Pointer to error structure. - * - * @return - * Pointer to flow pattern template on success, NULL otherwise. - */ -static struct rte_flow_pattern_template * -flow_hw_create_tx_default_mreg_copy_pattern_template(struct rte_eth_dev *dev, - struct rte_flow_error *error) -{ - struct rte_flow_pattern_template_attr tx_pa_attr = { - .relaxed_matching = 0, - .egress = 1, - }; - struct rte_flow_item_eth promisc = { - .hdr.dst_addr.addr_bytes = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .hdr.src_addr.addr_bytes = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .hdr.ether_type = 0, - }; - struct rte_flow_item eth_all[] = { - [0] = { - .type = RTE_FLOW_ITEM_TYPE_ETH, - .spec = &promisc, - .mask = &promisc, - }, - [1] = { - .type = RTE_FLOW_ITEM_TYPE_END, - }, - }; - - return flow_hw_pattern_template_create(dev, &tx_pa_attr, eth_all, error); -} - -/* * Creating a flow pattern template with all LACP packets matching, only for NIC * ingress domain. * @@ -10432,7 +10434,7 @@ .priority = MLX5_HW_LOWEST_PRIO_ROOT, .egress = 1, }, - .nb_flows = 1, /* One default flow rule for all. */ + .nb_flows = MLX5_HW_CTRL_FLOW_NB_RULES, }; struct mlx5_flow_template_table_cfg tx_tbl_cfg = { .attr = tx_tbl_attr, @@ -10692,7 +10694,7 @@ /* Create templates and table for default Tx metadata copy flow rule. */ if (!repr_matching && xmeta == MLX5_XMETA_MODE_META32_HWS) { hw_ctrl_fdb->tx_meta_items_tmpl = - flow_hw_create_tx_default_mreg_copy_pattern_template(dev, error); + flow_hw_create_tx_repr_sq_pattern_tmpl(dev, error); if (!hw_ctrl_fdb->tx_meta_items_tmpl) { DRV_LOG(ERR, "port %u failed to Tx metadata copy pattern" " template for control flows", dev->data->port_id); @@ -10948,8 +10950,8 @@ return 0; } -static void -flow_hw_cleanup_ctrl_rx_tables(struct rte_eth_dev *dev) +void +mlx5_flow_hw_cleanup_ctrl_rx_tables(struct rte_eth_dev *dev) { struct mlx5_priv *priv = dev->data->dev_private; unsigned int i; @@ -11234,8 +11236,8 @@ return flow_hw_pattern_template_create(dev, &attr, items, NULL); } -static int -flow_hw_create_ctrl_rx_tables(struct rte_eth_dev *dev) +int +mlx5_flow_hw_create_ctrl_rx_tables(struct rte_eth_dev *dev) { struct mlx5_priv *priv = dev->data->dev_private; unsigned int i; @@ -11271,8 +11273,6 @@ return 0; err: ret = rte_errno; - flow_hw_cleanup_ctrl_rx_tables(dev); - rte_errno = ret; return -ret; } @@ -11466,7 +11466,6 @@ flow_hw_flush_all_ctrl_flows(dev); flow_hw_cleanup_ctrl_fdb_tables(dev); flow_hw_cleanup_tx_repr_tagging(dev); - flow_hw_cleanup_ctrl_rx_tables(dev); flow_hw_action_template_drop_release(dev); grp = LIST_FIRST(&priv->flow_hw_grp); while (grp) { @@ -11823,12 +11822,6 @@ ret = flow_hw_action_template_drop_init(dev, error); if (ret) goto err; - ret = flow_hw_create_ctrl_rx_tables(dev); - if (ret) { - rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, - "Failed to set up Rx control flow templates"); - goto err; - } /* Initialize quotas */ if (port_attr->nb_quotas || (host_priv && host_priv->quota_ctx.devx_obj)) { ret = mlx5_flow_quota_init(dev, port_attr->nb_quotas); @@ -12680,13 +12673,16 @@ break; } /* Wait for ASO object completion. */ - if (queue == MLX5_HW_INV_QUEUE && - mlx5_aso_mtr_wait(priv, aso_mtr, true)) { - ret = -EINVAL; - rte_flow_error_set(error, EINVAL, - RTE_FLOW_ERROR_TYPE_UNSPECIFIED, - NULL, "Unable to wait for ASO meter CQE"); - break; + if (queue == MLX5_HW_INV_QUEUE) { + if (mlx5_aso_mtr_wait(priv, aso_mtr, true)) { + ret = -EINVAL; + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, "Unable to wait for ASO meter CQE"); + } + mlx5_ipool_free(pool->idx_pool, idx); + if (ret < 0) + break; } aso = true; break; @@ -12884,6 +12880,14 @@ const struct rte_flow_action *action, struct rte_flow_error *err) { + struct mlx5_priv *priv = dev->data->dev_private; + + if (action->type == RTE_FLOW_ACTION_TYPE_AGE && priv->hws_strict_queue) { + rte_flow_error_set(err, EINVAL, RTE_FLOW_ERROR_TYPE_STATE, NULL, + "Cannot create age action synchronously with strict queueing"); + return NULL; + } + return flow_hw_action_handle_create(dev, MLX5_HW_INV_QUEUE, NULL, conf, action, NULL, err); } @@ -13103,6 +13107,8 @@ RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, "No aging initialized"); if (priv->hws_strict_queue) { + /* Queue is invalid in sync query. Sync query and strict queueing is disallowed. */ + MLX5_ASSERT(queue_id != MLX5_HW_INV_QUEUE); if (queue_id >= age_info->hw_q_age->nb_rings) return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, @@ -13156,10 +13162,10 @@ struct mlx5_priv *priv = dev->data->dev_private; if (priv->hws_strict_queue) - DRV_LOG(WARNING, - "port %u get aged flows called in strict queue mode.", - dev->data->port_id); - return flow_hw_get_q_aged_flows(dev, 0, contexts, nb_contexts, error); + return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_STATE, NULL, + "Cannot get aged flows synchronously with strict queueing"); + + return flow_hw_get_q_aged_flows(dev, MLX5_HW_INV_QUEUE, contexts, nb_contexts, error); } /** * Initialization function for non template API which calls @@ -13222,8 +13228,9 @@ (*flow)->nt2hws = (struct rte_flow_nt2hws *) ((uintptr_t)(*flow) + sizeof(struct rte_flow_hw)); (*flow)->idx = idx; - (*flow)->nt2hws->flow_aux = mlx5_malloc(MLX5_MEM_ZERO, sizeof(struct rte_flow_hw_aux), - RTE_CACHE_LINE_SIZE, rte_dev_numa_node(dev->device)); + (*flow)->nt2hws->flow_aux = (struct rte_flow_hw_aux *) + ((uintptr_t)((*flow)->nt2hws) + sizeof(struct rte_flow_nt2hws)); + if (!(*flow)->nt2hws->flow_aux) return rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, @@ -13821,11 +13828,6 @@ mlx5_free(hw_act.push_remove); if (hw_act.mhdr) mlx5_free(hw_act.mhdr); - if (ret) { - /* release after actual error */ - if ((*flow)->nt2hws && (*flow)->nt2hws->matcher) - flow_hw_unregister_matcher(dev, (*flow)->nt2hws->matcher); - } return ret; } #endif @@ -13843,6 +13845,7 @@ ret = mlx5dr_bwc_rule_destroy(flow->nt2hws->nt_rule); if (ret) DRV_LOG(ERR, "bwc rule destroy failed"); + flow->nt2hws->nt_rule = NULL; } flow->operation_type = MLX5_FLOW_HW_FLOW_OP_TYPE_DESTROY; /* Notice this function does not handle shared/static actions. */ @@ -13858,17 +13861,21 @@ * , same as for DV. */ if (flow->nt2hws->flow_aux) - mlx5_free(flow->nt2hws->flow_aux); - - if (flow->nt2hws->rix_encap_decap) + flow->nt2hws->flow_aux = NULL; + if (flow->nt2hws->rix_encap_decap) { flow_encap_decap_resource_release(dev, flow->nt2hws->rix_encap_decap); + flow->nt2hws->rix_encap_decap = 0; + } if (flow->nt2hws->modify_hdr) { MLX5_ASSERT(flow->nt2hws->modify_hdr->action); mlx5_hlist_unregister(priv->sh->modify_cmds, &flow->nt2hws->modify_hdr->entry); + flow->nt2hws->modify_hdr = NULL; } - if (flow->nt2hws->matcher) + if (flow->nt2hws->matcher) { flow_hw_unregister_matcher(dev, flow->nt2hws->matcher); + flow->nt2hws->matcher = NULL; + } } #ifdef HAVE_MLX5_HWS_SUPPORT @@ -14634,6 +14641,7 @@ legacy->handle, user_data, error); mlx5_indirect_list_remove_entry(&legacy->indirect); + mlx5_free(legacy); goto end; } if (attr) { @@ -14771,6 +14779,8 @@ ((const struct rte_flow_item_ipv4 *)(pattern->spec))->hdr.dst_addr; data.src.ipv4_addr = ((const struct rte_flow_item_ipv4 *)(pattern->spec))->hdr.src_addr; + data.next_protocol = ((const struct rte_flow_item_ipv4 *) + (pattern->spec))->hdr.next_proto_id; break; case RTE_FLOW_ITEM_TYPE_IPV6: memcpy(data.dst.ipv6_addr, @@ -14779,6 +14789,8 @@ memcpy(data.src.ipv6_addr, &((const struct rte_flow_item_ipv6 *)(pattern->spec))->hdr.src_addr, sizeof(data.src.ipv6_addr)); + data.next_protocol = ((const struct rte_flow_item_ipv6 *) + (pattern->spec))->hdr.proto; break; case RTE_FLOW_ITEM_TYPE_UDP: data.next_protocol = IPPROTO_UDP; @@ -14884,7 +14896,7 @@ return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, table, "shrinking table is not supported"); - ret = mlx5_ipool_resize(table->flow, nb_flows, error); + ret = mlx5_ipool_resize(table->flow_pool, nb_flows, error); if (ret) return ret; /* @@ -15670,21 +15682,18 @@ } int -mlx5_flow_hw_create_tx_default_mreg_copy_flow(struct rte_eth_dev *dev) +mlx5_flow_hw_create_tx_default_mreg_copy_flow(struct rte_eth_dev *dev, uint32_t sqn, bool external) { struct mlx5_priv *priv = dev->data->dev_private; - struct rte_flow_item_eth promisc = { - .hdr.dst_addr.addr_bytes = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .hdr.src_addr.addr_bytes = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .hdr.ether_type = 0, + struct mlx5_rte_flow_item_sq sq_spec = { + .queue = sqn, }; - struct rte_flow_item eth_all[] = { - [0] = { - .type = RTE_FLOW_ITEM_TYPE_ETH, - .spec = &promisc, - .mask = &promisc, + struct rte_flow_item items[] = { + { + .type = (enum rte_flow_item_type)MLX5_RTE_FLOW_ITEM_TYPE_SQ, + .spec = &sq_spec, }, - [1] = { + { .type = RTE_FLOW_ITEM_TYPE_END, }, }; @@ -15714,6 +15723,7 @@ }; struct mlx5_ctrl_flow_info flow_info = { .type = MLX5_CTRL_FLOW_TYPE_TX_META_COPY, + .tx_repr_sq = sqn, }; MLX5_ASSERT(priv->master); @@ -15723,7 +15733,7 @@ return 0; return flow_hw_create_ctrl_flow(dev, dev, priv->hw_ctrl_fdb->hw_tx_meta_cpy_tbl, - eth_all, 0, copy_reg_action, 0, &flow_info, false); + items, 0, copy_reg_action, 0, &flow_info, external); } int @@ -16669,6 +16679,7 @@ switch (items->type) { const struct rte_flow_item_ethdev *ethdev; const struct rte_flow_item_tx_queue *tx_queue; + const struct rte_flow_item_conntrack *spec; struct mlx5_txq_ctrl *txq; case RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT: @@ -16689,6 +16700,15 @@ RTE_FLOW_ERROR_TYPE_ITEM_SPEC, items, "Invalid Tx queue"); mlx5_txq_release(dev, tx_queue->tx_queue); + break; + case RTE_FLOW_ITEM_TYPE_CONNTRACK: + spec = items->spec; + if (spec->flags & ~MLX5_FLOW_CONNTRACK_PKT_STATE_ALL) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + NULL, + "Invalid CT item flags"); + break; default: break; } diff -Nru dpdk-24.11.3/drivers/net/mlx5/mlx5_flow_verbs.c dpdk-24.11.4/drivers/net/mlx5/mlx5_flow_verbs.c --- dpdk-24.11.3/drivers/net/mlx5/mlx5_flow_verbs.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx5/mlx5_flow_verbs.c 2025-12-19 12:05:33.000000000 +0000 @@ -1332,10 +1332,8 @@ switch (items->type) { #ifdef HAVE_IBV_FLOW_SPEC_ESP case RTE_FLOW_ITEM_TYPE_ESP: - ret = mlx5_flow_os_validate_item_esp(dev, items, - item_flags, - next_protocol, - error); + ret = mlx5_flow_os_validate_item_esp(dev, items, item_flags, + next_protocol, false, error); if (ret < 0) return ret; last_item = MLX5_FLOW_ITEM_ESP; diff -Nru dpdk-24.11.3/drivers/net/mlx5/mlx5_hws_cnt.c dpdk-24.11.4/drivers/net/mlx5/mlx5_hws_cnt.c --- dpdk-24.11.3/drivers/net/mlx5/mlx5_hws_cnt.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx5/mlx5_hws_cnt.c 2025-12-19 12:05:33.000000000 +0000 @@ -56,8 +56,8 @@ uint32_t ret __rte_unused; reset_cnt_num = rte_ring_count(reset_list); - cpool->query_gen++; mlx5_aso_cnt_query(sh, cpool); + rte_atomic_fetch_add_explicit(&cpool->query_gen, 1, rte_memory_order_release); zcdr.n1 = 0; zcdu.n1 = 0; ret = rte_ring_enqueue_zc_burst_elem_start(reuse_list, @@ -127,14 +127,14 @@ uint32_t nb_alloc_cnts = mlx5_hws_cnt_pool_get_size(cpool); uint16_t expected1 = HWS_AGE_CANDIDATE; uint16_t expected2 = HWS_AGE_CANDIDATE_INSIDE_RING; - uint32_t i; + uint32_t i, age_idx, in_use; cpool->time_of_last_age_check = curr_time; for (i = 0; i < nb_alloc_cnts; ++i) { - uint32_t age_idx = cpool->pool[i].age_idx; uint64_t hits; - if (!cpool->pool[i].in_used || age_idx == 0) + mlx5_hws_cnt_get_all(&cpool->pool[i], &in_use, NULL, &age_idx); + if (!in_use || age_idx == 0) continue; param = mlx5_ipool_get(age_info->ages_ipool, age_idx); if (unlikely(param == NULL)) { @@ -163,10 +163,13 @@ break; case HWS_AGE_FREE: /* - * AGE parameter with state "FREE" couldn't be pointed - * by any counter since counter is destroyed first. - * Fall-through. + * Since this check is async, we may reach a race condition + * where the age and counter are used in the same rule, + * using the same counter index, + * age was freed first, and counter was not freed yet. + * Aging check can be safely ignored in that case. */ + continue; default: MLX5_ASSERT(0); continue; @@ -750,7 +753,7 @@ * because they already have init value no need * to wait for query. */ - cpool->query_gen = 1; + rte_atomic_store_explicit(&cpool->query_gen, 1, rte_memory_order_relaxed); ret = mlx5_hws_cnt_pool_action_create(priv, cpool); if (ret != 0) { rte_flow_error_set(error, -ret, diff -Nru dpdk-24.11.3/drivers/net/mlx5/mlx5_hws_cnt.h dpdk-24.11.4/drivers/net/mlx5/mlx5_hws_cnt.h --- dpdk-24.11.3/drivers/net/mlx5/mlx5_hws_cnt.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx5/mlx5_hws_cnt.h 2025-12-19 12:05:33.000000000 +0000 @@ -42,33 +42,36 @@ struct mlx5_hws_cnt_dcs dcs[MLX5_HWS_CNT_DCS_NUM]; }; -struct mlx5_hws_cnt { - struct flow_counter_stats reset; - bool in_used; /* Indicator whether this counter in used or in pool. */ - union { - struct { - uint32_t share:1; - /* - * share will be set to 1 when this counter is used as - * indirect action. - */ - uint32_t age_idx:24; - /* - * When this counter uses for aging, it save the index - * of AGE parameter. For pure counter (without aging) - * this index is zero. - */ - }; - /* This struct is only meaningful when user own this counter. */ - uint32_t query_gen_when_free; +union mlx5_hws_cnt_state { + RTE_ATOMIC(uint32_t) data; + struct { + uint32_t in_used:1; + /* Indicator whether this counter in used or in pool. */ + uint32_t share:1; + /* + * share will be set to 1 when this counter is used as + * indirect action. + */ + uint32_t age_idx:24; /* - * When PMD own this counter (user put back counter to PMD - * counter pool, i.e), this field recorded value of counter - * pools query generation at time user release the counter. + * When this counter uses for aging, it stores the index + * of AGE parameter. Otherwise, this index is zero. */ }; }; +struct mlx5_hws_cnt { + struct flow_counter_stats reset; + union mlx5_hws_cnt_state cnt_state; + /* This struct is only meaningful when user own this counter. */ + RTE_ATOMIC(uint32_t) query_gen_when_free; + /* + * When PMD own this counter (user put back counter to PMD + * counter pool, i.e), this field recorded value of counter + * pools query generation at time user release the counter. + */ +}; + struct mlx5_hws_cnt_raw_data_mng { struct flow_counter_stats *raw; struct mlx5_pmd_mr mr; @@ -197,6 +200,42 @@ MLX5_INDIRECT_ACTION_TYPE_COUNT ? true : false; } +static __rte_always_inline void +mlx5_hws_cnt_set_age_idx(struct mlx5_hws_cnt *cnt, uint32_t value) +{ + union mlx5_hws_cnt_state cnt_state; + + cnt_state.data = rte_atomic_load_explicit(&cnt->cnt_state.data, rte_memory_order_acquire); + cnt_state.age_idx = value; + rte_atomic_store_explicit(&cnt->cnt_state.data, cnt_state.data, rte_memory_order_release); +} + +static __rte_always_inline void +mlx5_hws_cnt_set_all(struct mlx5_hws_cnt *cnt, uint32_t in_used, uint32_t share, uint32_t age_idx) +{ + union mlx5_hws_cnt_state cnt_state; + + cnt_state.in_used = !!in_used; + cnt_state.share = !!share; + cnt_state.age_idx = age_idx; + rte_atomic_store_explicit(&cnt->cnt_state.data, cnt_state.data, rte_memory_order_relaxed); +} + +static __rte_always_inline void +mlx5_hws_cnt_get_all(struct mlx5_hws_cnt *cnt, uint32_t *in_used, uint32_t *share, + uint32_t *age_idx) +{ + union mlx5_hws_cnt_state cnt_state; + + cnt_state.data = rte_atomic_load_explicit(&cnt->cnt_state.data, rte_memory_order_acquire); + if (in_used != NULL) + *in_used = cnt_state.in_used; + if (share != NULL) + *share = cnt_state.share; + if (age_idx != NULL) + *age_idx = cnt_state.age_idx; +} + /** * Generate Counter id from internal index. * @@ -424,9 +463,10 @@ hpool = mlx5_hws_cnt_host_pool(cpool); iidx = mlx5_hws_cnt_iidx(hpool, *cnt_id); - hpool->pool[iidx].in_used = false; - hpool->pool[iidx].query_gen_when_free = - rte_atomic_load_explicit(&hpool->query_gen, rte_memory_order_relaxed); + mlx5_hws_cnt_set_all(&hpool->pool[iidx], 0, 0, 0); + rte_atomic_store_explicit(&hpool->pool[iidx].query_gen_when_free, + rte_atomic_load_explicit(&hpool->query_gen, rte_memory_order_relaxed), + rte_memory_order_relaxed); if (likely(queue != NULL) && cpool->cfg.host_cpool == NULL) qcache = hpool->cache->qcache[*queue]; if (unlikely(qcache == NULL)) { @@ -480,7 +520,7 @@ */ static __rte_always_inline int mlx5_hws_cnt_pool_get(struct mlx5_hws_cnt_pool *cpool, uint32_t *queue, - cnt_id_t *cnt_id, uint32_t age_idx) + cnt_id_t *cnt_id, uint32_t age_idx, uint32_t shared) { unsigned int ret; struct rte_ring_zc_data zcdc = {0}; @@ -508,10 +548,7 @@ __hws_cnt_query_raw(cpool, *cnt_id, &cpool->pool[iidx].reset.hits, &cpool->pool[iidx].reset.bytes); - cpool->pool[iidx].share = 0; - MLX5_ASSERT(!cpool->pool[iidx].in_used); - cpool->pool[iidx].in_used = true; - cpool->pool[iidx].age_idx = age_idx; + mlx5_hws_cnt_set_all(&cpool->pool[iidx], 1, shared, age_idx); return 0; } ret = rte_ring_dequeue_zc_burst_elem_start(qcache, sizeof(cnt_id_t), 1, @@ -530,8 +567,10 @@ /* get one from local cache. */ *cnt_id = (*(cnt_id_t *)zcdc.ptr1); iidx = mlx5_hws_cnt_iidx(cpool, *cnt_id); - query_gen = cpool->pool[iidx].query_gen_when_free; - if (cpool->query_gen == query_gen) { /* counter is waiting to reset. */ + query_gen = rte_atomic_load_explicit(&cpool->pool[iidx].query_gen_when_free, + rte_memory_order_relaxed); + /* counter is waiting to reset. */ + if (rte_atomic_load_explicit(&cpool->query_gen, rte_memory_order_relaxed) == query_gen) { rte_ring_dequeue_zc_elem_finish(qcache, 0); /* write-back counter to reset list. */ mlx5_hws_cnt_pool_cache_flush(cpool, *queue); @@ -549,10 +588,7 @@ __hws_cnt_query_raw(cpool, *cnt_id, &cpool->pool[iidx].reset.hits, &cpool->pool[iidx].reset.bytes); rte_ring_dequeue_zc_elem_finish(qcache, 1); - cpool->pool[iidx].share = 0; - MLX5_ASSERT(!cpool->pool[iidx].in_used); - cpool->pool[iidx].in_used = true; - cpool->pool[iidx].age_idx = age_idx; + mlx5_hws_cnt_set_all(&cpool->pool[iidx], 1, shared, age_idx); return 0; } @@ -611,24 +647,15 @@ uint32_t age_idx) { struct mlx5_hws_cnt_pool *hpool = mlx5_hws_cnt_host_pool(cpool); - uint32_t iidx; - int ret; - ret = mlx5_hws_cnt_pool_get(hpool, NULL, cnt_id, age_idx); - if (ret != 0) - return ret; - iidx = mlx5_hws_cnt_iidx(hpool, *cnt_id); - hpool->pool[iidx].share = 1; - return 0; + return mlx5_hws_cnt_pool_get(hpool, NULL, cnt_id, age_idx, 1); } static __rte_always_inline void mlx5_hws_cnt_shared_put(struct mlx5_hws_cnt_pool *cpool, cnt_id_t *cnt_id) { struct mlx5_hws_cnt_pool *hpool = mlx5_hws_cnt_host_pool(cpool); - uint32_t iidx = mlx5_hws_cnt_iidx(hpool, *cnt_id); - hpool->pool[iidx].share = 0; mlx5_hws_cnt_pool_put(hpool, NULL, cnt_id); } @@ -637,8 +664,10 @@ { struct mlx5_hws_cnt_pool *hpool = mlx5_hws_cnt_host_pool(cpool); uint32_t iidx = mlx5_hws_cnt_iidx(hpool, cnt_id); + uint32_t share; - return hpool->pool[iidx].share ? true : false; + mlx5_hws_cnt_get_all(&hpool->pool[iidx], NULL, &share, NULL); + return !!share; } static __rte_always_inline void @@ -648,8 +677,8 @@ struct mlx5_hws_cnt_pool *hpool = mlx5_hws_cnt_host_pool(cpool); uint32_t iidx = mlx5_hws_cnt_iidx(hpool, cnt_id); - MLX5_ASSERT(hpool->pool[iidx].share); - hpool->pool[iidx].age_idx = age_idx; + MLX5_ASSERT(hpool->pool[iidx].cnt_state.share); + mlx5_hws_cnt_set_age_idx(&hpool->pool[iidx], age_idx); } static __rte_always_inline uint32_t @@ -657,9 +686,11 @@ { struct mlx5_hws_cnt_pool *hpool = mlx5_hws_cnt_host_pool(cpool); uint32_t iidx = mlx5_hws_cnt_iidx(hpool, cnt_id); + uint32_t age_idx, share; - MLX5_ASSERT(hpool->pool[iidx].share); - return hpool->pool[iidx].age_idx; + mlx5_hws_cnt_get_all(&hpool->pool[iidx], NULL, &share, &age_idx); + MLX5_ASSERT(share); + return age_idx; } static __rte_always_inline cnt_id_t diff -Nru dpdk-24.11.3/drivers/net/mlx5/mlx5_nta_rss.c dpdk-24.11.4/drivers/net/mlx5/mlx5_nta_rss.c --- dpdk-24.11.3/drivers/net/mlx5/mlx5_nta_rss.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx5/mlx5_nta_rss.c 2025-12-19 12:05:33.000000000 +0000 @@ -67,7 +67,7 @@ ctx->pattern, ctx->actions, MLX5_FLOW_ITEM_PTYPE, MLX5_FLOW_ACTION_RSS, ctx->external, &flow, ctx->error); - if (flow) { + if (ret == 0) { SLIST_INSERT_HEAD(ctx->head, flow, nt2hws->next); if (dbg_log) { DRV_LOG(NOTICE, @@ -275,6 +275,7 @@ uint32_t ptype_group, bool external, struct rte_flow_error *error) { + int ret; struct rte_flow_hw *flow = NULL; const struct rte_flow_attr miss_attr = { .ingress = 1, @@ -299,10 +300,10 @@ [MLX5_RSS_PTYPE_ACTION_INDEX + 1] = { .type = RTE_FLOW_ACTION_TYPE_END } }; - flow_hw_create_flow(dev, MLX5_FLOW_TYPE_GEN, &miss_attr, - miss_pattern, miss_actions, 0, MLX5_FLOW_ACTION_RSS, - external, &flow, error); - return flow; + ret = flow_hw_create_flow(dev, MLX5_FLOW_TYPE_GEN, &miss_attr, + miss_pattern, miss_actions, 0, + MLX5_FLOW_ACTION_RSS, external, &flow, error); + return ret == 0 ? flow : NULL; } static struct rte_flow_hw * @@ -315,10 +316,13 @@ enum mlx5_flow_type flow_type, struct rte_flow_error *error) { - int i = 0; + int ret, i = 0; struct rte_flow_hw *flow = NULL; struct rte_flow_action actions[MLX5_HW_MAX_ACTS]; enum mlx5_indirect_type indirect_type; + const struct rte_flow_action_jump jump_conf = { + .group = ptype_group + }; do { switch (orig_actions[i].type) { @@ -333,9 +337,7 @@ /* Fall through */ case RTE_FLOW_ACTION_TYPE_RSS: actions[i].type = RTE_FLOW_ACTION_TYPE_JUMP; - actions[i].conf = &(const struct rte_flow_action_jump) { - .group = ptype_group - }; + actions[i].conf = &jump_conf; break; default: actions[i] = orig_actions[i]; @@ -344,9 +346,9 @@ } while (actions[i++].type != RTE_FLOW_ACTION_TYPE_END); action_flags &= ~MLX5_FLOW_ACTION_RSS; action_flags |= MLX5_FLOW_ACTION_JUMP; - flow_hw_create_flow(dev, flow_type, attr, pattern, actions, - item_flags, action_flags, external, &flow, error); - return flow; + ret = flow_hw_create_flow(dev, flow_type, attr, pattern, actions, + item_flags, action_flags, external, &flow, error); + return ret == 0 ? flow : NULL; } const struct rte_flow_action_rss * @@ -424,6 +426,7 @@ enum mlx5_flow_type flow_type, struct rte_flow_error *error) { + int ret; struct rte_flow_hw *flow = NULL; struct rte_flow_action copy[MLX5_HW_MAX_ACTS]; const struct rte_flow_action *_actions; @@ -455,10 +458,9 @@ _actions = actions; } end: - flow_hw_create_flow(dev, flow_type, attr, items, - _actions, item_flags, action_flags, - external, &flow, error); - return flow; + ret = flow_hw_create_flow(dev, flow_type, attr, items, _actions, + item_flags, action_flags, external, &flow, error); + return ret == 0 ? flow : NULL; } /* diff -Nru dpdk-24.11.3/drivers/net/mlx5/mlx5_nta_split.c dpdk-24.11.4/drivers/net/mlx5/mlx5_nta_split.c --- dpdk-24.11.3/drivers/net/mlx5/mlx5_nta_split.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx5/mlx5_nta_split.c 2025-12-19 12:05:33.000000000 +0000 @@ -345,8 +345,8 @@ /* (match REG 'tag') or all. */ items[1].type = RTE_FLOW_ITEM_TYPE_END; /* (Mark) or void + copy to Rx meta + jump to the MREG_ACT_TABLE_GROUP. */ - actions[1].type = RTE_FLOW_ACTION_TYPE_MODIFY_FIELD, - actions[1].conf = &rx_meta, + actions[1].type = RTE_FLOW_ACTION_TYPE_MODIFY_FIELD; + actions[1].conf = &rx_meta; actions[2].type = RTE_FLOW_ACTION_TYPE_JUMP; actions[2].conf = &jump; actions[3].type = RTE_FLOW_ACTION_TYPE_END; diff -Nru dpdk-24.11.3/drivers/net/mlx5/mlx5_rx.c dpdk-24.11.4/drivers/net/mlx5/mlx5_rx.c --- dpdk-24.11.3/drivers/net/mlx5/mlx5_rx.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx5/mlx5_rx.c 2025-12-19 12:05:33.000000000 +0000 @@ -294,6 +294,20 @@ return (value & m) == v ? -1 : 0; } +static int +mlx5_monitor_cqe_own_callback(const uint64_t value, + const uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ]) +{ + const uint64_t m = opaque[CLB_MSK_IDX]; + const uint64_t v = opaque[CLB_VAL_IDX]; + const uint64_t sw_owned = ((value & m) == v); + const uint64_t opcode = MLX5_CQE_OPCODE(value); + const uint64_t valid_op = (opcode != MLX5_CQE_INVALID); + + /* ownership bit is not valid for invalid opcode; CQE is HW owned */ + return -(valid_op & sw_owned); +} + int mlx5_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc) { struct mlx5_rxq_data *rxq = rx_queue; @@ -311,12 +325,13 @@ pmc->addr = &cqe->validity_iteration_count; pmc->opaque[CLB_VAL_IDX] = vic; pmc->opaque[CLB_MSK_IDX] = MLX5_CQE_VIC_INIT; + pmc->fn = mlx5_monitor_callback; } else { pmc->addr = &cqe->op_own; pmc->opaque[CLB_VAL_IDX] = !!idx; pmc->opaque[CLB_MSK_IDX] = MLX5_CQE_OWNER_MASK; + pmc->fn = mlx5_monitor_cqe_own_callback; } - pmc->fn = mlx5_monitor_callback; pmc->size = sizeof(uint8_t); return 0; } diff -Nru dpdk-24.11.3/drivers/net/mlx5/mlx5_rx.h dpdk-24.11.4/drivers/net/mlx5/mlx5_rx.h --- dpdk-24.11.3/drivers/net/mlx5/mlx5_rx.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx5/mlx5_rx.h 2025-12-19 12:05:33.000000000 +0000 @@ -158,6 +158,7 @@ /* RX queue control descriptor. */ struct mlx5_rxq_ctrl { struct mlx5_rxq_data rxq; /* Data path structure. */ + uint16_t mtu; /* Original MTU that the queue was allocated with. */ LIST_ENTRY(mlx5_rxq_ctrl) next; /* Pointer to the next element. */ LIST_HEAD(priv, mlx5_rxq_priv) owners; /* Owner rxq list. */ struct mlx5_rxq_obj *obj; /* Verbs/DevX elements. */ diff -Nru dpdk-24.11.3/drivers/net/mlx5/mlx5_rxq.c dpdk-24.11.4/drivers/net/mlx5/mlx5_rxq.c --- dpdk-24.11.3/drivers/net/mlx5/mlx5_rxq.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx5/mlx5_rxq.c 2025-12-19 12:05:33.000000000 +0000 @@ -779,7 +779,7 @@ dev->data->port_id, idx); return false; } - if (priv->mtu != spriv->mtu) { + if (priv->mtu != rxq_ctrl->mtu) { DRV_LOG(ERR, "port %u queue index %u failed to join shared group: mtu mismatch", dev->data->port_id, idx); return false; @@ -1770,6 +1770,10 @@ LIST_INIT(&tmpl->owners); MLX5_ASSERT(n_seg && n_seg <= MLX5_MAX_RXQ_NSEG); /* + * Save the original MTU to check against for shared rx queues. + */ + tmpl->mtu = dev->data->mtu; + /* * Save the original segment configuration in the shared queue * descriptor for the later check on the sibling queue creation. */ @@ -1978,8 +1982,9 @@ tmpl->share_group = conf->share_group; tmpl->share_qid = conf->share_qid; LIST_INSERT_HEAD(&priv->sh->shared_rxqs, tmpl, share_entry); + } else { + LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next); } - LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next); rte_atomic_store_explicit(&tmpl->ctrl_ref, 1, rte_memory_order_relaxed); return tmpl; error: @@ -2156,7 +2161,8 @@ { struct mlx5_external_q *rxq = mlx5_ext_rxq_get(dev, idx); - rte_atomic_fetch_add_explicit(&rxq->refcnt, 1, rte_memory_order_relaxed); + if (rxq != NULL) + rte_atomic_fetch_add_explicit(&rxq->refcnt, 1, rte_memory_order_relaxed); return rxq; } @@ -2176,7 +2182,9 @@ { struct mlx5_external_q *rxq = mlx5_ext_rxq_get(dev, idx); - return rte_atomic_fetch_sub_explicit(&rxq->refcnt, 1, rte_memory_order_relaxed) - 1; + return rxq != NULL ? + rte_atomic_fetch_sub_explicit(&rxq->refcnt, 1, rte_memory_order_relaxed) - 1 : + UINT32_MAX; } /** @@ -2195,8 +2203,8 @@ { struct mlx5_priv *priv = dev->data->dev_private; - MLX5_ASSERT(mlx5_is_external_rxq(dev, idx)); - return &priv->ext_rxqs[idx - RTE_PMD_MLX5_EXTERNAL_RX_QUEUE_ID_MIN]; + return mlx5_is_external_rxq(dev, idx) ? + &priv->ext_rxqs[idx - RTE_PMD_MLX5_EXTERNAL_RX_QUEUE_ID_MIN] : NULL; } /** @@ -2310,7 +2318,8 @@ (&rxq_ctrl->rxq.mr_ctrl.cache_bh); if (rxq_ctrl->rxq.shared) LIST_REMOVE(rxq_ctrl, share_entry); - LIST_REMOVE(rxq_ctrl, next); + else + LIST_REMOVE(rxq_ctrl, next); mlx5_free(rxq_ctrl->rxq.rq_win_data); mlx5_free(rxq_ctrl); } @@ -2358,7 +2367,6 @@ mlx5_ext_rxq_verify(struct rte_eth_dev *dev) { struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_external_q *rxq; uint32_t i; int ret = 0; @@ -2366,8 +2374,9 @@ return 0; for (i = RTE_PMD_MLX5_EXTERNAL_RX_QUEUE_ID_MIN; i <= UINT16_MAX ; ++i) { - rxq = mlx5_ext_rxq_get(dev, i); - if (rxq->refcnt < 2) + struct mlx5_external_q *rxq = mlx5_ext_rxq_get(dev, i); + + if (rxq == NULL || rxq->refcnt < 2) continue; DRV_LOG(DEBUG, "Port %u external RxQ %u still referenced.", dev->data->port_id, i); diff -Nru dpdk-24.11.3/drivers/net/mlx5/mlx5_rxtx_vec.h dpdk-24.11.4/drivers/net/mlx5/mlx5_rxtx_vec.h --- dpdk-24.11.3/drivers/net/mlx5/mlx5_rxtx_vec.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx5/mlx5_rxtx_vec.h 2025-12-19 12:05:33.000000000 +0000 @@ -13,13 +13,6 @@ #include "mlx5_autoconf.h" -/* HW checksum offload capabilities of vectorized Tx. */ -#define MLX5_VEC_TX_CKSUM_OFFLOAD_CAP \ - (RTE_ETH_TX_OFFLOAD_IPV4_CKSUM | \ - RTE_ETH_TX_OFFLOAD_UDP_CKSUM | \ - RTE_ETH_TX_OFFLOAD_TCP_CKSUM | \ - RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM) - /* * Compile time sanity check for vectorized functions. */ diff -Nru dpdk-24.11.3/drivers/net/mlx5/mlx5_trigger.c dpdk-24.11.4/drivers/net/mlx5/mlx5_trigger.c --- dpdk-24.11.3/drivers/net/mlx5/mlx5_trigger.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx5/mlx5_trigger.c 2025-12-19 12:05:33.000000000 +0000 @@ -1135,6 +1135,11 @@ #endif +#define SAVE_RTE_ERRNO_AND_STOP(ret, dev) do { \ + ret = rte_errno; \ + (dev)->data->dev_started = 0; \ +} while (0) + /** * DPDK callback to start the device. * @@ -1217,19 +1222,23 @@ if (ret) { DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s", dev->data->port_id, strerror(rte_errno)); + SAVE_RTE_ERRNO_AND_STOP(ret, dev); goto error; } if (mlx5_devx_obj_ops_en(priv->sh) && priv->obj_ops.lb_dummy_queue_create) { ret = priv->obj_ops.lb_dummy_queue_create(dev); - if (ret) - goto error; + if (ret) { + SAVE_RTE_ERRNO_AND_STOP(ret, dev); + goto txpp_stop; + } } ret = mlx5_txq_start(dev); if (ret) { DRV_LOG(ERR, "port %u Tx queue allocation failed: %s", dev->data->port_id, strerror(rte_errno)); - goto error; + SAVE_RTE_ERRNO_AND_STOP(ret, dev); + goto lb_dummy_queue_release; } if (priv->config.std_delay_drop || priv->config.hp_delay_drop) { if (!priv->sh->dev_cap.vf && !priv->sh->dev_cap.sf && @@ -1253,7 +1262,8 @@ if (ret) { DRV_LOG(ERR, "port %u Rx queue allocation failed: %s", dev->data->port_id, strerror(rte_errno)); - goto error; + SAVE_RTE_ERRNO_AND_STOP(ret, dev); + goto txq_stop; } /* * Such step will be skipped if there is no hairpin TX queue configured @@ -1263,7 +1273,8 @@ if (ret) { DRV_LOG(ERR, "port %u hairpin auto binding failed: %s", dev->data->port_id, strerror(rte_errno)); - goto error; + SAVE_RTE_ERRNO_AND_STOP(ret, dev); + goto rxq_stop; } /* Set started flag here for the following steps like control flow. */ dev->data->dev_started = 1; @@ -1271,7 +1282,8 @@ if (ret) { DRV_LOG(ERR, "port %u Rx interrupt vector creation failed", dev->data->port_id); - goto error; + SAVE_RTE_ERRNO_AND_STOP(ret, dev); + goto rxq_stop; } mlx5_os_stats_init(dev); /* @@ -1283,7 +1295,8 @@ DRV_LOG(ERR, "port %u failed to attach indirect actions: %s", dev->data->port_id, rte_strerror(rte_errno)); - goto error; + SAVE_RTE_ERRNO_AND_STOP(ret, dev); + goto rx_intr_vec_disable; } #ifdef HAVE_MLX5_HWS_SUPPORT if (priv->sh->config.dv_flow_en == 2) { @@ -1291,7 +1304,8 @@ if (ret) { DRV_LOG(ERR, "port %u failed to update HWS tables", dev->data->port_id); - goto error; + SAVE_RTE_ERRNO_AND_STOP(ret, dev); + goto action_handle_detach; } } #endif @@ -1299,7 +1313,8 @@ if (ret) { DRV_LOG(ERR, "port %u failed to set defaults flows", dev->data->port_id); - goto error; + SAVE_RTE_ERRNO_AND_STOP(ret, dev); + goto action_handle_detach; } /* Set dynamic fields and flags into Rx queues. */ mlx5_flow_rxq_dynf_set(dev); @@ -1316,12 +1331,14 @@ if (ret) { DRV_LOG(DEBUG, "port %u failed to start default actions: %s", dev->data->port_id, strerror(rte_errno)); - goto error; + SAVE_RTE_ERRNO_AND_STOP(ret, dev); + goto traffic_disable; } if (mlx5_dev_ctx_shared_mempool_subscribe(dev) != 0) { DRV_LOG(ERR, "port %u failed to subscribe for mempool life cycle: %s", dev->data->port_id, rte_strerror(rte_errno)); - goto error; + SAVE_RTE_ERRNO_AND_STOP(ret, dev); + goto stop_default; } rte_wmb(); dev->tx_pkt_burst = mlx5_select_tx_function(dev); @@ -1348,18 +1365,25 @@ priv->sh->port[priv->dev_port - 1].devx_ih_port_id = (uint32_t)dev->data->port_id; return 0; -error: - ret = rte_errno; /* Save rte_errno before cleanup. */ - /* Rollback. */ - dev->data->dev_started = 0; +stop_default: mlx5_flow_stop_default(dev); +traffic_disable: mlx5_traffic_disable(dev); - mlx5_txq_stop(dev); +action_handle_detach: + mlx5_action_handle_detach(dev); +rx_intr_vec_disable: + mlx5_rx_intr_vec_disable(dev); +rxq_stop: mlx5_rxq_stop(dev); +txq_stop: + mlx5_txq_stop(dev); +lb_dummy_queue_release: if (priv->obj_ops.lb_dummy_queue_release) priv->obj_ops.lb_dummy_queue_release(dev); - mlx5_txpp_stop(dev); /* Stop last. */ - rte_errno = ret; /* Restore rte_errno. */ +txpp_stop: + mlx5_txpp_stop(dev); +error: + rte_errno = ret; return -rte_errno; } @@ -1488,18 +1512,6 @@ unsigned int i; int ret; - /* - * With extended metadata enabled, the Tx metadata copy is handled by default - * Tx tagging flow rules, so default Tx flow rule is not needed. It is only - * required when representor matching is disabled. - */ - if (config->dv_esw_en && - !config->repr_matching && - config->dv_xmeta_en == MLX5_XMETA_MODE_META32_HWS && - priv->master) { - if (mlx5_flow_hw_create_tx_default_mreg_copy_flow(dev)) - goto error; - } for (i = 0; i < priv->txqs_n; ++i) { struct mlx5_txq_ctrl *txq = mlx5_txq_get(dev, i); uint32_t queue; @@ -1521,6 +1533,19 @@ goto error; } } + /* + * With extended metadata enabled, the Tx metadata copy is handled by default + * Tx tagging flow rules, so default Tx flow rule is not needed. It is only + * required when representor matching is disabled. + */ + if (config->dv_esw_en && !config->repr_matching && + config->dv_xmeta_en == MLX5_XMETA_MODE_META32_HWS && + (priv->master || priv->representor)) { + if (mlx5_flow_hw_create_tx_default_mreg_copy_flow(dev, queue, false)) { + mlx5_txq_release(dev, i); + goto error; + } + } mlx5_txq_release(dev, i); } if (config->fdb_def_rule) { @@ -1538,6 +1563,12 @@ goto error; if (priv->isolated) return 0; + ret = mlx5_flow_hw_create_ctrl_rx_tables(dev); + if (ret) { + DRV_LOG(ERR, "Failed to set up Rx control flow templates for port %u, %d", + dev->data->port_id, -ret); + goto error; + } if (dev->data->promiscuous) flags |= MLX5_CTRL_PROMISCUOUS; if (dev->data->all_multicast) @@ -1551,6 +1582,7 @@ error: ret = rte_errno; mlx5_flow_hw_flush_ctrl_flows(dev); + mlx5_flow_hw_cleanup_ctrl_rx_tables(dev); rte_errno = ret; return -rte_errno; } @@ -1714,7 +1746,10 @@ for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) { struct rte_ether_addr *mac = &dev->data->mac_addrs[i]; - if (!memcmp(mac, &cmp, sizeof(*mac)) || rte_is_multicast_ether_addr(mac)) + /* Add flows for unicast and multicast mac addresses added by API. */ + if (!memcmp(mac, &cmp, sizeof(*mac)) || + !BITFIELD_ISSET(priv->mac_own, i) || + (dev->data->all_multicast && rte_is_multicast_ether_addr(mac))) continue; memcpy(&unicast.hdr.dst_addr.addr_bytes, mac->addr_bytes, @@ -1782,8 +1817,13 @@ #ifdef HAVE_MLX5_HWS_SUPPORT struct mlx5_priv *priv = dev->data->dev_private; - if (priv->sh->config.dv_flow_en == 2) + if (priv->sh->config.dv_flow_en == 2) { + /* Device started flag was cleared before, this is used to derefer the Rx queues. */ + priv->hws_rule_flushing = true; mlx5_flow_hw_flush_ctrl_flows(dev); + mlx5_flow_hw_cleanup_ctrl_rx_tables(dev); + priv->hws_rule_flushing = false; + } else #endif mlx5_traffic_disable_legacy(dev); diff -Nru dpdk-24.11.3/drivers/net/mlx5/mlx5_txq.c dpdk-24.11.4/drivers/net/mlx5/mlx5_txq.c --- dpdk-24.11.3/drivers/net/mlx5/mlx5_txq.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx5/mlx5_txq.c 2025-12-19 12:05:33.000000000 +0000 @@ -1209,8 +1209,8 @@ { struct mlx5_priv *priv = dev->data->dev_private; - MLX5_ASSERT(mlx5_is_external_txq(dev, idx)); - return &priv->ext_txqs[idx - MLX5_EXTERNAL_TX_QUEUE_ID_MIN]; + return mlx5_is_external_txq(dev, idx) ? + &priv->ext_txqs[idx - MLX5_EXTERNAL_TX_QUEUE_ID_MIN] : NULL; } /** @@ -1226,7 +1226,6 @@ mlx5_ext_txq_verify(struct rte_eth_dev *dev) { struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_external_q *txq; uint32_t i; int ret = 0; @@ -1234,8 +1233,9 @@ return 0; for (i = MLX5_EXTERNAL_TX_QUEUE_ID_MIN; i <= UINT16_MAX ; ++i) { - txq = mlx5_ext_txq_get(dev, i); - if (txq->refcnt < 2) + struct mlx5_external_q *txq = mlx5_ext_txq_get(dev, i); + + if (txq == NULL || txq->refcnt < 2) continue; DRV_LOG(DEBUG, "Port %u external TxQ %u still referenced.", dev->data->port_id, i); @@ -1385,6 +1385,14 @@ if (sq_miss_created) mlx5_flow_hw_esw_destroy_sq_miss_flow(dev, sq_num); return -rte_errno; + } + + if (!priv->sh->config.repr_matching && + priv->sh->config.dv_xmeta_en == MLX5_XMETA_MODE_META32_HWS && + mlx5_flow_hw_create_tx_default_mreg_copy_flow(dev, sq_num, true)) { + if (sq_miss_created) + mlx5_flow_hw_esw_destroy_sq_miss_flow(dev, sq_num); + return -rte_errno; } return 0; } diff -Nru dpdk-24.11.3/drivers/net/mlx5/mlx5_utils.c dpdk-24.11.4/drivers/net/mlx5/mlx5_utils.c --- dpdk-24.11.3/drivers/net/mlx5/mlx5_utils.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx5/mlx5_utils.c 2025-12-19 12:05:33.000000000 +0000 @@ -587,7 +587,7 @@ uint32_t trunk_idx; uint32_t entry_idx; - if (!idx) + if (!pool || !idx) return; if (pool->cfg.per_core_cache) { mlx5_ipool_free_cache(pool, idx); diff -Nru dpdk-24.11.3/drivers/net/mlx5/mlx5_utils.h dpdk-24.11.4/drivers/net/mlx5/mlx5_utils.h --- dpdk-24.11.3/drivers/net/mlx5/mlx5_utils.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx5/mlx5_utils.h 2025-12-19 12:05:33.000000000 +0000 @@ -22,9 +22,6 @@ #include "mlx5_defs.h" -/* Save and restore errno around argument evaluation. */ -#define ERRNO_SAFE(x) ((errno = (int []){ errno, ((x), 0) }[0])) - extern int mlx5_logtype; #define MLX5_NET_LOG_PREFIX "mlx5_net" diff -Nru dpdk-24.11.3/drivers/net/mlx5/windows/mlx5_ethdev_os.c dpdk-24.11.4/drivers/net/mlx5/windows/mlx5_ethdev_os.c --- dpdk-24.11.3/drivers/net/mlx5/windows/mlx5_ethdev_os.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx5/windows/mlx5_ethdev_os.c 2025-12-19 12:05:33.000000000 +0000 @@ -56,7 +56,7 @@ * 0 on success, a negative errno value otherwise and rte_errno is set. */ int -mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[MLX5_NAMESIZE]) +mlx5_get_ifname(const struct rte_eth_dev *dev, char ifname[MLX5_NAMESIZE]) { struct mlx5_priv *priv; mlx5_context_st *context_obj; @@ -67,11 +67,39 @@ } priv = dev->data->dev_private; context_obj = (mlx5_context_st *)priv->sh->cdev->ctx; - strncpy(*ifname, context_obj->mlx5_dev.name, MLX5_NAMESIZE); + strncpy(ifname, context_obj->mlx5_dev.name, MLX5_NAMESIZE); return 0; } /** + * Get device minimum and maximum allowed MTU. + * + * Windows API does not expose minimum and maximum allowed MTU. + * In this case, this just returns (-ENOTSUP) to allow platform-independent code + * to fallback to default values. + * + * @param dev + * Pointer to Ethernet device. + * @param[out] min_mtu + * Minimum MTU value output buffer. + * @param[out] max_mtu + * Maximum MTU value output buffer. + * + * @return + * (-ENOTSUP) - not supported on Windows + */ +int +mlx5_os_get_mtu_bounds(struct rte_eth_dev *dev, uint16_t *min_mtu, uint16_t *max_mtu) +{ + RTE_SET_USED(dev); + RTE_SET_USED(min_mtu); + RTE_SET_USED(max_mtu); + + rte_errno = ENOTSUP; + return -rte_errno; +} + +/** * Get device MTU. * * @param dev @@ -283,11 +311,11 @@ dev_link.link_duplex = 1; if (dev->data->dev_link.link_speed != dev_link.link_speed || dev->data->dev_link.link_duplex != dev_link.link_duplex || - dev->data->dev_link.link_autoneg != dev_link.link_autoneg || dev->data->dev_link.link_status != dev_link.link_status) ret = 1; else ret = 0; + dev_link.link_autoneg = dev->data->dev_link.link_autoneg; dev->data->dev_link = dev_link; return ret; } diff -Nru dpdk-24.11.3/drivers/net/mlx5/windows/mlx5_flow_os.c dpdk-24.11.4/drivers/net/mlx5/windows/mlx5_flow_os.c --- dpdk-24.11.3/drivers/net/mlx5/windows/mlx5_flow_os.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx5/windows/mlx5_flow_os.c 2025-12-19 12:05:33.000000000 +0000 @@ -219,9 +219,9 @@ default: break; } - MLX5_SET(devx_fs_rule_add_in, in, match_criteria_enable, - MLX5_MATCH_OUTER_HEADERS); } + MLX5_SET(devx_fs_rule_add_in, in, match_criteria_enable, + mlx5_matcher->attr.match_criteria_enable); *flow = mlx5_glue->devx_fs_rule_add(mlx5_matcher->ctx, in, sizeof(in)); return (*flow) ? 0 : -1; } @@ -428,6 +428,7 @@ const struct rte_flow_item *item, uint64_t item_flags, uint8_t target_protocol, + bool allow_seq, struct rte_flow_error *error) { const struct rte_flow_item_esp *mask = item->mask; @@ -437,6 +438,12 @@ MLX5_FLOW_LAYER_OUTER_L3; const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : MLX5_FLOW_LAYER_OUTER_L4; + static const struct rte_flow_item_esp mlx5_flow_item_esp_mask = { + .hdr = { + .spi = RTE_BE32(0xffffffff), + .seq = RTE_BE32(0xffffffff), + }, + }; int ret; if (!(item_flags & l3m)) @@ -461,7 +468,8 @@ " supported on Windows"); ret = mlx5_flow_item_acceptable (dev, item, (const uint8_t *)mask, - (const uint8_t *)&rte_flow_item_esp_mask, + allow_seq ? (const uint8_t *)&mlx5_flow_item_esp_mask : + (const uint8_t *)&rte_flow_item_esp_mask, sizeof(struct rte_flow_item_esp), MLX5_ITEM_RANGE_NOT_ACCEPTED, error); if (ret < 0) diff -Nru dpdk-24.11.3/drivers/net/mlx5/windows/mlx5_flow_os.h dpdk-24.11.4/drivers/net/mlx5/windows/mlx5_flow_os.h --- dpdk-24.11.3/drivers/net/mlx5/windows/mlx5_flow_os.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx5/windows/mlx5_flow_os.h 2025-12-19 12:05:33.000000000 +0000 @@ -47,6 +47,7 @@ case RTE_FLOW_ITEM_TYPE_IPV6: case RTE_FLOW_ITEM_TYPE_VLAN: case RTE_FLOW_ITEM_TYPE_ESP: + case RTE_FLOW_ITEM_TYPE_NVGRE: return true; default: return false; @@ -461,6 +462,8 @@ * Bit-fields that holds the items detected until now. * @param[in] target_protocol * The next protocol in the previous item. + * @param[in] allow_seq + * The match on sequence number is supported. * @param[out] error * Pointer to error structure. * @@ -472,6 +475,7 @@ const struct rte_flow_item *item, uint64_t item_flags, uint8_t target_protocol, + bool allow_seq, struct rte_flow_error *error); /** diff -Nru dpdk-24.11.3/drivers/net/mlx5/windows/mlx5_os.c dpdk-24.11.4/drivers/net/mlx5/windows/mlx5_os.c --- dpdk-24.11.3/drivers/net/mlx5/windows/mlx5_os.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx5/windows/mlx5_os.c 2025-12-19 12:05:33.000000000 +0000 @@ -478,6 +478,8 @@ eth_dev->data->mac_addrs = priv->mac; eth_dev->device = dpdk_dev; eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; + /* Fetch minimum and maximum allowed MTU from the device. */ + mlx5_get_mtu_bounds(eth_dev, &priv->min_mtu, &priv->max_mtu); /* Configure the first MAC address by default. */ if (mlx5_get_mac(eth_dev, &mac.addr_bytes)) { DRV_LOG(ERR, @@ -508,6 +510,7 @@ err = rte_errno; goto error; } + eth_dev->data->mtu = priv->mtu; DRV_LOG(DEBUG, "port %u MTU is %u.", eth_dev->data->port_id, priv->mtu); /* Initialize burst functions to prevent crashes before link-up. */ diff -Nru dpdk-24.11.3/drivers/net/mlx5/windows/mlx5_os.h dpdk-24.11.4/drivers/net/mlx5/windows/mlx5_os.h --- dpdk-24.11.3/drivers/net/mlx5/windows/mlx5_os.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/mlx5/windows/mlx5_os.h 2025-12-19 12:05:33.000000000 +0000 @@ -12,8 +12,6 @@ MLX5_FS_PATH_MAX = MLX5_DEVX_DEVICE_PNP_SIZE + 1 }; -#define PCI_DRV_FLAGS 0 - #define MLX5_NAMESIZE MLX5_FS_NAME_MAX enum mlx5_sw_parsing_offloads { diff -Nru dpdk-24.11.3/drivers/net/nfp/nfp_mtr.c dpdk-24.11.4/drivers/net/nfp/nfp_mtr.c --- dpdk-24.11.3/drivers/net/nfp/nfp_mtr.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/nfp/nfp_mtr.c 2025-12-19 12:05:33.000000000 +0000 @@ -12,6 +12,13 @@ #include "flower/nfp_flower_representor.h" #include "nfp_logs.h" +#ifndef LIST_FOREACH_SAFE +#define LIST_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = LIST_FIRST((head)); \ + (var) && ((tvar) = LIST_NEXT((var), field), 1); \ + (var) = (tvar)) +#endif + #define NFP_MAX_POLICY_CNT NFP_MAX_MTR_CNT #define NFP_MAX_PROFILE_CNT NFP_MAX_MTR_CNT @@ -1124,10 +1131,10 @@ void nfp_mtr_priv_uninit(struct nfp_pf_dev *pf_dev) { - struct nfp_mtr *mtr; + struct nfp_mtr *mtr, *tmp_mtr; struct nfp_mtr_priv *priv; - struct nfp_mtr_policy *mtr_policy; - struct nfp_mtr_profile *mtr_profile; + struct nfp_mtr_policy *mtr_policy, *tmp_policy; + struct nfp_mtr_profile *mtr_profile, *tmp_profile; struct nfp_app_fw_flower *app_fw_flower; app_fw_flower = NFP_PRIV_TO_APP_FW_FLOWER(pf_dev->app_fw_priv); @@ -1135,17 +1142,17 @@ rte_eal_alarm_cancel(nfp_mtr_stats_request, (void *)app_fw_flower); - LIST_FOREACH(mtr, &priv->mtrs, next) { + LIST_FOREACH_SAFE(mtr, &priv->mtrs, next, tmp_mtr) { LIST_REMOVE(mtr, next); rte_free(mtr); } - LIST_FOREACH(mtr_profile, &priv->profiles, next) { + LIST_FOREACH_SAFE(mtr_profile, &priv->profiles, next, tmp_profile) { LIST_REMOVE(mtr_profile, next); rte_free(mtr_profile); } - LIST_FOREACH(mtr_policy, &priv->policies, next) { + LIST_FOREACH_SAFE(mtr_policy, &priv->policies, next, tmp_policy) { LIST_REMOVE(mtr_policy, next); rte_free(mtr_policy); } diff -Nru dpdk-24.11.3/drivers/net/ngbe/ngbe_ethdev.c dpdk-24.11.4/drivers/net/ngbe/ngbe_ethdev.c --- dpdk-24.11.3/drivers/net/ngbe/ngbe_ethdev.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/ngbe/ngbe_ethdev.c 2025-12-19 12:05:33.000000000 +0000 @@ -1701,6 +1701,8 @@ { struct ngbe_hw *hw = ngbe_dev_hw(dev); struct ngbe_hw_stats *hw_stats = NGBE_DEV_STATS(dev); + struct ngbe_rx_queue *rxq; + uint64_t rx_csum_err = 0; unsigned int i, count; ngbe_read_stats_registers(hw, hw_stats); @@ -1714,6 +1716,13 @@ limit = min(limit, ngbe_xstats_calc_num(dev)); + /* Rx Checksum Errors */ + for (i = 0; i < dev->data->nb_rx_queues; i++) { + rxq = dev->data->rx_queues[i]; + rx_csum_err += rxq->csum_err; + } + hw_stats->rx_l3_l4_xsum_error = rx_csum_err; + /* Extended stats from ngbe_hw_stats */ for (i = 0; i < limit; i++) { uint32_t offset = 0; @@ -1790,6 +1799,8 @@ { struct ngbe_hw *hw = ngbe_dev_hw(dev); struct ngbe_hw_stats *hw_stats = NGBE_DEV_STATS(dev); + struct ngbe_rx_queue *rxq; + int i = 0; /* HW registers are cleared on read */ hw->offset_loaded = 0; @@ -1799,6 +1810,12 @@ /* Reset software totals */ memset(hw_stats, 0, sizeof(*hw_stats)); + /* Reset rxq checksum errors */ + for (i = 0; i < dev->data->nb_rx_queues; i++) { + rxq = dev->data->rx_queues[i]; + rxq->csum_err = 0; + } + return 0; } diff -Nru dpdk-24.11.3/drivers/net/ngbe/ngbe_rxtx.c dpdk-24.11.4/drivers/net/ngbe/ngbe_rxtx.c --- dpdk-24.11.3/drivers/net/ngbe/ngbe_rxtx.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/ngbe/ngbe_rxtx.c 2025-12-19 12:05:33.000000000 +0000 @@ -972,22 +972,28 @@ } static inline uint64_t -rx_desc_error_to_pkt_flags(uint32_t rx_status) +rx_desc_error_to_pkt_flags(uint32_t rx_status, struct ngbe_rx_queue *rxq) { uint64_t pkt_flags = 0; /* checksum offload can't be disabled */ - if (rx_status & NGBE_RXD_STAT_IPCS) + if (rx_status & NGBE_RXD_STAT_IPCS) { pkt_flags |= (rx_status & NGBE_RXD_ERR_IPCS ? RTE_MBUF_F_RX_IP_CKSUM_BAD : RTE_MBUF_F_RX_IP_CKSUM_GOOD); + rxq->csum_err += !!(rx_status & NGBE_RXD_ERR_IPCS); + } - if (rx_status & NGBE_RXD_STAT_L4CS) + if (rx_status & NGBE_RXD_STAT_L4CS) { pkt_flags |= (rx_status & NGBE_RXD_ERR_L4CS ? RTE_MBUF_F_RX_L4_CKSUM_BAD : RTE_MBUF_F_RX_L4_CKSUM_GOOD); + rxq->csum_err += !!(rx_status & NGBE_RXD_ERR_L4CS); + } if (rx_status & NGBE_RXD_STAT_EIPCS && - rx_status & NGBE_RXD_ERR_EIPCS) + rx_status & NGBE_RXD_ERR_EIPCS) { pkt_flags |= RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD; + rxq->csum_err += !!(rx_status & NGBE_RXD_ERR_EIPCS); + } return pkt_flags; } @@ -1060,7 +1066,7 @@ /* convert descriptor fields to rte mbuf flags */ pkt_flags = rx_desc_status_to_pkt_flags(s[j], rxq->vlan_flags); - pkt_flags |= rx_desc_error_to_pkt_flags(s[j]); + pkt_flags |= rx_desc_error_to_pkt_flags(s[j], rxq); pkt_flags |= ngbe_rxd_pkt_info_to_pkt_flags(pkt_info[j]); mb->ol_flags = pkt_flags; @@ -1393,7 +1399,7 @@ pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags); - pkt_flags |= rx_desc_error_to_pkt_flags(staterr); + pkt_flags |= rx_desc_error_to_pkt_flags(staterr, rxq); pkt_flags |= ngbe_rxd_pkt_info_to_pkt_flags(pkt_info); rxm->ol_flags = pkt_flags; rxm->packet_type = ngbe_rxd_pkt_info_to_pkt_type(pkt_info, @@ -1464,7 +1470,7 @@ head->vlan_tci = rte_le_to_cpu_16(desc->qw1.hi.tag); pkt_info = rte_le_to_cpu_32(desc->qw0.dw0); pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags); - pkt_flags |= rx_desc_error_to_pkt_flags(staterr); + pkt_flags |= rx_desc_error_to_pkt_flags(staterr, rxq); pkt_flags |= ngbe_rxd_pkt_info_to_pkt_flags(pkt_info); head->ol_flags = pkt_flags; head->packet_type = ngbe_rxd_pkt_info_to_pkt_type(pkt_info, @@ -2052,13 +2058,9 @@ if (txq == NULL) return -ENOMEM; - /* - * Allocate Tx ring hardware descriptors. A memzone large enough to - * handle the maximum ring size is allocated in order to allow for - * resizing in later calls to the queue setup function. - */ + /* Allocate Tx ring hardware descriptors. */ tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx, - sizeof(struct ngbe_tx_desc) * NGBE_RING_DESC_MAX, + sizeof(struct ngbe_tx_desc) * nb_desc, NGBE_ALIGN, socket_id); if (tz == NULL) { ngbe_tx_queue_release(txq); @@ -2266,6 +2268,7 @@ rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1); rxq->rx_tail = 0; rxq->nb_rx_hold = 0; + rxq->csum_err = 0; rte_pktmbuf_free(rxq->pkt_first_seg); rxq->pkt_first_seg = NULL; rxq->pkt_last_seg = NULL; @@ -2317,6 +2320,7 @@ uint16_t len; struct ngbe_adapter *adapter = ngbe_dev_adapter(dev); uint64_t offloads; + uint32_t size; PMD_INIT_FUNC_TRACE(); hw = ngbe_dev_hw(dev); @@ -2350,13 +2354,10 @@ rxq->rx_deferred_start = rx_conf->rx_deferred_start; rxq->offloads = offloads; - /* - * Allocate Rx ring hardware descriptors. A memzone large enough to - * handle the maximum ring size is allocated in order to allow for - * resizing in later calls to the queue setup function. - */ + /* Allocate Rx ring hardware descriptors. */ + size = (nb_desc + RTE_PMD_NGBE_RX_MAX_BURST) * sizeof(struct ngbe_rx_desc); rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx, - RX_RING_SZ, NGBE_ALIGN, socket_id); + size, NGBE_ALIGN, socket_id); if (rz == NULL) { ngbe_rx_queue_release(rxq); return -ENOMEM; @@ -2366,7 +2367,7 @@ /* * Zero init all the descriptors in the ring. */ - memset(rz->addr, 0, RX_RING_SZ); + memset(rz->addr, 0, size); rxq->rdt_reg_addr = NGBE_REG_ADDR(hw, NGBE_RXWP(rxq->reg_idx)); rxq->rdh_reg_addr = NGBE_REG_ADDR(hw, NGBE_RXRP(rxq->reg_idx)); diff -Nru dpdk-24.11.3/drivers/net/ngbe/ngbe_rxtx.h dpdk-24.11.4/drivers/net/ngbe/ngbe_rxtx.h --- dpdk-24.11.3/drivers/net/ngbe/ngbe_rxtx.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/ngbe/ngbe_rxtx.h 2025-12-19 12:05:33.000000000 +0000 @@ -292,6 +292,7 @@ /** hold packets to return to application */ struct rte_mbuf *rx_stage[RTE_PMD_NGBE_RX_MAX_BURST * 2]; const struct rte_memzone *mz; + uint64_t csum_err; }; /** diff -Nru dpdk-24.11.3/drivers/net/ngbe/ngbe_rxtx_vec_neon.c dpdk-24.11.4/drivers/net/ngbe/ngbe_rxtx_vec_neon.c --- dpdk-24.11.3/drivers/net/ngbe/ngbe_rxtx_vec_neon.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/ngbe/ngbe_rxtx_vec_neon.c 2025-12-19 12:05:33.000000000 +0000 @@ -222,7 +222,7 @@ volatile struct ngbe_rx_desc *rxdp; struct ngbe_rx_entry *sw_ring; uint16_t nb_pkts_recd; - int pos; + int pos, i; uint8x16_t shuf_msk = { 0xFF, 0xFF, 0xFF, 0xFF, /* skip 32 bits pkt_type */ @@ -331,6 +331,13 @@ desc_to_olflags_v(sterr_tmp1, sterr_tmp2, staterr, vlan_flags, &rx_pkts[pos]); + for (i = 0; i < RTE_NGBE_DESCS_PER_LOOP; i++) { + if (rx_pkts[pos + i]->ol_flags & + (RTE_MBUF_F_RX_IP_CKSUM_BAD | + RTE_MBUF_F_RX_L4_CKSUM_BAD)) + rxq->csum_err++; + } + /* D.2 pkt 3,4 set in_port/nb_seg and remove crc */ tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb4), crc_adjust); pkt_mb4 = vreinterpretq_u8_u16(tmp); diff -Nru dpdk-24.11.3/drivers/net/ngbe/ngbe_rxtx_vec_sse.c dpdk-24.11.4/drivers/net/ngbe/ngbe_rxtx_vec_sse.c --- dpdk-24.11.3/drivers/net/ngbe/ngbe_rxtx_vec_sse.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/ngbe/ngbe_rxtx_vec_sse.c 2025-12-19 12:05:33.000000000 +0000 @@ -244,7 +244,7 @@ volatile struct ngbe_rx_desc *rxdp; struct ngbe_rx_entry *sw_ring; uint16_t nb_pkts_recd; - int pos; + int pos, i; uint64_t var; __m128i shuf_msk; __m128i crc_adjust = _mm_set_epi16(0, 0, 0, /* ignore non-length fields */ @@ -412,6 +412,13 @@ /* set ol_flags with vlan packet type */ desc_to_olflags_v(descs, mbuf_init, vlan_flags, &rx_pkts[pos]); + for (i = 0; i < RTE_NGBE_DESCS_PER_LOOP; i++) { + if (rx_pkts[pos + i]->ol_flags & + (RTE_MBUF_F_RX_IP_CKSUM_BAD | + RTE_MBUF_F_RX_L4_CKSUM_BAD)) + rxq->csum_err++; + } + /* D.2 pkt 3,4 set in_port/nb_seg and remove crc */ pkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust); pkt_mb3 = _mm_add_epi16(pkt_mb3, crc_adjust); diff -Nru dpdk-24.11.3/drivers/net/ntnic/ntnic_ethdev.c dpdk-24.11.4/drivers/net/ntnic/ntnic_ethdev.c --- dpdk-24.11.3/drivers/net/ntnic/ntnic_ethdev.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/ntnic/ntnic_ethdev.c 2025-12-19 12:05:33.000000000 +0000 @@ -2471,7 +2471,7 @@ NT_LOG_DBGX(DBG, NTNIC, "PCI device deinitialization"); int i; - char name[32]; + char name[RTE_ETH_NAME_MAX_LEN]; struct pmd_internals *internals = eth_dev->data->dev_private; ntdrv_4ga_t *p_ntdrv = &internals->p_drv->ntdrv; @@ -2498,7 +2498,7 @@ } for (i = 0; i < n_phy_ports; i++) { - sprintf(name, "ntnic%d", i); + snprintf(name, sizeof(name), "ntnic%d", i); eth_dev = rte_eth_dev_allocated(name); if (eth_dev == NULL) continue; /* port already released */ diff -Nru dpdk-24.11.3/drivers/net/octeon_ep/cnxk_ep_rx.c dpdk-24.11.4/drivers/net/octeon_ep/cnxk_ep_rx.c --- dpdk-24.11.3/drivers/net/octeon_ep/cnxk_ep_rx.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/octeon_ep/cnxk_ep_rx.c 2025-12-19 12:05:33.000000000 +0000 @@ -10,7 +10,6 @@ { struct rte_mbuf **recv_buf_list = droq->recv_buf_list; uint32_t total_pkt_len, bytes_rsvd = 0; - uint16_t port_id = droq->otx_ep_dev->port_id; uint16_t nb_desc = droq->nb_desc; uint16_t pkts; @@ -22,7 +21,7 @@ uint32_t pkt_len = 0; mbuf = recv_buf_list[droq->read_idx]; - info = rte_pktmbuf_mtod(mbuf, struct otx_ep_droq_info *); + info = cnxk_pktmbuf_mtod(mbuf, struct otx_ep_droq_info *); total_pkt_len = rte_bswap16(info->length >> 48) + OTX_EP_INFO_SIZE; @@ -37,7 +36,7 @@ if (!pkt_len) { /* Note the first seg */ first_buf = mbuf; - mbuf->data_off += OTX_EP_INFO_SIZE; + *(uint64_t *)&mbuf->rearm_data = droq->rearm_data; mbuf->pkt_len = cpy_len - OTX_EP_INFO_SIZE; mbuf->data_len = cpy_len - OTX_EP_INFO_SIZE; } else { @@ -57,12 +56,10 @@ droq->refill_count++; } mbuf = first_buf; - mbuf->port = port_id; rx_pkts[pkts] = mbuf; bytes_rsvd += pkt_len; } - droq->refill_count += new_pkts; droq->pkts_pending -= pkts; /* Stats */ droq->stats.pkts_received += pkts; @@ -154,3 +151,43 @@ return new_pkts; } + +void +cnxk_ep_drain_rx_pkts(void *rx_queue) +{ + struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue; + struct rte_mbuf *rx_pkt, *next_seg, *seg; + uint16_t i, j, nb_pkts; + + if (droq->read_idx == 0 && droq->pkts_pending == 0 && droq->refill_count) + return; + + /* Check for pending packets */ + nb_pkts = cnxk_ep_rx_pkts_to_process(droq, droq->nb_desc); + + /* Drain the pending packets */ + for (i = 0; i < nb_pkts; i++) { + rx_pkt = NULL; + cnxk_ep_process_pkts_scalar_mseg(&rx_pkt, droq, 1); + if (rx_pkt) { + seg = rx_pkt->next; + for (j = 1; j < rx_pkt->nb_segs; j++) { + next_seg = seg->next; + rte_mempool_put(droq->mpool, seg); + seg = next_seg; + } + rx_pkt->nb_segs = 1; + rte_mempool_put(droq->mpool, rx_pkt); + } + } + + cnxk_ep_rx_refill(droq); + + /* Reset the indexes */ + droq->read_idx = 0; + droq->write_idx = 0; + droq->refill_idx = 0; + droq->refill_count = 0; + droq->last_pkt_count = 0; + droq->pkts_pending = 0; +} diff -Nru dpdk-24.11.3/drivers/net/octeon_ep/cnxk_ep_vf.c dpdk-24.11.4/drivers/net/octeon_ep/cnxk_ep_vf.c --- dpdk-24.11.3/drivers/net/octeon_ep/cnxk_ep_vf.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/octeon_ep/cnxk_ep_vf.c 2025-12-19 12:05:33.000000000 +0000 @@ -138,6 +138,18 @@ return -EIO; } + /* Clear the IQ doorbell */ + loop = OTX_EP_BUSY_LOOP_COUNT; + while ((rte_read64(iq->doorbell_reg) != 0ull) && loop--) { + rte_write32(OTX_EP_CLEAR_INSTR_DBELL, iq->doorbell_reg); + rte_delay_ms(1); + } + + if (loop < 0) { + otx_ep_err("INSTR DBELL is not zero"); + return -EIO; + } + /* IN INTR_THRESHOLD is set to max(FFFFFFFF) which disable the IN INTR * to raise */ @@ -237,8 +249,8 @@ droq->pkts_sent_ism = (uint32_t __rte_atomic *)((uint8_t *)otx_ep->ism_buffer_mz->addr + CNXK_EP_OQ_ISM_OFFSET(oq_no)); - otx_ep_err("SDP_R[%d] OQ ISM virt: %p dma: 0x%" PRIX64, - oq_no, (void *)(uintptr_t)droq->pkts_sent_ism, ism_addr); + otx_ep_dbg("SDP_R[%d] OQ ISM virt: %p dma: 0x%" PRIX64, oq_no, + (void *)(uintptr_t)droq->pkts_sent_ism, ism_addr); *droq->pkts_sent_ism = 0; droq->pkts_sent_prev = 0; @@ -266,24 +278,8 @@ static int cnxk_ep_vf_enable_iq(struct otx_ep_device *otx_ep, uint32_t q_no) { - int loop = OTX_EP_BUSY_LOOP_COUNT; uint64_t reg_val = 0ull; - /* Resetting doorbells during IQ enabling also to handle abrupt - * guest reboot. IQ reset does not clear the doorbells. - */ - oct_ep_write64(0xFFFFFFFF, otx_ep->hw_addr + CNXK_EP_R_IN_INSTR_DBELL(q_no)); - - while (((oct_ep_read64(otx_ep->hw_addr + - CNXK_EP_R_IN_INSTR_DBELL(q_no))) != 0ull) && loop--) { - rte_delay_ms(1); - } - - if (loop < 0) { - otx_ep_err("INSTR DBELL not coming back to 0"); - return -EIO; - } - reg_val = oct_ep_read64(otx_ep->hw_addr + CNXK_EP_R_IN_ENABLE(q_no)); reg_val |= 0x1ull; diff -Nru dpdk-24.11.3/drivers/net/octeon_ep/otx2_ep_vf.c dpdk-24.11.4/drivers/net/octeon_ep/otx2_ep_vf.c --- dpdk-24.11.3/drivers/net/octeon_ep/otx2_ep_vf.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/octeon_ep/otx2_ep_vf.c 2025-12-19 12:05:33.000000000 +0000 @@ -287,6 +287,18 @@ return -EIO; } + /* Clear the IQ doorbell */ + loop = OTX_EP_BUSY_LOOP_COUNT; + while ((rte_read64(iq->doorbell_reg) != 0ull) && loop--) { + rte_write32(OTX_EP_CLEAR_INSTR_DBELL, iq->doorbell_reg); + rte_delay_ms(1); + } + + if (loop < 0) { + otx_ep_err("INSTR DBELL is not zero"); + return -EIO; + } + /* IN INTR_THRESHOLD is set to max(FFFFFFFF) which disable the IN INTR * to raise */ @@ -388,7 +400,7 @@ droq->pkts_sent_ism = (uint32_t __rte_atomic *)((uint8_t *)otx_ep->ism_buffer_mz->addr + OTX2_EP_OQ_ISM_OFFSET(oq_no)); - otx_ep_err("SDP_R[%d] OQ ISM virt: %p, dma: 0x%x", oq_no, + otx_ep_dbg("SDP_R[%d] OQ ISM virt: %p, dma: 0x%x", oq_no, (void *)(uintptr_t)droq->pkts_sent_ism, (unsigned int)ism_addr); *droq->pkts_sent_ism = 0; @@ -411,24 +423,8 @@ static int otx2_vf_enable_iq(struct otx_ep_device *otx_ep, uint32_t q_no) { - int loop = SDP_VF_BUSY_LOOP_COUNT; uint64_t reg_val = 0ull; - /* Resetting doorbells during IQ enabling also to handle abrupt - * guest reboot. IQ reset does not clear the doorbells. - */ - oct_ep_write64(0xFFFFFFFF, otx_ep->hw_addr + SDP_VF_R_IN_INSTR_DBELL(q_no)); - - while (((oct_ep_read64(otx_ep->hw_addr + - SDP_VF_R_IN_INSTR_DBELL(q_no))) != 0ull) && loop--) { - rte_delay_ms(1); - } - - if (loop < 0) { - otx_ep_err("INSTR DBELL not coming back to 0"); - return -EIO; - } - reg_val = oct_ep_read64(otx_ep->hw_addr + SDP_VF_R_IN_ENABLE(q_no)); reg_val |= 0x1ull; diff -Nru dpdk-24.11.3/drivers/net/octeon_ep/otx_ep_common.h dpdk-24.11.4/drivers/net/octeon_ep/otx_ep_common.h --- dpdk-24.11.3/drivers/net/octeon_ep/otx_ep_common.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/octeon_ep/otx_ep_common.h 2025-12-19 12:05:33.000000000 +0000 @@ -589,6 +589,7 @@ #define OTX_EP_CLEAR_ISIZE_BSIZE 0x7FFFFFULL #define OTX_EP_CLEAR_OUT_INT_LVLS 0x3FFFFFFFFFFFFFULL #define OTX_EP_CLEAR_IN_INT_LVLS 0xFFFFFFFF +#define OTX_EP_CLEAR_INSTR_DBELL 0xFFFFFFFF #define OTX_EP_CLEAR_SDP_IN_INT_LVLS 0x3FFFFFFFFFFFFFUL #define OTX_EP_DROQ_BUFSZ_MASK 0xFFFF #define OTX_EP_CLEAR_SLIST_DBELL 0xFFFFFFFF diff -Nru dpdk-24.11.3/drivers/net/octeon_ep/otx_ep_ethdev.c dpdk-24.11.4/drivers/net/octeon_ep/otx_ep_ethdev.c --- dpdk-24.11.3/drivers/net/octeon_ep/otx_ep_ethdev.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/octeon_ep/otx_ep_ethdev.c 2025-12-19 12:05:33.000000000 +0000 @@ -236,6 +236,12 @@ int ret; otx_epvf = (struct otx_ep_device *)OTX_EP_DEV(eth_dev); + + for (q = 0; q < otx_epvf->nb_rx_queues; q++) { + cnxk_ep_drain_rx_pkts(otx_epvf->droq[q]); + otx_epvf->fn_list.setup_oq_regs(otx_epvf, q); + } + /* Enable IQ/OQ for this device */ ret = otx_epvf->fn_list.enable_io_queues(otx_epvf); if (ret) { diff -Nru dpdk-24.11.3/drivers/net/octeon_ep/otx_ep_mbox.c dpdk-24.11.4/drivers/net/octeon_ep/otx_ep_mbox.c --- dpdk-24.11.3/drivers/net/octeon_ep/otx_ep_mbox.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/octeon_ep/otx_ep_mbox.c 2025-12-19 12:05:33.000000000 +0000 @@ -355,16 +355,22 @@ struct otx_ep_device *otx_ep = (struct otx_ep_device *)eth_dev->data->dev_private; struct rte_pci_device *pdev = RTE_ETH_DEV_TO_PCI(eth_dev); uint64_t reg_val; + int rc; otx_ep_mbox_version_check(otx_ep); rte_intr_callback_register(pdev->intr_handle, otx_ep_mbox_intr_handler, (void *)eth_dev); - if (rte_intr_enable(pdev->intr_handle)) { + rc = rte_intr_enable(pdev->intr_handle); + + if (!(rc == -1 || rc == 0)) { otx_ep_err("rte_intr_enable failed"); return -1; } + if (rc == -1) + return 0; + reg_val = otx2_read64(otx_ep->hw_addr + CNXK_EP_R_MBOX_PF_VF_INT(0)); if (reg_val == UINT64_MAX) return -ENODEV; diff -Nru dpdk-24.11.3/drivers/net/octeon_ep/otx_ep_rxtx.h dpdk-24.11.4/drivers/net/octeon_ep/otx_ep_rxtx.h --- dpdk-24.11.3/drivers/net/octeon_ep/otx_ep_rxtx.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/octeon_ep/otx_ep_rxtx.h 2025-12-19 12:05:33.000000000 +0000 @@ -32,6 +32,9 @@ return ((index + count) & (max - 1)); } +void +cnxk_ep_drain_rx_pkts(void *rx_queue); + uint16_t otx_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts); diff -Nru dpdk-24.11.3/drivers/net/tap/bpf/meson.build dpdk-24.11.4/drivers/net/tap/bpf/meson.build --- dpdk-24.11.3/drivers/net/tap/bpf/meson.build 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/tap/bpf/meson.build 2025-12-19 12:05:33.000000000 +0000 @@ -39,13 +39,17 @@ enable_tap_rss = true -libbpf_include_dir = libbpf.get_variable(pkgconfig : 'includedir') +# Determine sysroot if cross-compiling and the property exists in the +# init files. Note that this environment variable will have to be passed +# in as a property during meson setup. +sysroot = meson.get_external_property('sysroot', '') +libbpf_include_dir = sysroot + libbpf.get_variable(pkgconfig : 'includedir') # The include files and others include # but is not defined for multi-lib environment target. # Workaround by using include directoriy from the host build environment. machine_name = run_command('uname', '-m', check: true).stdout().strip() -march_include_dir = '/usr/include/' + machine_name + '-linux-gnu' +march_include_dir = sysroot + '/usr/include/' + machine_name + '-linux-gnu' clang_flags = [ # these are flags used to build the BPF code diff -Nru dpdk-24.11.3/drivers/net/tap/rte_eth_tap.c dpdk-24.11.4/drivers/net/tap/rte_eth_tap.c --- dpdk-24.11.3/drivers/net/tap/rte_eth_tap.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/tap/rte_eth_tap.c 2025-12-19 12:05:33.000000000 +0000 @@ -889,8 +889,10 @@ return err; err = tap_link_set_up(dev); - if (err) + if (err) { + tap_intr_handle_set(dev, 0); return err; + } for (i = 0; i < dev->data->nb_tx_queues; i++) dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED; diff -Nru dpdk-24.11.3/drivers/net/tap/tap_flow.c dpdk-24.11.4/drivers/net/tap/tap_flow.c --- dpdk-24.11.3/drivers/net/tap/tap_flow.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/tap/tap_flow.c 2025-12-19 12:05:33.000000000 +0000 @@ -430,20 +430,16 @@ return 0; msg = &flow->msg; if (!rte_is_zero_ether_addr(&mask->hdr.dst_addr)) { - tap_nlattr_add(&msg->nh, TCA_FLOWER_KEY_ETH_DST, - RTE_ETHER_ADDR_LEN, - &spec->hdr.dst_addr.addr_bytes); - tap_nlattr_add(&msg->nh, - TCA_FLOWER_KEY_ETH_DST_MASK, RTE_ETHER_ADDR_LEN, - &mask->hdr.dst_addr.addr_bytes); + tap_nlattr_add(msg, TCA_FLOWER_KEY_ETH_DST, RTE_ETHER_ADDR_LEN, + &spec->hdr.dst_addr.addr_bytes); + tap_nlattr_add(msg, TCA_FLOWER_KEY_ETH_DST_MASK, RTE_ETHER_ADDR_LEN, + &mask->hdr.dst_addr.addr_bytes); } if (!rte_is_zero_ether_addr(&mask->hdr.src_addr)) { - tap_nlattr_add(&msg->nh, TCA_FLOWER_KEY_ETH_SRC, - RTE_ETHER_ADDR_LEN, + tap_nlattr_add(msg, TCA_FLOWER_KEY_ETH_SRC, RTE_ETHER_ADDR_LEN, &spec->hdr.src_addr.addr_bytes); - tap_nlattr_add(&msg->nh, - TCA_FLOWER_KEY_ETH_SRC_MASK, RTE_ETHER_ADDR_LEN, - &mask->hdr.src_addr.addr_bytes); + tap_nlattr_add(msg, TCA_FLOWER_KEY_ETH_SRC_MASK, RTE_ETHER_ADDR_LEN, + &mask->hdr.src_addr.addr_bytes); } return 0; } @@ -498,11 +494,9 @@ uint8_t vid = VLAN_ID(tci); if (prio) - tap_nlattr_add8(&msg->nh, - TCA_FLOWER_KEY_VLAN_PRIO, prio); + tap_nlattr_add8(msg, TCA_FLOWER_KEY_VLAN_PRIO, prio); if (vid) - tap_nlattr_add16(&msg->nh, - TCA_FLOWER_KEY_VLAN_ID, vid); + tap_nlattr_add16(msg, TCA_FLOWER_KEY_VLAN_ID, vid); } return 0; } @@ -544,20 +538,15 @@ if (!spec) return 0; if (mask->hdr.dst_addr) { - tap_nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_DST, - spec->hdr.dst_addr); - tap_nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_DST_MASK, - mask->hdr.dst_addr); + tap_nlattr_add32(msg, TCA_FLOWER_KEY_IPV4_DST, spec->hdr.dst_addr); + tap_nlattr_add32(msg, TCA_FLOWER_KEY_IPV4_DST_MASK, mask->hdr.dst_addr); } if (mask->hdr.src_addr) { - tap_nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_SRC, - spec->hdr.src_addr); - tap_nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_SRC_MASK, - mask->hdr.src_addr); + tap_nlattr_add32(msg, TCA_FLOWER_KEY_IPV4_SRC, spec->hdr.src_addr); + tap_nlattr_add32(msg, TCA_FLOWER_KEY_IPV4_SRC_MASK, mask->hdr.src_addr); } if (spec->hdr.next_proto_id) - tap_nlattr_add8(&msg->nh, TCA_FLOWER_KEY_IP_PROTO, - spec->hdr.next_proto_id); + tap_nlattr_add8(msg, TCA_FLOWER_KEY_IP_PROTO, spec->hdr.next_proto_id); return 0; } @@ -599,20 +588,19 @@ if (!spec) return 0; if (memcmp(&mask->hdr.dst_addr, empty_addr, 16)) { - tap_nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_DST, - sizeof(spec->hdr.dst_addr), &spec->hdr.dst_addr); - tap_nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_DST_MASK, - sizeof(mask->hdr.dst_addr), &mask->hdr.dst_addr); + tap_nlattr_add(msg, TCA_FLOWER_KEY_IPV6_DST, sizeof(spec->hdr.dst_addr), + &spec->hdr.dst_addr); + tap_nlattr_add(msg, TCA_FLOWER_KEY_IPV6_DST_MASK, sizeof(mask->hdr.dst_addr), + &mask->hdr.dst_addr); } if (memcmp(&mask->hdr.src_addr, empty_addr, 16)) { - tap_nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_SRC, - sizeof(spec->hdr.src_addr), &spec->hdr.src_addr); - tap_nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_SRC_MASK, - sizeof(mask->hdr.src_addr), &mask->hdr.src_addr); + tap_nlattr_add(msg, TCA_FLOWER_KEY_IPV6_SRC, sizeof(spec->hdr.src_addr), + &spec->hdr.src_addr); + tap_nlattr_add(msg, TCA_FLOWER_KEY_IPV6_SRC_MASK, sizeof(mask->hdr.src_addr), + &mask->hdr.src_addr); } if (spec->hdr.proto) - tap_nlattr_add8(&msg->nh, - TCA_FLOWER_KEY_IP_PROTO, spec->hdr.proto); + tap_nlattr_add8(msg, TCA_FLOWER_KEY_IP_PROTO, spec->hdr.proto); return 0; } @@ -649,15 +637,13 @@ if (!flow) return 0; msg = &flow->msg; - tap_nlattr_add8(&msg->nh, TCA_FLOWER_KEY_IP_PROTO, IPPROTO_UDP); + tap_nlattr_add8(msg, TCA_FLOWER_KEY_IP_PROTO, IPPROTO_UDP); if (!spec) return 0; if (mask->hdr.dst_port) - tap_nlattr_add16(&msg->nh, TCA_FLOWER_KEY_UDP_DST, - spec->hdr.dst_port); + tap_nlattr_add16(msg, TCA_FLOWER_KEY_UDP_DST, spec->hdr.dst_port); if (mask->hdr.src_port) - tap_nlattr_add16(&msg->nh, TCA_FLOWER_KEY_UDP_SRC, - spec->hdr.src_port); + tap_nlattr_add16(msg, TCA_FLOWER_KEY_UDP_SRC, spec->hdr.src_port); return 0; } @@ -694,15 +680,13 @@ if (!flow) return 0; msg = &flow->msg; - tap_nlattr_add8(&msg->nh, TCA_FLOWER_KEY_IP_PROTO, IPPROTO_TCP); + tap_nlattr_add8(msg, TCA_FLOWER_KEY_IP_PROTO, IPPROTO_TCP); if (!spec) return 0; if (mask->hdr.dst_port) - tap_nlattr_add16(&msg->nh, TCA_FLOWER_KEY_TCP_DST, - spec->hdr.dst_port); + tap_nlattr_add16(msg, TCA_FLOWER_KEY_TCP_DST, spec->hdr.dst_port); if (mask->hdr.src_port) - tap_nlattr_add16(&msg->nh, TCA_FLOWER_KEY_TCP_SRC, - spec->hdr.src_port); + tap_nlattr_add16(msg, TCA_FLOWER_KEY_TCP_SRC, spec->hdr.src_port); return 0; } @@ -810,37 +794,30 @@ if (tap_nlattr_nested_start(msg, (*act_index)++) < 0) return -1; - tap_nlattr_add(&msg->nh, TCA_ACT_KIND, - strlen(adata->id) + 1, adata->id); + tap_nlattr_add(msg, TCA_ACT_KIND, strlen(adata->id) + 1, adata->id); if (tap_nlattr_nested_start(msg, TCA_ACT_OPTIONS) < 0) return -1; if (strcmp("gact", adata->id) == 0) { - tap_nlattr_add(&msg->nh, TCA_GACT_PARMS, sizeof(adata->gact), - &adata->gact); + tap_nlattr_add(msg, TCA_GACT_PARMS, sizeof(adata->gact), &adata->gact); } else if (strcmp("mirred", adata->id) == 0) { if (adata->mirred.eaction == TCA_EGRESS_MIRROR) adata->mirred.action = TC_ACT_PIPE; else /* REDIRECT */ adata->mirred.action = TC_ACT_STOLEN; - tap_nlattr_add(&msg->nh, TCA_MIRRED_PARMS, - sizeof(adata->mirred), - &adata->mirred); + tap_nlattr_add(msg, TCA_MIRRED_PARMS, sizeof(adata->mirred), &adata->mirred); } else if (strcmp("skbedit", adata->id) == 0) { - tap_nlattr_add(&msg->nh, TCA_SKBEDIT_PARMS, - sizeof(adata->skbedit.skbedit), &adata->skbedit.skbedit); + tap_nlattr_add(msg, TCA_SKBEDIT_PARMS, sizeof(adata->skbedit.skbedit), + &adata->skbedit.skbedit); if (adata->skbedit.mark) - tap_nlattr_add32(&msg->nh, TCA_SKBEDIT_MARK, adata->skbedit.mark); + tap_nlattr_add32(msg, TCA_SKBEDIT_MARK, adata->skbedit.mark); else - tap_nlattr_add16(&msg->nh, TCA_SKBEDIT_QUEUE_MAPPING, adata->skbedit.queue); + tap_nlattr_add16(msg, TCA_SKBEDIT_QUEUE_MAPPING, adata->skbedit.queue); } else if (strcmp("bpf", adata->id) == 0) { #ifdef HAVE_BPF_RSS - tap_nlattr_add32(&msg->nh, TCA_ACT_BPF_FD, adata->bpf.bpf_fd); - tap_nlattr_add(&msg->nh, TCA_ACT_BPF_NAME, - strlen(adata->bpf.annotation) + 1, - adata->bpf.annotation); - tap_nlattr_add(&msg->nh, TCA_ACT_BPF_PARMS, - sizeof(adata->bpf.bpf), - &adata->bpf.bpf); + tap_nlattr_add32(msg, TCA_ACT_BPF_FD, adata->bpf.bpf_fd); + tap_nlattr_add(msg, TCA_ACT_BPF_NAME, strlen(adata->bpf.annotation) + 1, + adata->bpf.annotation); + tap_nlattr_add(msg, TCA_ACT_BPF_PARMS, sizeof(adata->bpf.bpf), &adata->bpf.bpf); #else TAP_LOG(ERR, "Internal error: bpf requested but not supported"); return -1; @@ -975,7 +952,7 @@ TC_H_MAKE(MULTIQ_MAJOR_HANDLE, 0); } /* use flower filter type */ - tap_nlattr_add(&flow->msg.nh, TCA_KIND, sizeof("flower"), "flower"); + tap_nlattr_add(&flow->msg, TCA_KIND, sizeof("flower"), "flower"); if (tap_nlattr_nested_start(&flow->msg, TCA_OPTIONS) < 0) { rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_ACTION, actions, "could not allocated netlink msg"); @@ -1015,15 +992,11 @@ } if (flow) { if (data.vlan) { - tap_nlattr_add16(&flow->msg.nh, TCA_FLOWER_KEY_ETH_TYPE, - htons(ETH_P_8021Q)); - tap_nlattr_add16(&flow->msg.nh, - TCA_FLOWER_KEY_VLAN_ETH_TYPE, - data.eth_type ? - data.eth_type : htons(ETH_P_ALL)); + tap_nlattr_add16(&flow->msg, TCA_FLOWER_KEY_ETH_TYPE, htons(ETH_P_8021Q)); + tap_nlattr_add16(&flow->msg, TCA_FLOWER_KEY_VLAN_ETH_TYPE, + data.eth_type ? data.eth_type : htons(ETH_P_ALL)); } else if (data.eth_type) { - tap_nlattr_add16(&flow->msg.nh, TCA_FLOWER_KEY_ETH_TYPE, - data.eth_type); + tap_nlattr_add16(&flow->msg, TCA_FLOWER_KEY_ETH_TYPE, data.eth_type); } } if (mirred && flow) { diff -Nru dpdk-24.11.3/drivers/net/tap/tap_netlink.c dpdk-24.11.4/drivers/net/tap/tap_netlink.c --- dpdk-24.11.3/drivers/net/tap/tap_netlink.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/tap/tap_netlink.c 2025-12-19 12:05:33.000000000 +0000 @@ -293,18 +293,18 @@ * The data to append. */ void -tap_nlattr_add(struct nlmsghdr *nh, unsigned short type, +tap_nlattr_add(struct tap_nlmsg *msg, unsigned short type, unsigned int data_len, const void *data) { /* see man 3 rtnetlink */ struct rtattr *rta; - rta = (struct rtattr *)NLMSG_TAIL(nh); + rta = (struct rtattr *)NLMSG_TAIL(msg); rta->rta_len = RTA_LENGTH(data_len); rta->rta_type = type; if (data_len > 0) memcpy(RTA_DATA(rta), data, data_len); - nh->nlmsg_len = NLMSG_ALIGN(nh->nlmsg_len) + RTA_ALIGN(rta->rta_len); + msg->nh.nlmsg_len = NLMSG_ALIGN(msg->nh.nlmsg_len) + RTA_ALIGN(rta->rta_len); } /** @@ -318,9 +318,9 @@ * The data to append. */ void -tap_nlattr_add8(struct nlmsghdr *nh, unsigned short type, uint8_t data) +tap_nlattr_add8(struct tap_nlmsg *msg, unsigned short type, uint8_t data) { - tap_nlattr_add(nh, type, sizeof(uint8_t), &data); + tap_nlattr_add(msg, type, sizeof(uint8_t), &data); } /** @@ -334,9 +334,9 @@ * The data to append. */ void -tap_nlattr_add16(struct nlmsghdr *nh, unsigned short type, uint16_t data) +tap_nlattr_add16(struct tap_nlmsg *msg, unsigned short type, uint16_t data) { - tap_nlattr_add(nh, type, sizeof(uint16_t), &data); + tap_nlattr_add(msg, type, sizeof(uint16_t), &data); } /** @@ -350,9 +350,9 @@ * The data to append. */ void -tap_nlattr_add32(struct nlmsghdr *nh, unsigned short type, uint32_t data) +tap_nlattr_add32(struct tap_nlmsg *msg, unsigned short type, uint32_t data) { - tap_nlattr_add(nh, type, sizeof(uint32_t), &data); + tap_nlattr_add(msg, type, sizeof(uint32_t), &data); } /** @@ -379,9 +379,9 @@ return -1; } - tail->tail = (struct rtattr *)NLMSG_TAIL(&msg->nh); + tail->tail = (struct rtattr *)NLMSG_TAIL(msg); - tap_nlattr_add(&msg->nh, type, 0, NULL); + tap_nlattr_add(msg, type, 0, NULL); tail->prev = msg->nested_tails; @@ -404,7 +404,7 @@ { struct nested_tail *tail = msg->nested_tails; - tail->tail->rta_len = (char *)NLMSG_TAIL(&msg->nh) - (char *)tail->tail; + tail->tail->rta_len = (char *)NLMSG_TAIL(msg) - (char *)tail->tail; if (tail->prev) msg->nested_tails = tail->prev; diff -Nru dpdk-24.11.3/drivers/net/tap/tap_netlink.h dpdk-24.11.4/drivers/net/tap/tap_netlink.h --- dpdk-24.11.3/drivers/net/tap/tap_netlink.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/tap/tap_netlink.h 2025-12-19 12:05:33.000000000 +0000 @@ -23,7 +23,7 @@ struct nested_tail *nested_tails; }; -#define NLMSG_TAIL(nlh) (void *)((char *)(nlh) + NLMSG_ALIGN((nlh)->nlmsg_len)) +#define NLMSG_TAIL(msg) (void *)((char *)(msg) + NLMSG_ALIGN((msg)->nh.nlmsg_len)) int tap_nl_init(uint32_t nl_groups); int tap_nl_final(int nlsk_fd); @@ -31,11 +31,11 @@ int tap_nl_recv(int nlsk_fd, int (*callback)(struct nlmsghdr *, void *), void *arg); int tap_nl_recv_ack(int nlsk_fd); -void tap_nlattr_add(struct nlmsghdr *nh, unsigned short type, +void tap_nlattr_add(struct tap_nlmsg *msg, unsigned short type, unsigned int data_len, const void *data); -void tap_nlattr_add8(struct nlmsghdr *nh, unsigned short type, uint8_t data); -void tap_nlattr_add16(struct nlmsghdr *nh, unsigned short type, uint16_t data); -void tap_nlattr_add32(struct nlmsghdr *nh, unsigned short type, uint32_t data); +void tap_nlattr_add8(struct tap_nlmsg *msg, unsigned short type, uint8_t data); +void tap_nlattr_add16(struct tap_nlmsg *msg, unsigned short type, uint16_t data); +void tap_nlattr_add32(struct tap_nlmsg *msg, unsigned short type, uint32_t data); int tap_nlattr_nested_start(struct tap_nlmsg *msg, uint16_t type); void tap_nlattr_nested_finish(struct tap_nlmsg *msg); diff -Nru dpdk-24.11.3/drivers/net/tap/tap_tcmsgs.c dpdk-24.11.4/drivers/net/tap/tap_tcmsgs.c --- dpdk-24.11.3/drivers/net/tap/tap_tcmsgs.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/tap/tap_tcmsgs.c 2025-12-19 12:05:33.000000000 +0000 @@ -123,8 +123,8 @@ NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE); msg.t.tcm_handle = TC_H_MAKE(MULTIQ_MAJOR_HANDLE, 0); msg.t.tcm_parent = TC_H_ROOT; - tap_nlattr_add(&msg.nh, TCA_KIND, sizeof("multiq"), "multiq"); - tap_nlattr_add(&msg.nh, TCA_OPTIONS, sizeof(opt), &opt); + tap_nlattr_add(&msg, TCA_KIND, sizeof("multiq"), "multiq"); + tap_nlattr_add(&msg, TCA_OPTIONS, sizeof(opt), &opt); if (tap_nl_send(nlsk_fd, &msg.nh) < 0) return -1; if (tap_nl_recv_ack(nlsk_fd) < 0) @@ -152,7 +152,7 @@ NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE); msg.t.tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0); msg.t.tcm_parent = TC_H_INGRESS; - tap_nlattr_add(&msg.nh, TCA_KIND, sizeof("ingress"), "ingress"); + tap_nlattr_add(&msg, TCA_KIND, sizeof("ingress"), "ingress"); if (tap_nl_send(nlsk_fd, &msg.nh) < 0) return -1; if (tap_nl_recv_ack(nlsk_fd) < 0) diff -Nru dpdk-24.11.3/drivers/net/txgbe/base/txgbe_type.h dpdk-24.11.4/drivers/net/txgbe/base/txgbe_type.h --- dpdk-24.11.3/drivers/net/txgbe/base/txgbe_type.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/txgbe/base/txgbe_type.h 2025-12-19 12:05:33.000000000 +0000 @@ -699,6 +699,8 @@ #define TXGBE_DEVARG_FFE_MAIN "ffe_main" #define TXGBE_DEVARG_FFE_PRE "ffe_pre" #define TXGBE_DEVARG_FFE_POST "ffe_post" +#define TXGBE_DEVARG_FDIR_PBALLOC "pkt-filter-size" +#define TXGBE_DEVARG_FDIR_DROP_QUEUE "pkt-filter-drop-queue" static const char * const txgbe_valid_arguments[] = { TXGBE_DEVARG_BP_AUTO, @@ -709,6 +711,8 @@ TXGBE_DEVARG_FFE_MAIN, TXGBE_DEVARG_FFE_PRE, TXGBE_DEVARG_FFE_POST, + TXGBE_DEVARG_FDIR_PBALLOC, + TXGBE_DEVARG_FDIR_DROP_QUEUE, NULL }; diff -Nru dpdk-24.11.3/drivers/net/txgbe/txgbe_ethdev.c dpdk-24.11.4/drivers/net/txgbe/txgbe_ethdev.c --- dpdk-24.11.3/drivers/net/txgbe/txgbe_ethdev.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/txgbe/txgbe_ethdev.c 2025-12-19 12:05:33.000000000 +0000 @@ -497,8 +497,12 @@ } static void -txgbe_parse_devargs(struct txgbe_hw *hw, struct rte_devargs *devargs) +txgbe_parse_devargs(struct rte_eth_dev *dev) { + struct rte_eth_fdir_conf *fdir_conf = TXGBE_DEV_FDIR_CONF(dev); + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); + struct rte_devargs *devargs = pci_dev->device.devargs; + struct txgbe_hw *hw = TXGBE_DEV_HW(dev); struct rte_kvargs *kvlist; u16 auto_neg = 1; u16 poll = 0; @@ -508,6 +512,9 @@ u16 ffe_main = 27; u16 ffe_pre = 8; u16 ffe_post = 44; + /* FDIR args */ + u8 pballoc = 0; + u8 drop_queue = 127; if (devargs == NULL) goto null; @@ -532,6 +539,10 @@ &txgbe_handle_devarg, &ffe_pre); rte_kvargs_process(kvlist, TXGBE_DEVARG_FFE_POST, &txgbe_handle_devarg, &ffe_post); + rte_kvargs_process(kvlist, TXGBE_DEVARG_FDIR_PBALLOC, + &txgbe_handle_devarg, &pballoc); + rte_kvargs_process(kvlist, TXGBE_DEVARG_FDIR_DROP_QUEUE, + &txgbe_handle_devarg, &drop_queue); rte_kvargs_free(kvlist); null: @@ -543,6 +554,9 @@ hw->phy.ffe_main = ffe_main; hw->phy.ffe_pre = ffe_pre; hw->phy.ffe_post = ffe_post; + + fdir_conf->pballoc = pballoc; + fdir_conf->drop_queue = drop_queue; } static int @@ -631,7 +645,7 @@ hw->isb_dma = TMZ_PADDR(mz); hw->isb_mem = TMZ_VADDR(mz); - txgbe_parse_devargs(hw, pci_dev->device.devargs); + txgbe_parse_devargs(eth_dev); /* Initialize the shared code (base driver) */ err = txgbe_init_shared_code(hw); if (err != 0) { @@ -839,6 +853,7 @@ } memset(filter_info->fivetuple_mask, 0, sizeof(uint32_t) * TXGBE_5TUPLE_ARRAY_SIZE); + filter_info->ntuple_is_full = false; return 0; } @@ -881,11 +896,13 @@ static int txgbe_fdir_filter_init(struct rte_eth_dev *eth_dev) { + struct rte_eth_fdir_conf *fdir_conf = TXGBE_DEV_FDIR_CONF(eth_dev); struct txgbe_hw_fdir_info *fdir_info = TXGBE_DEV_FDIR(eth_dev); char fdir_hash_name[RTE_HASH_NAMESIZE]; + u16 max_fdir_num = (1024 << (fdir_conf->pballoc + 1)) - 2; struct rte_hash_parameters fdir_hash_params = { .name = fdir_hash_name, - .entries = TXGBE_MAX_FDIR_FILTER_NUM, + .entries = max_fdir_num, .key_len = sizeof(struct txgbe_atr_input), .hash_func = rte_hash_crc, .hash_func_init_val = 0, @@ -902,7 +919,7 @@ } fdir_info->hash_map = rte_zmalloc("txgbe", sizeof(struct txgbe_fdir_filter *) * - TXGBE_MAX_FDIR_FILTER_NUM, + max_fdir_num, 0); if (!fdir_info->hash_map) { PMD_INIT_LOG(ERR, @@ -2602,6 +2619,8 @@ { struct txgbe_hw *hw = TXGBE_DEV_HW(dev); struct txgbe_hw_stats *hw_stats = TXGBE_DEV_STATS(dev); + struct txgbe_rx_queue *rxq; + uint64_t rx_csum_err = 0; unsigned int i, count; txgbe_read_stats_registers(hw, hw_stats); @@ -2615,6 +2634,13 @@ limit = min(limit, txgbe_xstats_calc_num(dev)); + /* Rx Checksum Errors */ + for (i = 0; i < dev->data->nb_rx_queues; i++) { + rxq = dev->data->rx_queues[i]; + rx_csum_err += rxq->csum_err; + } + hw_stats->rx_l3_l4_xsum_error = rx_csum_err; + /* Extended stats from txgbe_hw_stats */ for (i = 0; i < limit; i++) { uint32_t offset; @@ -2657,6 +2683,8 @@ { struct txgbe_hw *hw = TXGBE_DEV_HW(dev); struct txgbe_hw_stats *hw_stats = TXGBE_DEV_STATS(dev); + struct txgbe_rx_queue *rxq; + int i = 0; /* HW registers are cleared on read */ hw->offset_loaded = 0; @@ -2666,6 +2694,12 @@ /* Reset software totals */ memset(hw_stats, 0, sizeof(*hw_stats)); + /* Reset rxq checksum errors */ + for (i = 0; i < dev->data->nb_rx_queues; i++) { + rxq = dev->data->rx_queues[i]; + rxq->csum_err = 0; + } + return 0; } @@ -4234,7 +4268,8 @@ } } if (i >= TXGBE_MAX_FTQF_FILTERS) { - PMD_DRV_LOG(ERR, "5tuple filters are full."); + PMD_DRV_LOG(INFO, "5tuple filters are full, switch to FDIR"); + filter_info->ntuple_is_full = true; return -ENOSYS; } @@ -4262,6 +4297,7 @@ ~(1 << (index % (sizeof(uint32_t) * NBBY))); TAILQ_REMOVE(&filter_info->fivetuple_list, filter, entries); rte_free(filter); + filter_info->ntuple_is_full = false; wr32(hw, TXGBE_5TFDADDR(index), 0); wr32(hw, TXGBE_5TFSADDR(index), 0); @@ -5678,7 +5714,9 @@ TXGBE_DEVARG_FFE_SET "=<0-4>" TXGBE_DEVARG_FFE_MAIN "=" TXGBE_DEVARG_FFE_PRE "=" - TXGBE_DEVARG_FFE_POST "="); + TXGBE_DEVARG_FFE_POST "=" + TXGBE_DEVARG_FDIR_PBALLOC "=<0|1|2>" + TXGBE_DEVARG_FDIR_DROP_QUEUE "="); RTE_LOG_REGISTER_SUFFIX(txgbe_logtype_init, init, NOTICE); RTE_LOG_REGISTER_SUFFIX(txgbe_logtype_driver, driver, NOTICE); diff -Nru dpdk-24.11.3/drivers/net/txgbe/txgbe_ethdev.h dpdk-24.11.4/drivers/net/txgbe/txgbe_ethdev.h --- dpdk-24.11.3/drivers/net/txgbe/txgbe_ethdev.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/txgbe/txgbe_ethdev.h 2025-12-19 12:05:33.000000000 +0000 @@ -243,6 +243,7 @@ /* Bit mask for every used 5tuple filter */ uint32_t fivetuple_mask[TXGBE_5TUPLE_ARRAY_SIZE]; struct txgbe_5tuple_filter_list fivetuple_list; + bool ntuple_is_full; /* store the SYN filter info */ uint32_t syn_info; /* store the rss filter info */ diff -Nru dpdk-24.11.3/drivers/net/txgbe/txgbe_fdir.c dpdk-24.11.4/drivers/net/txgbe/txgbe_fdir.c --- dpdk-24.11.3/drivers/net/txgbe/txgbe_fdir.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/txgbe/txgbe_fdir.c 2025-12-19 12:05:33.000000000 +0000 @@ -165,6 +165,15 @@ return 0; } +static inline uint16_t +txgbe_reverse_fdir_bitmasks(uint16_t mask) +{ + mask = ((mask & 0x5555) << 1) | ((mask & 0xAAAA) >> 1); + mask = ((mask & 0x3333) << 2) | ((mask & 0xCCCC) >> 2); + mask = ((mask & 0x0F0F) << 4) | ((mask & 0xF0F0) >> 4); + return ((mask & 0x00FF) << 8) | ((mask & 0xFF00) >> 8); +} + int txgbe_fdir_set_input_mask(struct rte_eth_dev *dev) { @@ -206,15 +215,15 @@ wr32(hw, TXGBE_FDIRUDPMSK, ~fdirtcpm); wr32(hw, TXGBE_FDIRSCTPMSK, ~fdirtcpm); - /* Store source and destination IPv4 masks (big-endian) */ - wr32(hw, TXGBE_FDIRSIP4MSK, ~info->mask.src_ipv4_mask); - wr32(hw, TXGBE_FDIRDIP4MSK, ~info->mask.dst_ipv4_mask); + /* Store source and destination IPv4 masks (little-endian) */ + wr32(hw, TXGBE_FDIRSIP4MSK, rte_be_to_cpu_32(~info->mask.src_ipv4_mask)); + wr32(hw, TXGBE_FDIRDIP4MSK, rte_be_to_cpu_32(~info->mask.dst_ipv4_mask)); /* * Store source and destination IPv6 masks (bit reversed) */ - fdiripv6m = TXGBE_FDIRIP6MSK_DST(info->mask.dst_ipv6_mask) | - TXGBE_FDIRIP6MSK_SRC(info->mask.src_ipv6_mask); + fdiripv6m = txgbe_reverse_fdir_bitmasks(info->mask.dst_ipv6_mask) << 16; + fdiripv6m |= txgbe_reverse_fdir_bitmasks(info->mask.src_ipv6_mask); wr32(hw, TXGBE_FDIRIP6MSK, ~fdiripv6m); return 0; @@ -258,10 +267,7 @@ if (rule->input.flow_type & TXGBE_ATR_L4TYPE_MASK) return TXGBE_FDIRFLEXCFG_BASE_PAY; - if (rule->input.flow_type & TXGBE_ATR_L3TYPE_MASK) - return TXGBE_FDIRFLEXCFG_BASE_L3; - - return TXGBE_FDIRFLEXCFG_BASE_L2; + return TXGBE_FDIRFLEXCFG_BASE_L3; } int @@ -639,8 +645,14 @@ fdircmd |= TXGBE_FDIRPICMD_QP(queue); fdircmd |= TXGBE_FDIRPICMD_POOL(input->vm_pool); - if (input->flow_type & TXGBE_ATR_L3TYPE_IPV6) + if (input->flow_type & TXGBE_ATR_L3TYPE_IPV6) { + /* use SIP4 to store LS Dword of the Source iPv6 address */ + wr32(hw, TXGBE_FDIRPISIP4, be_to_le32(input->src_ip[3])); + wr32(hw, TXGBE_FDIRPISIP6(0), be_to_le32(input->src_ip[2])); + wr32(hw, TXGBE_FDIRPISIP6(1), be_to_le32(input->src_ip[1])); + wr32(hw, TXGBE_FDIRPISIP6(2), be_to_le32(input->src_ip[0])); fdircmd |= TXGBE_FDIRPICMD_IP6; + } wr32(hw, TXGBE_FDIRPICMD, fdircmd); PMD_DRV_LOG(DEBUG, "Rx Queue=%x hash=%x", queue, fdirhash); @@ -786,6 +798,26 @@ return 0; } +static void +txgbe_fdir_mask_input(struct txgbe_hw_fdir_mask *mask, + struct txgbe_atr_input *input) +{ + int i; + + if (input->flow_type & TXGBE_ATR_L3TYPE_IPV6) { + for (i = 0; i < 16; i++) { + if (!(mask->src_ipv6_mask & (1 << i))) + input->src_ip[i / 4] &= ~(0xFF << ((i % 4) * 8)); + } + } else { + input->src_ip[0] &= mask->src_ipv4_mask; + input->dst_ip[0] &= mask->dst_ipv4_mask; + } + + input->src_port &= mask->src_port_mask; + input->dst_port &= mask->dst_port_mask; +} + int txgbe_fdir_filter_program(struct rte_eth_dev *dev, struct txgbe_fdir_rule *rule, @@ -808,6 +840,8 @@ if (fdir_mode >= RTE_FDIR_MODE_PERFECT) is_perfect = TRUE; + txgbe_fdir_mask_input(&info->mask, &rule->input); + if (is_perfect) { fdirhash = atr_compute_perfect_hash(&rule->input, TXGBE_DEV_FDIR_CONF(dev)->pballoc); @@ -959,6 +993,7 @@ int txgbe_clear_all_fdir_filter(struct rte_eth_dev *dev) { + struct rte_eth_fdir_conf *fdir_conf = TXGBE_DEV_FDIR_CONF(dev); struct txgbe_hw_fdir_info *fdir_info = TXGBE_DEV_FDIR(dev); struct txgbe_fdir_filter *fdir_filter; struct txgbe_fdir_filter *filter_flag; @@ -967,7 +1002,9 @@ /* flush flow director */ rte_hash_reset(fdir_info->hash_handle); memset(fdir_info->hash_map, 0, - sizeof(struct txgbe_fdir_filter *) * TXGBE_MAX_FDIR_FILTER_NUM); + sizeof(struct txgbe_fdir_filter *) * + ((1024 << (fdir_conf->pballoc + 1)) - 2)); + fdir_conf->mode = RTE_FDIR_MODE_NONE; filter_flag = TAILQ_FIRST(&fdir_info->fdir_list); while ((fdir_filter = TAILQ_FIRST(&fdir_info->fdir_list))) { TAILQ_REMOVE(&fdir_info->fdir_list, diff -Nru dpdk-24.11.3/drivers/net/txgbe/txgbe_flow.c dpdk-24.11.4/drivers/net/txgbe/txgbe_flow.c --- dpdk-24.11.3/drivers/net/txgbe/txgbe_flow.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/txgbe/txgbe_flow.c 2025-12-19 12:05:33.000000000 +0000 @@ -580,8 +580,12 @@ struct rte_eth_ntuple_filter *filter, struct rte_flow_error *error) { + struct txgbe_filter_info *filter_info = TXGBE_DEV_FILTER(dev); int ret; + if (filter_info->ntuple_is_full) + return -ENOSYS; + ret = cons_parse_ntuple_filter(attr, pattern, actions, filter, error); if (ret) @@ -1333,7 +1337,6 @@ { const struct rte_flow_action *act; const struct rte_flow_action_queue *act_q; - const struct rte_flow_action_mark *mark; /* parse attr */ /* must be input direction */ @@ -1398,10 +1401,9 @@ rule->fdirflags = TXGBE_FDIRPICMD_DROP; } - /* check if the next not void item is MARK */ + /* nothing else supported */ act = next_no_void_action(actions, act); - if (act->type != RTE_FLOW_ACTION_TYPE_MARK && - act->type != RTE_FLOW_ACTION_TYPE_END) { + if (act->type != RTE_FLOW_ACTION_TYPE_END) { memset(rule, 0, sizeof(struct txgbe_fdir_rule)); rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, @@ -1411,21 +1413,6 @@ rule->soft_id = 0; - if (act->type == RTE_FLOW_ACTION_TYPE_MARK) { - mark = (const struct rte_flow_action_mark *)act->conf; - rule->soft_id = mark->id; - act = next_no_void_action(actions, act); - } - - /* check if the next not void item is END */ - if (act->type != RTE_FLOW_ACTION_TYPE_END) { - memset(rule, 0, sizeof(struct txgbe_fdir_rule)); - rte_flow_error_set(error, EINVAL, - RTE_FLOW_ERROR_TYPE_ACTION, - act, "Not supported action."); - return -rte_errno; - } - return 0; } @@ -1537,8 +1524,6 @@ * The next not void item must be END. * ACTION: * The first not void action should be QUEUE or DROP. - * The second not void optional action should be MARK, - * mark_id is a uint32_t number. * The next not void action should be END. * UDP/TCP/SCTP pattern example: * ITEM Spec Mask @@ -1849,9 +1834,7 @@ /* check dst addr mask */ for (j = 0; j < 16; j++) { - if (ipv6_mask->hdr.dst_addr.a[j] == UINT8_MAX) { - rule->mask.dst_ipv6_mask |= 1 << j; - } else if (ipv6_mask->hdr.dst_addr.a[j] != 0) { + if (ipv6_mask->hdr.dst_addr.a[j] != 0) { memset(rule, 0, sizeof(struct txgbe_fdir_rule)); rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, @@ -2222,6 +2205,8 @@ const struct rte_flow_item_udp *udp_mask; const struct rte_flow_item_sctp *sctp_spec; const struct rte_flow_item_sctp *sctp_mask; + const struct rte_flow_item_raw *raw_mask; + const struct rte_flow_item_raw *raw_spec; u8 ptid = 0; uint32_t j; @@ -2548,7 +2533,8 @@ if (item->type != RTE_FLOW_ITEM_TYPE_TCP && item->type != RTE_FLOW_ITEM_TYPE_UDP && item->type != RTE_FLOW_ITEM_TYPE_SCTP && - item->type != RTE_FLOW_ITEM_TYPE_END) { + item->type != RTE_FLOW_ITEM_TYPE_END && + item->type != RTE_FLOW_ITEM_TYPE_RAW) { memset(rule, 0, sizeof(struct txgbe_fdir_rule)); rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, @@ -2609,9 +2595,7 @@ /* check dst addr mask */ for (j = 0; j < 16; j++) { - if (ipv6_mask->hdr.dst_addr.a[j] == UINT8_MAX) { - rule->mask.dst_ipv6_mask |= 1 << j; - } else if (ipv6_mask->hdr.dst_addr.a[j] != 0) { + if (ipv6_mask->hdr.dst_addr.a[j] != 0) { memset(rule, 0, sizeof(struct txgbe_fdir_rule)); rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, @@ -2637,7 +2621,8 @@ if (item->type != RTE_FLOW_ITEM_TYPE_TCP && item->type != RTE_FLOW_ITEM_TYPE_UDP && item->type != RTE_FLOW_ITEM_TYPE_SCTP && - item->type != RTE_FLOW_ITEM_TYPE_END) { + item->type != RTE_FLOW_ITEM_TYPE_END && + item->type != RTE_FLOW_ITEM_TYPE_RAW) { memset(rule, 0, sizeof(struct txgbe_fdir_rule)); rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, @@ -2699,6 +2684,16 @@ rule->input.dst_port = tcp_spec->hdr.dst_port; } + + item = next_no_fuzzy_pattern(pattern, item); + if (item->type != RTE_FLOW_ITEM_TYPE_RAW && + item->type != RTE_FLOW_ITEM_TYPE_END) { + memset(rule, 0, sizeof(struct txgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } } /* Get the UDP info */ @@ -2748,6 +2743,16 @@ rule->input.dst_port = udp_spec->hdr.dst_port; } + + item = next_no_fuzzy_pattern(pattern, item); + if (item->type != RTE_FLOW_ITEM_TYPE_RAW && + item->type != RTE_FLOW_ITEM_TYPE_END) { + memset(rule, 0, sizeof(struct txgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } } /* Get the SCTP info */ @@ -2798,13 +2803,10 @@ rule->input.dst_port = sctp_spec->hdr.dst_port; } - /* others even sctp port is not supported */ - sctp_mask = item->mask; - if (sctp_mask && - (sctp_mask->hdr.src_port || - sctp_mask->hdr.dst_port || - sctp_mask->hdr.tag || - sctp_mask->hdr.cksum)) { + + item = next_no_fuzzy_pattern(pattern, item); + if (item->type != RTE_FLOW_ITEM_TYPE_RAW && + item->type != RTE_FLOW_ITEM_TYPE_END) { memset(rule, 0, sizeof(struct txgbe_fdir_rule)); rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, @@ -2813,6 +2815,93 @@ } } + /* Get the flex byte info */ + if (item->type == RTE_FLOW_ITEM_TYPE_RAW) { + uint16_t pattern = 0; + + /* Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + } + /* mask should not be null */ + if (!item->mask || !item->spec) { + memset(rule, 0, sizeof(struct txgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + + rule->b_mask = TRUE; + raw_mask = item->mask; + + /* check mask */ + if (raw_mask->relative != 0x1 || + raw_mask->search != 0x1 || + raw_mask->reserved != 0x0 || + (uint32_t)raw_mask->offset != 0xffffffff || + raw_mask->limit != 0xffff || + raw_mask->length != 0xffff) { + memset(rule, 0, sizeof(struct txgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + + rule->b_spec = TRUE; + raw_spec = item->spec; + + /* check spec */ + if (raw_spec->search != 0 || + raw_spec->reserved != 0 || + raw_spec->offset > TXGBE_MAX_FLX_SOURCE_OFF || + raw_spec->offset % 2 || + raw_spec->limit != 0 || + raw_spec->length != 4 || + /* pattern can't be 0xffff */ + (raw_spec->pattern[0] == 0xff && + raw_spec->pattern[1] == 0xff && + raw_spec->pattern[2] == 0xff && + raw_spec->pattern[3] == 0xff)) { + memset(rule, 0, sizeof(struct txgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + + /* check pattern mask */ + if (raw_mask->pattern[0] != 0xff || + raw_mask->pattern[1] != 0xff || + raw_mask->pattern[2] != 0xff || + raw_mask->pattern[3] != 0xff) { + memset(rule, 0, sizeof(struct txgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + + rule->mask.flex_bytes_mask = 0xffff; + /* Convert pattern string to hex bytes */ + if (sscanf((const char *)raw_spec->pattern, "%hx", &pattern) != 1) { + memset(rule, 0, sizeof(struct txgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Failed to parse raw pattern"); + return -rte_errno; + } + rule->input.flex_bytes = (pattern & 0x00FF) << 8; + rule->input.flex_bytes |= (pattern & 0xFF00) >> 8; + + rule->flex_bytes_offset = raw_spec->offset; + rule->flex_relative = raw_spec->relative; + } + if (item->type != RTE_FLOW_ITEM_TYPE_END) { /* check if the next not void item is END */ item = next_no_fuzzy_pattern(pattern, item); @@ -2839,7 +2928,6 @@ struct rte_flow_error *error) { int ret; - struct txgbe_hw *hw = TXGBE_DEV_HW(dev); struct rte_eth_fdir_conf *fdir_conf = TXGBE_DEV_FDIR_CONF(dev); ret = txgbe_parse_fdir_filter_normal(dev, attr, pattern, @@ -2853,12 +2941,6 @@ return ret; step_next: - - if (hw->mac.type == txgbe_mac_raptor && - rule->fdirflags == TXGBE_FDIRPICMD_DROP && - (rule->input.src_port != 0 || rule->input.dst_port != 0)) - return -ENOTSUP; - if (fdir_conf->mode == RTE_FDIR_MODE_NONE) { fdir_conf->mode = rule->mode; ret = txgbe_fdir_configure(dev); @@ -3103,6 +3185,7 @@ struct txgbe_fdir_rule_ele *fdir_rule_ptr; struct txgbe_rss_conf_ele *rss_filter_ptr; struct txgbe_flow_mem *txgbe_flow_mem_ptr; + struct txgbe_filter_info *filter_info = TXGBE_DEV_FILTER(dev); uint8_t first_mask = FALSE; flow = rte_zmalloc("txgbe_rte_flow", sizeof(struct rte_flow), 0); @@ -3148,10 +3231,13 @@ flow->rule = ntuple_filter_ptr; flow->filter_type = RTE_ETH_FILTER_NTUPLE; return flow; + } else if (filter_info->ntuple_is_full) { + goto next; } goto out; } +next: memset(ðertype_filter, 0, sizeof(struct rte_eth_ethertype_filter)); ret = txgbe_parse_ethertype_filter(dev, attr, pattern, actions, ðertype_filter, error); @@ -3426,6 +3512,7 @@ struct txgbe_fdir_rule_ele *fdir_rule_ptr; struct txgbe_flow_mem *txgbe_flow_mem_ptr; struct txgbe_hw_fdir_info *fdir_info = TXGBE_DEV_FDIR(dev); + struct rte_eth_fdir_conf *fdir_conf = TXGBE_DEV_FDIR_CONF(dev); struct txgbe_rss_conf_ele *rss_filter_ptr; switch (filter_type) { @@ -3485,6 +3572,7 @@ fdir_info->mask_added = false; fdir_info->flex_relative = false; fdir_info->flex_bytes_offset = 0; + fdir_conf->mode = RTE_FDIR_MODE_NONE; } } break; diff -Nru dpdk-24.11.3/drivers/net/txgbe/txgbe_rxtx.c dpdk-24.11.4/drivers/net/txgbe/txgbe_rxtx.c --- dpdk-24.11.3/drivers/net/txgbe/txgbe_rxtx.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/txgbe/txgbe_rxtx.c 2025-12-19 12:05:33.000000000 +0000 @@ -876,7 +876,6 @@ tx_offload.data[0] = 0; tx_offload.data[1] = 0; - txq = tx_queue; sw_ring = txq->sw_ring; txr = txq->tx_ring; tx_id = txq->tx_tail; @@ -1277,7 +1276,7 @@ } static inline uint64_t -rx_desc_error_to_pkt_flags(uint32_t rx_status) +rx_desc_error_to_pkt_flags(uint32_t rx_status, struct txgbe_rx_queue *rxq) { uint64_t pkt_flags = 0; @@ -1285,16 +1284,19 @@ if (rx_status & TXGBE_RXD_STAT_IPCS) { pkt_flags |= (rx_status & TXGBE_RXD_ERR_IPCS ? RTE_MBUF_F_RX_IP_CKSUM_BAD : RTE_MBUF_F_RX_IP_CKSUM_GOOD); + rxq->csum_err += !!(rx_status & TXGBE_RXD_ERR_IPCS); } if (rx_status & TXGBE_RXD_STAT_L4CS) { pkt_flags |= (rx_status & TXGBE_RXD_ERR_L4CS ? RTE_MBUF_F_RX_L4_CKSUM_BAD : RTE_MBUF_F_RX_L4_CKSUM_GOOD); + rxq->csum_err += !!(rx_status & TXGBE_RXD_ERR_L4CS); } if (rx_status & TXGBE_RXD_STAT_EIPCS && rx_status & TXGBE_RXD_ERR_EIPCS) { pkt_flags |= RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD; + rxq->csum_err += !!(rx_status & TXGBE_RXD_ERR_EIPCS); } #ifdef RTE_LIB_SECURITY @@ -1376,7 +1378,7 @@ /* convert descriptor fields to rte mbuf flags */ pkt_flags = rx_desc_status_to_pkt_flags(s[j], rxq->vlan_flags); - pkt_flags |= rx_desc_error_to_pkt_flags(s[j]); + pkt_flags |= rx_desc_error_to_pkt_flags(s[j], rxq); pkt_flags |= txgbe_rxd_pkt_info_to_pkt_flags(pkt_info[j]); mb->ol_flags = pkt_flags; @@ -1715,7 +1717,7 @@ pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags); - pkt_flags |= rx_desc_error_to_pkt_flags(staterr); + pkt_flags |= rx_desc_error_to_pkt_flags(staterr, rxq); pkt_flags |= txgbe_rxd_pkt_info_to_pkt_flags(pkt_info); rxm->ol_flags = pkt_flags; rxm->packet_type = txgbe_rxd_pkt_info_to_pkt_type(pkt_info, @@ -1791,7 +1793,7 @@ head->vlan_tci = rte_le_to_cpu_16(desc->qw1.hi.tag); pkt_info = rte_le_to_cpu_32(desc->qw0.dw0); pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags); - pkt_flags |= rx_desc_error_to_pkt_flags(staterr); + pkt_flags |= rx_desc_error_to_pkt_flags(staterr, rxq); pkt_flags |= txgbe_rxd_pkt_info_to_pkt_flags(pkt_info); if (TXGBE_RXD_RSCCNT(desc->qw0.dw0)) pkt_flags |= RTE_MBUF_F_RX_LRO; @@ -2464,13 +2466,9 @@ if (txq == NULL) return -ENOMEM; - /* - * Allocate TX ring hardware descriptors. A memzone large enough to - * handle the maximum ring size is allocated in order to allow for - * resizing in later calls to the queue setup function. - */ + /* Allocate TX ring hardware descriptors. */ tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx, - sizeof(struct txgbe_tx_desc) * TXGBE_RING_DESC_MAX, + sizeof(struct txgbe_tx_desc) * nb_desc, TXGBE_ALIGN, socket_id); if (tz == NULL) { txgbe_tx_queue_release(txq); @@ -2696,6 +2694,7 @@ rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1); rxq->rx_tail = 0; rxq->nb_rx_hold = 0; + rxq->csum_err = 0; rte_pktmbuf_free(rxq->pkt_first_seg); rxq->pkt_first_seg = NULL; rxq->pkt_last_seg = NULL; @@ -2720,6 +2719,7 @@ uint16_t len; struct txgbe_adapter *adapter = TXGBE_DEV_ADAPTER(dev); uint64_t offloads; + uint32_t size; PMD_INIT_FUNC_TRACE(); hw = TXGBE_DEV_HW(dev); @@ -2770,13 +2770,10 @@ */ rxq->pkt_type_mask = TXGBE_PTID_MASK; - /* - * Allocate RX ring hardware descriptors. A memzone large enough to - * handle the maximum ring size is allocated in order to allow for - * resizing in later calls to the queue setup function. - */ + /* Allocate RX ring hardware descriptors. */ + size = (nb_desc + RTE_PMD_TXGBE_RX_MAX_BURST) * sizeof(struct txgbe_rx_desc); rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx, - RX_RING_SZ, TXGBE_ALIGN, socket_id); + size, TXGBE_ALIGN, socket_id); if (rz == NULL) { txgbe_rx_queue_release(rxq); return -ENOMEM; @@ -2786,7 +2783,7 @@ /* * Zero init all the descriptors in the ring. */ - memset(rz->addr, 0, RX_RING_SZ); + memset(rz->addr, 0, size); /* * Modified to setup VFRDT for Virtual Function @@ -5141,7 +5138,7 @@ */ buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) - RTE_PKTMBUF_HEADROOM); - buf_size = ROUND_UP(buf_size, 1 << 10); + buf_size = ROUND_DOWN(buf_size, 1 << 10); srrctl |= TXGBE_RXCFG_PKTLEN(buf_size); /* diff -Nru dpdk-24.11.3/drivers/net/txgbe/txgbe_rxtx.h dpdk-24.11.4/drivers/net/txgbe/txgbe_rxtx.h --- dpdk-24.11.3/drivers/net/txgbe/txgbe_rxtx.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/txgbe/txgbe_rxtx.h 2025-12-19 12:05:33.000000000 +0000 @@ -323,6 +323,7 @@ /** hold packets to return to application */ struct rte_mbuf *rx_stage[RTE_PMD_TXGBE_RX_MAX_BURST * 2]; const struct rte_memzone *mz; + uint64_t csum_err; }; /** diff -Nru dpdk-24.11.3/drivers/net/txgbe/txgbe_rxtx_vec_neon.c dpdk-24.11.4/drivers/net/txgbe/txgbe_rxtx_vec_neon.c --- dpdk-24.11.3/drivers/net/txgbe/txgbe_rxtx_vec_neon.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/txgbe/txgbe_rxtx_vec_neon.c 2025-12-19 12:05:33.000000000 +0000 @@ -222,7 +222,7 @@ volatile struct txgbe_rx_desc *rxdp; struct txgbe_rx_entry *sw_ring; uint16_t nb_pkts_recd; - int pos; + int pos, i; uint8x16_t shuf_msk = { 0xFF, 0xFF, 0xFF, 0xFF, /* skip 32 bits pkt_type */ @@ -331,6 +331,13 @@ desc_to_olflags_v(sterr_tmp1, sterr_tmp2, staterr, vlan_flags, &rx_pkts[pos]); + for (i = 0; i < RTE_TXGBE_DESCS_PER_LOOP; i++) { + if (rx_pkts[pos + i]->ol_flags & + (RTE_MBUF_F_RX_IP_CKSUM_BAD | + RTE_MBUF_F_RX_L4_CKSUM_BAD)) + rxq->csum_err++; + } + /* D.2 pkt 3,4 set in_port/nb_seg and remove crc */ tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb4), crc_adjust); pkt_mb4 = vreinterpretq_u8_u16(tmp); diff -Nru dpdk-24.11.3/drivers/net/txgbe/txgbe_rxtx_vec_sse.c dpdk-24.11.4/drivers/net/txgbe/txgbe_rxtx_vec_sse.c --- dpdk-24.11.3/drivers/net/txgbe/txgbe_rxtx_vec_sse.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/txgbe/txgbe_rxtx_vec_sse.c 2025-12-19 12:05:33.000000000 +0000 @@ -283,7 +283,7 @@ #ifdef RTE_LIB_SECURITY uint8_t use_ipsec = rxq->using_ipsec; #endif - int pos; + int pos, i; uint64_t var; __m128i shuf_msk; __m128i crc_adjust = _mm_set_epi16(0, 0, 0, /* ignore non-length fields */ @@ -451,6 +451,13 @@ /* set ol_flags with vlan packet type */ desc_to_olflags_v(descs, mbuf_init, vlan_flags, &rx_pkts[pos]); + for (i = 0; i < RTE_TXGBE_DESCS_PER_LOOP; i++) { + if (rx_pkts[pos + i]->ol_flags & + (RTE_MBUF_F_RX_IP_CKSUM_BAD | + RTE_MBUF_F_RX_L4_CKSUM_BAD)) + rxq->csum_err++; + } + #ifdef RTE_LIB_SECURITY if (unlikely(use_ipsec)) desc_to_olflags_v_ipsec(descs, &rx_pkts[pos]); diff -Nru dpdk-24.11.3/drivers/net/virtio/virtio_user/virtio_user_dev.c dpdk-24.11.4/drivers/net/virtio/virtio_user/virtio_user_dev.c --- dpdk-24.11.3/drivers/net/virtio/virtio_user/virtio_user_dev.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/virtio/virtio_user/virtio_user_dev.c 2025-12-19 12:05:33.000000000 +0000 @@ -118,7 +118,7 @@ struct vhost_vring_state state; struct vring *vring = &dev->vrings.split[queue_sel]; struct vring_packed *pq_vring = &dev->vrings.packed[queue_sel]; - uint64_t desc_addr, avail_addr, used_addr; + uint64_t desc_addr, desc_iova_addr, avail_addr, used_addr; struct vhost_vring_addr addr = { .index = queue_sel, .log_guest_addr = 0, @@ -138,25 +138,21 @@ } if (dev->features & (1ULL << VIRTIO_F_RING_PACKED)) { - desc_addr = pq_vring->desc_iova; - avail_addr = desc_addr + pq_vring->num * sizeof(struct vring_packed_desc); - used_addr = RTE_ALIGN_CEIL(avail_addr + sizeof(struct vring_packed_desc_event), - VIRTIO_VRING_ALIGN); - - addr.desc_user_addr = desc_addr; - addr.avail_user_addr = avail_addr; - addr.used_user_addr = used_addr; + desc_iova_addr = pq_vring->desc_iova; + desc_addr = (uint64_t)(uintptr_t)pq_vring->desc; + avail_addr = (uint64_t)(uintptr_t)pq_vring->driver; + used_addr = (uint64_t)(uintptr_t)pq_vring->device; } else { - desc_addr = vring->desc_iova; - avail_addr = desc_addr + vring->num * sizeof(struct vring_desc); - used_addr = RTE_ALIGN_CEIL((uintptr_t)(&vring->avail->ring[vring->num]), - VIRTIO_VRING_ALIGN); - - addr.desc_user_addr = desc_addr; - addr.avail_user_addr = avail_addr; - addr.used_user_addr = used_addr; + desc_iova_addr = vring->desc_iova; + desc_addr = (uint64_t)(uintptr_t)vring->desc; + avail_addr = (uint64_t)(uintptr_t)vring->avail; + used_addr = (uint64_t)(uintptr_t)vring->used; } + addr.desc_user_addr = desc_iova_addr; + addr.avail_user_addr = (desc_iova_addr - desc_addr) + avail_addr; + addr.used_user_addr = (desc_iova_addr - desc_addr) + used_addr; + state.index = queue_sel; state.num = vring->num; ret = dev->ops->set_vring_num(dev, &state); diff -Nru dpdk-24.11.3/drivers/net/vmxnet3/base/vmxnet3_defs.h dpdk-24.11.4/drivers/net/vmxnet3/base/vmxnet3_defs.h --- dpdk-24.11.3/drivers/net/vmxnet3/base/vmxnet3_defs.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/vmxnet3/base/vmxnet3_defs.h 2025-12-19 12:05:33.000000000 +0000 @@ -598,6 +598,9 @@ /* addition 1 for events */ #define VMXNET3_MAX_INTRS 25 +/* Max number of queues that can request memreg, for both RX and TX. */ +#define VMXNET3_MAX_MEMREG_QUEUES 16 + /* Version 6 and later will use below macros */ #define VMXNET3_EXT_MAX_TX_QUEUES 32 #define VMXNET3_EXT_MAX_RX_QUEUES 32 diff -Nru dpdk-24.11.3/drivers/net/vmxnet3/vmxnet3_ethdev.c dpdk-24.11.4/drivers/net/vmxnet3/vmxnet3_ethdev.c --- dpdk-24.11.3/drivers/net/vmxnet3/vmxnet3_ethdev.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/vmxnet3/vmxnet3_ethdev.c 2025-12-19 12:05:33.000000000 +0000 @@ -610,6 +610,13 @@ PMD_INIT_FUNC_TRACE(); + /* Disabling RSS for single queue pair */ + if (dev->data->nb_rx_queues == 1 && + dev->data->dev_conf.rxmode.mq_mode == RTE_ETH_MQ_RX_RSS) { + dev->data->dev_conf.rxmode.mq_mode = RTE_ETH_MQ_RX_NONE; + PMD_INIT_LOG(ERR, "WARN: Disabling RSS for single Rx queue"); + } + if (dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) dev->data->dev_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH; @@ -801,14 +808,15 @@ Vmxnet3_DriverShared *shared = hw->shared; Vmxnet3_CmdInfo *cmdInfo; struct rte_mempool *mp[VMXNET3_MAX_RX_QUEUES]; - uint8_t index[VMXNET3_MAX_RX_QUEUES + VMXNET3_MAX_TX_QUEUES]; - uint32_t num, i, j, size; + uint16_t index[VMXNET3_MAX_MEMREG_QUEUES]; + uint16_t tx_index_mask; + uint32_t num, tx_num, i, j, size; if (hw->memRegsPA == 0) { const struct rte_memzone *mz; size = sizeof(Vmxnet3_MemRegs) + - (VMXNET3_MAX_RX_QUEUES + VMXNET3_MAX_TX_QUEUES) * + (2 * VMXNET3_MAX_MEMREG_QUEUES) * sizeof(Vmxnet3_MemoryRegion); mz = gpa_zone_reserve(dev, size, "memRegs", rte_socket_id(), 8, @@ -822,7 +830,9 @@ hw->memRegsPA = mz->iova; } - num = hw->num_rx_queues; + num = RTE_MIN(hw->num_rx_queues, VMXNET3_MAX_MEMREG_QUEUES); + tx_num = RTE_MIN(hw->num_tx_queues, VMXNET3_MAX_MEMREG_QUEUES); + tx_index_mask = (uint16_t)((1UL << tx_num) - 1); for (i = 0; i < num; i++) { vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i]; @@ -857,13 +867,15 @@ (uintptr_t)STAILQ_FIRST(&mp[i]->mem_list)->iova; mr->length = STAILQ_FIRST(&mp[i]->mem_list)->len <= INT32_MAX ? STAILQ_FIRST(&mp[i]->mem_list)->len : INT32_MAX; - mr->txQueueBits = index[i]; mr->rxQueueBits = index[i]; + /* tx uses same pool, but there may be fewer tx queues */ + mr->txQueueBits = index[i] & tx_index_mask; PMD_INIT_LOG(INFO, "index: %u startPA: %" PRIu64 " length: %u, " - "rxBits: %x", - j, mr->startPA, mr->length, mr->rxQueueBits); + "rxBits: %x, txBits: %x", + j, mr->startPA, mr->length, + mr->rxQueueBits, mr->txQueueBits); j++; } hw->memRegs->numRegs = j; @@ -1087,8 +1099,8 @@ } /* Check memregs restrictions first */ - if (dev->data->nb_rx_queues <= VMXNET3_MAX_RX_QUEUES && - dev->data->nb_tx_queues <= VMXNET3_MAX_TX_QUEUES) { + if (dev->data->nb_rx_queues <= VMXNET3_MAX_MEMREG_QUEUES && + dev->data->nb_tx_queues <= VMXNET3_MAX_MEMREG_QUEUES) { ret = vmxnet3_dev_setup_memreg(dev); if (ret == 0) { VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, diff -Nru dpdk-24.11.3/drivers/net/zxdh/meson.build dpdk-24.11.4/drivers/net/zxdh/meson.build --- dpdk-24.11.3/drivers/net/zxdh/meson.build 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/net/zxdh/meson.build 2025-12-19 12:05:33.000000000 +0000 @@ -7,7 +7,7 @@ subdir_done() endif -if not dpdk_conf.has('RTE_ARCH_X86_64') or not dpdk_conf.get('RTE_ARCH_64') +if arch_subdir != 'x86' and arch_subdir != 'arm' or not dpdk_conf.get('RTE_ARCH_64') build = false reason = 'only supported on x86_64 and aarch64' subdir_done() diff -Nru dpdk-24.11.3/drivers/regex/mlx5/mlx5_regex_fastpath.c dpdk-24.11.4/drivers/regex/mlx5/mlx5_regex_fastpath.c --- dpdk-24.11.3/drivers/regex/mlx5/mlx5_regex_fastpath.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/regex/mlx5/mlx5_regex_fastpath.c 2025-12-19 12:05:33.000000000 +0000 @@ -27,7 +27,6 @@ #define MLX5_REGEX_MAX_WQE_INDEX 0xffff #define MLX5_REGEX_METADATA_SIZE ((size_t)64) #define MLX5_REGEX_MAX_OUTPUT (((size_t)1) << 11) -#define MLX5_REGEX_WQE_CTRL_OFFSET 12 #define MLX5_REGEX_WQE_METADATA_OFFSET 16 #define MLX5_REGEX_WQE_GATHER_OFFSET 32 #define MLX5_REGEX_WQE_SCATTER_OFFSET 48 diff -Nru dpdk-24.11.3/drivers/regex/mlx5/mlx5_rxp.c dpdk-24.11.4/drivers/regex/mlx5/mlx5_rxp.c --- dpdk-24.11.3/drivers/regex/mlx5/mlx5_rxp.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/regex/mlx5/mlx5_rxp.c 2025-12-19 12:05:33.000000000 +0000 @@ -24,8 +24,6 @@ #define MLX5_REGEX_MAX_RULES_PER_GROUP UINT32_MAX #define MLX5_REGEX_MAX_GROUPS MLX5_RXP_MAX_SUBSETS -#define MLX5_REGEX_RXP_ROF2_LINE_LEN 34 - const uint64_t combined_rof_tag = 0xff52544424a52475; /* Private Declarations */ diff -Nru dpdk-24.11.3/drivers/regex/mlx5/mlx5_rxp.h dpdk-24.11.4/drivers/regex/mlx5/mlx5_rxp.h --- dpdk-24.11.3/drivers/regex/mlx5/mlx5_rxp.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/regex/mlx5/mlx5_rxp.h 2025-12-19 12:05:33.000000000 +0000 @@ -9,27 +9,13 @@ #define MLX5_RXP_BF3_IDENTIFIER 0x1 #define MLX5_RXP_MAX_JOB_LENGTH 16384 #define MLX5_RXP_MAX_SUBSETS 4095 -#define MLX5_RXP_CSR_NUM_ENTRIES 31 #define MLX5_RXP_BF2_ROF_VERSION_STRING 0x07055254 #define MLX5_RXP_BF3_ROF_VERSION_STRING 0x00065254 -#define MLX5_RXP_BF4_ROF_VERSION_STRING 0x00075254 - -#define MLX5_RXP_CTRL_TYPE_MASK 7 -#define MLX5_RXP_CTRL_TYPE_JOB_DESCRIPTOR 0 -#define MLX5_RXP_CTRL_TYPE_RESPONSE_DESCRIPTOR 1 -#define MLX5_RXP_CTRL_TYPE_MEMORY_WRITE 4 -#define MLX5_RXP_CSR_CTRL_DISABLE_L2C (1 << 7) #define MLX5_RXP_CTRL_JOB_DESC_SOF 0x0010 #define MLX5_RXP_CTRL_JOB_DESC_EOF 0x0020 #define MLX5_RXP_CTRL_JOB_DESC_HPM_ENABLE 0x0100 #define MLX5_RXP_CTRL_JOB_DESC_ANYMATCH_ENABLE 0x0200 -#define MLX5_RXP_CTRL_JOB_DESC_FLAGS (MLX5_RXP_CTRL_JOB_DESC_SOF | \ - MLX5_RXP_CTRL_JOB_DESC_EOF | \ - MLX5_RXP_CTRL_JOB_DESC_HPM_ENABLE | \ - MLX5_RXP_CTRL_JOB_DESC_ANYMATCH_ENABLE) - -#define MLX5_RXP_CTRL_VALID 0x8000 #define MLX5_RXP_RESP_STATUS_MAX_PRI_THREADS (1 << 3) #define MLX5_RXP_RESP_STATUS_MAX_SEC_THREADS (1 << 4) @@ -128,12 +114,6 @@ MLX5_RXP_PRIVATE_PROG_MODE, }; -#define MLX5_RXP_POLL_CSR_FOR_VALUE_TIMEOUT 3000 /* Poll timeout in ms. */ -#define MLX5_RXP_INITIALIZATION_TIMEOUT 60000 /* Initialize timeout in ms. */ -#define MLX5_RXP_MAX_ENGINES 2u /* Number of RXP engines. */ -#define MLX5_RXP_EM_COUNT 1u /* Extra External Memories to use. */ -#define MLX5_RXP_DB_NOT_ASSIGNED 0xFF - struct mlx5_regex_mkey { struct mlx5dv_devx_umem *umem; struct mlx5_devx_obj *mkey; diff -Nru dpdk-24.11.3/drivers/vdpa/mlx5/mlx5_vdpa.h dpdk-24.11.4/drivers/vdpa/mlx5/mlx5_vdpa.h --- dpdk-24.11.3/drivers/vdpa/mlx5/mlx5_vdpa.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/drivers/vdpa/mlx5/mlx5_vdpa.h 2025-12-19 12:05:33.000000000 +0000 @@ -38,7 +38,6 @@ #define VIRTIO_F_RING_PACKED 34 #endif -#define MLX5_VDPA_DEFAULT_TIMER_DELAY_US 0u #define MLX5_VDPA_DEFAULT_TIMER_STEP_US 1u struct mlx5_vdpa_cq { diff -Nru dpdk-24.11.3/dts/tests/TestSuite_checksum_offload.py dpdk-24.11.4/dts/tests/TestSuite_checksum_offload.py --- dpdk-24.11.3/dts/tests/TestSuite_checksum_offload.py 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/dts/tests/TestSuite_checksum_offload.py 2025-12-19 12:05:33.000000000 +0000 @@ -37,7 +37,7 @@ class TestChecksumOffload(TestSuite): """Checksum offload test suite. - This suite consists of 6 test cases: + This suite consists of 7 test cases: 1. Insert checksum on transmit packet 2. Do not insert checksum on transmit packet 3. Hardware checksum check L4 Rx @@ -86,12 +86,17 @@ testpmd.start() self.send_packet_and_capture(packet=packet) verbose_output = testpmd.extract_verbose_output(testpmd.stop()) + is_IP = is_L4 = None for packet in verbose_output: if packet.dst_mac == id: - isIP = PacketOffloadFlag.RTE_MBUF_F_RX_IP_CKSUM_GOOD in packet.ol_flags - isL4 = PacketOffloadFlag.RTE_MBUF_F_RX_L4_CKSUM_GOOD in packet.ol_flags - self.verify(isL4 == goodL4, "Layer 4 checksum flag did not match expected checksum flag.") - self.verify(isIP == goodIP, "IP checksum flag did not match expected checksum flag.") + is_IP = PacketOffloadFlag.RTE_MBUF_F_RX_IP_CKSUM_GOOD in packet.ol_flags + is_L4 = PacketOffloadFlag.RTE_MBUF_F_RX_L4_CKSUM_GOOD in packet.ol_flags + self.verify( + is_IP is not None and is_L4 is not None, + "Test packet was dropped when it should have been received.", + ) + self.verify(is_L4 == goodL4, "Layer 4 checksum flag did not match expected checksum flag.") + self.verify(is_IP == goodIP, "IP checksum flag did not match expected checksum flag.") def setup_hw_offload(self, testpmd: TestPmdShell) -> None: """Sets IP, UDP, and TCP layers to hardware offload. diff -Nru dpdk-24.11.3/examples/l3fwd/l3fwd.h dpdk-24.11.4/examples/l3fwd/l3fwd.h --- dpdk-24.11.3/examples/l3fwd/l3fwd.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/examples/l3fwd/l3fwd.h 2025-12-19 12:05:33.000000000 +0000 @@ -32,10 +32,6 @@ #define VECTOR_SIZE_DEFAULT MAX_PKT_BURST #define VECTOR_TMO_NS_DEFAULT 1E6 /* 1ms */ -/* - * Try to avoid TX buffering if we have at least MAX_TX_BURST packets to send. - */ -#define MAX_TX_BURST (MAX_PKT_BURST / 2) #define NB_SOCKETS 8 @@ -116,7 +112,7 @@ extern uint32_t max_pkt_len; -extern uint32_t nb_pkt_per_burst; +extern uint32_t rx_burst_size; extern uint32_t mb_mempool_cache_size; /* Send burst of packets on an output interface */ @@ -152,8 +148,8 @@ len++; /* enough pkts to be sent */ - if (unlikely(len == MAX_PKT_BURST)) { - send_burst(qconf, MAX_PKT_BURST, port); + if (unlikely(len == rx_burst_size)) { + send_burst(qconf, rx_burst_size, port); len = 0; } diff -Nru dpdk-24.11.3/examples/l3fwd/l3fwd_acl.c dpdk-24.11.4/examples/l3fwd/l3fwd_acl.c --- dpdk-24.11.3/examples/l3fwd/l3fwd_acl.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/examples/l3fwd/l3fwd_acl.c 2025-12-19 12:05:33.000000000 +0000 @@ -1136,7 +1136,7 @@ portid = qconf->rx_queue_list[i].port_id; queueid = qconf->rx_queue_list[i].queue_id; nb_rx = rte_eth_rx_burst(portid, queueid, - pkts_burst, nb_pkt_per_burst); + pkts_burst, rx_burst_size); if (nb_rx > 0) { nb_drop = acl_process_pkts(pkts_burst, hops, diff -Nru dpdk-24.11.3/examples/l3fwd/l3fwd_common.h dpdk-24.11.4/examples/l3fwd/l3fwd_common.h --- dpdk-24.11.3/examples/l3fwd/l3fwd_common.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/examples/l3fwd/l3fwd_common.h 2025-12-19 12:05:33.000000000 +0000 @@ -25,6 +25,9 @@ */ #define SENDM_PORT_OVERHEAD(x) (x) +extern uint32_t rx_burst_size; +extern uint32_t tx_burst_size; + /* * From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2: * - The IP version number must be 4. @@ -71,7 +74,7 @@ * If TX buffer for that queue is empty, and we have enough packets, * then send them straightway. */ - if (num >= MAX_TX_BURST && len == 0) { + if (num >= tx_burst_size && len == 0) { n = rte_eth_tx_burst(port, qconf->tx_queue_id[port], m, num); if (unlikely(n < num)) { do { diff -Nru dpdk-24.11.3/examples/l3fwd/l3fwd_em.c dpdk-24.11.4/examples/l3fwd/l3fwd_em.c --- dpdk-24.11.3/examples/l3fwd/l3fwd_em.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/examples/l3fwd/l3fwd_em.c 2025-12-19 12:05:33.000000000 +0000 @@ -644,7 +644,7 @@ portid = qconf->rx_queue_list[i].port_id; queueid = qconf->rx_queue_list[i].queue_id; nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, - nb_pkt_per_burst); + rx_burst_size); if (nb_rx == 0) continue; diff -Nru dpdk-24.11.3/examples/l3fwd/l3fwd_fib.c dpdk-24.11.4/examples/l3fwd/l3fwd_fib.c --- dpdk-24.11.3/examples/l3fwd/l3fwd_fib.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/examples/l3fwd/l3fwd_fib.c 2025-12-19 12:05:33.000000000 +0000 @@ -239,7 +239,7 @@ portid = qconf->rx_queue_list[i].port_id; queueid = qconf->rx_queue_list[i].queue_id; nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, - nb_pkt_per_burst); + rx_burst_size); if (nb_rx == 0) continue; diff -Nru dpdk-24.11.3/examples/l3fwd/l3fwd_lpm.c dpdk-24.11.4/examples/l3fwd/l3fwd_lpm.c --- dpdk-24.11.3/examples/l3fwd/l3fwd_lpm.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/examples/l3fwd/l3fwd_lpm.c 2025-12-19 12:05:33.000000000 +0000 @@ -205,7 +205,7 @@ portid = qconf->rx_queue_list[i].port_id; queueid = qconf->rx_queue_list[i].queue_id; nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, - nb_pkt_per_burst); + rx_burst_size); if (nb_rx == 0) continue; diff -Nru dpdk-24.11.3/examples/l3fwd/main.c dpdk-24.11.4/examples/l3fwd/main.c --- dpdk-24.11.3/examples/l3fwd/main.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/examples/l3fwd/main.c 2025-12-19 12:05:33.000000000 +0000 @@ -57,8 +57,9 @@ static_assert(MEMPOOL_CACHE_SIZE >= MAX_PKT_BURST, "MAX_PKT_BURST should be at most MEMPOOL_CACHE_SIZE"); uint16_t nb_rxd = RX_DESC_DEFAULT; uint16_t nb_txd = TX_DESC_DEFAULT; -uint32_t nb_pkt_per_burst = DEFAULT_PKT_BURST; +uint32_t rx_burst_size = DEFAULT_PKT_BURST; uint32_t mb_mempool_cache_size = MEMPOOL_CACHE_SIZE; +uint32_t tx_burst_size = DEFAULT_PKT_BURST; /**< Ports set in promiscuous mode off by default. */ static int promiscuous_on; @@ -400,7 +401,8 @@ " --config (port,queue,lcore)[,(port,queue,lcore)]" " [--rx-queue-size NPKTS]" " [--tx-queue-size NPKTS]" - " [--burst NPKTS]" + " [--rx-burst NPKTS]" + " [--tx-burst NPKTS]" " [--mbcache CACHESZ]" " [--eth-dest=X,MM:MM:MM:MM:MM:MM]" " [--max-pkt-len PKTLEN]" @@ -427,7 +429,9 @@ " Default: %d\n" " --tx-queue-size NPKTS: Tx queue size in decimal\n" " Default: %d\n" - " --burst NPKTS: Burst size in decimal\n" + " --rx-burst NPKTS: RX Burst size in decimal\n" + " Default: %d\n" + " --tx-burst NPKTS: TX Burst size in decimal\n" " Default: %d\n" " --mbcache CACHESZ: Mbuf cache size in decimal\n" " Default: %d\n" @@ -460,8 +464,8 @@ " another is route entry at while line leads with character '%c'.\n" " --rule_ipv6=FILE: Specify the ipv6 rules entries file.\n" " --alg: ACL classify method to use, one of: %s.\n\n", - prgname, RX_DESC_DEFAULT, TX_DESC_DEFAULT, DEFAULT_PKT_BURST, MEMPOOL_CACHE_SIZE, - ACL_LEAD_CHAR, ROUTE_LEAD_CHAR, alg); + prgname, RX_DESC_DEFAULT, TX_DESC_DEFAULT, DEFAULT_PKT_BURST, DEFAULT_PKT_BURST, + MEMPOOL_CACHE_SIZE, ACL_LEAD_CHAR, ROUTE_LEAD_CHAR, alg); } static int @@ -695,7 +699,7 @@ } static void -parse_pkt_burst(const char *optarg) +parse_pkt_burst(const char *optarg, bool is_rx_burst, uint32_t *burst_sz) { struct rte_eth_dev_info dev_info; unsigned long pkt_burst; @@ -710,31 +714,38 @@ if (pkt_burst > MAX_PKT_BURST) { RTE_LOG(INFO, L3FWD, "User provided burst must be <= %d. Using default value %d\n", - MAX_PKT_BURST, nb_pkt_per_burst); + MAX_PKT_BURST, *burst_sz); return; } else if (pkt_burst > 0) { - nb_pkt_per_burst = (uint32_t)pkt_burst; + *burst_sz = (uint32_t)pkt_burst; return; } - /* If user gives a value of zero, query the PMD for its recommended Rx burst size. */ - ret = rte_eth_dev_info_get(0, &dev_info); - if (ret != 0) - return; - burst_size = dev_info.default_rxportconf.burst_size; - if (burst_size == 0) { - RTE_LOG(INFO, L3FWD, "PMD does not recommend a burst size. Using default value %d. " - "User provided value must be in [1, %d]\n", - nb_pkt_per_burst, MAX_PKT_BURST); - return; - } else if (burst_size > MAX_PKT_BURST) { - RTE_LOG(INFO, L3FWD, "PMD recommended burst size %d exceeds maximum value %d. " - "Using default value %d\n", - burst_size, MAX_PKT_BURST, nb_pkt_per_burst); - return; + if (is_rx_burst) { + /* If user gives a value of zero, query the PMD for its recommended + * Rx burst size. + */ + ret = rte_eth_dev_info_get(0, &dev_info); + if (ret != 0) + return; + burst_size = dev_info.default_rxportconf.burst_size; + if (burst_size == 0) { + RTE_LOG(INFO, L3FWD, "PMD does not recommend a burst size. Using default value %d. " + "User provided value must be in [1, %d]\n", + rx_burst_size, MAX_PKT_BURST); + return; + } else if (burst_size > MAX_PKT_BURST) { + RTE_LOG(INFO, L3FWD, "PMD recommended burst size %d exceeds maximum value %d. " + "Using default value %d\n", + burst_size, MAX_PKT_BURST, rx_burst_size); + return; + } + *burst_sz = burst_size; + RTE_LOG(INFO, L3FWD, "Using PMD-provided RX burst value %d\n", burst_size); + } else { + RTE_LOG(INFO, L3FWD, "User provided TX burst is 0. Using default value %d\n", + *burst_sz); } - nb_pkt_per_burst = burst_size; - RTE_LOG(INFO, L3FWD, "Using PMD-provided burst value %d\n", burst_size); } #define MAX_JUMBO_PKT_LEN 9600 @@ -768,7 +779,8 @@ #define CMD_LINE_OPT_RULE_IPV4 "rule_ipv4" #define CMD_LINE_OPT_RULE_IPV6 "rule_ipv6" #define CMD_LINE_OPT_ALG "alg" -#define CMD_LINE_OPT_PKT_BURST "burst" +#define CMD_LINE_OPT_PKT_RX_BURST "rx-burst" +#define CMD_LINE_OPT_PKT_TX_BURST "tx-burst" #define CMD_LINE_OPT_MB_CACHE_SIZE "mbcache" enum { @@ -799,7 +811,8 @@ CMD_LINE_OPT_ENABLE_VECTOR_NUM, CMD_LINE_OPT_VECTOR_SIZE_NUM, CMD_LINE_OPT_VECTOR_TMO_NS_NUM, - CMD_LINE_OPT_PKT_BURST_NUM, + CMD_LINE_OPT_PKT_RX_BURST_NUM, + CMD_LINE_OPT_PKT_TX_BURST_NUM, CMD_LINE_OPT_MB_CACHE_SIZE_NUM, }; @@ -827,7 +840,8 @@ {CMD_LINE_OPT_RULE_IPV4, 1, 0, CMD_LINE_OPT_RULE_IPV4_NUM}, {CMD_LINE_OPT_RULE_IPV6, 1, 0, CMD_LINE_OPT_RULE_IPV6_NUM}, {CMD_LINE_OPT_ALG, 1, 0, CMD_LINE_OPT_ALG_NUM}, - {CMD_LINE_OPT_PKT_BURST, 1, 0, CMD_LINE_OPT_PKT_BURST_NUM}, + {CMD_LINE_OPT_PKT_RX_BURST, 1, 0, CMD_LINE_OPT_PKT_RX_BURST_NUM}, + {CMD_LINE_OPT_PKT_TX_BURST, 1, 0, CMD_LINE_OPT_PKT_TX_BURST_NUM}, {CMD_LINE_OPT_MB_CACHE_SIZE, 1, 0, CMD_LINE_OPT_MB_CACHE_SIZE_NUM}, {NULL, 0, 0, 0} }; @@ -917,8 +931,12 @@ parse_queue_size(optarg, &nb_txd, 0); break; - case CMD_LINE_OPT_PKT_BURST_NUM: - parse_pkt_burst(optarg); + case CMD_LINE_OPT_PKT_RX_BURST_NUM: + parse_pkt_burst(optarg, true, &rx_burst_size); + break; + + case CMD_LINE_OPT_PKT_TX_BURST_NUM: + parse_pkt_burst(optarg, false, &tx_burst_size); break; case CMD_LINE_OPT_MB_CACHE_SIZE_NUM: @@ -1654,6 +1672,8 @@ if (ret < 0) rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n"); + RTE_LOG(INFO, L3FWD, "Using Rx burst %u Tx burst %u\n", rx_burst_size, tx_burst_size); + /* Setup function pointers for lookup method. */ setup_l3fwd_lookup_tables(); diff -Nru dpdk-24.11.3/examples/l3fwd-power/main.c dpdk-24.11.4/examples/l3fwd-power/main.c --- dpdk-24.11.3/examples/l3fwd-power/main.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/examples/l3fwd-power/main.c 2025-12-19 12:05:33.000000000 +0000 @@ -2910,7 +2910,7 @@ rte_spinlock_init(&stats[lcore_id].telemetry_lock); } rte_timer_init(&telemetry_timer); - rte_telemetry_register_cmd("/l3fwd-power/stats", + rte_telemetry_register_cmd("/l3fwd_power/stats", handle_app_stats, "Returns global power stats. Parameters: None"); rte_eal_mp_remote_launch(main_telemetry_loop, NULL, diff -Nru dpdk-24.11.3/examples/server_node_efd/efd_server/main.c dpdk-24.11.4/examples/server_node_efd/efd_server/main.c --- dpdk-24.11.3/examples/server_node_efd/efd_server/main.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/examples/server_node_efd/efd_server/main.c 2025-12-19 12:05:33.000000000 +0000 @@ -68,7 +68,7 @@ get_printable_mac_addr(uint16_t port) { static const char err_address[] = "00:00:00:00:00:00"; - static char addresses[RTE_MAX_ETHPORTS][sizeof(err_address)]; + static char addresses[RTE_MAX_ETHPORTS][RTE_ETHER_ADDR_FMT_SIZE + 1]; struct rte_ether_addr mac; int ret; diff -Nru dpdk-24.11.3/examples/server_node_efd/shared/common.h dpdk-24.11.4/examples/server_node_efd/shared/common.h --- dpdk-24.11.3/examples/server_node_efd/shared/common.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/examples/server_node_efd/shared/common.h 2025-12-19 12:05:33.000000000 +0000 @@ -58,8 +58,9 @@ /* * Buffer for return value. Size calculated by %u being replaced * by maximum 3 digits (plus an extra byte for safety) + * Used as ring name, so upper limit is ring name size. */ - static char buffer[sizeof(MP_NODE_RXQ_NAME) + 2]; + static char buffer[RTE_RING_NAMESIZE]; snprintf(buffer, sizeof(buffer), MP_NODE_RXQ_NAME, id); return buffer; diff -Nru dpdk-24.11.3/examples/vdpa/main.c dpdk-24.11.4/examples/vdpa/main.c --- dpdk-24.11.3/examples/vdpa/main.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/examples/vdpa/main.c 2025-12-19 12:05:33.000000000 +0000 @@ -22,6 +22,7 @@ #define MAX_PATH_LEN 128 #define MAX_VDPA_SAMPLE_PORTS 1024 +#define MAX_VDPA_STR_LEN sizeof(RTE_STR(MAX_VDPA_SAMPLE_PORTS)) #define RTE_LOGTYPE_VDPA RTE_LOGTYPE_USER1 struct vdpa_port { @@ -36,7 +37,7 @@ static struct vdpa_port vports[MAX_VDPA_SAMPLE_PORTS]; -static char iface[MAX_PATH_LEN]; +static char iface[MAX_PATH_LEN - MAX_VDPA_STR_LEN]; static int devcnt; static int interactive; static int client_mode; @@ -74,9 +75,8 @@ break; /* long options */ case 0: - if (strncmp(long_option[idx].name, "iface", - MAX_PATH_LEN) == 0) { - rte_strscpy(iface, optarg, MAX_PATH_LEN); + if (!strcmp(long_option[idx].name, "iface")) { + rte_strscpy(iface, optarg, sizeof(iface)); printf("iface %s\n", iface); } if (!strcmp(long_option[idx].name, "interactive")) { diff -Nru dpdk-24.11.3/lib/bbdev/rte_bbdev.c dpdk-24.11.4/lib/bbdev/rte_bbdev.c --- dpdk-24.11.3/lib/bbdev/rte_bbdev.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/bbdev/rte_bbdev.c 2025-12-19 12:05:33.000000000 +0000 @@ -1197,7 +1197,7 @@ { struct rte_bbdev_queue_data *q_data; struct rte_bbdev_stats *stats; - uint16_t i; + enum rte_bbdev_enqueue_status i; struct rte_bbdev *dev = get_dev(dev_id); VALID_DEV_OR_RET_ERR(dev, dev_id); @@ -1214,11 +1214,15 @@ dev->data->name, queue_id); fprintf(f, " Last Enqueue Status %s\n", rte_bbdev_enqueue_status_str(q_data->enqueue_status)); - for (i = 0; i < RTE_BBDEV_ENQ_STATUS_SIZE_MAX; i++) + for (i = 0; i < RTE_BBDEV_ENQ_STATUS_SIZE_MAX; i++) { + const char *status_str = rte_bbdev_enqueue_status_str(i); + if (status_str == NULL) + continue; if (q_data->queue_stats.enqueue_status_count[i] > 0) fprintf(f, " Enqueue Status Counters %s %" PRIu64 "\n", - rte_bbdev_enqueue_status_str(i), + status_str, q_data->queue_stats.enqueue_status_count[i]); + } stats = &dev->data->queues[queue_id].queue_stats; fprintf(f, " Enqueue Count %" PRIu64 " Warning %" PRIu64 " Error %" PRIu64 "\n", diff -Nru dpdk-24.11.3/lib/cmdline/cmdline_parse_portlist.c dpdk-24.11.4/lib/cmdline/cmdline_parse_portlist.c --- dpdk-24.11.3/lib/cmdline/cmdline_parse_portlist.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/cmdline/cmdline_parse_portlist.c 2025-12-19 12:05:33.000000000 +0000 @@ -9,7 +9,9 @@ #include #include +#include #include + #include "cmdline_parse.h" #include "cmdline_parse_portlist.h" @@ -24,22 +26,20 @@ parse_set_list(cmdline_portlist_t *pl, size_t low, size_t high) { do { - pl->map |= (1 << low++); + pl->map |= RTE_BIT32(low); + low++; } while (low <= high); } static int parse_ports(cmdline_portlist_t *pl, const char *str) { + const char *first = str; size_t ps, pe; - const char *first, *last; char *end; - for (first = str, last = first; - first != NULL && last != NULL; - first = last + 1) { - - last = strchr(first, ','); + while (first != NULL) { + const char *last = strchr(first, ','); errno = 0; ps = strtoul(first, &end, 10); @@ -63,6 +63,7 @@ return -1; parse_set_list(pl, ps, pe); + first = (last == NULL ? NULL : last + 1); } return 0; diff -Nru dpdk-24.11.3/lib/dmadev/rte_dmadev.h dpdk-24.11.4/lib/dmadev/rte_dmadev.h --- dpdk-24.11.3/lib/dmadev/rte_dmadev.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/dmadev/rte_dmadev.h 2025-12-19 12:05:33.000000000 +0000 @@ -145,6 +145,7 @@ */ #include +#include #include #include diff -Nru dpdk-24.11.3/lib/eal/arm/include/rte_memcpy_32.h dpdk-24.11.4/lib/eal/arm/include/rte_memcpy_32.h --- dpdk-24.11.3/lib/eal/arm/include/rte_memcpy_32.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/eal/arm/include/rte_memcpy_32.h 2025-12-19 12:05:33.000000000 +0000 @@ -19,10 +19,14 @@ /* ARM NEON Intrinsics are used to copy data */ #include +#endif /* RTE_ARCH_ARM_NEON_MEMCPY */ + #ifdef __cplusplus extern "C" { #endif +#ifdef RTE_ARCH_ARM_NEON_MEMCPY + static inline void rte_mov16(uint8_t *dst, const uint8_t *src) { @@ -252,7 +256,7 @@ return ret; } -#else +#else /* ! RTE_ARCH_ARM_NEON_MEMCPY */ static inline void rte_mov16(uint8_t *dst, const uint8_t *src) diff -Nru dpdk-24.11.3/lib/eal/common/eal_common_options.c dpdk-24.11.4/lib/eal/common/eal_common_options.c --- dpdk-24.11.3/lib/eal/common/eal_common_options.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/eal/common/eal_common_options.c 2025-12-19 12:05:33.000000000 +0000 @@ -395,12 +395,21 @@ } #else +static bool +ends_with(const char *str, const char *tail) +{ + size_t tail_len = strlen(tail); + size_t str_len = strlen(str); + + return str_len >= tail_len && strcmp(&str[str_len - tail_len], tail) == 0; +} + static int eal_plugindir_init(const char *path) { - DIR *d = NULL; struct dirent *dent = NULL; char sopath[PATH_MAX]; + DIR *d = NULL; if (path == NULL || *path == '\0') return 0; @@ -414,12 +423,8 @@ while ((dent = readdir(d)) != NULL) { struct stat sb; - int nlen = strnlen(dent->d_name, sizeof(dent->d_name)); - /* check if name ends in .so or .so.ABI_VERSION */ - if (strcmp(&dent->d_name[nlen - 3], ".so") != 0 && - strcmp(&dent->d_name[nlen - 4 - strlen(ABI_VERSION)], - ".so."ABI_VERSION) != 0) + if (!ends_with(dent->d_name, ".so") && !ends_with(dent->d_name, ".so."ABI_VERSION)) continue; snprintf(sopath, sizeof(sopath), "%s/%s", path, dent->d_name); diff -Nru dpdk-24.11.3/lib/eal/freebsd/eal.c dpdk-24.11.4/lib/eal/freebsd/eal.c --- dpdk-24.11.3/lib/eal/freebsd/eal.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/eal/freebsd/eal.c 2025-12-19 12:05:33.000000000 +0000 @@ -665,12 +665,16 @@ * with a message describing the cause. */ has_phys_addr = internal_conf->no_hugetlbfs == 0; + + /* Always call rte_bus_get_iommu_class() to trigger DMA mask detection and validation */ + enum rte_iova_mode bus_iova_mode = rte_bus_get_iommu_class(); + iova_mode = internal_conf->iova_mode; if (iova_mode == RTE_IOVA_DC) { EAL_LOG(DEBUG, "Specific IOVA mode is not requested, autodetecting"); if (has_phys_addr) { EAL_LOG(DEBUG, "Selecting IOVA mode according to bus requests"); - iova_mode = rte_bus_get_iommu_class(); + iova_mode = bus_iova_mode; if (iova_mode == RTE_IOVA_DC) { if (!RTE_IOVA_IN_MBUF) { iova_mode = RTE_IOVA_VA; @@ -899,8 +903,8 @@ struct internal_config *internal_conf = eal_get_internal_configuration(); rte_service_finalize(); - rte_mp_channel_cleanup(); eal_bus_cleanup(); + rte_mp_channel_cleanup(); rte_eal_alarm_cleanup(); rte_trace_save(); eal_trace_fini(); diff -Nru dpdk-24.11.3/lib/eal/include/rte_bitops.h dpdk-24.11.4/lib/eal/include/rte_bitops.h --- dpdk-24.11.3/lib/eal/include/rte_bitops.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/eal/include/rte_bitops.h 2025-12-19 12:05:33.000000000 +0000 @@ -1234,7 +1234,7 @@ * The integer value to align * * @return - * Input parameter aligned to the next power of 2 + * The smallest power of 2 which is greater than or equal to @c x. */ static inline uint32_t rte_align32pow2(uint32_t x) @@ -1252,7 +1252,7 @@ * The integer value to align * * @return - * Input parameter aligned to the previous power of 2 + * The greatest power of 2 which is smaller than or equal to @c x. */ static inline uint32_t rte_align32prevpow2(uint32_t x) @@ -1269,7 +1269,7 @@ * The 64b value to align * * @return - * Input parameter aligned to the next power of 2 + * The smallest power of 2 which is greater than or equal to @c v. */ static inline uint64_t rte_align64pow2(uint64_t v) @@ -1287,7 +1287,7 @@ * The 64b value to align * * @return - * Input parameter aligned to the previous power of 2 + * The greatest power of 2 which is smaller than or equal to @c v. */ static inline uint64_t rte_align64prevpow2(uint64_t v) diff -Nru dpdk-24.11.3/lib/eal/include/rte_mcslock.h dpdk-24.11.4/lib/eal/include/rte_mcslock.h --- dpdk-24.11.3/lib/eal/include/rte_mcslock.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/eal/include/rte_mcslock.h 2025-12-19 12:05:33.000000000 +0000 @@ -57,11 +57,21 @@ rte_atomic_store_explicit(&me->locked, 1, rte_memory_order_relaxed); rte_atomic_store_explicit(&me->next, NULL, rte_memory_order_relaxed); - /* If the queue is empty, the exchange operation is enough to acquire - * the lock. Hence, the exchange operation requires acquire semantics. - * The store to me->next above should complete before the node is - * visible to other CPUs/threads. Hence, the exchange operation requires - * release semantics as well. + /* + * A0/R0: Queue might be empty, perform the exchange (RMW) with both acquire and + * release semantics: + * A0: Acquire — synchronizes with both R0 and R2. + * Must synchronize with R0 to ensure that this thread observes predecessor's + * initialization of its lock object or risk them overwriting this thread's + * update to the next of the same object via store to prev->next. + * + * Must synchronize with R2 the releasing CAS in unlock(), this will ensure + * that all prior critical-section writes become visible to this thread. + * + * R0: Release — ensures the successor observes our initialization of me->next; + * without it, me->next could be overwritten to NULL after the successor + * sets its own address, causing deadlock. This release synchronizes with + * A0 above. */ prev = rte_atomic_exchange_explicit(msl, me, rte_memory_order_acq_rel); if (likely(prev == NULL)) { @@ -70,24 +80,26 @@ */ return; } - /* The store to me->next above should also complete before the node is - * visible to predecessor thread releasing the lock. Hence, the store - * prev->next also requires release semantics. Note that, for example, - * on ARM, the release semantics in the exchange operation is not - * strong as a release fence and is not sufficient to enforce the - * desired order here. + + /* + * R1: With the relaxed memory model of C/C++, it's essential that after + * we link ourselves by storing prev->next = me, the owner of prev must + * observe our prior initialization of me->locked. Otherwise it could + * clear me->locked before we set it to 1, which may deadlock. + * Perform a releasing store so the predecessor's acquire loads A2 and A3 + * observes our initialization, establishing a happens-before from those + * writes. */ rte_atomic_store_explicit(&prev->next, me, rte_memory_order_release); - /* The while-load of me->locked should not move above the previous - * store to prev->next. Otherwise it will cause a deadlock. Need a - * store-load barrier. - */ - rte_atomic_thread_fence(rte_memory_order_acq_rel); - /* If the lock has already been acquired, it first atomically + /* + * A1: If the lock has already been acquired, it first atomically * places the node at the end of the queue and then proceeds * to spin on me->locked until the previous lock holder resets - * the me->locked using mcslock_unlock(). + * the me->locked in rte_mcslock_unlock(). + * This load must synchronize with store-release R3 to ensure that + * all updates to critical section by previous lock holder is visible + * to this thread after acquiring the lock. */ rte_wait_until_equal_32((uint32_t *)(uintptr_t)&me->locked, 0, rte_memory_order_acquire); } @@ -103,30 +115,46 @@ static inline void rte_mcslock_unlock(RTE_ATOMIC(rte_mcslock_t *) *msl, RTE_ATOMIC(rte_mcslock_t *) me) { - /* Check if there are more nodes in the queue. */ - if (likely(rte_atomic_load_explicit(&me->next, rte_memory_order_relaxed) == NULL)) { + /* + * A2: Check whether a successor is already queued. + * Load me->next with acquire semantics so it can synchronize with the + * successor’s release store R1. This guarantees that the successor’s + * initialization of its lock object (me) is completed before we observe + * it here, preventing a race between this thread’s store-release to + * me->next->locked and the successor’s store to me->locked. + */ + if (likely(rte_atomic_load_explicit(&me->next, rte_memory_order_acquire) == NULL)) { /* No, last member in the queue. */ - rte_mcslock_t *save_me = rte_atomic_load_explicit(&me, rte_memory_order_relaxed); + rte_mcslock_t *save_me = me; - /* Release the lock by setting it to NULL */ + /* + * R2: Try to release the lock by swinging *msl from save_me to NULL. + * Use release semantics so all critical section writes become + * visible to the next lock acquirer. + */ if (likely(rte_atomic_compare_exchange_strong_explicit(msl, &save_me, NULL, rte_memory_order_release, rte_memory_order_relaxed))) return; - /* Speculative execution would be allowed to read in the - * while-loop first. This has the potential to cause a - * deadlock. Need a load barrier. - */ - rte_atomic_thread_fence(rte_memory_order_acquire); - /* More nodes added to the queue by other CPUs. - * Wait until the next pointer is set. + /* + * A3: Another thread was enqueued concurrently, so the CAS and the lock + * release failed. Wait until the successor sets our 'next' pointer. + * This load must synchronize with the successor’s release store (R1) to + * ensure that the successor’s initialization completes before we observe + * it here. This ordering prevents a race between this thread’s later + * store-release to me->next->locked and the successor’s store to me->locked. */ RTE_ATOMIC(uintptr_t) *next; next = (__rte_atomic uintptr_t *)&me->next; - RTE_WAIT_UNTIL_MASKED(next, UINTPTR_MAX, !=, 0, rte_memory_order_relaxed); + RTE_WAIT_UNTIL_MASKED(next, UINTPTR_MAX, !=, 0, rte_memory_order_acquire); } - /* Pass lock to next waiter. */ + /* + * R3: Pass the lock to the successor. + * Use a release store to synchronize with A1 when clearing me->next->locked + * so the successor observes our critical section writes after it sees locked + * become 0. + */ rte_atomic_store_explicit(&me->next->locked, 0, rte_memory_order_release); } @@ -149,11 +177,11 @@ /* Try to lock */ rte_mcslock_t *expected = NULL; - /* The lock can be taken only when the queue is empty. Hence, - * the compare-exchange operation requires acquire semantics. - * The store to me->next above should complete before the node - * is visible to other CPUs/threads. Hence, the compare-exchange - * operation requires release semantics as well. + /* + * A4/R4: The lock can be acquired only when the queue is empty. + * The compare-and-exchange operation must use acquire and release + * semantics for the same reasons described in the rte_mcslock_lock() + * function’s empty-queue case (see A0/R0 for details). */ return rte_atomic_compare_exchange_strong_explicit(msl, &expected, me, rte_memory_order_acq_rel, rte_memory_order_relaxed); diff -Nru dpdk-24.11.3/lib/eal/include/rte_tailq.h dpdk-24.11.4/lib/eal/include/rte_tailq.h --- dpdk-24.11.3/lib/eal/include/rte_tailq.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/eal/include/rte_tailq.h 2025-12-19 12:05:33.000000000 +0000 @@ -69,11 +69,12 @@ * @return * The return value from rte_eal_tailq_lookup, typecast to the appropriate * structure pointer type. - * NULL on error, since the tailq_head is the first - * element in the rte_tailq_head structure. + * NULL on error. */ -#define RTE_TAILQ_LOOKUP(name, struct_name) \ - RTE_TAILQ_CAST(rte_eal_tailq_lookup(name), struct_name) +#define RTE_TAILQ_LOOKUP(name, struct_name) __extension__ ({ \ + struct rte_tailq_head *head = rte_eal_tailq_lookup(name); \ + head == NULL ? NULL : RTE_TAILQ_CAST(head, struct_name); \ +}) /** * Dump tail queues to a file. diff -Nru dpdk-24.11.3/lib/eal/linux/eal.c dpdk-24.11.4/lib/eal/linux/eal.c --- dpdk-24.11.3/lib/eal/linux/eal.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/eal/linux/eal.c 2025-12-19 12:05:33.000000000 +0000 @@ -1031,10 +1031,13 @@ phys_addrs = rte_eal_using_phys_addrs() != 0; + /* Always call rte_bus_get_iommu_class() to trigger DMA mask detection and validation */ + enum rte_iova_mode bus_iova_mode = rte_bus_get_iommu_class(); + /* if no EAL option "--iova-mode=", use bus IOVA scheme */ if (internal_conf->iova_mode == RTE_IOVA_DC) { /* autodetect the IOVA mapping mode */ - enum rte_iova_mode iova_mode = rte_bus_get_iommu_class(); + enum rte_iova_mode iova_mode = bus_iova_mode; if (iova_mode == RTE_IOVA_DC) { EAL_LOG(DEBUG, "Buses did not request a specific IOVA mode."); @@ -1318,11 +1321,11 @@ rte_memseg_walk(mark_freeable, NULL); rte_service_finalize(); + eal_bus_cleanup(); #ifdef VFIO_PRESENT vfio_mp_sync_cleanup(); #endif rte_mp_channel_cleanup(); - eal_bus_cleanup(); rte_eal_alarm_cleanup(); rte_trace_save(); eal_trace_fini(); diff -Nru dpdk-24.11.3/lib/eal/windows/eal.c dpdk-24.11.4/lib/eal/windows/eal.c --- dpdk-24.11.3/lib/eal/windows/eal.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/eal/windows/eal.c 2025-12-19 12:05:33.000000000 +0000 @@ -339,12 +339,15 @@ has_phys_addr = false; } + /* Always call rte_bus_get_iommu_class() to trigger DMA mask detection and validation */ + enum rte_iova_mode bus_iova_mode = rte_bus_get_iommu_class(); + iova_mode = internal_conf->iova_mode; if (iova_mode == RTE_IOVA_DC) { EAL_LOG(DEBUG, "Specific IOVA mode is not requested, autodetecting"); if (has_phys_addr) { EAL_LOG(DEBUG, "Selecting IOVA mode according to bus requests"); - iova_mode = rte_bus_get_iommu_class(); + iova_mode = bus_iova_mode; if (iova_mode == RTE_IOVA_DC) { if (!RTE_IOVA_IN_MBUF) { iova_mode = RTE_IOVA_VA; diff -Nru dpdk-24.11.3/lib/eal/x86/rte_power_intrinsics.c dpdk-24.11.4/lib/eal/x86/rte_power_intrinsics.c --- dpdk-24.11.3/lib/eal/x86/rte_power_intrinsics.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/eal/x86/rte_power_intrinsics.c 2025-12-19 12:05:33.000000000 +0000 @@ -87,14 +87,14 @@ static void amd_mwaitx(const uint64_t timeout) { - RTE_SET_USED(timeout); #if defined(RTE_TOOLCHAIN_MSVC) || defined(__MWAITX__) - _mm_mwaitx(0, 0, 0); + _mm_mwaitx(2, 0, (uint32_t)timeout); #else asm volatile(".byte 0x0f, 0x01, 0xfb;" : /* ignore rflags */ : "a"(0), /* enter C1 */ - "c"(0)); /* no time-out */ + "b"((uint32_t)timeout), + "c"(2)); /* enable time-out */ #endif } diff -Nru dpdk-24.11.3/lib/efd/rte_efd.c dpdk-24.11.4/lib/efd/rte_efd.c --- dpdk-24.11.3/lib/efd/rte_efd.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/efd/rte_efd.c 2025-12-19 12:05:33.000000000 +0000 @@ -25,6 +25,7 @@ #include "rte_efd.h" #if defined(RTE_ARCH_X86) +#include "rte_efd_x86.h" #elif defined(RTE_ARCH_ARM64) #include "rte_efd_arm64.h" #endif @@ -1273,7 +1274,7 @@ switch (lookup_fn) { -#if defined(RTE_ARCH_X86) && defined(CC_SUPPORT_AVX2) +#if defined(RTE_ARCH_X86) case EFD_LOOKUP_AVX2: return efd_lookup_internal_avx2(group->hash_idx, group->lookup_table, diff -Nru dpdk-24.11.3/lib/ethdev/rte_ethdev.h dpdk-24.11.4/lib/ethdev/rte_ethdev.h --- dpdk-24.11.3/lib/ethdev/rte_ethdev.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/ethdev/rte_ethdev.h 2025-12-19 12:05:33.000000000 +0000 @@ -3682,7 +3682,7 @@ * @param port_id * The port identifier of the Ethernet device. * @param rx_queue_id - * The index of the receive queue for which a queue stats mapping is required. + * The index of the receive queue on which to enable/disable VLAN stripping. * The value must be in the range [0, nb_rx_queue - 1] previously supplied * to rte_eth_dev_configure(). * @param on diff -Nru dpdk-24.11.3/lib/eventdev/rte_event_crypto_adapter.c dpdk-24.11.4/lib/eventdev/rte_event_crypto_adapter.c --- dpdk-24.11.3/lib/eventdev/rte_event_crypto_adapter.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/eventdev/rte_event_crypto_adapter.c 2025-12-19 12:05:33.000000000 +0000 @@ -1454,7 +1454,7 @@ &caps); if (ret) { RTE_EDEV_LOG_ERR("Failed to get adapter caps dev %" PRIu8 - " cdev %" PRIu8, adapter->eventdev_id, + " cdev %" PRIu16, adapter->eventdev_id, adapter->next_cdev_id); return ret; } @@ -1581,7 +1581,7 @@ RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); if (!rte_cryptodev_is_valid_dev(cdev_id)) { - RTE_EDEV_LOG_ERR("Invalid dev_id=%" PRIu8, cdev_id); + RTE_EDEV_LOG_ERR("Invalid dev_id=%" PRIu16, cdev_id); return -EINVAL; } @@ -1602,7 +1602,7 @@ if (!(cap & RTE_EVENT_CRYPTO_ADAPTER_CAP_EVENT_VECTOR)) { RTE_EDEV_LOG_ERR("Event vectorization is not supported," - "dev %" PRIu8 " cdev %" PRIu8, dev_id, cdev_id); + "dev %" PRIu8 " cdev %" PRIu16, dev_id, cdev_id); return -ENOTSUP; } diff -Nru dpdk-24.11.3/lib/eventdev/rte_event_timer_adapter.c dpdk-24.11.4/lib/eventdev/rte_event_timer_adapter.c --- dpdk-24.11.3/lib/eventdev/rte_event_timer_adapter.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/eventdev/rte_event_timer_adapter.c 2025-12-19 12:05:33.000000000 +0000 @@ -1398,7 +1398,7 @@ adapter_id = atoi(params); - if (adapter_id >= RTE_EVENT_TIMER_ADAPTER_NUM_MAX) { + if (adapters == NULL || adapter_id >= RTE_EVENT_TIMER_ADAPTER_NUM_MAX) { EVTIM_LOG_ERR("Invalid timer adapter id %u", adapter_id); return -EINVAL; } @@ -1444,7 +1444,7 @@ adapter_id = atoi(params); - if (adapter_id >= RTE_EVENT_TIMER_ADAPTER_NUM_MAX) { + if (adapters == NULL || adapter_id >= RTE_EVENT_TIMER_ADAPTER_NUM_MAX) { EVTIM_LOG_ERR("Invalid timer adapter id %u", adapter_id); return -EINVAL; } diff -Nru dpdk-24.11.3/lib/eventdev/rte_event_timer_adapter.h dpdk-24.11.4/lib/eventdev/rte_event_timer_adapter.h --- dpdk-24.11.3/lib/eventdev/rte_event_timer_adapter.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/eventdev/rte_event_timer_adapter.h 2025-12-19 12:05:33.000000000 +0000 @@ -566,7 +566,7 @@ * Before calling this function, the application allocates * ``struct rte_event_timer`` objects from mempool or huge page backed * application buffers of desired size. On successful allocation, - * application updates the `struct rte_event_timer`` attributes such as + * application updates the ``struct rte_event_timer`` attributes such as * expiry event attributes, timeout ticks from now. * This function submits the event timer arm requests to the event timer adapter * and on expiry, the events will be injected to designated event queue. diff -Nru dpdk-24.11.3/lib/fib/trie.c dpdk-24.11.4/lib/fib/trie.c --- dpdk-24.11.3/lib/fib/trie.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/fib/trie.c 2025-12-19 12:05:33.000000000 +0000 @@ -515,7 +515,7 @@ struct rte_rib6_node *tmp = NULL; struct rte_rib6_node *node; struct rte_rib6_node *parent; - struct rte_ipv6_addr ip_masked; + struct rte_ipv6_addr ip_masked, tmp_ip; int ret = 0; uint64_t par_nh, node_nh; uint8_t tmp_depth, depth_diff = 0, parent_depth = 24; @@ -534,9 +534,25 @@ if (depth > 24) { tmp = rte_rib6_get_nxt(rib, &ip_masked, RTE_ALIGN_FLOOR(depth, 8), NULL, - RTE_RIB6_GET_NXT_COVER); + RTE_RIB6_GET_NXT_ALL); + if (tmp && op == RTE_FIB6_DEL) { + /* in case of delete operation, skip the prefix we are going to delete */ + rte_rib6_get_ip(tmp, &tmp_ip); + rte_rib6_get_depth(tmp, &tmp_depth); + if (rte_ipv6_addr_eq(&ip_masked, &tmp_ip) && depth == tmp_depth) + tmp = rte_rib6_get_nxt(rib, &ip_masked, + RTE_ALIGN_FLOOR(depth, 8), tmp, RTE_RIB6_GET_NXT_ALL); + } + if (tmp == NULL) { tmp = rte_rib6_lookup(rib, ip); + /** + * in case of delete operation, lookup returns the prefix + * we are going to delete. Find the parent. + */ + if (tmp && op == RTE_FIB6_DEL) + tmp = rte_rib6_lookup_parent(tmp); + if (tmp != NULL) { rte_rib6_get_depth(tmp, &tmp_depth); parent_depth = RTE_MAX(tmp_depth, 24); @@ -559,8 +575,7 @@ return 0; } - if ((depth > 24) && (dp->rsvd_tbl8s >= - dp->number_tbl8s - depth_diff)) + if ((depth > 24) && (dp->rsvd_tbl8s + depth_diff > dp->number_tbl8s)) return -ENOSPC; node = rte_rib6_insert(rib, &ip_masked, depth); diff -Nru dpdk-24.11.3/lib/graph/graph.c dpdk-24.11.4/lib/graph/graph.c --- dpdk-24.11.3/lib/graph/graph.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/graph/graph.c 2025-12-19 12:05:33.000000000 +0000 @@ -260,6 +260,20 @@ graph_node->node->name)); } +void +graph_node_replace_all(struct node *old, struct node *new) +{ + struct graph_node *graph_node; + struct graph *graph; + + STAILQ_FOREACH(graph, &graph_list, next) { + STAILQ_FOREACH(graph_node, &graph->node_list, next) { + if (graph_node->node == old) + graph_node->node = new; + } + } +} + static struct rte_graph * graph_mem_fixup_node_ctx(struct rte_graph *graph) { diff -Nru dpdk-24.11.3/lib/graph/graph_private.h dpdk-24.11.4/lib/graph/graph_private.h --- dpdk-24.11.3/lib/graph/graph_private.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/graph/graph_private.h 2025-12-19 12:05:33.000000000 +0000 @@ -295,6 +295,18 @@ /** * @internal * + * Replace all pointers of a given node with another one in all active graphs. + * + * @param old + * Node pointer to replace in all graphs. + * @param new + * Updated pointer. + */ +void graph_node_replace_all(struct node *old, struct node *new); + +/** + * @internal + * * Get the count of source nodes in the graph. * * @param graph diff -Nru dpdk-24.11.3/lib/graph/graph_stats.c dpdk-24.11.4/lib/graph/graph_stats.c --- dpdk-24.11.3/lib/graph/graph_stats.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/graph/graph_stats.c 2025-12-19 12:05:33.000000000 +0000 @@ -36,7 +36,6 @@ int socket_id; bool dispatch; void *cookie; - size_t sz; struct cluster_node clusters[]; }; @@ -177,15 +176,55 @@ return graph_cluster_stats_cb(true, is_first, is_last, cookie, stat); }; +static uint32_t +cluster_count_nodes(const struct cluster *cluster) +{ + rte_node_t *nodes = NULL; + uint32_t max_nodes = 0; + + for (unsigned int i = 0; i < cluster->nb_graphs; i++) { + struct graph_node *graph_node; + + STAILQ_FOREACH(graph_node, &cluster->graphs[i]->node_list, next) { + rte_node_t *new_nodes; + unsigned int n; + + for (n = 0; n < max_nodes; n++) { + if (nodes[n] != graph_node->node->id) + continue; + break; + } + if (n != max_nodes) + continue; + + max_nodes++; + new_nodes = realloc(nodes, max_nodes * sizeof(nodes[0])); + if (new_nodes == NULL) { + free(nodes); + return 0; + } + nodes = new_nodes; + nodes[n] = graph_node->node->id; + } + } + free(nodes); + + return max_nodes; +} + static struct rte_graph_cluster_stats * stats_mem_init(struct cluster *cluster, const struct rte_graph_cluster_stats_param *prm) { - size_t sz = sizeof(struct rte_graph_cluster_stats); struct rte_graph_cluster_stats *stats; rte_graph_cluster_stats_cb_t fn; int socket_id = prm->socket_id; uint32_t cluster_node_size; + uint32_t max_nodes; + + max_nodes = cluster_count_nodes(cluster); + if (max_nodes == 0) + return NULL; /* Fix up callback */ fn = prm->fn; @@ -202,25 +241,23 @@ cluster_node_size += cluster->nb_graphs * sizeof(struct rte_node *); cluster_node_size = RTE_ALIGN(cluster_node_size, RTE_CACHE_LINE_SIZE); - stats = realloc(NULL, sz); + stats = rte_zmalloc_socket(NULL, sizeof(struct rte_graph_cluster_stats) + + max_nodes * cluster_node_size, 0, socket_id); if (stats) { - memset(stats, 0, sz); stats->fn = fn; stats->cluster_node_size = cluster_node_size; stats->max_nodes = 0; stats->socket_id = socket_id; stats->cookie = prm->cookie; - stats->sz = sz; } return stats; } static int -stats_mem_populate(struct rte_graph_cluster_stats **stats_in, +stats_mem_populate(struct rte_graph_cluster_stats *stats, struct rte_graph *graph, struct graph_node *graph_node) { - struct rte_graph_cluster_stats *stats = *stats_in; rte_node_t id = graph_node->node->id; struct cluster_node *cluster; struct rte_node *node; @@ -246,21 +283,12 @@ cluster = RTE_PTR_ADD(cluster, stats->cluster_node_size); } - /* Hey, it is a new node, allocate space for it in the reel */ - stats = realloc(stats, stats->sz + stats->cluster_node_size); - if (stats == NULL) - SET_ERR_JMP(ENOMEM, err, "Realloc failed"); - *stats_in = NULL; - - /* Clear the new struct cluster_node area */ - cluster = RTE_PTR_ADD(stats, stats->sz), - memset(cluster, 0, stats->cluster_node_size); memcpy(cluster->stat.name, graph_node->node->name, RTE_NODE_NAMESIZE); cluster->stat.id = graph_node->node->id; cluster->stat.hz = rte_get_timer_hz(); node = graph_node_id_to_ptr(graph, id); if (node == NULL) - SET_ERR_JMP(ENOENT, free, "Failed to find node %s in graph %s", + SET_ERR_JMP(ENOENT, err, "Failed to find node %s in graph %s", graph_node->node->name, graph->name); cluster->nodes[cluster->nb_nodes++] = node; if (graph_node->node->xstats) { @@ -269,15 +297,15 @@ sizeof(uint64_t) * graph_node->node->xstats->nb_xstats, RTE_CACHE_LINE_SIZE, stats->socket_id); if (cluster->stat.xstat_count == NULL) - SET_ERR_JMP(ENOMEM, free, "Failed to allocate memory node %s graph %s", + SET_ERR_JMP(ENOMEM, err, "Failed to allocate memory node %s graph %s", graph_node->node->name, graph->name); cluster->stat.xstat_desc = rte_zmalloc_socket(NULL, - sizeof(RTE_NODE_XSTAT_DESC_SIZE) * graph_node->node->xstats->nb_xstats, + RTE_NODE_XSTAT_DESC_SIZE * graph_node->node->xstats->nb_xstats, RTE_CACHE_LINE_SIZE, stats->socket_id); if (cluster->stat.xstat_desc == NULL) { rte_free(cluster->stat.xstat_count); - SET_ERR_JMP(ENOMEM, free, "Failed to allocate memory node %s graph %s", + SET_ERR_JMP(ENOMEM, err, "Failed to allocate memory node %s graph %s", graph_node->node->name, graph->name); } @@ -287,31 +315,21 @@ RTE_NODE_XSTAT_DESC_SIZE) < 0) { rte_free(cluster->stat.xstat_count); rte_free(cluster->stat.xstat_desc); - SET_ERR_JMP(E2BIG, free, + SET_ERR_JMP(E2BIG, err, "Error description overflow node %s graph %s", graph_node->node->name, graph->name); } } } - stats->sz += stats->cluster_node_size; stats->max_nodes++; - *stats_in = stats; return 0; -free: - free(stats); err: return -rte_errno; } static void -stats_mem_fini(struct rte_graph_cluster_stats *stats) -{ - free(stats); -} - -static void cluster_init(struct cluster *cluster) { memset(cluster, 0, sizeof(*cluster)); @@ -379,10 +397,7 @@ rte_graph_cluster_stats_create(const struct rte_graph_cluster_stats_param *prm) { struct rte_graph_cluster_stats *stats, *rc = NULL; - struct graph_node *graph_node; struct cluster cluster; - struct graph *graph; - const char *pattern; rte_graph_t i; /* Sanity checks */ @@ -400,37 +415,36 @@ graph_spinlock_lock(); /* Expand graph pattern and add the graph to the cluster */ for (i = 0; i < prm->nb_graph_patterns; i++) { - pattern = prm->graph_patterns[i]; - if (expand_pattern_to_cluster(&cluster, pattern)) + if (expand_pattern_to_cluster(&cluster, prm->graph_patterns[i])) goto bad_pattern; } /* Alloc the stats memory */ stats = stats_mem_init(&cluster, prm); if (stats == NULL) - SET_ERR_JMP(ENOMEM, bad_pattern, "Failed alloc stats memory"); + SET_ERR_JMP(ENOMEM, bad_pattern, "Failed rte_malloc for stats memory"); /* Iterate over M(Graph) x N (Nodes in graph) */ for (i = 0; i < cluster.nb_graphs; i++) { + struct graph_node *graph_node; + struct graph *graph; + graph = cluster.graphs[i]; STAILQ_FOREACH(graph_node, &graph->node_list, next) { struct rte_graph *graph_fp = graph->graph; - if (stats_mem_populate(&stats, graph_fp, graph_node)) + if (stats_mem_populate(stats, graph_fp, graph_node)) goto realloc_fail; } if (graph->graph->model == RTE_GRAPH_MODEL_MCORE_DISPATCH) stats->dispatch = true; } - /* Finally copy to hugepage memory to avoid pressure on rte_realloc */ - rc = rte_malloc_socket(NULL, stats->sz, 0, stats->socket_id); - if (rc) - rte_memcpy(rc, stats, stats->sz); - else - SET_ERR_JMP(ENOMEM, realloc_fail, "rte_malloc failed"); + rc = stats; + stats = NULL; realloc_fail: - stats_mem_fini(stats); + if (stats != NULL) + rte_graph_cluster_stats_destroy(stats); bad_pattern: graph_spinlock_unlock(); cluster_fini(&cluster); @@ -467,7 +481,8 @@ uint64_t *xstat; uint8_t i; - memset(stat->xstat_count, 0, sizeof(uint64_t) * stat->xstat_cntrs); + if (stat->xstat_cntrs != 0) + memset(stat->xstat_count, 0, sizeof(uint64_t) * stat->xstat_cntrs); for (count = 0; count < cluster->nb_nodes; count++) { node = cluster->nodes[count]; diff -Nru dpdk-24.11.3/lib/graph/node.c dpdk-24.11.4/lib/graph/node.c --- dpdk-24.11.3/lib/graph/node.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/graph/node.c 2025-12-19 12:05:33.000000000 +0000 @@ -268,11 +268,15 @@ need_realloc = max_edges > node->nb_edges; if (need_realloc) { sz = sizeof(struct node) + (max_edges * RTE_NODE_NAMESIZE); - new_node = realloc(node, sz); + new_node = malloc(sz); if (new_node == NULL) { rte_errno = ENOMEM; goto restore; } else { + sz = sizeof(*node) + (node->nb_edges * RTE_NODE_NAMESIZE); + memcpy(new_node, node, sz); + graph_node_replace_all(node, new_node); + free(node); node = new_node; } } diff -Nru dpdk-24.11.3/lib/gro/gro_tcp.h dpdk-24.11.4/lib/gro/gro_tcp.h --- dpdk-24.11.3/lib/gro/gro_tcp.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/gro/gro_tcp.h 2025-12-19 12:05:33.000000000 +0000 @@ -133,7 +133,7 @@ pkt_head->nb_segs += pkt_tail->nb_segs; pkt_head->pkt_len += pkt_tail->pkt_len; if (tcp_flags != RTE_TCP_ACK_FLAG) { - tcp_hdr = rte_pktmbuf_mtod_offset(pkt, struct rte_tcp_hdr *, + tcp_hdr = rte_pktmbuf_mtod_offset(pkt_head, struct rte_tcp_hdr *, l2_offset + pkt_head->l2_len + pkt_head->l3_len); tcp_hdr->tcp_flags |= tcp_flags; } diff -Nru dpdk-24.11.3/lib/hash/rte_thash.c dpdk-24.11.4/lib/hash/rte_thash.c --- dpdk-24.11.3/lib/hash/rte_thash.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/hash/rte_thash.c 2025-12-19 12:05:33.000000000 +0000 @@ -409,10 +409,10 @@ static inline uint32_t get_subvalue(struct rte_thash_ctx *ctx, uint32_t offset) { - uint32_t *tmp, val; + uint32_t tmp, val; - tmp = (uint32_t *)(&ctx->hash_key[offset >> 3]); - val = rte_be_to_cpu_32(*tmp); + tmp = *(unaligned_uint32_t *)&ctx->hash_key[offset >> 3]; + val = rte_be_to_cpu_32(tmp); val >>= (TOEPLITZ_HASH_LEN - ((offset & (CHAR_BIT - 1)) + ctx->reta_sz_log)); diff -Nru dpdk-24.11.3/lib/net/rte_ip6.h dpdk-24.11.4/lib/net/rte_ip6.h --- dpdk-24.11.3/lib/net/rte_ip6.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/net/rte_ip6.h 2025-12-19 12:05:33.000000000 +0000 @@ -393,7 +393,7 @@ /* * Generate a link-local IPv6 address from an Ethernet address as specified in - * RFC 2464, section 5. + * RFC 4291, section 2.5.1. * * @param[out] ip * The link-local IPv6 address to generate. @@ -406,7 +406,12 @@ ip->a[0] = 0xfe; ip->a[1] = 0x80; memset(&ip->a[2], 0, 6); - ip->a[8] = mac->addr_bytes[0]; + /* + * The "u" bit (universal/local bit in IEEE EUI-64 terminology) + * must be inverted for IPv6 link local address. + * 0 means local scope, 1 means universal scope. + */ + ip->a[8] = mac->addr_bytes[0] ^ RTE_ETHER_LOCAL_ADMIN_ADDR; ip->a[9] = mac->addr_bytes[1]; ip->a[10] = mac->addr_bytes[2]; ip->a[11] = 0xff; diff -Nru dpdk-24.11.3/lib/net/rte_net.c dpdk-24.11.4/lib/net/rte_net.c --- dpdk-24.11.3/lib/net/rte_net.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/net/rte_net.c 2025-12-19 12:05:33.000000000 +0000 @@ -376,6 +376,7 @@ pkt_type |= ptype_tunnel(&proto, m, &off); hdr_lens->tunnel_len = off - prev_off; + hdr_lens->inner_l2_len = off - prev_off; } /* same job for inner header: we need to duplicate the code diff -Nru dpdk-24.11.3/lib/rawdev/rte_rawdev.c dpdk-24.11.4/lib/rawdev/rte_rawdev.c --- dpdk-24.11.3/lib/rawdev/rte_rawdev.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/rawdev/rte_rawdev.c 2025-12-19 12:05:33.000000000 +0000 @@ -403,12 +403,12 @@ struct rte_rawdev *dev; int diag; - RTE_RDEV_DEBUG("Start dev_id=%" PRIu8, dev_id); + RTE_RDEV_DEBUG("Start dev_id=%" PRIu16, dev_id); RTE_RAWDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); dev = &rte_rawdevs[dev_id]; if (dev->started != 0) { - RTE_RDEV_ERR("Device with dev_id=%" PRIu8 "already started", + RTE_RDEV_ERR("Device with dev_id=%" PRIu16 "already started", dev_id); return 0; } @@ -430,13 +430,13 @@ { struct rte_rawdev *dev; - RTE_RDEV_DEBUG("Stop dev_id=%" PRIu8, dev_id); + RTE_RDEV_DEBUG("Stop dev_id=%" PRIu16, dev_id); RTE_RAWDEV_VALID_DEVID_OR_RET(dev_id); dev = &rte_rawdevs[dev_id]; if (dev->started == 0) { - RTE_RDEV_ERR("Device with dev_id=%" PRIu8 "already stopped", + RTE_RDEV_ERR("Device with dev_id=%" PRIu16 "already stopped", dev_id); return; } diff -Nru dpdk-24.11.3/lib/rawdev/rte_rawdev_pmd.h dpdk-24.11.4/lib/rawdev/rte_rawdev_pmd.h --- dpdk-24.11.3/lib/rawdev/rte_rawdev_pmd.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/rawdev/rte_rawdev_pmd.h 2025-12-19 12:05:33.000000000 +0000 @@ -506,7 +506,7 @@ * >0, ~0: for successful load * <0: for failure * - * @see Application may use 'firmware_version_get` for ascertaining successful + * @see Application may use `firmware_version_get` for ascertaining successful * load */ typedef int (*rawdev_firmware_load_t)(struct rte_rawdev *dev, diff -Nru dpdk-24.11.3/lib/ring/rte_ring_c11_pvt.h dpdk-24.11.4/lib/ring/rte_ring_c11_pvt.h --- dpdk-24.11.3/lib/ring/rte_ring_c11_pvt.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/ring/rte_ring_c11_pvt.h 2025-12-19 12:05:33.000000000 +0000 @@ -24,7 +24,12 @@ if (!single) rte_wait_until_equal_32((uint32_t *)(uintptr_t)&ht->tail, old_val, rte_memory_order_relaxed); - + /* + * R0: Establishes a synchronizing edge with load-acquire of + * cons_tail at A1 or prod_tail at A4. + * Ensures that memory effects by this thread on ring elements array + * is observed by a different thread of the other type. + */ rte_atomic_store_explicit(&ht->tail, new_val, rte_memory_order_release); } @@ -62,16 +67,24 @@ unsigned int max = n; int success; - *old_head = rte_atomic_load_explicit(&r->prod.head, rte_memory_order_relaxed); + /* + * A0: Establishes a synchronizing edge with R1. + * Ensure that this thread observes same values + * to cons_tail observed by the thread that + * updated r->prod.head. + * If not, an unsafe partial order may ensue. + */ + *old_head = rte_atomic_load_explicit(&r->prod.head, rte_memory_order_acquire); do { /* Reset n to the initial burst count */ n = max; - /* Ensure the head is read before tail */ - rte_atomic_thread_fence(rte_memory_order_acquire); - /* load-acquire synchronize with store-release of ht->tail - * in update_tail. + /* + * A1: Establishes a synchronizing edge with R0. + * Ensures that other thread's memory effects on + * ring elements array is observed by the time + * this thread observes its tail update. */ cons_tail = rte_atomic_load_explicit(&r->cons.tail, rte_memory_order_acquire); @@ -97,10 +110,19 @@ success = 1; } else /* on failure, *old_head is updated */ + /* + * R1/A2. + * R1: Establishes a synchronizing edge with A0 of a + * different thread. + * A2: Establishes a synchronizing edge with R1 of a + * different thread to observe same value for + * cons_tail observed by that thread on CAS failure + * (to retry with an updated *old_head). + */ success = rte_atomic_compare_exchange_strong_explicit(&r->prod.head, old_head, *new_head, - rte_memory_order_relaxed, - rte_memory_order_relaxed); + rte_memory_order_release, + rte_memory_order_acquire); } while (unlikely(success == 0)); return n; } @@ -138,17 +160,23 @@ uint32_t prod_tail; int success; - /* move cons.head atomically */ - *old_head = rte_atomic_load_explicit(&r->cons.head, rte_memory_order_relaxed); + /* + * A3: Establishes a synchronizing edge with R2. + * Ensure that this thread observes same values + * to prod_tail observed by the thread that + * updated r->cons.head. + * If not, an unsafe partial order may ensue. + */ + *old_head = rte_atomic_load_explicit(&r->cons.head, rte_memory_order_acquire); do { /* Restore n as it may change every loop */ n = max; - /* Ensure the head is read before tail */ - rte_atomic_thread_fence(rte_memory_order_acquire); - - /* this load-acquire synchronize with store-release of ht->tail - * in update_tail. + /* + * A4: Establishes a synchronizing edge with R0. + * Ensures that other thread's memory effects on + * ring elements array is observed by the time + * this thread observes its tail update. */ prod_tail = rte_atomic_load_explicit(&r->prod.tail, rte_memory_order_acquire); @@ -173,10 +201,19 @@ success = 1; } else /* on failure, *old_head will be updated */ + /* + * R2/A5. + * R2: Establishes a synchronizing edge with A3 of a + * different thread. + * A5: Establishes a synchronizing edge with R2 of a + * different thread to observe same value for + * prod_tail observed by that thread on CAS failure + * (to retry with an updated *old_head). + */ success = rte_atomic_compare_exchange_strong_explicit(&r->cons.head, old_head, *new_head, - rte_memory_order_relaxed, - rte_memory_order_relaxed); + rte_memory_order_release, + rte_memory_order_acquire); } while (unlikely(success == 0)); return n; } diff -Nru dpdk-24.11.3/lib/ring/rte_ring_hts_elem_pvt.h dpdk-24.11.4/lib/ring/rte_ring_hts_elem_pvt.h --- dpdk-24.11.3/lib/ring/rte_ring_hts_elem_pvt.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/ring/rte_ring_hts_elem_pvt.h 2025-12-19 12:05:33.000000000 +0000 @@ -32,22 +32,40 @@ RTE_SET_USED(enqueue); tail = old_tail + num; + + /* + * R0: Release the tail update. Establishes a synchronization edge with + * the load-acquire at A1/A3. This release ensures that all updates to + * *ht and the ring array made by this thread become visible to the + * opposing thread once the tail value written here is observed. + */ rte_atomic_store_explicit(&ht->ht.pos.tail, tail, rte_memory_order_release); } /** - * @internal waits till tail will become equal to head. - * Means no writer/reader is active for that ring. - * Suppose to work as serialization point. + * @internal + * Waits until the tail becomes equal to the head. + * This indicates that another thread has finished its transaction, and there + * is a chance that we could be the next writer or reader in line. + * + * Returns ht.raw at this point. The value may be imprecise, since another + * thread might change the state before we observe ht.raw, but that does not + * matter. The function __rte_ring_hts_move_head() can detect and recall this + * function when it reaches the linearization point (CAS). */ -static __rte_always_inline void +static __rte_always_inline union __rte_ring_hts_pos __rte_ring_hts_head_wait(const struct rte_ring_hts_headtail *ht, - union __rte_ring_hts_pos *p) + int memorder) { - while (p->pos.head != p->pos.tail) { + union __rte_ring_hts_pos p; + p.raw = rte_atomic_load_explicit(&ht->ht.raw, memorder); + + while (p.pos.head != p.pos.tail) { rte_pause(); - p->raw = rte_atomic_load_explicit(&ht->ht.raw, rte_memory_order_acquire); + p.raw = rte_atomic_load_explicit(&ht->ht.raw, memorder); } + + return p; } /** @@ -58,13 +76,11 @@ enum rte_ring_queue_behavior behavior, uint32_t *old_head, uint32_t *free_entries) { - uint32_t n; + uint32_t n, cons_tail; union __rte_ring_hts_pos np, op; const uint32_t capacity = r->capacity; - op.raw = rte_atomic_load_explicit(&r->hts_prod.ht.raw, rte_memory_order_acquire); - do { /* Reset n to the initial burst count */ n = num; @@ -74,7 +90,20 @@ * make sure that we read prod head/tail *before* * reading cons tail. */ - __rte_ring_hts_head_wait(&r->hts_prod, &op); + /* + * A0: Synchronizes with the CAS at R1. + * Establishes a happens-before relationship with a thread of the same + * type that released the ht.raw, ensuring this thread observes all of + * its memory effects needed to maintain a safe partial order. + */ + op = __rte_ring_hts_head_wait(&r->hts_prod, rte_memory_order_acquire); + + /* + * A1: Establish a synchronizes-with edge using a store-release at R0. + * This ensures that all memory effects from the preceding opposing + * thread are observed. + */ + cons_tail = rte_atomic_load_explicit(&r->cons.tail, rte_memory_order_acquire); /* * The subtraction is done between two unsigned 32bits value @@ -82,7 +111,7 @@ * *old_head > cons_tail). So 'free_entries' is always between 0 * and capacity (which is < size). */ - *free_entries = capacity + r->cons.tail - op.pos.head; + *free_entries = capacity + cons_tail - op.pos.head; /* check that we have enough room in ring */ if (unlikely(n > *free_entries)) @@ -96,13 +125,16 @@ np.pos.head = op.pos.head + n; /* - * this CAS(ACQUIRE, ACQUIRE) serves as a hoist barrier to prevent: - * - OOO reads of cons tail value - * - OOO copy of elems from the ring + * R1: Establishes a synchronizes-with edge with the load-acquire + * of ht.raw at A0. This makes sure that the store-release to the + * tail by this thread, if it was of the opposite type, becomes + * visible to another thread of the current type. That thread will + * then observe the updates in the same order, keeping a safe + * partial order. */ } while (rte_atomic_compare_exchange_strong_explicit(&r->hts_prod.ht.raw, (uint64_t *)(uintptr_t)&op.raw, np.raw, - rte_memory_order_acquire, rte_memory_order_acquire) == 0); + rte_memory_order_release, rte_memory_order_relaxed) == 0); *old_head = op.pos.head; return n; @@ -116,11 +148,9 @@ enum rte_ring_queue_behavior behavior, uint32_t *old_head, uint32_t *entries) { - uint32_t n; + uint32_t n, prod_tail; union __rte_ring_hts_pos np, op; - op.raw = rte_atomic_load_explicit(&r->hts_cons.ht.raw, rte_memory_order_acquire); - /* move cons.head atomically */ do { /* Restore n as it may change every loop */ @@ -131,14 +161,27 @@ * make sure that we read cons head/tail *before* * reading prod tail. */ - __rte_ring_hts_head_wait(&r->hts_cons, &op); + /* + * A2: Synchronizes with the CAS at R2. + * Establishes a happens-before relationship with a thread of the same + * type that released the ht.raw, ensuring this thread observes all of + * its memory effects needed to maintain a safe partial order. + */ + op = __rte_ring_hts_head_wait(&r->hts_cons, rte_memory_order_acquire); + + /* + * A3: Establish a synchronizes-with edge using a store-release at R0. + * This ensures that all memory effects from the preceding opposing + * thread are observed. + */ + prod_tail = rte_atomic_load_explicit(&r->prod.tail, rte_memory_order_acquire); /* The subtraction is done between two unsigned 32bits value * (the result is always modulo 32 bits even if we have * cons_head > prod_tail). So 'entries' is always between 0 * and size(ring)-1. */ - *entries = r->prod.tail - op.pos.head; + *entries = prod_tail - op.pos.head; /* Set the actual entries for dequeue */ if (n > *entries) @@ -151,13 +194,16 @@ np.pos.head = op.pos.head + n; /* - * this CAS(ACQUIRE, ACQUIRE) serves as a hoist barrier to prevent: - * - OOO reads of prod tail value - * - OOO copy of elems from the ring + * R2: Establishes a synchronizes-with edge with the load-acquire + * of ht.raw at A2. This makes sure that the store-release to the + * tail by this thread, if it was of the opposite type, becomes + * visible to another thread of the current type. That thread will + * then observe the updates in the same order, keeping a safe + * partial order. */ } while (rte_atomic_compare_exchange_strong_explicit(&r->hts_cons.ht.raw, (uint64_t *)(uintptr_t)&op.raw, np.raw, - rte_memory_order_acquire, rte_memory_order_acquire) == 0); + rte_memory_order_release, rte_memory_order_relaxed) == 0); *old_head = op.pos.head; return n; diff -Nru dpdk-24.11.3/lib/ring/rte_ring_rts_elem_pvt.h dpdk-24.11.4/lib/ring/rte_ring_rts_elem_pvt.h --- dpdk-24.11.3/lib/ring/rte_ring_rts_elem_pvt.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/ring/rte_ring_rts_elem_pvt.h 2025-12-19 12:05:33.000000000 +0000 @@ -31,6 +31,17 @@ * might preceded us, then don't update tail with new value. */ + /* + * A0 = {A0.a, A0.b}: Synchronizes with the CAS at R0. + * The CAS at R0 in same typed thread establishes a happens-before + * relationship with this load acquire. Ensures that this thread + * observes the same or later values for h.raw/h.val.cnt + * observed by the other thread when it updated ht->tail.raw. + * If not, ht->tail.raw may get updated out of sync (e.g. getting + * updated to the same value twice). A0.a makes sure this condition + * holds when CAS succeeds and A0.b when it fails. + */ + /* A0.a */ ot.raw = rte_atomic_load_explicit(&ht->tail.raw, rte_memory_order_acquire); do { @@ -41,6 +52,11 @@ if (++nt.val.cnt == h.val.cnt) nt.val.pos = h.val.pos; + /* + * R0: Synchronizes with A2 of a different thread of the opposite type and A0.b + * of a different thread of the same type. + */ + /* A0.b */ } while (rte_atomic_compare_exchange_strong_explicit(&ht->tail.raw, (uint64_t *)(uintptr_t)&ot.raw, nt.raw, rte_memory_order_release, rte_memory_order_acquire) == 0); @@ -50,18 +66,22 @@ * @internal This function waits till head/tail distance wouldn't * exceed pre-defined max value. */ -static __rte_always_inline void +static __rte_always_inline union __rte_ring_rts_poscnt __rte_ring_rts_head_wait(const struct rte_ring_rts_headtail *ht, - union __rte_ring_rts_poscnt *h) + int memorder) { - uint32_t max; + union __rte_ring_rts_poscnt h; + uint32_t max = ht->htd_max; + - max = ht->htd_max; + h.raw = rte_atomic_load_explicit(&ht->head.raw, memorder); - while (h->val.pos - ht->tail.val.pos > max) { + while (h.val.pos - ht->tail.val.pos > max) { rte_pause(); - h->raw = rte_atomic_load_explicit(&ht->head.raw, rte_memory_order_acquire); + h.raw = rte_atomic_load_explicit(&ht->head.raw, memorder); } + + return h; } /** @@ -72,13 +92,11 @@ enum rte_ring_queue_behavior behavior, uint32_t *old_head, uint32_t *free_entries) { - uint32_t n; + uint32_t n, cons_tail; union __rte_ring_rts_poscnt nh, oh; const uint32_t capacity = r->capacity; - oh.raw = rte_atomic_load_explicit(&r->rts_prod.head.raw, rte_memory_order_acquire); - do { /* Reset n to the initial burst count */ n = num; @@ -88,7 +106,20 @@ * make sure that we read prod head *before* * reading cons tail. */ - __rte_ring_rts_head_wait(&r->rts_prod, &oh); + /* + * A1 Synchronizes with the CAS at R1. + * Establishes a happens-before relationship with a thread of the same + * type that released the ht.raw, ensuring this thread observes all of + * its memory effects needed to maintain a safe partial order. + */ + oh = __rte_ring_rts_head_wait(&r->rts_prod, rte_memory_order_acquire); + + /* + * A2: Establish a synchronizes-with edge using a store-release at R0. + * This ensures that all memory effects from the preceding opposing + * thread are observed. + */ + cons_tail = rte_atomic_load_explicit(&r->cons.tail, rte_memory_order_acquire); /* * The subtraction is done between two unsigned 32bits value @@ -96,7 +127,7 @@ * *old_head > cons_tail). So 'free_entries' is always between 0 * and capacity (which is < size). */ - *free_entries = capacity + r->cons.tail - oh.val.pos; + *free_entries = capacity + cons_tail - oh.val.pos; /* check that we have enough room in ring */ if (unlikely(n > *free_entries)) @@ -110,13 +141,16 @@ nh.val.cnt = oh.val.cnt + 1; /* - * this CAS(ACQUIRE, ACQUIRE) serves as a hoist barrier to prevent: - * - OOO reads of cons tail value - * - OOO copy of elems to the ring + * R1: Establishes a synchronizes-with edge with the load-acquire + * of ht.raw at A1. Ensures that the store-release to the tail by + * this thread, if it was of the opposite type, becomes + * visible to another thread of the current type. That thread will + * then observe the updates in the same order, keeping a safe + * partial order. */ } while (rte_atomic_compare_exchange_strong_explicit(&r->rts_prod.head.raw, (uint64_t *)(uintptr_t)&oh.raw, nh.raw, - rte_memory_order_acquire, rte_memory_order_acquire) == 0); + rte_memory_order_release, rte_memory_order_relaxed) == 0); *old_head = oh.val.pos; return n; @@ -130,11 +164,9 @@ enum rte_ring_queue_behavior behavior, uint32_t *old_head, uint32_t *entries) { - uint32_t n; + uint32_t n, prod_tail; union __rte_ring_rts_poscnt nh, oh; - oh.raw = rte_atomic_load_explicit(&r->rts_cons.head.raw, rte_memory_order_acquire); - /* move cons.head atomically */ do { /* Restore n as it may change every loop */ @@ -145,14 +177,27 @@ * make sure that we read cons head *before* * reading prod tail. */ - __rte_ring_rts_head_wait(&r->rts_cons, &oh); + /* + * A3: Synchronizes with the CAS at R2. + * Establishes a happens-before relationship with a thread of the same + * type that released the ht.raw, ensuring this thread observes all of + * its memory effects needed to maintain a safe partial order. + */ + oh = __rte_ring_rts_head_wait(&r->rts_cons, rte_memory_order_acquire); + + /* + * A4: Establish a synchronizes-with edge using a store-release at R0. + * This ensures that all memory effects from the preceding opposing + * thread are observed. + */ + prod_tail = rte_atomic_load_explicit(&r->prod.tail, rte_memory_order_acquire); /* The subtraction is done between two unsigned 32bits value * (the result is always modulo 32 bits even if we have * cons_head > prod_tail). So 'entries' is always between 0 * and size(ring)-1. */ - *entries = r->prod.tail - oh.val.pos; + *entries = prod_tail - oh.val.pos; /* Set the actual entries for dequeue */ if (n > *entries) @@ -165,13 +210,16 @@ nh.val.cnt = oh.val.cnt + 1; /* - * this CAS(ACQUIRE, ACQUIRE) serves as a hoist barrier to prevent: - * - OOO reads of prod tail value - * - OOO copy of elems from the ring + * R2: Establishes a synchronizes-with edge with the load-acquire + * of ht.raw at A3. Ensures that the store-release to the tail by + * this thread, if it was of the opposite type, becomes + * visible to another thread of the current type. That thread will + * then observe the updates in the same order, keeping a safe + * partial order. */ } while (rte_atomic_compare_exchange_strong_explicit(&r->rts_cons.head.raw, (uint64_t *)(uintptr_t)&oh.raw, nh.raw, - rte_memory_order_acquire, rte_memory_order_acquire) == 0); + rte_memory_order_release, rte_memory_order_relaxed) == 0); *old_head = oh.val.pos; return n; diff -Nru dpdk-24.11.3/lib/sched/rte_sched.c dpdk-24.11.4/lib/sched/rte_sched.c --- dpdk-24.11.3/lib/sched/rte_sched.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/sched/rte_sched.c 2025-12-19 12:05:33.000000000 +0000 @@ -71,7 +71,7 @@ uint64_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /* Weighted Round Robin (WRR) */ - uint8_t wrr_tokens[RTE_SCHED_BE_QUEUES_PER_PIPE]; + uint16_t wrr_tokens[RTE_SCHED_BE_QUEUES_PER_PIPE]; /* TC oversubscription */ uint64_t tc_ov_credits; diff -Nru dpdk-24.11.3/lib/telemetry/rte_telemetry.h dpdk-24.11.4/lib/telemetry/rte_telemetry.h --- dpdk-24.11.3/lib/telemetry/rte_telemetry.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/telemetry/rte_telemetry.h 2025-12-19 12:05:33.000000000 +0000 @@ -361,17 +361,6 @@ struct rte_tel_data *info); /** - * Used for handling data received over a telemetry socket. - * - * @param sock_id - * ID for the socket to be used by the handler. - * - * @return - * Void. - */ -typedef void * (*handler)(void *sock_id); - -/** * Used when registering a command and callback function with telemetry. * * @param cmd diff -Nru dpdk-24.11.3/lib/telemetry/telemetry.c dpdk-24.11.4/lib/telemetry/telemetry.c --- dpdk-24.11.3/lib/telemetry/telemetry.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/telemetry/telemetry.c 2025-12-19 12:05:33.000000000 +0000 @@ -46,7 +46,7 @@ struct socket { int sock; char path[sizeof(((struct sockaddr_un *)0)->sun_path)]; - handler fn; + telemetry_sock_handler fn; RTE_ATOMIC(uint16_t) *num_clients; }; static struct socket v2_socket; /* socket for v2 telemetry */ diff -Nru dpdk-24.11.3/lib/telemetry/telemetry_internal.h dpdk-24.11.4/lib/telemetry/telemetry_internal.h --- dpdk-24.11.3/lib/telemetry/telemetry_internal.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/telemetry/telemetry_internal.h 2025-12-19 12:05:33.000000000 +0000 @@ -26,6 +26,17 @@ }; /** + * Used for handling data received over a telemetry socket. + * + * @param sock_id + * ID for the socket to be used by the handler. + * + * @return + * Void. + */ +typedef void * (*telemetry_sock_handler)(void *sock_id); + +/** * This telemetry callback is used when registering a legacy telemetry command. * It handles getting and formatting stats to be returned to telemetry when * requested. Stats up to buf_len in length are put in the buffer. diff -Nru dpdk-24.11.3/lib/vhost/socket.c dpdk-24.11.4/lib/vhost/socket.c --- dpdk-24.11.3/lib/vhost/socket.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/vhost/socket.c 2025-12-19 12:05:33.000000000 +0000 @@ -1192,7 +1192,8 @@ return -1; if (vsocket->is_vduse) - return vduse_device_create(path, vsocket->net_compliant_ol_flags); + return vduse_device_create(path, vsocket->net_compliant_ol_flags, + vsocket->extbuf, vsocket->linearbuf); if (vhost_user.fdset == NULL) { vhost_user.fdset = fdset_init("vhost-evt"); diff -Nru dpdk-24.11.3/lib/vhost/vduse.c dpdk-24.11.4/lib/vhost/vduse.c --- dpdk-24.11.3/lib/vhost/vduse.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/vhost/vduse.c 2025-12-19 12:05:33.000000000 +0000 @@ -140,7 +140,7 @@ { struct vhost_virtqueue *vq = dev->virtqueue[index]; struct vhost_vring_addr *ra = &vq->ring_addrs; - struct vduse_vq_info vq_info; + struct vduse_vq_info vq_info = { 0 }; struct vduse_vq_eventfd vq_efd; int ret; @@ -271,6 +271,55 @@ vq->last_avail_idx = 0; } +/* + * Tests show that virtqueues get ready at the first retry at worst, + * but let's be on the safe side and allow more retries. + */ +#define VDUSE_VQ_READY_POLL_MAX_RETRIES 100 + +static int +vduse_wait_for_virtqueues_ready(struct virtio_net *dev) +{ + unsigned int i; + int ret; + + for (i = 0; i < dev->nr_vring; i++) { + int retry_count = 0; + + while (retry_count < VDUSE_VQ_READY_POLL_MAX_RETRIES) { + struct vduse_vq_info vq_info = { 0 }; + + vq_info.index = i; + ret = ioctl(dev->vduse_dev_fd, VDUSE_VQ_GET_INFO, &vq_info); + if (ret) { + VHOST_CONFIG_LOG(dev->ifname, ERR, + "Failed to get VQ %u info while polling ready state: %s", + i, strerror(errno)); + return -1; + } + + if (vq_info.ready) { + VHOST_CONFIG_LOG(dev->ifname, DEBUG, + "VQ %u is ready after %u retries", i, retry_count); + break; + } + + retry_count++; + usleep(1000); + } + + if (retry_count >= VDUSE_VQ_READY_POLL_MAX_RETRIES) { + VHOST_CONFIG_LOG(dev->ifname, ERR, + "VQ %u ready state polling timeout after %u retries", + i, VDUSE_VQ_READY_POLL_MAX_RETRIES); + return -1; + } + } + + VHOST_CONFIG_LOG(dev->ifname, INFO, "All virtqueues are ready after polling"); + return 0; +} + static void vduse_device_start(struct virtio_net *dev, bool reconnect) { @@ -413,10 +462,18 @@ } if ((old_status ^ dev->status) & VIRTIO_DEVICE_STATUS_DRIVER_OK) { - if (dev->status & VIRTIO_DEVICE_STATUS_DRIVER_OK) + if (dev->status & VIRTIO_DEVICE_STATUS_DRIVER_OK) { + /* Poll virtqueues ready states before starting device */ + ret = vduse_wait_for_virtqueues_ready(dev); + if (ret < 0) { + VHOST_CONFIG_LOG(dev->ifname, ERR, + "Failed to wait for virtqueues ready, aborting device start"); + return; + } vduse_device_start(dev, false); - else + } else { vduse_device_stop(dev); + } } VHOST_CONFIG_LOG(dev->ifname, INFO, "Request %s (%u) handled successfully", @@ -614,7 +671,7 @@ } int -vduse_device_create(const char *path, bool compliant_ol_flags) +vduse_device_create(const char *path, bool compliant_ol_flags, bool extbuf, bool linearbuf) { int control_fd, dev_fd, vid, ret; uint32_t i, max_queue_pairs, total_queues; @@ -759,6 +816,12 @@ vhost_setup_virtio_net(dev->vid, true, compliant_ol_flags, true, true); + if (extbuf) + vhost_enable_extbuf(dev->vid); + + if (linearbuf) + vhost_enable_linearbuf(dev->vid); + for (i = 0; i < total_queues; i++) { struct vduse_vq_config vq_cfg = { 0 }; struct vhost_virtqueue *vq; diff -Nru dpdk-24.11.3/lib/vhost/vduse.h dpdk-24.11.4/lib/vhost/vduse.h --- dpdk-24.11.3/lib/vhost/vduse.h 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/vhost/vduse.h 2025-12-19 12:05:33.000000000 +0000 @@ -11,15 +11,17 @@ #ifdef VHOST_HAS_VDUSE -int vduse_device_create(const char *path, bool compliant_ol_flags); +int vduse_device_create(const char *path, bool compliant_ol_flags, bool extbuf, bool linearbuf); int vduse_device_destroy(const char *path); #else static inline int -vduse_device_create(const char *path, bool compliant_ol_flags) +vduse_device_create(const char *path, bool compliant_ol_flags, bool extbuf, bool linearbuf) { RTE_SET_USED(compliant_ol_flags); + RTE_SET_USED(extbuf); + RTE_SET_USED(linearbuf); VHOST_CONFIG_LOG(path, ERR, "VDUSE support disabled at build time"); return -1; diff -Nru dpdk-24.11.3/lib/vhost/virtio_net.c dpdk-24.11.4/lib/vhost/virtio_net.c --- dpdk-24.11.3/lib/vhost/virtio_net.c 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/lib/vhost/virtio_net.c 2025-12-19 12:05:33.000000000 +0000 @@ -2864,25 +2864,28 @@ } } -static __rte_noinline void +static __rte_always_inline int copy_vnet_hdr_from_desc(struct virtio_net_hdr *hdr, - struct buf_vector *buf_vec) + const struct buf_vector *buf_vec, + uint16_t nr_vec) { - uint64_t len; - uint64_t remain = sizeof(struct virtio_net_hdr); - uint64_t src; - uint64_t dst = (uint64_t)(uintptr_t)hdr; - - while (remain) { - len = RTE_MIN(remain, buf_vec->buf_len); - src = buf_vec->buf_addr; - rte_memcpy((void *)(uintptr_t)dst, - (void *)(uintptr_t)src, len); + size_t remain = sizeof(struct virtio_net_hdr); + uint8_t *dst = (uint8_t *)hdr; + + while (remain > 0) { + size_t len = RTE_MIN(remain, buf_vec->buf_len); + const void *src = (const void *)(uintptr_t)buf_vec->buf_addr; + + if (unlikely(nr_vec == 0)) + return -1; + memcpy(dst, src, len); remain -= len; dst += len; buf_vec++; + --nr_vec; } + return 0; } static __rte_always_inline int @@ -2911,16 +2914,12 @@ */ if (virtio_net_with_host_offload(dev)) { - if (unlikely(buf_vec[0].buf_len < sizeof(struct virtio_net_hdr))) { - /* - * No luck, the virtio-net header doesn't fit - * in a contiguous virtual area. - */ - copy_vnet_hdr_from_desc(&tmp_hdr, buf_vec); - hdr = &tmp_hdr; - } else { - hdr = (struct virtio_net_hdr *)((uintptr_t)buf_vec[0].buf_addr); - } + if (unlikely(copy_vnet_hdr_from_desc(&tmp_hdr, buf_vec, nr_vec) != 0)) + return -1; + + /* ensure that compiler does not delay copy */ + rte_compiler_barrier(); + hdr = &tmp_hdr; } for (vec_idx = 0; vec_idx < nr_vec; vec_idx++) { @@ -3366,7 +3365,6 @@ { uint16_t avail_idx = vq->last_avail_idx; uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); - struct virtio_net_hdr *hdr; uintptr_t desc_addrs[PACKED_BATCH_SIZE]; uint16_t ids[PACKED_BATCH_SIZE]; uint16_t i; @@ -3385,8 +3383,12 @@ if (virtio_net_with_host_offload(dev)) { vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { - hdr = (struct virtio_net_hdr *)(desc_addrs[i]); - vhost_dequeue_offload(dev, hdr, pkts[i], legacy_ol_flags); + struct virtio_net_hdr hdr; + + memcpy(&hdr, (void *)desc_addrs[i], sizeof(struct virtio_net_hdr)); + rte_compiler_barrier(); + + vhost_dequeue_offload(dev, &hdr, pkts[i], legacy_ol_flags); } } diff -Nru dpdk-24.11.3/usertools/dpdk-telemetry-exporter.py dpdk-24.11.4/usertools/dpdk-telemetry-exporter.py --- dpdk-24.11.3/usertools/dpdk-telemetry-exporter.py 2025-08-18 15:26:42.000000000 +0000 +++ dpdk-24.11.4/usertools/dpdk-telemetry-exporter.py 2025-12-19 12:05:33.000000000 +0000 @@ -75,7 +75,7 @@ "/usr/local/share/dpdk/telemetry-endpoints", "/usr/share/dpdk/telemetry-endpoints", ] -DEFAULT_OUTPUT = "openmetrics://:9876" +DEFAULT_OUTPUT = "openmetrics://127.0.0.1:9876" def main(): @@ -275,11 +275,11 @@ Start an HTTP server and serve requests in the openmetrics/prometheus format. """ - listen = (args.output.hostname or "", int(args.output.port or 80)) + listen = (args.output.hostname or "127.0.0.1", int(args.output.port or 80)) with server.HTTPServer(listen, OpenmetricsHandler) as httpd: httpd.dpdk_socket_path = args.socket_path httpd.telemetry_endpoints = endpoints - LOG.info("listening on port %s", httpd.server_port) + LOG.info("listening on %s", httpd.socket.getsockname()) try: httpd.serve_forever() except KeyboardInterrupt: