Version in base suite: 22.11.9-1~deb12u1 Base version: dpdk_22.11.9-1~deb12u1 Target version: dpdk_22.11.11-0+deb12u1 Base file: /srv/ftp-master.debian.org/ftp/pool/main/d/dpdk/dpdk_22.11.9-1~deb12u1.dsc Target file: /srv/ftp-master.debian.org/policy/pool/main/d/dpdk/dpdk_22.11.11-0+deb12u1.dsc .mailmap | 8 VERSION | 2 app/dumpcap/main.c | 4 app/pdump/main.c | 24 app/proc-info/main.c | 2 app/test-crypto-perf/cperf_test_vector_parsing.c | 47 - app/test-flow-perf/main.c | 2 app/test-pmd/cmd_flex_item.c | 24 app/test-pmd/cmdline.c | 6 app/test-pmd/cmdline_mtr.c | 30 app/test-pmd/config.c | 10 app/test-pmd/testpmd.c | 142 +++ app/test/process.h | 2 app/test/test.c | 2 app/test/test.h | 2 app/test/test_cryptodev.c | 65 - app/test/test_debug.c | 99 +- app/test/test_dmadev.c | 2 app/test/test_hash_readwrite.c | 5 app/test/test_hash_readwrite_lf_perf.c | 4 buildtools/pmdinfogen.py | 42 debian/changelog | 9 debian/patches/0001-eal-linux-force-iova-mode-va-with-no-huge-option.patch | 12 doc/guides/freebsd_gsg/build_dpdk.rst | 2 doc/guides/linux_gsg/linux_drivers.rst | 2 doc/guides/nics/features/iavf.ini | 4 doc/guides/nics/features/ice.ini | 1 doc/guides/nics/features/txgbe.ini | 1 doc/guides/nics/ionic.rst | 4 doc/guides/nics/mlx5.rst | 25 doc/guides/nics/txgbe.rst | 10 doc/guides/rel_notes/release_22_11.rst | 287 ++++++ doc/guides/testpmd_app_ug/testpmd_funcs.rst | 4 drivers/baseband/acc/meson.build | 2 drivers/bus/fslmc/bus_fslmc_driver.h | 1 drivers/bus/fslmc/fslmc_bus.c | 2 drivers/bus/ifpga/ifpga_bus.c | 4 drivers/bus/pci/windows/pci.c | 11 drivers/bus/pci/windows/pci_netuio.h | 6 drivers/bus/vmbus/rte_vmbus_reg.h | 5 drivers/common/cnxk/roc_bphy_cgx.c | 52 - drivers/common/cnxk/roc_nix_queue.c | 2 drivers/common/mlx5/linux/mlx5_nl.c | 110 ++ drivers/common/mlx5/linux/mlx5_nl.h | 3 drivers/common/mlx5/mlx5_common.h | 3 drivers/common/mlx5/mlx5_common_mr.c | 6 drivers/common/mlx5/version.map | 1 drivers/common/mlx5/windows/mlx5_win_defs.h | 1 drivers/crypto/caam_jr/caam_jr_uio.c | 12 drivers/crypto/cnxk/cnxk_ae.h | 15 drivers/crypto/ipsec_mb/ipsec_mb_ops.c | 7 drivers/crypto/mlx5/mlx5_crypto.c | 2 drivers/crypto/qat/dev/qat_crypto_pmd_gen3.c | 38 drivers/crypto/qat/dev/qat_crypto_pmd_gen4.c | 6 drivers/crypto/qat/dev/qat_crypto_pmd_gens.h | 21 drivers/crypto/qat/dev/qat_sym_pmd_gen1.c | 24 drivers/crypto/qat/qat_sym_session.c | 3 drivers/dma/hisilicon/hisi_dmadev.c | 45 drivers/dma/hisilicon/hisi_dmadev.h | 2 drivers/event/cnxk/cn10k_eventdev.c | 15 drivers/event/cnxk/cn10k_worker.h | 6 drivers/net/af_packet/rte_eth_af_packet.c | 7 drivers/net/ark/ark_ethdev_rx.c | 4 drivers/net/axgbe/axgbe_ethdev.c | 6 drivers/net/bonding/rte_eth_bond_8023ad.c | 8 drivers/net/cnxk/cn10k_rx.h | 6 drivers/net/cnxk/cnxk_ethdev_mtr.c | 8 drivers/net/dpaa/dpaa_flow.c | 6 drivers/net/dpaa/rte_pmd_dpaa.h | 2 drivers/net/dpaa2/base/dpaa2_hw_dpni.c | 3 drivers/net/dpaa2/base/dpaa2_hw_dpni_annot.h | 10 drivers/net/dpaa2/dpaa2_ethdev.c | 21 drivers/net/dpaa2/dpaa2_ethdev.h | 5 drivers/net/dpaa2/dpaa2_recycle.c | 50 - drivers/net/dpaa2/dpaa2_rxtx.c | 16 drivers/net/dpaa2/dpaa2_tm.c | 8 drivers/net/e1000/base/e1000_mac.c | 1 drivers/net/ena/base/ena_com.c | 6 drivers/net/ena/ena_ethdev.c | 28 drivers/net/enetfec/enet_ethdev.c | 27 drivers/net/enetfec/enet_rxtx.c | 10 drivers/net/enetfec/enet_uio.c | 19 drivers/net/fm10k/base/fm10k_common.c | 4 drivers/net/gve/gve_ethdev.c | 142 ++- drivers/net/gve/gve_ethdev.h | 5 drivers/net/gve/gve_rx.c | 4 drivers/net/hns3/hns3_ethdev.c | 31 drivers/net/hns3/hns3_ethdev.h | 3 drivers/net/hns3/hns3_fdir.c | 13 drivers/net/hns3/hns3_flow.c | 60 - drivers/net/hns3/hns3_rxtx.c | 48 - drivers/net/hns3/hns3_rxtx_vec.c | 4 drivers/net/i40e/i40e_hash.c | 16 drivers/net/i40e/i40e_rxtx.c | 10 drivers/net/iavf/iavf_rxtx.c | 21 drivers/net/iavf/iavf_rxtx.h | 3 drivers/net/iavf/iavf_vchnl.c | 2 drivers/net/iavf/rte_pmd_iavf.h | 6 drivers/net/ice/base/ice_flow.c | 4 drivers/net/ice/base/ice_switch.c | 18 drivers/net/ice/base/ice_type.h | 2 drivers/net/ice/ice_acl_filter.c | 5 drivers/net/ice/ice_ethdev.c | 12 drivers/net/ice/ice_ethdev.h | 2 drivers/net/ice/ice_fdir_filter.c | 56 - drivers/net/ice/ice_rxtx.c | 10 drivers/net/ice/ice_rxtx_vec_common.h | 9 drivers/net/memif/rte_eth_memif.c | 3 drivers/net/mlx4/mlx4_rxtx.c | 2 drivers/net/mlx5/hws/mlx5dr_buddy.c | 1 drivers/net/mlx5/hws/mlx5dr_definer.c | 2 drivers/net/mlx5/hws/mlx5dr_pool.c | 3 drivers/net/mlx5/linux/mlx5_ethdev_os.c | 44 drivers/net/mlx5/linux/mlx5_flow_os.c | 6 drivers/net/mlx5/linux/mlx5_os.c | 46 drivers/net/mlx5/linux/mlx5_verbs.c | 10 drivers/net/mlx5/mlx5.c | 21 drivers/net/mlx5/mlx5.h | 21 drivers/net/mlx5/mlx5_defs.h | 6 drivers/net/mlx5/mlx5_devx.c | 12 drivers/net/mlx5/mlx5_ethdev.c | 50 + drivers/net/mlx5/mlx5_flow.c | 27 drivers/net/mlx5/mlx5_flow.h | 71 - drivers/net/mlx5/mlx5_flow_dv.c | 461 ++++++---- drivers/net/mlx5/mlx5_flow_hw.c | 266 +++-- drivers/net/mlx5/mlx5_hws_cnt.c | 17 drivers/net/mlx5/mlx5_hws_cnt.h | 124 +- drivers/net/mlx5/mlx5_rx.c | 281 +++++- drivers/net/mlx5/mlx5_rx.h | 29 drivers/net/mlx5/mlx5_rxq.c | 34 drivers/net/mlx5/mlx5_rxtx_vec.h | 7 drivers/net/mlx5/mlx5_trigger.c | 89 + drivers/net/mlx5/mlx5_txq.c | 60 + drivers/net/mlx5/mlx5_utils.h | 3 drivers/net/mlx5/rte_pmd_mlx5.h | 21 drivers/net/mlx5/version.map | 1 drivers/net/mlx5/windows/mlx5_ethdev_os.c | 34 drivers/net/mlx5/windows/mlx5_flow_os.c | 4 drivers/net/mlx5/windows/mlx5_flow_os.h | 1 drivers/net/mlx5/windows/mlx5_os.c | 3 drivers/net/mlx5/windows/mlx5_os.h | 2 drivers/net/ngbe/ngbe_rxtx.c | 20 drivers/net/tap/rte_eth_tap.c | 4 drivers/net/tap/tap_flow.c | 124 +- drivers/net/tap/tap_netlink.c | 24 drivers/net/tap/tap_netlink.h | 10 drivers/net/tap/tap_tcmsgs.c | 6 drivers/net/txgbe/base/txgbe_type.h | 4 drivers/net/txgbe/txgbe_ethdev.c | 33 drivers/net/txgbe/txgbe_ethdev.h | 1 drivers/net/txgbe/txgbe_fdir.c | 59 + drivers/net/txgbe/txgbe_flow.c | 174 ++- drivers/net/txgbe/txgbe_rxtx.c | 22 drivers/net/vmxnet3/base/vmxnet3_defs.h | 3 drivers/net/vmxnet3/vmxnet3_ethdev.c | 30 drivers/raw/dpaa2_cmdif/rte_pmd_dpaa2_cmdif.h | 2 drivers/raw/ntb/rte_pmd_ntb.h | 2 drivers/regex/mlx5/mlx5_regex_fastpath.c | 1 drivers/regex/mlx5/mlx5_rxp.c | 2 drivers/regex/mlx5/mlx5_rxp.h | 20 drivers/vdpa/mlx5/mlx5_vdpa.h | 1 examples/l3fwd-power/main.c | 2 examples/server_node_efd/server/main.c | 2 examples/server_node_efd/shared/common.h | 3 examples/vdpa/main.c | 8 lib/cfgfile/rte_cfgfile.c | 8 lib/cmdline/cmdline_parse_portlist.c | 15 lib/dmadev/rte_dmadev.h | 1 lib/eal/common/eal_common_options.c | 17 lib/eal/freebsd/eal.c | 8 lib/eal/include/rte_mcslock.h | 101 +- lib/eal/include/rte_tailq.h | 9 lib/eal/include/rte_vfio.h | 6 lib/eal/linux/eal.c | 7 lib/eal/linux/eal_vfio.c | 110 +- lib/eal/linux/eal_vfio.h | 5 lib/eal/linux/eal_vfio_mp_sync.c | 17 lib/eal/windows/eal.c | 5 lib/efd/rte_efd.c | 3 lib/ethdev/rte_ethdev.h | 2 lib/eventdev/rte_event_crypto_adapter.c | 4 lib/eventdev/rte_event_timer_adapter.c | 4 lib/eventdev/rte_event_timer_adapter.h | 2 lib/fib/trie.c | 3 lib/gpudev/gpudev.c | 1 lib/gpudev/gpudev_driver.h | 4 lib/graph/graph.c | 14 lib/graph/graph_private.h | 12 lib/graph/graph_stats.c | 96 +- lib/graph/node.c | 6 lib/hash/rte_thash.c | 6 lib/net/rte_net.c | 1 lib/rawdev/rte_rawdev.c | 8 lib/rawdev/rte_rawdev_pmd.h | 2 lib/ring/rte_ring_c11_pvt.h | 77 + lib/ring/rte_ring_hts_elem_pvt.h | 96 +- lib/ring/rte_ring_rts_elem_pvt.h | 97 +- lib/sched/rte_sched.c | 2 lib/vhost/virtio_net.c | 52 - 199 files changed, 3567 insertions(+), 1584 deletions(-) diff -Nru dpdk-22.11.9/.mailmap dpdk-22.11.11/.mailmap --- dpdk-22.11.9/.mailmap 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/.mailmap 2025-12-24 13:18:07.000000000 +0000 @@ -18,6 +18,7 @@ Adham Masarwah Adrian Moreno Adrian Podlawski +Adrian Schollmeyer Adrien Mazarguil Ady Agbarih Agalya Babu RadhaKrishnan @@ -119,6 +120,7 @@ Anthony Fee Antonio Fischetti Anupam Kapoor +Anurag Mandal Apeksha Gupta Archana Muniganti Archit Pandey @@ -556,6 +558,7 @@ Ilyes Ben Hamouda Intiyaz Basha Isaac Boukris +Itai Sharoni Itsuro Oda Ivan Boule Ivan Dyukov @@ -728,6 +731,7 @@ Keiichi Watanabe Keith Wiles Kent Wires +Kerem Aksu Keunhong Lee Kevin Laatz Kevin Lampis @@ -820,6 +824,7 @@ Lunyuan Cui Lu Qiuwen Lyn M +Maayan Kashani Maciej Bielski Maciej Czekaj Maciej Fijalkowski @@ -1029,6 +1034,7 @@ Noa Ezra Nobuhiro Miki Norbert Ciosek +Nupur Uttarwar Odi Assli Ofer Dagan Ognjen Joldzic @@ -1109,6 +1115,7 @@ Piotr Skajewski Pradeep Satyanarayana Prashant Bhole +Prashant Gupta Prashant Upadhyaya Prateek Agarwal Praveen Shetty @@ -1191,6 +1198,7 @@ Rob Scheepens Roger Melton Rohit Raj +Roi Dayan Roland Qi Rolf Neugebauer Romain Delhomel diff -Nru dpdk-22.11.9/VERSION dpdk-22.11.11/VERSION --- dpdk-22.11.9/VERSION 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/VERSION 2025-12-24 13:18:07.000000000 +0000 @@ -1 +1 @@ -22.11.9 +22.11.11 diff -Nru dpdk-22.11.9/app/dumpcap/main.c dpdk-22.11.11/app/dumpcap/main.c --- dpdk-22.11.9/app/dumpcap/main.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/app/dumpcap/main.c 2025-12-24 13:18:07.000000000 +0000 @@ -894,6 +894,10 @@ else pcap_dump_close(out.dumper); + /* If primary has exited, do not try and communicate with it */ + if (!rte_eal_primary_proc_alive(NULL)) + return 0; + cleanup_pdump_resources(); rte_ring_free(r); diff -Nru dpdk-22.11.9/app/pdump/main.c dpdk-22.11.11/app/pdump/main.c --- dpdk-22.11.9/app/pdump/main.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/app/pdump/main.c 2025-12-24 13:18:07.000000000 +0000 @@ -979,9 +979,8 @@ int ret; int i; - char n_flag[] = "-n4"; char mp_flag[] = "--proc-type=secondary"; - char *argp[argc + 2]; + char *argp[argc + 1]; /* catch ctrl-c so we can cleanup on exit */ sigemptyset(&action.sa_mask); @@ -993,13 +992,12 @@ sigaction(SIGHUP, &action, NULL); argp[0] = argv[0]; - argp[1] = n_flag; - argp[2] = mp_flag; + argp[1] = mp_flag; for (i = 1; i < argc; i++) - argp[i + 2] = argv[i]; + argp[i + 1] = argv[i]; - argc += 2; + argc += 1; diag = rte_eal_init(argc, argp); if (diag < 0) @@ -1009,7 +1007,7 @@ rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n"); argc -= diag; - argv += (diag - 2); + argv += (diag - 1); /* parse app arguments */ if (argc > 1) { @@ -1025,13 +1023,15 @@ dump_packets(); disable_primary_monitor(); - cleanup_pdump_resources(); + /* dump debug stats */ print_pdump_stats(); - ret = rte_eal_cleanup(); - if (ret) - printf("Error from rte_eal_cleanup(), %d\n", ret); + /* If primary has exited, do not try and communicate with it */ + if (!rte_eal_primary_proc_alive(NULL)) + return 0; + + cleanup_pdump_resources(); - return 0; + return rte_eal_cleanup() ? EXIT_FAILURE : 0; } diff -Nru dpdk-22.11.9/app/proc-info/main.c dpdk-22.11.11/app/proc-info/main.c --- dpdk-22.11.9/app/proc-info/main.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/app/proc-info/main.c 2025-12-24 13:18:07.000000000 +0000 @@ -501,7 +501,7 @@ static void collectd_resolve_cnt_type(char *cnt_type, size_t cnt_type_len, const char *cnt_name) { - char *type_end = strrchr(cnt_name, '_'); + const char *type_end = strrchr(cnt_name, '_'); if ((type_end != NULL) && (strncmp(cnt_name, "rx_", strlen("rx_")) == 0)) { diff -Nru dpdk-22.11.9/app/test/process.h dpdk-22.11.11/app/test/process.h --- dpdk-22.11.9/app/test/process.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/app/test/process.h 2025-12-24 13:18:07.000000000 +0000 @@ -203,7 +203,7 @@ * tests attempting to use this function on FreeBSD. */ #ifdef RTE_EXEC_ENV_LINUX -static char * +static inline char * get_current_prefix(char *prefix, int size) { char path[PATH_MAX] = {0}; diff -Nru dpdk-22.11.9/app/test/test.c dpdk-22.11.11/app/test/test.c --- dpdk-22.11.9/app/test/test.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/app/test/test.c 2025-12-24 13:18:07.000000000 +0000 @@ -82,6 +82,8 @@ { "test_memory_flags", no_action }, { "test_file_prefix", no_action }, { "test_no_huge_flag", no_action }, + { "test_panic", test_panic }, + { "test_exit", test_exit }, #ifdef RTE_LIB_TIMER #ifndef RTE_EXEC_ENV_WINDOWS { "timer_secondary_spawn_wait", test_timer_secondary }, diff -Nru dpdk-22.11.9/app/test/test.h dpdk-22.11.11/app/test/test.h --- dpdk-22.11.9/app/test/test.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/app/test/test.h 2025-12-24 13:18:07.000000000 +0000 @@ -173,7 +173,9 @@ int commands_init(void); int command_valid(const char *cmd); +int test_exit(void); int test_mp_secondary(void); +int test_panic(void); int test_timer_secondary(void); int test_set_rxtx_conf(cmdline_fixed_string_t mode); diff -Nru dpdk-22.11.9/app/test/test_cryptodev.c dpdk-22.11.11/app/test/test_cryptodev.c --- dpdk-22.11.9/app/test/test_cryptodev.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/app/test/test_cryptodev.c 2025-12-24 13:18:07.000000000 +0000 @@ -207,7 +207,7 @@ struct rte_crypto_vec data_vec[UINT8_MAX], dest_data_vec[UINT8_MAX]; struct rte_crypto_va_iova_ptr cipher_iv, digest, aad_auth_iv; union rte_crypto_sym_ofs ofs; - struct rte_crypto_sym_vec vec; + struct rte_crypto_sym_vec vec = {0}; struct rte_crypto_sgl sgl, dest_sgl; uint32_t max_len; union rte_cryptodev_session_ctx sess; @@ -444,7 +444,7 @@ struct rte_crypto_sym_op *sop; union rte_crypto_sym_ofs ofs; struct rte_crypto_sgl sgl; - struct rte_crypto_sym_vec symvec; + struct rte_crypto_sym_vec symvec = {0}; struct rte_crypto_va_iova_ptr iv_ptr, aad_ptr, digest_ptr; struct rte_crypto_vec vec[UINT8_MAX]; @@ -490,7 +490,7 @@ struct rte_crypto_sym_op *sop; union rte_crypto_sym_ofs ofs; struct rte_crypto_sgl sgl; - struct rte_crypto_sym_vec symvec; + struct rte_crypto_sym_vec symvec = {0}; struct rte_crypto_va_iova_ptr iv_ptr, digest_ptr; struct rte_crypto_vec vec[UINT8_MAX]; @@ -3000,6 +3000,8 @@ uint16_t remaining_off = (auth_offset >> 3) + (auth_len >> 3); struct rte_mbuf *sgl_buf = (op_mode == IN_PLACE ? sym_op->m_src : sym_op->m_dst); + struct rte_mbuf *sgl_buf_head = sgl_buf; + while (remaining_off >= rte_pktmbuf_data_len(sgl_buf)) { remaining_off -= rte_pktmbuf_data_len(sgl_buf); sgl_buf = sgl_buf->next; @@ -3007,11 +3009,18 @@ /* The last segment should be large enough to hold full digest */ if (sgl_buf->data_len < auth_tag_len) { - rte_pktmbuf_free(sgl_buf->next); - sgl_buf->next = NULL; - TEST_ASSERT_NOT_NULL(rte_pktmbuf_append(sgl_buf, - auth_tag_len - sgl_buf->data_len), - "No room to append auth tag"); + uint16_t next_data_len = 0; + if (sgl_buf->next != NULL) { + next_data_len = sgl_buf->next->data_len; + + rte_pktmbuf_free(sgl_buf->next); + sgl_buf->next = NULL; + sgl_buf_head->nb_segs -= 1; + sgl_buf_head->pkt_len -= next_data_len; + } + TEST_ASSERT_NOT_NULL(rte_pktmbuf_append( + sgl_buf_head, auth_tag_len - sgl_buf->data_len), + "No room to append auth tag"); } sym_op->auth.digest.data = rte_pktmbuf_mtod_offset(sgl_buf, @@ -8871,11 +8880,13 @@ buf_oop = buf_oop->next; memset(rte_pktmbuf_mtod(buf_oop, uint8_t *), 0, rte_pktmbuf_tailroom(buf_oop)); - rte_pktmbuf_append(buf_oop, to_trn); + TEST_ASSERT_NOT_NULL(ut_params->obuf, "Output buffer not initialized"); + TEST_ASSERT_NOT_NULL(rte_pktmbuf_append(ut_params->obuf, to_trn), "Failed to append to mbuf"); } - plaintext = (uint8_t *)rte_pktmbuf_append(buf, + plaintext = (uint8_t *)rte_pktmbuf_append(ut_params->ibuf, to_trn); + TEST_ASSERT_NOT_NULL(plaintext, "Failed to append plaintext"); memcpy(plaintext, input_vec + trn_data, to_trn); trn_data += to_trn; @@ -8904,7 +8915,7 @@ buf_oop = buf_oop->next; memset(rte_pktmbuf_mtod(buf_oop, uint8_t *), 0, rte_pktmbuf_tailroom(buf_oop)); - rte_pktmbuf_append(buf_oop, to_trn); + TEST_ASSERT_NOT_NULL(rte_pktmbuf_append(ut_params->obuf, to_trn), "Failed to append to mbuf"); trn_data += to_trn; } @@ -13426,15 +13437,18 @@ memset(rte_pktmbuf_mtod(buf, uint8_t *), 0, rte_pktmbuf_tailroom(buf)); - plaintext = (uint8_t *)rte_pktmbuf_append(buf, + plaintext = (uint8_t *)rte_pktmbuf_append(ut_params->ibuf, to_trn); + TEST_ASSERT_NOT_NULL(plaintext, "Failed to append plaintext"); memcpy(plaintext, tdata->plaintext.data + trn_data, to_trn); trn_data += to_trn; - if (trn_data == tdata->plaintext.len) - digest_mem = (uint8_t *)rte_pktmbuf_append(buf, + if (trn_data == tdata->plaintext.len) { + digest_mem = (uint8_t *)rte_pktmbuf_append(ut_params->ibuf, tdata->gmac_tag.len); + TEST_ASSERT_NOT_NULL(digest_mem, "Failed to append digest data"); + } } ut_params->ibuf->nb_segs = segs; @@ -14717,23 +14731,28 @@ buf_oop = buf_oop->next; memset(rte_pktmbuf_mtod(buf_oop, uint8_t *), 0, rte_pktmbuf_tailroom(buf_oop)); - rte_pktmbuf_append(buf_oop, to_trn); + TEST_ASSERT_NOT_NULL(rte_pktmbuf_append(ut_params->obuf, to_trn), "Failed to append to mbuf"); } - plaintext = (uint8_t *)rte_pktmbuf_append(buf, + plaintext = (uint8_t *)rte_pktmbuf_append(ut_params->ibuf, to_trn); + TEST_ASSERT_NOT_NULL(plaintext, "Failed to append plaintext"); memcpy(plaintext, tdata->plaintext.data + trn_data, to_trn); trn_data += to_trn; if (trn_data == tdata->plaintext.len) { if (oop) { - if (!fragsz_oop) - digest_mem = rte_pktmbuf_append(buf_oop, + if (!fragsz_oop) { + digest_mem = rte_pktmbuf_append(ut_params->obuf, tdata->auth_tag.len); - } else - digest_mem = (uint8_t *)rte_pktmbuf_append(buf, + TEST_ASSERT_NOT_NULL(digest_mem, "Failed to append auth tag"); + } + } else { + digest_mem = (uint8_t *)rte_pktmbuf_append(ut_params->ibuf, tdata->auth_tag.len); + TEST_ASSERT_NOT_NULL(digest_mem, "Failed to append auth tag"); + } } } @@ -14768,16 +14787,18 @@ buf_last_oop = buf_oop->next = rte_pktmbuf_alloc(ts_params->mbuf_pool); + TEST_ASSERT_NOT_NULL(buf_oop->next, "Unexpected end of chain"); buf_oop = buf_oop->next; memset(rte_pktmbuf_mtod(buf_oop, uint8_t *), 0, rte_pktmbuf_tailroom(buf_oop)); - rte_pktmbuf_append(buf_oop, to_trn); + TEST_ASSERT_NOT_NULL(rte_pktmbuf_append(ut_params->obuf, to_trn), "Failed to append to mbuf"); trn_data += to_trn; if (trn_data == tdata->plaintext.len) { - digest_mem = rte_pktmbuf_append(buf_oop, + digest_mem = rte_pktmbuf_append(ut_params->obuf, tdata->auth_tag.len); + TEST_ASSERT_NOT_NULL(digest_mem, "Failed to append auth tag"); } } diff -Nru dpdk-22.11.9/app/test/test_debug.c dpdk-22.11.11/app/test/test_debug.c --- dpdk-22.11.9/app/test/test_debug.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/app/test/test_debug.c 2025-12-24 13:18:07.000000000 +0000 @@ -8,6 +8,18 @@ #include #ifdef RTE_EXEC_ENV_WINDOWS +int +test_panic(void) +{ + printf("debug not supported on Windows, skipping test\n"); + return TEST_SKIPPED; +} +int +test_exit(void) +{ + printf("debug not supported on Windows, skipping test\n"); + return TEST_SKIPPED; +} static int test_debug(void) { @@ -25,34 +37,31 @@ #include #include #include -#include +#include + +#include "process.h" /* * Debug test * ========== */ -/* use fork() to test rte_panic() */ -static int +static const char *test_args[8]; + +int test_panic(void) { - int pid; int status; - pid = fork(); - - if (pid == 0) { + if (getenv(RECURSIVE_ENV_VAR) != NULL) { struct rlimit rl; /* No need to generate a coredump when panicking. */ rl.rlim_cur = rl.rlim_max = 0; setrlimit(RLIMIT_CORE, &rl); rte_panic("Test Debug\n"); - } else if (pid < 0) { - printf("Fork Failed\n"); - return -1; } - wait(&status); + status = process_dup(test_args, RTE_DIM(test_args), "test_panic"); if(status == 0){ printf("Child process terminated normally!\n"); return -1; @@ -62,27 +71,16 @@ return 0; } -/* use fork() to test rte_exit() */ static int test_exit_val(int exit_val) { - int pid; + char buf[5]; int status; - /* manually cleanup EAL memory, as the fork() below would otherwise - * cause the same hugepages to be free()-ed multiple times. - */ - rte_service_finalize(); - - pid = fork(); - - if (pid == 0) - rte_exit(exit_val, __func__); - else if (pid < 0){ - printf("Fork Failed\n"); - return -1; - } - wait(&status); + sprintf(buf, "%d", exit_val); + if (setenv("TEST_DEBUG_EXIT_VAL", buf, 1) == -1) + rte_panic("Failed to set exit value in env\n"); + status = process_dup(test_args, RTE_DIM(test_args), "test_exit"); printf("Child process status: %d\n", status); if(!WIFEXITED(status) || WEXITSTATUS(status) != (uint8_t)exit_val){ printf("Child process terminated with incorrect status (expected = %d)!\n", @@ -92,11 +90,22 @@ return 0; } -static int +int test_exit(void) { int test_vals[] = { 0, 1, 2, 255, -1 }; unsigned i; + + if (getenv(RECURSIVE_ENV_VAR) != NULL) { + int exit_val; + + if (!getenv("TEST_DEBUG_EXIT_VAL")) + rte_panic("No exit value set in env\n"); + + exit_val = strtol(getenv("TEST_DEBUG_EXIT_VAL"), NULL, 0); + rte_exit(exit_val, __func__); + } + for (i = 0; i < RTE_DIM(test_vals); i++) { if (test_exit_val(test_vals[i]) < 0) return -1; @@ -128,6 +137,40 @@ static int test_debug(void) { +#ifdef RTE_EXEC_ENV_FREEBSD + /* BSD target doesn't support prefixes at this point, and we also need to + * run another primary process here. + */ + const char * prefix = "--no-shconf"; +#else + const char * prefix = "--file-prefix=debug"; +#endif + char core[10]; + + sprintf(core, "%d", rte_get_main_lcore()); + + test_args[0] = prgname; + test_args[1] = prefix; + test_args[2] = "-l"; + test_args[3] = core; + + if (rte_eal_has_hugepages()) { + test_args[4] = ""; + test_args[5] = ""; + test_args[6] = ""; + test_args[7] = ""; + } else { + test_args[4] = "--no-huge"; + test_args[5] = "-m"; + test_args[6] = "2048"; +#ifdef RTE_ARCH_PPC_64 + /* iova=pa is the default, but fails on ppc64 with --no-huge */ + test_args[7] = "--iova-mode=va"; +#else + test_args[7] = ""; +#endif + } + rte_dump_stack(); if (test_panic() < 0) return -1; diff -Nru dpdk-22.11.9/app/test/test_dmadev.c dpdk-22.11.11/app/test/test_dmadev.c --- dpdk-22.11.9/app/test/test_dmadev.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/app/test/test_dmadev.c 2025-12-24 13:18:07.000000000 +0000 @@ -46,7 +46,7 @@ printf("DMA Dev %d: Running %s Tests %s\n", dev_id, printable, check_err_stats ? " " : "(errors expected)"); for (i = 0; i < iterations; i++) { - if (test_fn(dev_id, vchan) < 0) + if (test_fn(dev_id, vchan) != 0) return -1; rte_dma_stats_get(dev_id, 0, &stats); diff -Nru dpdk-22.11.9/app/test/test_hash_readwrite.c dpdk-22.11.11/app/test/test_hash_readwrite.c --- dpdk-22.11.9/app/test/test_hash_readwrite.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/app/test/test_hash_readwrite.c 2025-12-24 13:18:07.000000000 +0000 @@ -64,6 +64,11 @@ ret = rte_malloc(NULL, sizeof(int) * tbl_rw_test_param.num_insert, 0); + if (ret == NULL) { + printf("allocation failed\n"); + return -1; + } + for (i = 0; i < rte_lcore_count(); i++) { if (worker_core_ids[i] == lcore_id) break; diff -Nru dpdk-22.11.9/app/test/test_hash_readwrite_lf_perf.c dpdk-22.11.11/app/test/test_hash_readwrite_lf_perf.c --- dpdk-22.11.9/app/test/test_hash_readwrite_lf_perf.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/app/test/test_hash_readwrite_lf_perf.c 2025-12-24 13:18:07.000000000 +0000 @@ -1310,6 +1310,10 @@ sz = rte_rcu_qsbr_get_memsize(RTE_MAX_LCORE); rv = (struct rte_rcu_qsbr *)rte_zmalloc(NULL, sz, RTE_CACHE_LINE_SIZE); + if (rv == NULL) { + printf("allocation failed\n"); + goto err; + } rcu_config.v = rv; if (rte_hash_rcu_qsbr_add(tbl_rwc_test_param.h, &rcu_config) < 0) { diff -Nru dpdk-22.11.9/app/test-crypto-perf/cperf_test_vector_parsing.c dpdk-22.11.11/app/test-crypto-perf/cperf_test_vector_parsing.c --- dpdk-22.11.9/app/test-crypto-perf/cperf_test_vector_parsing.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/app/test-crypto-perf/cperf_test_vector_parsing.c 2025-12-24 13:18:07.000000000 +0000 @@ -308,12 +308,19 @@ if (strstr(key_token, "plaintext")) { rte_free(vector->plaintext.data); vector->plaintext.data = data; + + if (opts->test == CPERF_TEST_TYPE_VERIFY && data_length > opts->max_buffer_size) { + printf("Global plaintext (%u) larger than buffer_sz (%u)\n", + data_length, opts->max_buffer_size); + return -1; + } + if (tc_found) vector->plaintext.length = data_length; else { if (opts->max_buffer_size > data_length) { - printf("Global plaintext shorter than " - "buffer_sz\n"); + printf("Global plaintext (%u) shorter than " + "buffer_sz (%u)\n", data_length, opts->max_buffer_size); return -1; } vector->plaintext.length = opts->max_buffer_size; @@ -326,8 +333,8 @@ vector->cipher_key.length = data_length; else { if (opts->cipher_key_sz > data_length) { - printf("Global cipher_key shorter than " - "cipher_key_sz\n"); + printf("Global cipher_key (%u) shorter than " + "cipher_key_sz (%u)\n", data_length, opts->cipher_key_sz); return -1; } vector->cipher_key.length = opts->cipher_key_sz; @@ -340,8 +347,8 @@ vector->auth_key.length = data_length; else { if (opts->auth_key_sz > data_length) { - printf("Global auth_key shorter than " - "auth_key_sz\n"); + printf("Global auth_key (%u) shorter than " + "auth_key_sz (%u)\n", data_length, opts->auth_key_sz); return -1; } vector->auth_key.length = opts->auth_key_sz; @@ -354,8 +361,8 @@ vector->aead_key.length = data_length; else { if (opts->aead_key_sz > data_length) { - printf("Global aead_key shorter than " - "aead_key_sz\n"); + printf("Global aead_key (%u) shorter than " + "aead_key_sz (%u)\n", data_length, opts->aead_key_sz); return -1; } vector->aead_key.length = opts->aead_key_sz; @@ -368,8 +375,8 @@ vector->cipher_iv.length = data_length; else { if (opts->cipher_iv_sz > data_length) { - printf("Global cipher iv shorter than " - "cipher_iv_sz\n"); + printf("Global cipher iv (%u) shorter than " + "cipher_iv_sz (%u)\n", data_length, opts->cipher_iv_sz); return -1; } vector->cipher_iv.length = opts->cipher_iv_sz; @@ -382,8 +389,8 @@ vector->auth_iv.length = data_length; else { if (opts->auth_iv_sz > data_length) { - printf("Global auth iv shorter than " - "auth_iv_sz\n"); + printf("Global auth iv (%u) shorter than " + "auth_iv_sz (%u)\n", data_length, opts->auth_iv_sz); return -1; } vector->auth_iv.length = opts->auth_iv_sz; @@ -396,8 +403,8 @@ vector->aead_iv.length = data_length; else { if (opts->aead_iv_sz > data_length) { - printf("Global aead iv shorter than " - "aead_iv_sz\n"); + printf("Global aead iv (%u) shorter than " + "aead_iv_sz (%u)\n", data_length, opts->aead_iv_sz); return -1; } vector->aead_iv.length = opts->aead_iv_sz; @@ -410,8 +417,8 @@ vector->ciphertext.length = data_length; else { if (opts->max_buffer_size > data_length) { - printf("Global ciphertext shorter than " - "buffer_sz\n"); + printf("Global ciphertext (%u) shorter than " + "buffer_sz (%u)\n", data_length, opts->max_buffer_size); return -1; } vector->ciphertext.length = opts->max_buffer_size; @@ -425,8 +432,8 @@ vector->aad.length = data_length; else { if (opts->aead_aad_sz > data_length) { - printf("Global aad shorter than " - "aead_aad_sz\n"); + printf("Global aad (%u) shorter than " + "aead_aad_sz (%u)\n", data_length, opts->aead_aad_sz); return -1; } vector->aad.length = opts->aead_aad_sz; @@ -441,8 +448,8 @@ vector->digest.length = data_length; else { if (opts->digest_sz > data_length) { - printf("Global digest shorter than " - "digest_sz\n"); + printf("Global digest (%u) shorter than " + "digest_sz (%u)\n", data_length, opts->digest_sz); return -1; } vector->digest.length = opts->digest_sz; diff -Nru dpdk-22.11.9/app/test-flow-perf/main.c dpdk-22.11.11/app/test-flow-perf/main.c --- dpdk-22.11.9/app/test-flow-perf/main.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/app/test-flow-perf/main.c 2025-12-24 13:18:07.000000000 +0000 @@ -1404,7 +1404,7 @@ global_actions[0] = FLOW_ITEM_MASK(RTE_FLOW_ACTION_TYPE_JUMP); flows_list = rte_zmalloc("flows_list", - (sizeof(struct rte_flow *) * rules_count_per_core) + 1, 0); + (sizeof(struct rte_flow *) * (rules_count_per_core + 1)), 0); if (flows_list == NULL) rte_exit(EXIT_FAILURE, "No Memory available!\n"); diff -Nru dpdk-22.11.9/app/test-pmd/cmd_flex_item.c dpdk-22.11.11/app/test-pmd/cmd_flex_item.c --- dpdk-22.11.9/app/test-pmd/cmd_flex_item.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/app/test-pmd/cmd_flex_item.c 2025-12-24 13:18:07.000000000 +0000 @@ -134,6 +134,7 @@ struct rte_flow_attr *attr; struct rte_flow_item *pattern; struct rte_flow_action *actions; + size_t sz; sprintf(flow_rule, "flow create 0 pattern %s / end actions drop / end", src); @@ -143,21 +144,38 @@ if (ret) return ret; item->type = pattern->type; + switch (item->type) { + case RTE_FLOW_ITEM_TYPE_IPV4: + sz = sizeof(struct rte_flow_item_ipv4); + break; + case RTE_FLOW_ITEM_TYPE_IPV6: + sz = sizeof(struct rte_flow_item_ipv6); + break; + case RTE_FLOW_ITEM_TYPE_UDP: + sz = sizeof(struct rte_flow_item_udp); + break; + case RTE_FLOW_ITEM_TYPE_TCP: + sz = sizeof(struct rte_flow_item_tcp); + break; + default: + printf("Unsupported item type in specified in link\n"); + return -EINVAL; + } if (pattern->spec) { ptr = (void *)(uintptr_t)item->spec; - memcpy(ptr, pattern->spec, FLEX_MAX_FLOW_PATTERN_LENGTH); + memcpy(ptr, pattern->spec, sz); } else { item->spec = NULL; } if (pattern->mask) { ptr = (void *)(uintptr_t)item->mask; - memcpy(ptr, pattern->mask, FLEX_MAX_FLOW_PATTERN_LENGTH); + memcpy(ptr, pattern->mask, sz); } else { item->mask = NULL; } if (pattern->last) { ptr = (void *)(uintptr_t)item->last; - memcpy(ptr, pattern->last, FLEX_MAX_FLOW_PATTERN_LENGTH); + memcpy(ptr, pattern->last, sz); } else { item->last = NULL; } diff -Nru dpdk-22.11.9/app/test-pmd/cmdline.c dpdk-22.11.11/app/test-pmd/cmdline.c --- dpdk-22.11.9/app/test-pmd/cmdline.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/app/test-pmd/cmdline.c 2025-12-24 13:18:07.000000000 +0000 @@ -3491,7 +3491,7 @@ value = 0; nb_item = 0; value_ok = 0; - for (i = 0; i < strnlen(str, STR_TOKEN_SIZE); i++) { + for (i = 0; i < strnlen(str, STR_MULTI_TOKEN_SIZE); i++) { c = str[i]; if ((c >= '0') && (c <= '9')) { value = (unsigned int) (value * 10 + (c - '0')); @@ -3542,7 +3542,7 @@ struct cmd_set_list_result { cmdline_fixed_string_t cmd_keyword; cmdline_fixed_string_t list_name; - cmdline_fixed_string_t list_of_items; + cmdline_multi_string_t list_of_items; }; static void cmd_set_list_parsed(void *parsed_result, @@ -3591,7 +3591,7 @@ "corelist#portlist"); static cmdline_parse_token_string_t cmd_set_list_of_items = TOKEN_STRING_INITIALIZER(struct cmd_set_list_result, list_of_items, - NULL); + TOKEN_STRING_MULTI); static cmdline_parse_inst_t cmd_set_fwd_list = { .f = cmd_set_list_parsed, diff -Nru dpdk-22.11.9/app/test-pmd/cmdline_mtr.c dpdk-22.11.11/app/test-pmd/cmdline_mtr.c --- dpdk-22.11.9/app/test-pmd/cmdline_mtr.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/app/test-pmd/cmdline_mtr.c 2025-12-24 13:18:07.000000000 +0000 @@ -86,14 +86,34 @@ } static int +validate_input_color_table_entries(char *str) +{ + char *saveptr; + char *token = strtok_r(str, PARSE_DELIMITER, &saveptr); + for (int i = 0; token != NULL; i++) { + if (i > ((MAX_DSCP_TABLE_ENTRIES + MAX_VLAN_TABLE_ENTRIES) - 1)) + return -1; + token = strtok_r(NULL, PARSE_DELIMITER, &saveptr); + } + return 0; +} + +static int parse_input_color_table_entries(char *str, enum rte_color **dscp_table, enum rte_color **vlan_table) { enum rte_color *vlan, *dscp; - char *token; + char *token, *saveptr; + char *temp_str = strdup(str); int i = 0; - token = strtok_r(str, PARSE_DELIMITER, &str); + if (validate_input_color_table_entries(temp_str)) { + free(temp_str); + return -1; + } + free(temp_str); + + token = strtok_r(str, PARSE_DELIMITER, &saveptr); if (token == NULL) return 0; @@ -117,7 +137,7 @@ if (i == MAX_DSCP_TABLE_ENTRIES) break; - token = strtok_r(str, PARSE_DELIMITER, &str); + token = strtok_r(NULL, PARSE_DELIMITER, &saveptr); if (token == NULL) { free(dscp); return -1; @@ -126,7 +146,7 @@ *dscp_table = dscp; - token = strtok_r(str, PARSE_DELIMITER, &str); + token = strtok_r(NULL, PARSE_DELIMITER, &saveptr); if (token == NULL) return 0; @@ -154,7 +174,7 @@ if (i == MAX_VLAN_TABLE_ENTRIES) break; - token = strtok_r(str, PARSE_DELIMITER, &str); + token = strtok_r(NULL, PARSE_DELIMITER, &saveptr); if (token == NULL) { free(vlan); free(*dscp_table); diff -Nru dpdk-22.11.9/app/test-pmd/config.c dpdk-22.11.11/app/test-pmd/config.c --- dpdk-22.11.9/app/test-pmd/config.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/app/test-pmd/config.c 2025-12-24 13:18:07.000000000 +0000 @@ -2001,6 +2001,7 @@ switch (pia->type) { case RTE_FLOW_ACTION_TYPE_AGE: case RTE_FLOW_ACTION_TYPE_COUNT: + case RTE_FLOW_ACTION_TYPE_CONNTRACK: break; default: fprintf(stderr, @@ -4556,7 +4557,7 @@ /* reinitialize forwarding streams */ init_fwd_streams(); sm_id = 0; - txp = 1; + txp = fwd_topology_tx_port_get(rxp); /* get the dcb info on the first RX and TX ports */ (void)rte_eth_dev_get_dcb_info(fwd_ports_ids[rxp], &rxp_dcb_info); (void)rte_eth_dev_get_dcb_info(fwd_ports_ids[txp], &txp_dcb_info); @@ -4574,7 +4575,7 @@ fwd_lcores[lc_id]->stream_idx; rxq = rxp_dcb_info.tc_queue.tc_rxq[i][tc].base; txq = txp_dcb_info.tc_queue.tc_txq[i][tc].base; - nb_rx_queue = txp_dcb_info.tc_queue.tc_rxq[i][tc].nb_queue; + nb_rx_queue = rxp_dcb_info.tc_queue.tc_rxq[i][tc].nb_queue; nb_tx_queue = txp_dcb_info.tc_queue.tc_txq[i][tc].nb_queue; for (j = 0; j < nb_rx_queue; j++) { struct fwd_stream *fs; @@ -4604,11 +4605,8 @@ rxp++; if (rxp >= nb_fwd_ports) return; + txp = fwd_topology_tx_port_get(rxp); /* get the dcb information on next RX and TX ports */ - if ((rxp & 0x1) == 0) - txp = (portid_t) (rxp + 1); - else - txp = (portid_t) (rxp - 1); rte_eth_dev_get_dcb_info(fwd_ports_ids[rxp], &rxp_dcb_info); rte_eth_dev_get_dcb_info(fwd_ports_ids[txp], &txp_dcb_info); } diff -Nru dpdk-22.11.9/app/test-pmd/testpmd.c dpdk-22.11.11/app/test-pmd/testpmd.c --- dpdk-22.11.9/app/test-pmd/testpmd.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/app/test-pmd/testpmd.c 2025-12-24 13:18:07.000000000 +0000 @@ -101,12 +101,14 @@ uint16_t verbose_level = 0; /**< Silent by default. */ int testpmd_logtype; /**< Log type for testpmd logs */ +/* Maximum delay for exiting after primary process. */ +#define MONITOR_INTERVAL (500 * 1000) + /* use main core for command line ? */ uint8_t interactive = 0; uint8_t auto_start = 0; uint8_t tx_first; char cmdline_filename[PATH_MAX] = {0}; - /* * NUMA support configuration. * When set, the NUMA support attempts to dispatch the allocation of the @@ -3706,6 +3708,83 @@ rte_devargs_reset(&da); } +#ifndef RTE_EXEC_ENV_WINDOWS + +enum testpmd_req_type { + TESTPMD_REQ_TYPE_EXIT, +}; + +struct testpmd_mp_req { + enum testpmd_req_type t; +}; + +struct testpmd_mp_resp { + int result; +}; + +#define TESTPMD_MP "mp_testpmd" + +/* Send reply to this peer when testpmd exits */ +static const char *primary_name; + +static void +reply_to_primary(const char *peer, int result) +{ + struct rte_mp_msg reply = { }; + struct testpmd_mp_resp *resp = (struct testpmd_mp_resp *) &reply.param; + + strlcpy(reply.name, TESTPMD_MP, RTE_MP_MAX_NAME_LEN); + reply.len_param = sizeof(*resp); + resp->result = result; + + printf("Replying %d to primary\n", result); + fflush(stdout); + + if (rte_mp_reply(&reply, peer) < 0) + printf("Failed to send response to primary:%s", strerror(rte_errno)); +} + +/* Primary process is exiting, stop secondary process */ +static void +pmd_notify_secondary(void) +{ + struct testpmd_mp_req request = { + .t = TESTPMD_REQ_TYPE_EXIT, + }; + struct rte_mp_msg mp_req = { + .name = TESTPMD_MP, + .len_param = sizeof(request), + }; + struct rte_mp_reply reply; + struct timespec ts = {.tv_sec = 5, .tv_nsec = 0}; + + printf("\nPrimary: Sending 'stop_req' request to secondary...\n"); + fflush(stdout); + + memcpy(mp_req.param, &request, sizeof(request)); + rte_mp_request_sync(&mp_req, &reply, &ts); +} + +static int +handle_testpmd_request(const struct rte_mp_msg *request, const void *peer) +{ + const struct testpmd_mp_req *req = (const struct testpmd_mp_req *)request->param; + + if (req->t == TESTPMD_REQ_TYPE_EXIT) { + printf("\nReceived notification of primary exiting\n"); + fflush(stdout); + + /* Response is sent after forwarding loop exits */ + primary_name = peer; + + kill(getpid(), SIGINT); + } else { + reply_to_primary(peer, -EINVAL); + } + return 0; +} +#endif + void pmd_test_exit(void) { @@ -3717,6 +3796,10 @@ stop_packet_forwarding(); #ifndef RTE_EXEC_ENV_WINDOWS + /* Tell secondary to exit */ + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + pmd_notify_secondary(); + for (i = 0 ; i < RTE_DIM(mempools) ; i++) { if (mempools[i]) { if (mp_alloc_type == MP_ALLOC_ANON) @@ -4431,6 +4514,38 @@ prompt_exit(); } +#ifndef RTE_EXEC_ENV_WINDOWS +/* Alarm signal handler, used to check that primary process */ +static void +monitor_primary(void *arg __rte_unused) +{ + if (rte_eal_primary_proc_alive(NULL)) { + rte_eal_alarm_set(MONITOR_INTERVAL, monitor_primary, NULL); + } else { + /* + * If primary process exits, then all the device information + * is no longer valid. Calling any cleanup code is going to + * run into use after free. + */ + fprintf(stderr, "\nPrimary process is no longer active, exiting...\n"); + exit(EXIT_FAILURE); + } +} + +/* Setup handler to check when primary exits. */ +static int +enable_primary_monitor(void) +{ + return rte_eal_alarm_set(MONITOR_INTERVAL, monitor_primary, NULL); +} + +static void +disable_primary_monitor(void) +{ + rte_eal_alarm_cancel(monitor_primary, NULL); +} +#endif + int main(int argc, char** argv) { @@ -4462,6 +4577,15 @@ rte_exit(EXIT_FAILURE, "Cannot init EAL: %s\n", rte_strerror(rte_errno)); +#ifndef RTE_EXEC_ENV_WINDOWS + if (rte_eal_process_type() == RTE_PROC_SECONDARY) { + if (enable_primary_monitor() < 0) + rte_exit(EXIT_FAILURE, "Cannot setup primary monitor"); + if (rte_mp_action_register(TESTPMD_MP, handle_testpmd_request) < 0) + rte_exit(EXIT_FAILURE, "Failed to register message action\n"); + } +#endif + /* allocate port structures, and init them */ init_port(); @@ -4659,8 +4783,24 @@ } } +#ifndef RTE_EXEC_ENV_WINDOWS + if (rte_eal_process_type() == RTE_PROC_SECONDARY) { + disable_primary_monitor(); + rte_mp_action_unregister(TESTPMD_MP); + } +#endif + pmd_test_exit(); +#ifndef RTE_EXEC_ENV_WINDOWS + if (rte_eal_process_type() == RTE_PROC_SECONDARY) { + const char *peer = primary_name; + primary_name = NULL; + if (peer) + reply_to_primary(peer, 0); + } +#endif + #ifdef RTE_LIB_PDUMP /* uninitialize packet capture framework */ rte_pdump_uninit(); diff -Nru dpdk-22.11.9/buildtools/pmdinfogen.py dpdk-22.11.11/buildtools/pmdinfogen.py --- dpdk-22.11.9/buildtools/pmdinfogen.py 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/buildtools/pmdinfogen.py 2025-12-24 13:18:07.000000000 +0000 @@ -4,9 +4,9 @@ # Copyright (c) 2020 Dmitry Kozlyuk import argparse -import ctypes import json import re +import struct import sys import tempfile @@ -110,24 +110,6 @@ return None -def define_rte_pci_id(is_big_endian): - base_type = ctypes.LittleEndianStructure - if is_big_endian: - base_type = ctypes.BigEndianStructure - - class rte_pci_id(base_type): - _pack_ = True - _fields_ = [ - ("class_id", ctypes.c_uint32), - ("vendor_id", ctypes.c_uint16), - ("device_id", ctypes.c_uint16), - ("subsystem_vendor_id", ctypes.c_uint16), - ("subsystem_device_id", ctypes.c_uint16), - ] - - return rte_pci_id - - class Driver: OPTIONS = [ ("params", "_param_string_export"), @@ -166,26 +148,24 @@ if not table_symbol: raise Exception("PCI table declared but not defined: %d" % table_name) - rte_pci_id = define_rte_pci_id(image.is_big_endian) + if image.is_big_endian: + fmt = ">" + else: + fmt = "<" + fmt += "LHHHH" result = [] while True: - size = ctypes.sizeof(rte_pci_id) + size = struct.calcsize(fmt) offset = size * len(result) data = table_symbol.get_value(offset, size) if not data: break - pci_id = rte_pci_id.from_buffer_copy(data) - if not pci_id.device_id: + _, vendor, device, ss_vendor, ss_device = struct.unpack_from(fmt, data) + if not device: break - result.append( - [ - pci_id.vendor_id, - pci_id.device_id, - pci_id.subsystem_vendor_id, - pci_id.subsystem_device_id, - ] - ) + result.append((vendor, device, ss_vendor, ss_device)) + return result def dump(self, file): diff -Nru dpdk-22.11.9/debian/changelog dpdk-22.11.11/debian/changelog --- dpdk-22.11.9/debian/changelog 2025-08-19 18:45:57.000000000 +0000 +++ dpdk-22.11.11/debian/changelog 2025-12-24 14:49:39.000000000 +0000 @@ -1,3 +1,12 @@ +dpdk (22.11.11-0+deb12u1) bookworm; urgency=medium + + * New upstream release 22.11.11; for a full list of changes see: + http://doc.dpdk.org/guides-22.11/rel_notes/release_22_11.html + * Refresh 0001-eal-linux-force-iova-mode-va-with-no-huge-option.patch to + remove fuzz from 22.11.11 + + -- Luca Boccassi Wed, 24 Dec 2025 14:49:39 +0000 + dpdk (22.11.9-1~deb12u1) bookworm; urgency=medium * New upstream release 22.11.9; for a full list of changes see: diff -Nru dpdk-22.11.9/debian/patches/0001-eal-linux-force-iova-mode-va-with-no-huge-option.patch dpdk-22.11.11/debian/patches/0001-eal-linux-force-iova-mode-va-with-no-huge-option.patch --- dpdk-22.11.9/debian/patches/0001-eal-linux-force-iova-mode-va-with-no-huge-option.patch 2025-08-19 18:45:57.000000000 +0000 +++ dpdk-22.11.11/debian/patches/0001-eal-linux-force-iova-mode-va-with-no-huge-option.patch 2025-12-24 14:49:39.000000000 +0000 @@ -8,7 +8,7 @@ Origin: https://patchwork.dpdk.org/project/dpdk/patch/20200402171241.13258-2-dwilder@us.ibm.com/ --- a/lib/eal/linux/eal.c +++ b/lib/eal/linux/eal.c -@@ -1078,6 +1078,11 @@ +@@ -1065,6 +1065,11 @@ phys_addrs = rte_eal_using_phys_addrs() != 0; @@ -17,10 +17,10 @@ + RTE_LOG(INFO, EAL, "Physical addresses are unavailable, selecting IOVA as VA mode.\n"); + } + - /* if no EAL option "--iova-mode=", use bus IOVA scheme */ - if (internal_conf->iova_mode == RTE_IOVA_DC) { - /* autodetect the IOVA mapping mode */ -@@ -1086,18 +1091,7 @@ + /* Always call rte_bus_get_iommu_class() to trigger DMA mask detection and validation */ + enum rte_iova_mode bus_iova_mode = rte_bus_get_iommu_class(); + +@@ -1076,18 +1081,7 @@ if (iova_mode == RTE_IOVA_DC) { RTE_LOG(DEBUG, EAL, "Buses did not request a specific IOVA mode.\n"); @@ -40,7 +40,7 @@ /* we have an IOMMU, pick IOVA as VA mode */ iova_mode = RTE_IOVA_VA; RTE_LOG(DEBUG, EAL, "IOMMU is available, selecting IOVA as VA mode.\n"); -@@ -1108,6 +1102,12 @@ +@@ -1098,6 +1092,12 @@ iova_mode = RTE_IOVA_PA; RTE_LOG(DEBUG, EAL, "IOMMU is not available, selecting IOVA as PA mode.\n"); } diff -Nru dpdk-22.11.9/doc/guides/freebsd_gsg/build_dpdk.rst dpdk-22.11.11/doc/guides/freebsd_gsg/build_dpdk.rst --- dpdk-22.11.9/doc/guides/freebsd_gsg/build_dpdk.rst 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/doc/guides/freebsd_gsg/build_dpdk.rst 2025-12-24 13:18:07.000000000 +0000 @@ -18,7 +18,7 @@ * pkgconf * py38-pyelftools -.. note: +.. note:: The specific package for pyelftools is dependent on the version of python in use, Python 3.8 being the version at type of writing, hence the ``py38`` prefix. diff -Nru dpdk-22.11.9/doc/guides/linux_gsg/linux_drivers.rst dpdk-22.11.11/doc/guides/linux_gsg/linux_drivers.rst --- dpdk-22.11.9/doc/guides/linux_gsg/linux_drivers.rst 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/doc/guides/linux_gsg/linux_drivers.rst 2025-12-24 13:18:07.000000000 +0000 @@ -319,7 +319,7 @@ More about the bifurcated driver can be found in NVIDIA `bifurcated PMD -`_ presentation. +`_ presentation. .. _uio: diff -Nru dpdk-22.11.9/doc/guides/nics/features/iavf.ini dpdk-22.11.11/doc/guides/nics/features/iavf.ini --- dpdk-22.11.9/doc/guides/nics/features/iavf.ini 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/doc/guides/nics/features/iavf.ini 2025-12-24 13:18:07.000000000 +0000 @@ -4,10 +4,11 @@ ; Refer to default.ini for the full list of available PMD features. ; [Features] -Speed capabilities = Y Link status = Y Rx interrupt = Y +Free Tx mbuf on demand = Y Queue start/stop = Y +Burst mode info = Y Power mgmt address monitor = Y MTU update = Y Scattered Rx = Y @@ -33,6 +34,7 @@ Rx descriptor status = Y Tx descriptor status = Y Basic stats = Y +Extended stats = Y Multiprocess aware = Y FreeBSD = Y Linux = Y diff -Nru dpdk-22.11.9/doc/guides/nics/features/ice.ini dpdk-22.11.11/doc/guides/nics/features/ice.ini --- dpdk-22.11.9/doc/guides/nics/features/ice.ini 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/doc/guides/nics/features/ice.ini 2025-12-24 13:18:07.000000000 +0000 @@ -13,6 +13,7 @@ Link status event = Y Rx interrupt = Y Fast mbuf free = P +Free Tx mbuf on demand = Y Queue start/stop = Y Burst mode info = Y Power mgmt address monitor = Y diff -Nru dpdk-22.11.9/doc/guides/nics/features/txgbe.ini dpdk-22.11.11/doc/guides/nics/features/txgbe.ini --- dpdk-22.11.9/doc/guides/nics/features/txgbe.ini 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/doc/guides/nics/features/txgbe.ini 2025-12-24 13:18:07.000000000 +0000 @@ -72,7 +72,6 @@ [rte_flow actions] drop = Y -mark = Y pf = Y queue = Y rss = Y diff -Nru dpdk-22.11.9/doc/guides/nics/ionic.rst dpdk-22.11.11/doc/guides/nics/ionic.rst --- dpdk-22.11.9/doc/guides/nics/ionic.rst 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/doc/guides/nics/ionic.rst 2025-12-24 13:18:07.000000000 +0000 @@ -1,5 +1,5 @@ .. SPDX-License-Identifier: BSD-3-Clause - Copyright 2018-2022 Advanced Micro Devices, Inc. + Copyright 2018-2025 Advanced Micro Devices, Inc. IONIC Driver ============ @@ -15,6 +15,8 @@ `(pdf) `__ - DSC3-400 dual-port 400G Distributed Services Card `(pdf) `__ +- Pollara 400 single-port 400G AI NIC + `(pdf) `__ Please visit the `AMD Pensando Networking diff -Nru dpdk-22.11.9/doc/guides/nics/mlx5.rst dpdk-22.11.11/doc/guides/nics/mlx5.rst --- dpdk-22.11.9/doc/guides/nics/mlx5.rst 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/doc/guides/nics/mlx5.rst 2025-12-24 13:18:07.000000000 +0000 @@ -466,7 +466,26 @@ - Modify Field flow: - - Supports the 'set' and 'add' operations for ``RTE_FLOW_ACTION_TYPE_MODIFY_FIELD`` action. + - Supports the 'set' operation for ``RTE_FLOW_ACTION_TYPE_MODIFY_FIELD`` in all flow engines. + - Supports the 'add' operation with 'src' field of type + ``RTE_FLOW_FIELD_VALUE`` or ``RTE_FLOW_FIELD_POINTER`` with both HW steering (``dv_flow_en=2``) + and DV flow engine (``dv_flow_en=1``). + - HW steering flow engine, starting with ConnectX-7 and BlueField-3, + supports packet header fields in 'src' field. + 'dst' field can be any of the following: + + - ``RTE_FLOW_FIELD_IPV4_TTL`` + - ``RTE_FLOW_FIELD_IPV6_HOPLIMIT`` + - ``RTE_FLOW_FIELD_TCP_SEQ_NUM`` + - ``RTE_FLOW_FIELD_TCP_ACK_NUM`` + - ``RTE_FLOW_FIELD_TAG`` + - ``RTE_FLOW_FIELD_META`` + - ``RTE_FLOW_FIELD_FLEX_ITEM`` + - ``RTE_FLOW_FIELD_TCP_DATA_OFFSET`` + - ``RTE_FLOW_FIELD_IPV4_IHL`` + - ``RTE_FLOW_FIELD_IPV4_TOTAL_LEN`` + - ``RTE_FLOW_FIELD_IPV6_PAYLOAD_LEN`` + - Modification of an arbitrary place in a packet via the special ``RTE_FLOW_FIELD_START`` Field ID is not supported. - Modification of the 802.1Q Tag, VXLAN Network or GENEVE Network ID's is not supported. - Encapsulation levels are not supported, can modify outermost header fields only. @@ -1700,6 +1719,10 @@ This option is supported only for Tx hairpin queues. +#. With strict queueing enabled + (``RTE_FLOW_PORT_FLAG_STRICT_QUEUE`` passed to ``rte_flow_configure()``), + indirect age actions can be created only through asynchronous flow API. + Notes for testpmd ----------------- diff -Nru dpdk-22.11.9/doc/guides/nics/txgbe.rst dpdk-22.11.11/doc/guides/nics/txgbe.rst --- dpdk-22.11.9/doc/guides/nics/txgbe.rst 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/doc/guides/nics/txgbe.rst 2025-12-24 13:18:07.000000000 +0000 @@ -139,6 +139,16 @@ PHY parameter used for user debugging. Setting other values to take effect requires setting the ``ffe_set``. +- ``pkt-filter-size`` (default **0**) + + Memory allocation for the flow director filter. + Default 0 for 64K mode, set 1 for 128K mode, set 2 for 256K mode. + +- ``pkt-filter-drop-queue`` (default **127**) + + The drop queue number for packets that match the drop rule in flow director. + Valid values are from 0 to 127. + Driver compilation and testing ------------------------------ diff -Nru dpdk-22.11.9/doc/guides/rel_notes/release_22_11.rst dpdk-22.11.11/doc/guides/rel_notes/release_22_11.rst --- dpdk-22.11.9/doc/guides/rel_notes/release_22_11.rst 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/doc/guides/rel_notes/release_22_11.rst 2025-12-24 13:18:07.000000000 +0000 @@ -3277,3 +3277,290 @@ * Ubuntu 22.04 * Driver MLNX_OFED_LINUX-24.10-2.1.8.0 * fw 22.43.2566 + +22.11.10 Release Notes +---------------------- + + +22.11.10 Fixes +~~~~~~~~~~~~~~ + +* net/mlx5: fix out-of-order completions in ordinary Rx burst (CVE-2025-23259) + +22.11.10 Validation +~~~~~~~~~~~~~~~~~~~ + +* Validated by Nvidia(R) + +22.11.11 Release Notes +---------------------- + + +22.11.11 Fixes +~~~~~~~~~~~~~~ + +* app/crypto-perf: fix plaintext size exceeds buffer size +* app/flow-perf: fix rules array length +* app/pdump: remove hard-coded memory channels +* app/procinfo: fix const pointer in collectd format +* app/testpmd: fix conntrack action query +* app/testpmd: fix DCB Rx queues +* app/testpmd: fix DCB Tx port +* app/testpmd: fix flex item link parsing +* app/testpmd: increase size of set cores list command +* app/testpmd: monitor state of primary process +* app/testpmd: stop forwarding in secondary process +* app/testpmd: validate DSCP and VLAN for meter creation +* baseband/acc: fix exported header +* buildtools/pmdinfogen: fix warning with python 3.14 +* bus/fslmc: fix const pointer in device name parsing +* bus/ifpga: fix const pointer in device name parsing +* bus/pci: fix build with MinGW 13 +* cfgfile: fix section count with no name +* cmdline: fix highest bit port list parsing +* cmdline: fix port list parsing +* common/cnxk: fix async event handling +* common/cnxk: fix max number of SQB buffers in clean up +* common/mlx5: release unused mempool entries +* common/mlx5: remove useless constants +* crypto/caam_jr: fix const pointer in UIO filename parsing +* crypto/cnxk: refactor RSA verification +* crypto/ipsec_mb: fix QP release in secondary +* crypto/mlx5: remove unused constants +* "crypto/qat: fix backported patch +* crypto/qat: fix CCM request descriptor hash state size +* crypto/qat: fix source buffer alignment +* dmadev: fix debug build with tracepoints +* dma/hisilicon: fix stop with pending transfers +* doc: add conntrack state inspect command to testpmd guide +* doc: add device arguments in txgbe guide +* doc: add Pollara 400 device in ionic guide +* doc: fix feature list of iavf driver +* doc: fix feature list of ice driver +* doc: fix note in FreeBSD guide +* doc: fix NVIDIA bifurcated driver presentation link +* drivers: fix some exported headers +* eal: correct memory ordering in MCS lock +* eal: fix DMA mask validation with IOVA mode option +* eal: fix MP socket cleanup +* eal: fix plugin dir walk +* efd: fix AVX2 support +* ethdev: fix VLAN filter parameter description +* event/cnxk: fix Rx offload flags +* eventdev/crypto: fix build with clang 21 +* eventdev: fix listing timer adapters with telemetry +* examples/l3fwd-power: fix telemetry command registration +* examples/server_node_efd: fix format overflow +* examples/vdpa: fix format overflow +* fib6: fix tbl8 allocation check logic +* gpudev: fix driver header for Windows +* graph: fix unaligned access in stats +* graph: fix updating edge with active graph +* hash: fix unaligned access in predictable RSS +* lib: fix backticks matching in Doxygen comments +* net/af_packet: fix crash in secondary process +* net/ark: remove double mbuf free +* net/axgbe: fix build with GCC 16 +* net/bonding: fix MAC address propagation in 802.3ad mode +* net/cnxk: fix default meter pre-color +* net/dpaa2: fix duplicate call of close +* net/dpaa2: fix L3/L4 checksum results +* net/dpaa2: fix shaper rate +* net/dpaa2: fix uninitialized variable +* net/dpaa2: receive packets with additional parse errors +* net/dpaa2: remove ethdev pointer from bus device +* net/dpaa: fix resource leak +* net/e1000/base: fix crash on init with GCC 13 +* net/ena/base: fix unsafe memcpy on invalid memory +* net/ena: fix PCI BAR mapping on 64K page size +* net/enetfec: fix buffer descriptor size configuration +* net/enetfec: fix checksum flag handling and error return +* net/enetfec: fix const pointer in UIO filename parsing +* net/enetfec: fix file descriptor leak on read error +* net/enetfec: fix memory leak in Rx buffer cleanup +* net/enetfec: fix out-of-bounds access in UIO mapping +* net/enetfec: fix Tx queue free +* net/enetfec: reject multi-queue configuration +* net/enetfec: reject Tx deferred queue +* net: fix L2 length for GRE packets +* net/fm10k: fix build with GCC 16 +* net/gve: allocate Rx QPL pages using malloc +* net/hns3: fix inconsistent lock +* net/hns3: fix overwrite mbuf in vector path +* net/hns3: fix VLAN resources freeing +* net/hns3: fix VLAN tag loss for short tunnel frame +* net/i40e: fix symmetric Toeplitz hashing for SCTP +* net/iavf: fix build with clang 21 +* net/iavf: fix check for PF Rx timestamp support +* net/iavf: fix Rx timestamp validity check +* net/ice/base: fix adding special words +* net/ice/base: fix integer overflow on NVM init +* net/ice/base: fix memory leak in HW profile handling +* net/ice/base: fix memory leak in recipe handling +* net/ice: fix initialization with 8 ports +* net/ice: fix memory leak in raw pattern parse +* net/ice: fix path selection for QinQ Tx offload +* net/ice: fix statistics +* net/ice: fix vector Rx VLAN offload flags +* net/ice: remove indirection for FDIR filters +* net/intel: fix assumption about tag placement order +* net/memif: fix const pointer in socket check +* net/mlx4: fix unnecessary comma +* net/mlx5: fix build with MinGW 13 +* net/mlx5: fix connection tracking state item validation +* net/mlx5: fix control flow leakage for external SQ +* net/mlx5: fix device start error handling +* net/mlx5: fix Direct Verbs counter offset detection +* net/mlx5: fix error reporting on masked indirect actions +* net/mlx5: fix ESP header match after UDP for group 0 +* net/mlx5: fix external queues access +* net/mlx5: fix flow aging race condition +* net/mlx5: fix indirect flow age action handling +* net/mlx5: fix indirect meter index leak +* net/mlx5: fix indirect RSS action hash +* net/mlx5: fix interface name parameter definition +* net/mlx5: fix leak of flow indexed pools +* net/mlx5: fix min and max MTU reporting +* net/mlx5: fix modify field action restriction +* net/mlx5: fix MTU initialization +* net/mlx5: fix multicast +* net/mlx5: fix multi-process Tx default rules +* net/mlx5: fix non-template age rules flush +* net/mlx5: fix send to kernel action resources release +* net/mlx5: fix Tx metadata pattern template mismatch +* net/mlx5: fix uninitialized variable +* net/mlx5: fix unnecessary commas +* net/mlx5: fix unsupported flow rule port action +* net/mlx5/hws: fix buddy memory allocation +* net/mlx5: release representor interrupt handler +* net/mlx5: remove unused macros +* net/mlx5: remove useless constants +* net/mlx5: store MTU at Rx queue allocation time +* net/mlx5/windows: fix match criteria in flow creation +* net/ngbe: reduce memory size of ring descriptors +* net/tap: fix build with LTO +* net/tap: fix interrupt callback crash after failed start +* net/txgbe: add device arguments for FDIR +* net/txgbe: filter FDIR match flex bytes for tunnel +* net/txgbe: fix FDIR drop action for L4 match packets +* net/txgbe: fix FDIR filter for SCTP tunnel +* net/txgbe: fix FDIR input mask +* net/txgbe: fix FDIR mode clearing +* net/txgbe: fix FDIR rule raw relative for L3 packets +* net/txgbe: fix maximum number of FDIR filters +* net/txgbe: fix VF Rx buffer size in config register +* net/txgbe: reduce memory size of ring descriptors +* net/txgbe: remove unsupported flow action mark +* net/txgbe: switch to FDIR when ntuple filter is full +* net/vmxnet3: disable RSS for single queue for ESX8.0+ +* net/vmxnet3: fix mapping of mempools to queues +* pdump: handle primary process exit +* rawdev: fix build with clang 21 +* regex/mlx5: remove useless constants +* Revert "net/mlx5: align PF and VF/SF MAC address handling" +* ring: establish a safe partial order in hts-ring +* ring: establish safe partial order in default mode +* ring: establish safe partial order in RTS mode +* sched: fix WRR parameter data type +* tailq: fix lookup macro +* test/crypto: fix mbuf handling +* test/crypto: fix vector initialization +* test/debug: fix crash with mlx5 devices +* test/debug: fix IOVA mode on PPC64 without huge pages +* test/dma: fix failure condition +* test/hash: check memory allocation +* vdpa/mlx5: remove unused constant +* version: 22.11.11-rc1 +* vfio: fix custom containers in multiprocess +* vhost: fix double fetch when dequeue offloading + +22.11.11 Validation +~~~~~~~~~~~~~~~~~~~ + +* Red Hat(R) Testing + + * Platform + + * RHEL 9.6 + * Kernel 5.14 + * Qemu 9.1.0 + * libvirt 10.10 + * openvswitch 3.3 + * X540-AT2 NIC(ixgbe, 10G) + + * Functionality + + * Guest with device assignment(PF) throughput testing(1G hugepage size) + * Guest with device assignment(PF) throughput testing(2M hugepage size) + * Guest with device assignment(VF) throughput testing + * PVP (host dpdk testpmd as vswitch) 1Q: throughput testing + * PVP vhost-user 2Q throughput testing + * PVP vhost-user 1Q cross numa node throughput testing + * Guest with vhost-user 2 queues throughput testing + * vhost-user reconnect with dpdk-client, qemu-server: qemu reconnect + * vhost-user reconnect with dpdk-client, qemu-server: ovs reconnect + * PVP reconnect with dpdk-client, qemu-server: PASS + * PVP 1Q live migration testing + * PVP 1Q cross numa node live migration testing + * Guest with ovs+dpdk+vhost-user 1Q live migration testing + * Guest with ovs+dpdk+vhost-user 1Q live migration testing (2M) + * Guest with ovs+dpdk+vhost-user 2Q live migration testing + * Guest with ovs+dpdk+vhost-user 4Q live migration testing + * Host PF + DPDK testing + * Host VF + DPDK testing + + +* Intel(R) Testing + + * Basic Intel(R) NIC testing + * Build & CFLAG compile: cover the build test combination with latest GCC/Clang version and the popular OS revision such as Ubuntu24.04, Ubuntu25.04, Fedora42, RHEL10, RHEL9.6, FreeBSD14.2, SUSE15, AzureLinux3.0, OpenAnolis8.10 etc. + * PF(i40e, ixgbe, igc): test scenarios including RTE_FLOW/TSO/Jumboframe/checksum offload/VLAN/VXLAN, etc. + * VF(i40e, ixgbe): test scenarios including VF-RTE_FLOW/TSO/Jumboframe/checksum offload/VLAN/VXLAN, etc. + * PF/VF(ice): test scenarios including Switch features/Package Management/Flow Director/Advanced Tx/Advanced RSS/ACL/DCF/Flexible Descriptor, etc. + * Intel NIC single core/NIC performance: test scenarios including PF/VF single core performance test, etc. + * IPsec: test scenarios including ipsec/ipsec-gw/ipsec library basic test - QAT&SW/FIB library, etc. + + * Basic cryptodev and virtio testing + * Virtio: both function and performance test are covered. Such as PVP/Virtio_loopback/virtio-user loopback/virtio-net VM2VM perf testing/VMAWARE ESXI 8.0, etc. + * Cryptodev: + * Function test: test scenarios including Cryptodev API testing/CompressDev ISA-L/QAT/ZLIB PMD Testing/FIPS, etc. + * Performance test: test scenarios including Thoughput Performance/Cryptodev Latency, etc. + + +* Nvidia(R) Testing + + * Basic functionality via testpmd/example applications + + * Tx/Rx + * xstats + * Timestamps + * Link status + * RTE flow and flow_director + * RSS + * VLAN filtering, stripping and insertion + * Checksum/TSO + * ptype + * link_status_interrupt example application + * l3fwd-power example application + * Multi-process example applications + * Hardware LRO tests + * Buffer Split tests + * Tx scheduling tests + + * Build tests + + * Debian 12 with MLNX_OFED_LINUX-24.10-3.2.5.0. + * Ubuntu 22.04 with MLNX_OFED_LINUX-24.10-3.2.5.0. + * Ubuntu 24.04 with MLNX_OFED_LINUX-24.10-3.2.5.0. + * Ubuntu 22.04 with rdma-core master (091ddb5). + * Ubuntu 24.04 with rdma-core v50.0. + * Fedora 42 with rdma-core v55.0. + * Fedora 43 (Rawhide) with rdma-core v58.0. + * OpenSUSE Leap 15.6 with rdma-core v49.1. + * Windows Server 2022 with Clang 18.1.8. + + * ConnectX-6 Dx + + * Ubuntu 22.04 + * Driver MLNX_OFED_LINUX-5.8-7.0.6.1 + * fw 22.35.8002 diff -Nru dpdk-22.11.9/doc/guides/testpmd_app_ug/testpmd_funcs.rst dpdk-22.11.11/doc/guides/testpmd_app_ug/testpmd_funcs.rst --- dpdk-22.11.9/doc/guides/testpmd_app_ug/testpmd_funcs.rst 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/doc/guides/testpmd_app_ug/testpmd_funcs.rst 2025-12-24 13:18:07.000000000 +0000 @@ -5062,6 +5062,10 @@ testpmd> flow indirect_action 0 update 0 action conntrack_update dir / end +Inspect the conntrack action state through the following command:: + + testpmd> flow indirect_action 0 query + Sample meter with policy rules ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff -Nru dpdk-22.11.9/drivers/baseband/acc/meson.build dpdk-22.11.11/drivers/baseband/acc/meson.build --- dpdk-22.11.9/drivers/baseband/acc/meson.build 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/baseband/acc/meson.build 2025-12-24 13:18:07.000000000 +0000 @@ -5,4 +5,4 @@ sources = files('acc_common.c', 'rte_acc100_pmd.c', 'rte_acc200_pmd.c') -headers = files('rte_acc_cfg.h') +headers = files('rte_acc_cfg.h', 'rte_acc_common_cfg.h') diff -Nru dpdk-22.11.9/drivers/bus/fslmc/bus_fslmc_driver.h dpdk-22.11.11/drivers/bus/fslmc/bus_fslmc_driver.h --- dpdk-22.11.9/drivers/bus/fslmc/bus_fslmc_driver.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/bus/fslmc/bus_fslmc_driver.h 2025-12-24 13:18:07.000000000 +0000 @@ -115,7 +115,6 @@ TAILQ_ENTRY(rte_dpaa2_device) next; /**< Next probed DPAA2 device. */ struct rte_device device; /**< Inherit core device */ union { - struct rte_eth_dev *eth_dev; /**< ethernet device */ struct rte_cryptodev *cryptodev; /**< Crypto Device */ struct rte_dma_dev *dmadev; /**< DMA Device */ struct rte_rawdev *rawdev; /**< Raw Device */ diff -Nru dpdk-22.11.9/drivers/bus/fslmc/fslmc_bus.c dpdk-22.11.11/drivers/bus/fslmc/fslmc_bus.c --- dpdk-22.11.9/drivers/bus/fslmc/fslmc_bus.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/bus/fslmc/fslmc_bus.c 2025-12-24 13:18:07.000000000 +0000 @@ -238,7 +238,7 @@ rte_fslmc_parse(const char *name, void *addr) { uint16_t dev_id; - char *t_ptr; + const char *t_ptr; const char *sep; uint8_t sep_exists = 0; int ret = -1; diff -Nru dpdk-22.11.9/drivers/bus/ifpga/ifpga_bus.c dpdk-22.11.11/drivers/bus/ifpga/ifpga_bus.c --- dpdk-22.11.9/drivers/bus/ifpga/ifpga_bus.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/bus/ifpga/ifpga_bus.c 2025-12-24 13:18:07.000000000 +0000 @@ -429,8 +429,8 @@ int *out = addr; struct rte_rawdev *rawdev = NULL; char rawdev_name[RTE_RAWDEV_NAME_MAX_LEN]; - char *c1 = NULL; - char *c2 = NULL; + const char *c1 = NULL; + const char *c2 = NULL; int port = IFPGA_BUS_DEV_PORT_MAX; char str_port[8]; int str_port_len = 0; diff -Nru dpdk-22.11.9/drivers/bus/pci/windows/pci.c dpdk-22.11.11/drivers/bus/pci/windows/pci.c --- dpdk-22.11.9/drivers/bus/pci/windows/pci.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/bus/pci/windows/pci.c 2025-12-24 13:18:07.000000000 +0000 @@ -11,18 +11,19 @@ #include #include -#include "private.h" -#include "pci_netuio.h" - +/* DEVPKEY_Device_Numa_Node should be defined in devpkey.h */ #include -#include - #if defined RTE_TOOLCHAIN_GCC && (__MINGW64_VERSION_MAJOR < 8) #include DEFINE_DEVPROPKEY(DEVPKEY_Device_Numa_Node, 0x540b947e, 0x8b40, 0x45bc, 0xa8, 0xa2, 0x6a, 0x0b, 0x89, 0x4c, 0xbd, 0xa2, 3); #endif +#include + +#include "private.h" +#include "pci_netuio.h" + /* * This code is used to simulate a PCI probe by parsing information in * the registry hive for PCI devices. diff -Nru dpdk-22.11.9/drivers/bus/pci/windows/pci_netuio.h dpdk-22.11.11/drivers/bus/pci/windows/pci_netuio.h --- dpdk-22.11.9/drivers/bus/pci/windows/pci_netuio.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/bus/pci/windows/pci_netuio.h 2025-12-24 13:18:07.000000000 +0000 @@ -5,12 +5,12 @@ #ifndef _PCI_NETUIO_H_ #define _PCI_NETUIO_H_ -#if !defined(NTDDI_WIN10_FE) || NTDDI_VERSION < NTDDI_WIN10_FE -/* GUID definition for device class netUIO */ +#if !defined(NTDDI_WIN10_FE) || NTDDI_VERSION < NTDDI_WIN10_FE || defined(__MINGW64__) +/* GUID_DEVCLASS_NETUIO should be defined in devguid.h */ DEFINE_GUID(GUID_DEVCLASS_NETUIO, 0x78912bc1, 0xcb8e, 0x4b28, 0xa3, 0x29, 0xf3, 0x22, 0xeb, 0xad, 0xbe, 0x0f); -/* GUID definition for the netuio device interface */ +/* GUID_DEVINTERFACE_NETUIO should be defined in ndisguid.h */ DEFINE_GUID(GUID_DEVINTERFACE_NETUIO, 0x08336f60, 0x0679, 0x4c6c, 0x85, 0xd2, 0xae, 0x7c, 0xed, 0x65, 0xff, 0xf7); #endif diff -Nru dpdk-22.11.9/drivers/bus/vmbus/rte_vmbus_reg.h dpdk-22.11.11/drivers/bus/vmbus/rte_vmbus_reg.h --- dpdk-22.11.9/drivers/bus/vmbus/rte_vmbus_reg.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/bus/vmbus/rte_vmbus_reg.h 2025-12-24 13:18:07.000000000 +0000 @@ -6,6 +6,11 @@ #ifndef _VMBUS_REG_H_ #define _VMBUS_REG_H_ +#include + +#include +#include + /* * Hyper-V SynIC message format. */ diff -Nru dpdk-22.11.9/drivers/common/cnxk/roc_bphy_cgx.c dpdk-22.11.11/drivers/common/cnxk/roc_bphy_cgx.c --- dpdk-22.11.9/drivers/common/cnxk/roc_bphy_cgx.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/common/cnxk/roc_bphy_cgx.c 2025-12-24 13:18:07.000000000 +0000 @@ -65,8 +65,7 @@ } static int -roc_bphy_cgx_wait_for_ownership(struct roc_bphy_cgx *roc_cgx, unsigned int lmac, - uint64_t *scr0) +roc_bphy_cgx_wait_ack(struct roc_bphy_cgx *roc_cgx, unsigned int lmac, uint64_t *scr0, bool ack) { int tries = 5000; uint64_t scr1; @@ -75,16 +74,18 @@ *scr0 = roc_bphy_cgx_read(roc_cgx, lmac, CGX_CMRX_SCRATCH0); scr1 = roc_bphy_cgx_read(roc_cgx, lmac, CGX_CMRX_SCRATCH1); - if (FIELD_GET(SCR1_OWN_STATUS, scr1) == ETH_OWN_NON_SECURE_SW && - FIELD_GET(SCR0_ETH_EVT_STS_S_ACK, *scr0) == 0) - break; - /* clear async events if any */ - if (FIELD_GET(SCR0_ETH_EVT_STS_S_EVT_TYPE, *scr0) == - ETH_EVT_ASYNC && - FIELD_GET(SCR0_ETH_EVT_STS_S_ACK, *scr0)) + if (FIELD_GET(SCR0_ETH_EVT_STS_S_EVT_TYPE, *scr0) == ETH_EVT_ASYNC && + FIELD_GET(SCR0_ETH_EVT_STS_S_ACK, *scr0)) { roc_bphy_cgx_ack(roc_cgx, lmac, scr0); + goto skip; + } + if (FIELD_GET(SCR1_OWN_STATUS, scr1) == ETH_OWN_NON_SECURE_SW && + FIELD_GET(SCR0_ETH_EVT_STS_S_ACK, *scr0) == ack) + break; + +skip: plt_delay_ms(1); } while (--tries); @@ -92,29 +93,20 @@ } static int -roc_bphy_cgx_wait_for_ack(struct roc_bphy_cgx *roc_cgx, unsigned int lmac, - uint64_t *scr0) +roc_bphy_cgx_wait_for_ownership(struct roc_bphy_cgx *roc_cgx, unsigned int lmac, uint64_t *scr0) { - int tries = 5000; - uint64_t scr1; - - do { - *scr0 = roc_bphy_cgx_read(roc_cgx, lmac, CGX_CMRX_SCRATCH0); - scr1 = roc_bphy_cgx_read(roc_cgx, lmac, CGX_CMRX_SCRATCH1); - - if (FIELD_GET(SCR1_OWN_STATUS, scr1) == ETH_OWN_NON_SECURE_SW && - FIELD_GET(SCR0_ETH_EVT_STS_S_ACK, *scr0)) - break; - - plt_delay_ms(1); - } while (--tries); + return roc_bphy_cgx_wait_ack(roc_cgx, lmac, scr0, false); +} - return tries ? 0 : -ETIMEDOUT; +static int +roc_bphy_cgx_wait_for_ack(struct roc_bphy_cgx *roc_cgx, unsigned int lmac, uint64_t *scr0) +{ + return roc_bphy_cgx_wait_ack(roc_cgx, lmac, scr0, true); } static int -roc_bphy_cgx_intf_req(struct roc_bphy_cgx *roc_cgx, unsigned int lmac, - uint64_t scr1, uint64_t *scr0) +roc_bphy_cgx_intf_req(struct roc_bphy_cgx *roc_cgx, unsigned int lmac, uint64_t scr1, + uint64_t *scr0) { uint8_t cmd_id = FIELD_GET(SCR1_ETH_CMD_ID, scr1); int ret; @@ -142,12 +134,6 @@ if (cmd_id == ETH_CMD_INTF_SHUTDOWN) goto out; - if (FIELD_GET(SCR0_ETH_EVT_STS_S_EVT_TYPE, *scr0) != ETH_EVT_CMD_RESP) { - plt_err("received async event instead of cmd resp event"); - ret = -EIO; - goto out; - } - if (FIELD_GET(SCR0_ETH_EVT_STS_S_ID, *scr0) != cmd_id) { plt_err("received resp for cmd %d expected for cmd %d", (int)FIELD_GET(SCR0_ETH_EVT_STS_S_ID, *scr0), cmd_id); diff -Nru dpdk-22.11.9/drivers/common/cnxk/roc_nix_queue.c dpdk-22.11.11/drivers/common/cnxk/roc_nix_queue.c --- dpdk-22.11.9/drivers/common/cnxk/roc_nix_queue.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/common/cnxk/roc_nix_queue.c 2025-12-24 13:18:07.000000000 +0000 @@ -1332,7 +1332,7 @@ /* Restore limit to max SQB count that the pool was created * for aura drain to succeed. */ - roc_npa_aura_limit_modify(sq->aura_handle, NIX_MAX_SQB); + roc_npa_aura_limit_modify(sq->aura_handle, sq->aura_sqb_bufs); rc |= roc_npa_pool_destroy(sq->aura_handle); plt_free(sq->fc); plt_free(sq->sqe_mem); diff -Nru dpdk-22.11.9/drivers/common/mlx5/linux/mlx5_nl.c dpdk-22.11.11/drivers/common/mlx5/linux/mlx5_nl.c --- dpdk-22.11.9/drivers/common/mlx5/linux/mlx5_nl.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/common/mlx5/linux/mlx5_nl.c 2025-12-24 13:18:07.000000000 +0000 @@ -27,8 +27,6 @@ #endif -/* Size of the buffer to receive kernel messages */ -#define MLX5_NL_BUF_SIZE (32 * 1024) /* Send buffer size for the Netlink socket */ #define MLX5_SEND_BUF_SIZE 32768 /* Receive buffer size for the Netlink socket */ @@ -1962,3 +1960,111 @@ } return 0; } + +struct mlx5_mtu { + uint32_t min_mtu; + bool min_mtu_set; + uint32_t max_mtu; + bool max_mtu_set; +}; + +static int +mlx5_nl_get_mtu_bounds_cb(struct nlmsghdr *nh, void *arg) +{ + size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + struct mlx5_mtu *out = arg; + + while (off < nh->nlmsg_len) { + struct rtattr *ra = RTE_PTR_ADD(nh, off); + uint32_t *payload; + + switch (ra->rta_type) { + case IFLA_MIN_MTU: + payload = RTA_DATA(ra); + out->min_mtu = *payload; + out->min_mtu_set = true; + break; + case IFLA_MAX_MTU: + payload = RTA_DATA(ra); + out->max_mtu = *payload; + out->max_mtu_set = true; + break; + default: + /* Nothing to do for other attributes. */ + break; + } + off += RTA_ALIGN(ra->rta_len); + } + + return 0; +} + +/** + * Query minimum and maximum allowed MTU values for given Linux network interface. + * + * This function queries the following interface attributes exposed in netlink since Linux 4.18: + * + * - IFLA_MIN_MTU - minimum allowed MTU + * - IFLA_MAX_MTU - maximum allowed MTU + * + * @param[in] nl + * Netlink socket of the ROUTE kind (NETLINK_ROUTE). + * @param[in] ifindex + * Linux network device index. + * @param[out] min_mtu + * Pointer to minimum allowed MTU. Populated only if both minimum and maximum MTU was queried. + * @param[out] max_mtu + * Pointer to maximum allowed MTU. Populated only if both minimum and maximum MTU was queried. + * + * @return + * 0 on success, negative on error and rte_errno is set. + * + * Known errors: + * + * - (-EINVAL) - either @p min_mtu or @p max_mtu is NULL. + * - (-ENOENT) - either minimum or maximum allowed MTU was not found in interface attributes. + */ +int +mlx5_nl_get_mtu_bounds(int nl, unsigned int ifindex, uint16_t *min_mtu, uint16_t *max_mtu) +{ + struct mlx5_mtu out = { 0 }; + struct { + struct nlmsghdr nh; + struct ifinfomsg info; + } req = { + .nh = { + .nlmsg_len = NLMSG_LENGTH(sizeof(req.info)), + .nlmsg_type = RTM_GETLINK, + .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, + }, + .info = { + .ifi_family = AF_UNSPEC, + .ifi_index = ifindex, + }, + }; + uint32_t sn = MLX5_NL_SN_GENERATE; + int ret; + + if (min_mtu == NULL || max_mtu == NULL) { + rte_errno = EINVAL; + return -rte_errno; + } + + ret = mlx5_nl_send(nl, &req.nh, sn); + if (ret < 0) + return ret; + + ret = mlx5_nl_recv(nl, sn, mlx5_nl_get_mtu_bounds_cb, &out); + if (ret < 0) + return ret; + + if (!out.min_mtu_set || !out.max_mtu_set) { + rte_errno = ENOENT; + return -rte_errno; + } + + *min_mtu = out.min_mtu; + *max_mtu = out.max_mtu; + + return ret; +} diff -Nru dpdk-22.11.9/drivers/common/mlx5/linux/mlx5_nl.h dpdk-22.11.11/drivers/common/mlx5/linux/mlx5_nl.h --- dpdk-22.11.9/drivers/common/mlx5/linux/mlx5_nl.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/common/mlx5/linux/mlx5_nl.h 2025-12-24 13:18:07.000000000 +0000 @@ -82,4 +82,7 @@ __rte_internal int mlx5_nl_parse_link_status_update(struct nlmsghdr *hdr, uint32_t *ifindex); +__rte_internal +int mlx5_nl_get_mtu_bounds(int nl, unsigned int ifindex, uint16_t *min_mtu, uint16_t *max_mtu); + #endif /* RTE_PMD_MLX5_NL_H_ */ diff -Nru dpdk-22.11.9/drivers/common/mlx5/mlx5_common.h dpdk-22.11.11/drivers/common/mlx5/mlx5_common.h --- dpdk-22.11.9/drivers/common/mlx5/mlx5_common.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/common/mlx5/mlx5_common.h 2025-12-24 13:18:07.000000000 +0000 @@ -502,9 +502,6 @@ */ typedef int (mlx5_class_driver_remove_t)(struct mlx5_common_device *cdev); -/** Device already probed can be probed again to check for new ports. */ -#define MLX5_DRV_PROBE_AGAIN 0x0004 - /** * A structure describing a mlx5 common class driver. */ diff -Nru dpdk-22.11.9/drivers/common/mlx5/mlx5_common_mr.c dpdk-22.11.11/drivers/common/mlx5/mlx5_common_mr.c --- dpdk-22.11.9/drivers/common/mlx5/mlx5_common_mr.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/common/mlx5/mlx5_common_mr.c 2025-12-24 13:18:07.000000000 +0000 @@ -1710,18 +1710,24 @@ * hugepage can be shared across mempools that also fit in it. */ if (share_hugepage) { + struct mlx5_mempool_mr *gc_mrs = NULL; + rte_rwlock_write_lock(&share_cache->rwlock); LIST_FOREACH(mpr, &share_cache->mempool_reg_list, next) { if (mpr->mrs[0].pmd_mr.addr == (void *)ranges[0].start) break; } if (mpr != NULL) { + /* Releasing MRs here can create a dead-lock on share_cache->rwlock */ + gc_mrs = new_mpr->mrs; new_mpr->mrs = mpr->mrs; mlx5_mempool_reg_attach(new_mpr); LIST_INSERT_HEAD(&share_cache->mempool_reg_list, new_mpr, next); } rte_rwlock_write_unlock(&share_cache->rwlock); + if (gc_mrs != NULL) + mlx5_free(gc_mrs); if (mpr != NULL) { DRV_LOG(DEBUG, "Shared MR %#x in PD %p for mempool %s with mempool %s", mpr->mrs[0].pmd_mr.lkey, pd, mp->name, diff -Nru dpdk-22.11.9/drivers/common/mlx5/version.map dpdk-22.11.11/drivers/common/mlx5/version.map --- dpdk-22.11.9/drivers/common/mlx5/version.map 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/common/mlx5/version.map 2025-12-24 13:18:07.000000000 +0000 @@ -123,6 +123,7 @@ mlx5_mr_mb2mr_bh; mlx5_nl_allmulti; # WINDOWS_NO_EXPORT + mlx5_nl_get_mtu_bounds; # WINDOWS_NO_EXPORT mlx5_nl_ifindex; # WINDOWS_NO_EXPORT mlx5_nl_init; # WINDOWS_NO_EXPORT mlx5_nl_mac_addr_add; # WINDOWS_NO_EXPORT diff -Nru dpdk-22.11.9/drivers/common/mlx5/windows/mlx5_win_defs.h dpdk-22.11.11/drivers/common/mlx5/windows/mlx5_win_defs.h --- dpdk-22.11.9/drivers/common/mlx5/windows/mlx5_win_defs.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/common/mlx5/windows/mlx5_win_defs.h 2025-12-24 13:18:07.000000000 +0000 @@ -184,7 +184,6 @@ #define MLX5DV_FLOW_TABLE_TYPE_NIC_RX MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX #define MLX5DV_FLOW_TABLE_TYPE_NIC_TX MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX #define MLX5DV_FLOW_TABLE_TYPE_FDB MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB -#define MLX5DV_FLOW_TABLE_TYPE_RDMA_RX MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX struct mlx5dv_flow_match_parameters { size_t match_sz; diff -Nru dpdk-22.11.9/drivers/crypto/caam_jr/caam_jr_uio.c dpdk-22.11.11/drivers/crypto/caam_jr/caam_jr_uio.c --- dpdk-22.11.9/drivers/crypto/caam_jr/caam_jr_uio.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/crypto/caam_jr/caam_jr_uio.c 2025-12-24 13:18:07.000000000 +0000 @@ -106,19 +106,11 @@ static bool file_name_match_extract(const char filename[], const char match[], int *number) { - char *substr = NULL; - - substr = strstr(filename, match); - if (substr == NULL) - return false; - /* substring was found in * read number following substring in */ - if (sscanf(filename + strlen(match), "%d", number) <= 0) - return false; - - return true; + return strstr(filename, match) != NULL && + sscanf(filename + strlen(match), "%d", number) > 0; } /** @brief Reads first line from a file. diff -Nru dpdk-22.11.9/drivers/crypto/cnxk/cnxk_ae.h dpdk-22.11.11/drivers/crypto/cnxk/cnxk_ae.h --- dpdk-22.11.9/drivers/crypto/cnxk/cnxk_ae.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/crypto/cnxk/cnxk_ae.h 2025-12-24 13:18:07.000000000 +0000 @@ -841,20 +841,17 @@ case RTE_CRYPTO_ASYM_OP_VERIFY: if (rsa->padding.type == RTE_CRYPTO_RSA_PADDING_NONE) { rsa->sign.length = rsa_ctx->n.length; - memcpy(rsa->sign.data, rptr, rsa->sign.length); + if (memcmp(rptr, rsa->message.data, rsa->message.length)) + cop->status = RTE_CRYPTO_OP_STATUS_ERROR; } else { /* Get length of signed output */ - rsa->sign.length = - rte_cpu_to_be_16(*((uint16_t *)rptr)); + rsa->sign.length = rte_cpu_to_be_16(*((uint16_t *)rptr)); /* * Offset output data pointer by length field - * (2 bytes) and copy signed data. + * (2 bytes) and compare signed data. */ - memcpy(rsa->sign.data, rptr + 2, rsa->sign.length); - } - if (memcmp(rsa->sign.data, rsa->message.data, - rsa->message.length)) { - cop->status = RTE_CRYPTO_OP_STATUS_ERROR; + if (memcmp(rptr + 2, rsa->message.data, rsa->message.length)) + cop->status = RTE_CRYPTO_OP_STATUS_ERROR; } break; default: diff -Nru dpdk-22.11.9/drivers/crypto/ipsec_mb/ipsec_mb_ops.c dpdk-22.11.11/drivers/crypto/ipsec_mb/ipsec_mb_ops.c --- dpdk-22.11.9/drivers/crypto/ipsec_mb/ipsec_mb_ops.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/crypto/ipsec_mb/ipsec_mb_ops.c 2025-12-24 13:18:07.000000000 +0000 @@ -139,6 +139,7 @@ ipsec_mb_qp_release(struct rte_cryptodev *dev, uint16_t qp_id) { struct ipsec_mb_qp *qp = dev->data->queue_pairs[qp_id]; + uint16_t process_id = (uint16_t)getpid(); if (!qp) return 0; @@ -158,8 +159,10 @@ rte_free(qp); dev->data->queue_pairs[qp_id] = NULL; } else { /* secondary process */ - return ipsec_mb_secondary_qp_op(dev->data->dev_id, qp_id, - NULL, 0, RTE_IPSEC_MB_MP_REQ_QP_FREE); + if (qp->qp_used_by_pid == process_id) + return ipsec_mb_secondary_qp_op(dev->data->dev_id, + qp_id, NULL, 0, + RTE_IPSEC_MB_MP_REQ_QP_FREE); } return 0; } diff -Nru dpdk-22.11.9/drivers/crypto/mlx5/mlx5_crypto.c dpdk-22.11.11/drivers/crypto/mlx5/mlx5_crypto.c --- dpdk-22.11.9/drivers/crypto/mlx5/mlx5_crypto.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/crypto/mlx5/mlx5_crypto.c 2025-12-24 13:18:07.000000000 +0000 @@ -19,9 +19,7 @@ #include "mlx5_crypto.h" #define MLX5_CRYPTO_DRIVER_NAME crypto_mlx5 -#define MLX5_CRYPTO_LOG_NAME pmd.crypto.mlx5 #define MLX5_CRYPTO_MAX_QPS 128 -#define MLX5_CRYPTO_MAX_SEGS 56 #define MLX5_CRYPTO_FEATURE_FLAGS(wrapped_mode) \ (RTE_CRYPTODEV_FF_SYMMETRIC_CRYPTO | RTE_CRYPTODEV_FF_HW_ACCELERATED | \ diff -Nru dpdk-22.11.9/drivers/crypto/qat/dev/qat_crypto_pmd_gen3.c dpdk-22.11.11/drivers/crypto/qat/dev/qat_crypto_pmd_gen3.c --- dpdk-22.11.9/drivers/crypto/qat/dev/qat_crypto_pmd_gen3.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/crypto/qat/dev/qat_crypto_pmd_gen3.c 2025-12-24 13:18:07.000000000 +0000 @@ -368,7 +368,7 @@ } total_len = qat_sym_build_req_set_data(req, in_op, cookie, - in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num); + in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num, &ofs, op); if (unlikely(total_len < 0)) { op->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS; return -EINVAL; @@ -414,7 +414,7 @@ } total_len = qat_sym_build_req_set_data(req, in_op, cookie, - in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num); + in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num, &ofs, op); if (unlikely(total_len < 0)) { op->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS; return -EINVAL; @@ -503,7 +503,7 @@ rte_mov128((uint8_t *)req, (const uint8_t *)&(ctx->fw_req)); rte_prefetch0((uint8_t *)tx_queue->base_addr + tail); data_len = qat_sym_build_req_set_data(req, user_data, cookie, - data, n_data_vecs, NULL, 0); + data, n_data_vecs, NULL, 0, NULL, NULL); if (unlikely(data_len < 0)) return -1; @@ -546,24 +546,27 @@ for (i = 0; i < n; i++) { struct qat_sym_op_cookie *cookie = qp->op_cookies[tail >> tx_queue->trailz]; + int error = 0; req = (struct icp_qat_fw_la_bulk_req *)( (uint8_t *)tx_queue->base_addr + tail); rte_mov128((uint8_t *)req, (const uint8_t *)&(ctx->fw_req)); if (vec->dest_sgl) { - data_len = qat_sym_build_req_set_data(req, - user_data[i], cookie, - vec->src_sgl[i].vec, vec->src_sgl[i].num, - vec->dest_sgl[i].vec, vec->dest_sgl[i].num); + data_len = qat_reqs_mid_set(&error, req, cookie, user_data[i], + &vec->src_sgl[i], &vec->dest_sgl[i], ofs); + /* In oop there is no offset, src/dst addresses are moved + * to avoid overwriting the dst header + */ + ofs.ofs.cipher.head = 0; } else { data_len = qat_sym_build_req_set_data(req, user_data[i], cookie, vec->src_sgl[i].vec, - vec->src_sgl[i].num, NULL, 0); + vec->src_sgl[i].num, NULL, 0, NULL, NULL); } - if (unlikely(data_len < 0)) + if (unlikely(data_len < 0) || error) break; enqueue_one_aead_job_gen3(ctx, req, &vec->iv[i], @@ -616,7 +619,7 @@ rte_mov128((uint8_t *)req, (const uint8_t *)&(ctx->fw_req)); rte_prefetch0((uint8_t *)tx_queue->base_addr + tail); data_len = qat_sym_build_req_set_data(req, user_data, cookie, - data, n_data_vecs, NULL, 0); + data, n_data_vecs, NULL, 0, NULL, NULL); if (unlikely(data_len < 0)) return -1; @@ -662,27 +665,24 @@ for (i = 0; i < n; i++) { struct qat_sym_op_cookie *cookie = qp->op_cookies[tail >> tx_queue->trailz]; - int error = 0; req = (struct icp_qat_fw_la_bulk_req *)( (uint8_t *)tx_queue->base_addr + tail); rte_mov128((uint8_t *)req, (const uint8_t *)&(ctx->fw_req)); if (vec->dest_sgl) { - data_len = qat_reqs_mid_set(&error, req, cookie, user_data[i], - &vec->src_sgl[i], &vec->dest_sgl[i], ofs); - /* In oop there is no offset, src/dst addresses are moved - * to avoid overwriting the dst header - */ - ofs.ofs.cipher.head = 0; + data_len = qat_sym_build_req_set_data(req, + user_data[i], cookie, + vec->src_sgl[i].vec, vec->src_sgl[i].num, + vec->dest_sgl[i].vec, vec->dest_sgl[i].num, NULL, NULL); } else { data_len = qat_sym_build_req_set_data(req, user_data[i], cookie, vec->src_sgl[i].vec, - vec->src_sgl[i].num, NULL, 0); + vec->src_sgl[i].num, NULL, 0, NULL, NULL); } - if (unlikely(data_len < 0) || error) + if (unlikely(data_len < 0)) break; if (ctx->qat_hash_alg == ICP_QAT_HW_AUTH_ALGO_NULL) { null_digest.iova = cookie->digest_null_phys_addr; diff -Nru dpdk-22.11.9/drivers/crypto/qat/dev/qat_crypto_pmd_gen4.c dpdk-22.11.11/drivers/crypto/qat/dev/qat_crypto_pmd_gen4.c --- dpdk-22.11.9/drivers/crypto/qat/dev/qat_crypto_pmd_gen4.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/crypto/qat/dev/qat_crypto_pmd_gen4.c 2025-12-24 13:18:07.000000000 +0000 @@ -207,7 +207,7 @@ } total_len = qat_sym_build_req_set_data(qat_req, in_op, cookie, - in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num); + in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num, &ofs, op); if (unlikely(total_len < 0)) { op->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS; return -EINVAL; @@ -366,7 +366,7 @@ rte_mov128((uint8_t *)req, (const uint8_t *)&(ctx->fw_req)); rte_prefetch0((uint8_t *)tx_queue->base_addr + tail); data_len = qat_sym_build_req_set_data(req, user_data, cookie, - data, n_data_vecs, NULL, 0); + data, n_data_vecs, NULL, 0, NULL, NULL); if (unlikely(data_len < 0)) return -1; @@ -426,7 +426,7 @@ data_len = qat_sym_build_req_set_data(req, user_data[i], cookie, vec->src_sgl[i].vec, - vec->src_sgl[i].num, NULL, 0); + vec->src_sgl[i].num, NULL, 0, NULL, NULL); } if (unlikely(data_len < 0) || error) diff -Nru dpdk-22.11.9/drivers/crypto/qat/dev/qat_crypto_pmd_gens.h dpdk-22.11.11/drivers/crypto/qat/dev/qat_crypto_pmd_gens.h --- dpdk-22.11.9/drivers/crypto/qat/dev/qat_crypto_pmd_gens.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/crypto/qat/dev/qat_crypto_pmd_gens.h 2025-12-24 13:18:07.000000000 +0000 @@ -411,7 +411,8 @@ qat_sym_build_req_set_data(struct icp_qat_fw_la_bulk_req *req, void *opaque, struct qat_sym_op_cookie *cookie, struct rte_crypto_vec *src_vec, uint16_t n_src, - struct rte_crypto_vec *dst_vec, uint16_t n_dst) + struct rte_crypto_vec *dst_vec, uint16_t n_dst, + union rte_crypto_sym_ofs *ofs, struct rte_crypto_op *op) { struct qat_sgl *list; uint32_t i; @@ -483,6 +484,24 @@ dst_data_start = src_data_start; } + /* For crypto API only try to align the in-place buffers*/ + if (op != NULL && likely(n_dst == 0)) { + uint16_t offset = src_data_start & RTE_CACHE_LINE_MASK; + if (offset) { + rte_iova_t buff_addr = rte_mbuf_iova_get(op->sym->m_src); + /* make sure src_data_start is still within the buffer */ + if (src_data_start - offset >= buff_addr) { + src_data_start -= offset; + dst_data_start = src_data_start; + ofs->ofs.auth.head += offset; + ofs->ofs.cipher.head += offset; + tl_src += offset; + total_len_src = tl_src; + total_len_dst = tl_src; + } + } + } + req->comn_mid.src_data_addr = src_data_start; req->comn_mid.dest_data_addr = dst_data_start; req->comn_mid.src_length = total_len_src; diff -Nru dpdk-22.11.9/drivers/crypto/qat/dev/qat_sym_pmd_gen1.c dpdk-22.11.11/drivers/crypto/qat/dev/qat_sym_pmd_gen1.c --- dpdk-22.11.9/drivers/crypto/qat/dev/qat_sym_pmd_gen1.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/crypto/qat/dev/qat_sym_pmd_gen1.c 2025-12-24 13:18:07.000000000 +0000 @@ -236,7 +236,7 @@ } total_len = qat_sym_build_req_set_data(req, in_op, cookie, - in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num); + in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num, &ofs, op); if (unlikely(total_len < 0)) { op->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS; return -EINVAL; @@ -281,7 +281,7 @@ } total_len = qat_sym_build_req_set_data(req, in_op, cookie, - in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num); + in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num, &ofs, op); if (unlikely(total_len < 0)) { op->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS; return -EINVAL; @@ -328,7 +328,7 @@ } total_len = qat_sym_build_req_set_data(req, in_op, cookie, - in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num); + in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num, &ofs, op); if (unlikely(total_len < 0)) { op->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS; return -EINVAL; @@ -375,7 +375,7 @@ } total_len = qat_sym_build_req_set_data(req, in_op, cookie, - in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num); + in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num, &ofs, op); if (unlikely(total_len < 0)) { op->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS; return -EINVAL; @@ -508,7 +508,7 @@ rte_prefetch0((uint8_t *)tx_queue->base_addr + tail); data_len = qat_sym_build_req_set_data(req, user_data, cookie, - data, n_data_vecs, NULL, 0); + data, n_data_vecs, NULL, 0, NULL, NULL); if (unlikely(data_len < 0)) return -1; @@ -569,7 +569,7 @@ data_len = qat_sym_build_req_set_data(req, user_data[i], cookie, vec->src_sgl[i].vec, - vec->src_sgl[i].num, NULL, 0); + vec->src_sgl[i].num, NULL, 0, NULL, NULL); } if (unlikely(data_len < 0 || error)) @@ -622,7 +622,7 @@ rte_mov128((uint8_t *)req, (const uint8_t *)&(ctx->fw_req)); rte_prefetch0((uint8_t *)tx_queue->base_addr + tail); data_len = qat_sym_build_req_set_data(req, user_data, cookie, - data, n_data_vecs, NULL, 0); + data, n_data_vecs, NULL, 0, NULL, NULL); if (unlikely(data_len < 0)) return -1; @@ -690,7 +690,7 @@ data_len = qat_sym_build_req_set_data(req, user_data[i], cookie, vec->src_sgl[i].vec, - vec->src_sgl[i].num, NULL, 0); + vec->src_sgl[i].num, NULL, 0, NULL, NULL); } if (unlikely(data_len < 0 || error)) @@ -749,7 +749,7 @@ rte_mov128((uint8_t *)req, (const uint8_t *)&(ctx->fw_req)); rte_prefetch0((uint8_t *)tx_queue->base_addr + tail); data_len = qat_sym_build_req_set_data(req, user_data, cookie, - data, n_data_vecs, NULL, 0); + data, n_data_vecs, NULL, 0, NULL, NULL); if (unlikely(data_len < 0)) return -1; @@ -818,7 +818,7 @@ data_len = qat_sym_build_req_set_data(req, user_data[i], cookie, vec->src_sgl[i].vec, - vec->src_sgl[i].num, NULL, 0); + vec->src_sgl[i].num, NULL, 0, NULL, NULL); } if (unlikely(data_len < 0 || error)) @@ -882,7 +882,7 @@ rte_mov128((uint8_t *)req, (const uint8_t *)&(ctx->fw_req)); rte_prefetch0((uint8_t *)tx_queue->base_addr + tail); data_len = qat_sym_build_req_set_data(req, user_data, cookie, - data, n_data_vecs, NULL, 0); + data, n_data_vecs, NULL, 0, NULL, NULL); if (unlikely(data_len < 0)) return -1; @@ -942,7 +942,7 @@ data_len = qat_sym_build_req_set_data(req, user_data[i], cookie, vec->src_sgl[i].vec, - vec->src_sgl[i].num, NULL, 0); + vec->src_sgl[i].num, NULL, 0, NULL, NULL); } if (unlikely(data_len < 0) || error) diff -Nru dpdk-22.11.9/drivers/crypto/qat/qat_sym_session.c dpdk-22.11.11/drivers/crypto/qat/qat_sym_session.c --- dpdk-22.11.9/drivers/crypto/qat/qat_sym_session.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/crypto/qat/qat_sym_session.c 2025-12-24 13:18:07.000000000 +0000 @@ -2384,7 +2384,8 @@ hash->auth_counter.counter = 0; hash_cd_ctrl->outer_prefix_sz = digestsize; - auth_param->hash_state_sz = digestsize; + auth_param->hash_state_sz = (RTE_ALIGN_CEIL(auth_param->u2.aad_sz, + ICP_QAT_HW_CCM_AAD_ALIGNMENT) >> 3); memcpy(cdesc->cd_cur_ptr + state1_size, authkey, authkeylen); break; diff -Nru dpdk-22.11.9/drivers/dma/hisilicon/hisi_dmadev.c dpdk-22.11.11/drivers/dma/hisilicon/hisi_dmadev.c --- dpdk-22.11.9/drivers/dma/hisilicon/hisi_dmadev.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/dma/hisilicon/hisi_dmadev.c 2025-12-24 13:18:07.000000000 +0000 @@ -378,6 +378,7 @@ hw->cq_head = 0; hw->cqs_completed = 0; hw->cqe_vld = 1; + hw->stop_proc = 0; hw->submitted = 0; hw->completed = 0; hw->errors = 0; @@ -390,12 +391,6 @@ } static int -hisi_dma_stop(struct rte_dma_dev *dev) -{ - return hisi_dma_reset_hw(dev->data->dev_private); -} - -static int hisi_dma_close(struct rte_dma_dev *dev) { if (rte_eal_process_type() == RTE_PROC_PRIMARY) { @@ -456,6 +451,37 @@ return 0; } +static int +hisi_dma_stop(struct rte_dma_dev *dev) +{ +#define MAX_WAIT_MSEC 10 + struct hisi_dma_dev *hw = dev->data->dev_private; + enum rte_dma_vchan_status status; + uint32_t i; + + /* Flag stop processing new requests. */ + hw->stop_proc = 1; + rte_delay_ms(1); + + /* Force set drop flag so that the hardware can quickly complete. */ + for (i = 0; i <= hw->sq_depth_mask; i++) + hw->sqe[i].dw0 |= SQE_DROP_FLAG; + + i = 0; + do { + hisi_dma_vchan_status(dev, 0, &status); + if (status != RTE_DMA_VCHAN_ACTIVE) + break; + rte_delay_ms(1); + } while (i++ < MAX_WAIT_MSEC); + if (status == RTE_DMA_VCHAN_ACTIVE) { + HISI_DMA_ERR(hw, "dev is still active!"); + return -EBUSY; + } + + return hisi_dma_reset_hw(dev->data->dev_private); +} + static void hisi_dma_dump_range(struct hisi_dma_dev *hw, FILE *f, uint32_t start, uint32_t end) @@ -550,14 +576,14 @@ " revision: 0x%x queue_id: %u ring_size: %u\n" " ridx: %u cridx: %u\n" " sq_head: %u sq_tail: %u cq_sq_head: %u\n" - " cq_head: %u cqs_completed: %u cqe_vld: %u\n" + " cq_head: %u cqs_completed: %u cqe_vld: %u stop_proc: %u\n" " submitted: %" PRIu64 " completed: %" PRIu64 " errors: %" PRIu64 " qfulls: %" PRIu64 "\n", hw->revision, hw->queue_id, hw->sq_depth_mask > 0 ? hw->sq_depth_mask + 1 : 0, hw->ridx, hw->cridx, hw->sq_head, hw->sq_tail, hw->cq_sq_head, - hw->cq_head, hw->cqs_completed, hw->cqe_vld, + hw->cq_head, hw->cqs_completed, hw->cqe_vld, hw->stop_proc, hw->submitted, hw->completed, hw->errors, hw->qfulls); hisi_dma_dump_queue(hw, f); hisi_dma_dump_common(hw, f); @@ -575,6 +601,9 @@ RTE_SET_USED(vchan); + if (unlikely(hw->stop_proc > 0)) + return -EPERM; + if (((hw->sq_tail + 1) & hw->sq_depth_mask) == hw->sq_head) { hw->qfulls++; return -ENOSPC; diff -Nru dpdk-22.11.9/drivers/dma/hisilicon/hisi_dmadev.h dpdk-22.11.11/drivers/dma/hisilicon/hisi_dmadev.h --- dpdk-22.11.9/drivers/dma/hisilicon/hisi_dmadev.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/dma/hisilicon/hisi_dmadev.h 2025-12-24 13:18:07.000000000 +0000 @@ -141,6 +141,7 @@ struct hisi_dma_sqe { uint32_t dw0; +#define SQE_DROP_FLAG BIT(4) #define SQE_FENCE_FLAG BIT(10) #define SQE_OPCODE_M2M 0x4 uint32_t dw1; @@ -211,6 +212,7 @@ */ uint16_t cqs_completed; uint8_t cqe_vld; /**< valid bit for CQE, will change for every round. */ + volatile uint8_t stop_proc; /**< whether stop processing new requests. */ uint64_t submitted; uint64_t completed; diff -Nru dpdk-22.11.9/drivers/event/cnxk/cn10k_eventdev.c dpdk-22.11.11/drivers/event/cnxk/cn10k_eventdev.c --- dpdk-22.11.9/drivers/event/cnxk/cn10k_eventdev.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/event/cnxk/cn10k_eventdev.c 2025-12-24 13:18:07.000000000 +0000 @@ -113,6 +113,7 @@ cn10k_sso_hws_flush_events(void *hws, uint8_t queue_id, uintptr_t base, cnxk_handle_event_t fn, void *arg) { + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(arg); uint64_t retry = CNXK_SSO_FLUSH_RETRY_MAX; struct cn10k_sso_hws *ws = hws; uint64_t cq_ds_cnt = 1; @@ -135,10 +136,7 @@ while (aq_cnt || cq_ds_cnt || ds_cnt) { plt_write64(req, ws->base + SSOW_LF_GWS_OP_GET_WORK0); - cn10k_sso_hws_get_work_empty( - ws, &ev, - (NIX_RX_OFFLOAD_MAX - 1) | NIX_RX_REAS_F | - NIX_RX_MULTI_SEG_F | CPT_RX_WQE_F); + cn10k_sso_hws_get_work_empty(ws, &ev, dev->rx_offloads); if (fn != NULL && ev.u64 != 0) fn(arg, ev); if (ev.sched_type != SSO_TT_EMPTY) @@ -710,9 +708,7 @@ } while (ptag & (BIT_ULL(62) | BIT_ULL(58) | BIT_ULL(56) | BIT_ULL(54))); - cn10k_sso_hws_get_work_empty(ws, &ev, - (NIX_RX_OFFLOAD_MAX - 1) | NIX_RX_REAS_F | - NIX_RX_MULTI_SEG_F | CPT_RX_WQE_F); + cn10k_sso_hws_get_work_empty(ws, &ev, dev->rx_offloads); if (is_pend && ev.u64) if (flush_cb) flush_cb(event_dev->data->dev_id, ev, args); @@ -741,10 +737,7 @@ SSO_TT_EMPTY) { plt_write64(BIT_ULL(16) | 1, ws->base + SSOW_LF_GWS_OP_GET_WORK0); - cn10k_sso_hws_get_work_empty( - ws, &ev, - (NIX_RX_OFFLOAD_MAX - 1) | NIX_RX_REAS_F | - NIX_RX_MULTI_SEG_F | CPT_RX_WQE_F); + cn10k_sso_hws_get_work_empty(ws, &ev, dev->rx_offloads); if (ev.u64) { if (flush_cb) flush_cb(event_dev->data->dev_id, ev, args); diff -Nru dpdk-22.11.9/drivers/event/cnxk/cn10k_worker.h dpdk-22.11.11/drivers/event/cnxk/cn10k_worker.h --- dpdk-22.11.9/drivers/event/cnxk/cn10k_worker.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/event/cnxk/cn10k_worker.h 2025-12-24 13:18:07.000000000 +0000 @@ -129,15 +129,15 @@ uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM; struct cnxk_timesync_info *tstamp = ws->tstamp[port_id]; void *lookup_mem = ws->lookup_mem; + uint64_t meta_aura = 0, laddr = 0; + uint16_t lmt_id = 0, d_off = 0; uintptr_t lbase = ws->lmt_base; struct rte_event_vector *vec; - uint64_t meta_aura, laddr; uint16_t nb_mbufs, non_vec; - uint16_t lmt_id, d_off; struct rte_mbuf **wqe; struct rte_mbuf *mbuf; uint8_t loff = 0; - uint64_t sa_base; + uint64_t sa_base = 0; int i; mbuf_init |= ((uint64_t)port_id) << 48; diff -Nru dpdk-22.11.9/drivers/net/af_packet/rte_eth_af_packet.c dpdk-22.11.11/drivers/net/af_packet/rte_eth_af_packet.c --- dpdk-22.11.9/drivers/net/af_packet/rte_eth_af_packet.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/af_packet/rte_eth_af_packet.c 2025-12-24 13:18:07.000000000 +0000 @@ -462,7 +462,7 @@ rte_free(internals->rx_queue[q].rd); rte_free(internals->tx_queue[q].rd); } - free(internals->if_name); + rte_free(internals->if_name); rte_free(internals->rx_queue); rte_free(internals->tx_queue); @@ -752,9 +752,10 @@ PMD_LOG_ERRNO(ERR, "%s: ioctl failed (SIOCGIFINDEX)", name); goto free_internals; } - (*internals)->if_name = strdup(pair->value); + (*internals)->if_name = rte_malloc_socket(name, ifnamelen + 1, 0, numa_node); if ((*internals)->if_name == NULL) goto free_internals; + strlcpy((*internals)->if_name, pair->value, ifnamelen + 1); (*internals)->if_index = ifr.ifr_ifindex; if (ioctl(sockfd, SIOCGIFHWADDR, &ifr) == -1) { @@ -941,7 +942,7 @@ free_internals: rte_free((*internals)->rx_queue); rte_free((*internals)->tx_queue); - free((*internals)->if_name); + rte_free((*internals)->if_name); rte_free(*internals); return -1; } diff -Nru dpdk-22.11.9/drivers/net/ark/ark_ethdev_rx.c dpdk-22.11.11/drivers/net/ark/ark_ethdev_rx.c --- dpdk-22.11.9/drivers/net/ark/ark_ethdev_rx.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/ark/ark_ethdev_rx.c 2025-12-24 13:18:07.000000000 +0000 @@ -530,7 +530,6 @@ eth_ark_dev_rx_queue_release(void *vqueue) { struct ark_rx_queue *queue; - uint32_t i; queue = (struct ark_rx_queue *)vqueue; if (queue == 0) @@ -543,9 +542,6 @@ /* Need to clear out mbufs here, dropping packets along the way */ eth_ark_rx_queue_drain(queue); - for (i = 0; i < queue->queue_size; ++i) - rte_pktmbuf_free(queue->reserve_q[i]); - rte_free(queue->reserve_q); rte_free(queue->paddress_q); rte_free(queue); diff -Nru dpdk-22.11.9/drivers/net/axgbe/axgbe_ethdev.c dpdk-22.11.11/drivers/net/axgbe/axgbe_ethdev.c --- dpdk-22.11.9/drivers/net/axgbe/axgbe_ethdev.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/axgbe/axgbe_ethdev.c 2025-12-24 13:18:07.000000000 +0000 @@ -1735,6 +1735,7 @@ { struct axgbe_port *pdata = dev->data->dev_private; unsigned int mac_tscr = 0; + unsigned int value = 0; /*disable timestamp for all pkts*/ AXGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSENALL, 0); @@ -1744,6 +1745,11 @@ AXGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSCFUPDT, 0); /*disable time stamp*/ AXGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSENA, 0); + + value = AXGMAC_IOREAD(pdata, MAC_TSCR); + value |= mac_tscr; + AXGMAC_IOWRITE(pdata, MAC_TSCR, value); + return 0; } diff -Nru dpdk-22.11.9/drivers/net/bonding/rte_eth_bond_8023ad.c dpdk-22.11.11/drivers/net/bonding/rte_eth_bond_8023ad.c --- dpdk-22.11.9/drivers/net/bonding/rte_eth_bond_8023ad.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/bonding/rte_eth_bond_8023ad.c 2025-12-24 13:18:07.000000000 +0000 @@ -1183,6 +1183,14 @@ continue; rte_ether_addr_copy(&internals->mode4.mac_addr, &slave->actor.system); + + /* Update physical NIC hardware MAC address to match bonding device. */ + if (rte_eth_dev_default_mac_addr_set(slave_id, &internals->mode4.mac_addr) != 0) { + RTE_BOND_LOG(ERR, + "Failed to update MAC address on member port %u", + slave_id); + } + /* Do nothing if this port is not an aggregator. In other case * Set NTT flag on every port that use this aggregator. */ if (slave->aggregator_port_id != slave_id) diff -Nru dpdk-22.11.9/drivers/net/cnxk/cn10k_rx.h dpdk-22.11.11/drivers/net/cnxk/cn10k_rx.h --- dpdk-22.11.9/drivers/net/cnxk/cn10k_rx.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/cnxk/cn10k_rx.h 2025-12-24 13:18:07.000000000 +0000 @@ -580,7 +580,7 @@ const struct cpt_parse_hdr_s *hdr = (const struct cpt_parse_hdr_s *)cpth; uint64_t mbuf_init = vgetq_lane_u64(*rearm, 0); - struct cn10k_inb_priv_data *inb_priv; + struct cn10k_inb_priv_data *inb_priv = NULL; /* Clear checksum flags */ *ol_flags &= ~(RTE_MBUF_F_RX_L4_CKSUM_MASK | @@ -1022,9 +1022,9 @@ uint64x2_t rearm3 = vdupq_n_u64(mbuf_initializer); struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3; uint8_t loff = 0, lnum = 0, shft = 0; + uint16_t lmt_id = 0, d_off = 0; + uint64_t lbase = 0, laddr = 0; uint8x16_t f0, f1, f2, f3; - uint16_t lmt_id, d_off; - uint64_t lbase, laddr; uintptr_t sa_base = 0; uint16_t packets = 0; uint16_t pkts_left; diff -Nru dpdk-22.11.9/drivers/net/cnxk/cnxk_ethdev_mtr.c dpdk-22.11.11/drivers/net/cnxk/cnxk_ethdev_mtr.c --- dpdk-22.11.9/drivers/net/cnxk/cnxk_ethdev_mtr.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/cnxk/cnxk_ethdev_mtr.c 2025-12-24 13:18:07.000000000 +0000 @@ -1245,7 +1245,13 @@ cfg->alg = alg_map[profile->profile.alg]; cfg->lmode = profile->profile.packet_mode; - cfg->icolor = color_map[mtr->params.default_input_color]; + int idx = mtr->params.default_input_color; + + /* Index validation */ + if (idx >= RTE_COLORS) + cfg->icolor = ROC_NIX_BPF_COLOR_GREEN; + else + cfg->icolor = color_map[idx]; switch (RTE_MTR_COLOR_IN_PROTO_OUTER_IP) { case RTE_MTR_COLOR_IN_PROTO_OUTER_IP: diff -Nru dpdk-22.11.9/drivers/net/dpaa/dpaa_flow.c dpdk-22.11.11/drivers/net/dpaa/dpaa_flow.c --- dpdk-22.11.9/drivers/net/dpaa/dpaa_flow.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/dpaa/dpaa_flow.c 2025-12-24 13:18:07.000000000 +0000 @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright 2017-2019,2021 NXP + * Copyright 2017-2019,2021-2025 NXP */ /* System headers */ @@ -889,9 +889,9 @@ /* FM PCD Enable */ ret = fm_pcd_enable(pcd_handle); if (ret) { - fm_close(fman_handle); - fm_pcd_close(pcd_handle); DPAA_PMD_ERR("fm_pcd_enable: Failed"); + fm_pcd_close(pcd_handle); + fm_close(fman_handle); return -1; } diff -Nru dpdk-22.11.9/drivers/net/dpaa/rte_pmd_dpaa.h dpdk-22.11.11/drivers/net/dpaa/rte_pmd_dpaa.h --- dpdk-22.11.9/drivers/net/dpaa/rte_pmd_dpaa.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/dpaa/rte_pmd_dpaa.h 2025-12-24 13:18:07.000000000 +0000 @@ -5,6 +5,8 @@ #ifndef _PMD_DPAA_H_ #define _PMD_DPAA_H_ +#include + /** * @file rte_pmd_dpaa.h * diff -Nru dpdk-22.11.9/drivers/net/dpaa2/base/dpaa2_hw_dpni.c dpdk-22.11.11/drivers/net/dpaa2/base/dpaa2_hw_dpni.c --- dpdk-22.11.9/drivers/net/dpaa2/base/dpaa2_hw_dpni.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/dpaa2/base/dpaa2_hw_dpni.c 2025-12-24 13:18:07.000000000 +0000 @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2016 Freescale Semiconductor, Inc. All rights reserved. - * Copyright 2016-2021 NXP + * Copyright 2016-2021,2023-2025 NXP * */ @@ -59,6 +59,7 @@ return -ENOMEM; } + memset(&kg_cfg, 0, sizeof(struct dpkg_profile_cfg)); kg_cfg.extracts[0].type = DPKG_EXTRACT_FROM_DATA; kg_cfg.extracts[0].extract.from_data.offset = offset; kg_cfg.extracts[0].extract.from_data.size = size; diff -Nru dpdk-22.11.9/drivers/net/dpaa2/base/dpaa2_hw_dpni_annot.h dpdk-22.11.11/drivers/net/dpaa2/base/dpaa2_hw_dpni_annot.h --- dpdk-22.11.9/drivers/net/dpaa2/base/dpaa2_hw_dpni_annot.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/dpaa2/base/dpaa2_hw_dpni_annot.h 2025-12-24 13:18:07.000000000 +0000 @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2016 Freescale Semiconductor, Inc. All rights reserved. - * Copyright 2016,2019 NXP + * Copyright 2016,2019,2022,2024 NXP * */ @@ -298,13 +298,13 @@ #define DPAA2_ETH_FAS_PHE 0x00000020 #define DPAA2_ETH_FAS_BLE 0x00000010 /* L3 csum validation performed */ -#define DPAA2_ETH_FAS_L3CV 0x00000008 +#define DPAA2_ETH_FAS_L3CV 0x0000000800000000 /* L3 csum error */ -#define DPAA2_ETH_FAS_L3CE 0x00000004 +#define DPAA2_ETH_FAS_L3CE 0x0000000400000000 /* L4 csum validation performed */ -#define DPAA2_ETH_FAS_L4CV 0x00000002 +#define DPAA2_ETH_FAS_L4CV 0x0000000200000000 /* L4 csum error */ -#define DPAA2_ETH_FAS_L4CE 0x00000001 +#define DPAA2_ETH_FAS_L4CE 0x0000000100000000 #ifdef __cplusplus } diff -Nru dpdk-22.11.9/drivers/net/dpaa2/dpaa2_ethdev.c dpdk-22.11.11/drivers/net/dpaa2/dpaa2_ethdev.c --- dpdk-22.11.9/drivers/net/dpaa2/dpaa2_ethdev.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/dpaa2/dpaa2_ethdev.c 2025-12-24 13:18:07.000000000 +0000 @@ -1242,7 +1242,7 @@ err_cfg.errors = DPNI_ERROR_L3CE | DPNI_ERROR_L4CE; /* if packet with parse error are not to be dropped */ - err_cfg.errors |= DPNI_ERROR_PHE; + err_cfg.errors |= DPNI_ERROR_PHE | DPNI_ERROR_BLE; err_cfg.error_action = DPNI_ERROR_ACTION_CONTINUE; } @@ -2909,7 +2909,6 @@ eth_dev->device = &dpaa2_dev->device; - dpaa2_dev->eth_dev = eth_dev; eth_dev->data->rx_mbuf_alloc_failed = 0; if (dpaa2_drv->drv_flags & RTE_DPAA2_DRV_INTR_LSC) @@ -2945,14 +2944,22 @@ rte_dpaa2_remove(struct rte_dpaa2_device *dpaa2_dev) { struct rte_eth_dev *eth_dev; - int ret; + int ret = 0; + + eth_dev = rte_eth_dev_allocated(dpaa2_dev->device.name); + if (eth_dev) { + ret = dpaa2_dev_close(eth_dev); + if (ret) + DPAA2_PMD_ERR("dpaa2_dev_close ret= %d", ret); + + ret = rte_eth_dev_release_port(eth_dev); + } - eth_dev = dpaa2_dev->eth_dev; - dpaa2_dev_close(eth_dev); dpaa2_valid_dev--; - if (!dpaa2_valid_dev) + if (!dpaa2_valid_dev) { rte_mempool_free(dpaa2_tx_sg_pool); - ret = rte_eth_dev_release_port(eth_dev); + dpaa2_tx_sg_pool = NULL; + } return ret; } diff -Nru dpdk-22.11.9/drivers/net/dpaa2/dpaa2_ethdev.h dpdk-22.11.11/drivers/net/dpaa2/dpaa2_ethdev.h --- dpdk-22.11.9/drivers/net/dpaa2/dpaa2_ethdev.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/dpaa2/dpaa2_ethdev.h 2025-12-24 13:18:07.000000000 +0000 @@ -306,10 +306,5 @@ int dpaa2_dev_recycle_config(struct rte_eth_dev *eth_dev); int dpaa2_dev_recycle_deconfig(struct rte_eth_dev *eth_dev); -int dpaa2_dev_recycle_qp_setup(struct rte_dpaa2_device *dpaa2_dev, - uint16_t qidx, uint64_t cntx, - eth_rx_burst_t tx_lpbk, eth_tx_burst_t rx_lpbk, - struct dpaa2_queue **txq, - struct dpaa2_queue **rxq); #endif /* _DPAA2_ETHDEV_H */ diff -Nru dpdk-22.11.9/drivers/net/dpaa2/dpaa2_recycle.c dpdk-22.11.11/drivers/net/dpaa2/dpaa2_recycle.c --- dpdk-22.11.9/drivers/net/dpaa2/dpaa2_recycle.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/dpaa2/dpaa2_recycle.c 2025-12-24 13:18:07.000000000 +0000 @@ -730,53 +730,3 @@ return ret; } - -int -dpaa2_dev_recycle_qp_setup(struct rte_dpaa2_device *dpaa2_dev, - uint16_t qidx, uint64_t cntx, - eth_rx_burst_t tx_lpbk, eth_tx_burst_t rx_lpbk, - struct dpaa2_queue **txq, - struct dpaa2_queue **rxq) -{ - struct rte_eth_dev *dev; - struct rte_eth_dev_data *data; - struct dpaa2_queue *txq_tmp; - struct dpaa2_queue *rxq_tmp; - struct dpaa2_dev_priv *priv; - - dev = dpaa2_dev->eth_dev; - data = dev->data; - priv = data->dev_private; - - if (!(priv->flags & DPAA2_TX_LOOPBACK_MODE) && - (tx_lpbk || rx_lpbk)) { - DPAA2_PMD_ERR("%s is NOT recycle device!", data->name); - - return -EINVAL; - } - - if (qidx >= data->nb_rx_queues || qidx >= data->nb_tx_queues) - return -EINVAL; - - rte_spinlock_lock(&priv->lpbk_qp_lock); - - if (tx_lpbk) - dev->tx_pkt_burst = tx_lpbk; - - if (rx_lpbk) - dev->rx_pkt_burst = rx_lpbk; - - txq_tmp = data->tx_queues[qidx]; - txq_tmp->lpbk_cntx = cntx; - rxq_tmp = data->rx_queues[qidx]; - rxq_tmp->lpbk_cntx = cntx; - - if (txq) - *txq = txq_tmp; - if (rxq) - *rxq = rxq_tmp; - - rte_spinlock_unlock(&priv->lpbk_qp_lock); - - return 0; -} diff -Nru dpdk-22.11.9/drivers/net/dpaa2/dpaa2_rxtx.c dpdk-22.11.11/drivers/net/dpaa2/dpaa2_rxtx.c --- dpdk-22.11.9/drivers/net/dpaa2/dpaa2_rxtx.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/dpaa2/dpaa2_rxtx.c 2025-12-24 13:18:07.000000000 +0000 @@ -196,14 +196,10 @@ goto parse_done; } - if (BIT_ISSET_AT_POS(annotation->word8, DPAA2_ETH_FAS_L3CE)) + if (BIT_ISSET_AT_POS(annotation->word1, DPAA2_ETH_FAS_L3CE)) mbuf->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD; - else - mbuf->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD; - if (BIT_ISSET_AT_POS(annotation->word8, DPAA2_ETH_FAS_L4CE)) + else if (BIT_ISSET_AT_POS(annotation->word1, DPAA2_ETH_FAS_L4CE)) mbuf->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD; - else - mbuf->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; if (BIT_ISSET_AT_POS(annotation->word4, L3_IP_1_FIRST_FRAGMENT | L3_IP_1_MORE_FRAGMENT | @@ -243,14 +239,10 @@ DPAA2_PMD_DP_DEBUG("(fast parse) Annotation = 0x%" PRIx64 "\t", annotation->word4); - if (BIT_ISSET_AT_POS(annotation->word8, DPAA2_ETH_FAS_L3CE)) + if (BIT_ISSET_AT_POS(annotation->word1, DPAA2_ETH_FAS_L3CE)) mbuf->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD; - else - mbuf->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD; - if (BIT_ISSET_AT_POS(annotation->word8, DPAA2_ETH_FAS_L4CE)) + else if (BIT_ISSET_AT_POS(annotation->word1, DPAA2_ETH_FAS_L4CE)) mbuf->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD; - else - mbuf->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; if (dpaa2_enable_ts[mbuf->port]) { *dpaa2_timestamp_dynfield(mbuf) = annotation->word2; diff -Nru dpdk-22.11.9/drivers/net/dpaa2/dpaa2_tm.c dpdk-22.11.11/drivers/net/dpaa2/dpaa2_tm.c --- dpdk-22.11.9/drivers/net/dpaa2/dpaa2_tm.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/dpaa2/dpaa2_tm.c 2025-12-24 13:18:07.000000000 +0000 @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright 2020-2021 NXP + * Copyright 2020-2024 NXP */ #include @@ -726,12 +726,12 @@ tx_cr_shaper.max_burst_size = node->profile->params.committed.size; tx_cr_shaper.rate_limit = - node->profile->params.committed.rate / - (1024 * 1024); + (node->profile->params.committed.rate / + (1024 * 1024)) * 8; tx_er_shaper.max_burst_size = node->profile->params.peak.size; tx_er_shaper.rate_limit = - node->profile->params.peak.rate / (1024 * 1024); + (node->profile->params.peak.rate / (1024 * 1024)) * 8; /* root node */ if (node->parent == NULL) { DPAA2_PMD_DEBUG("LNI S.rate = %u, burst =%u\n", diff -Nru dpdk-22.11.9/drivers/net/e1000/base/e1000_mac.c dpdk-22.11.11/drivers/net/e1000/base/e1000_mac.c --- dpdk-22.11.9/drivers/net/e1000/base/e1000_mac.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/e1000/base/e1000_mac.c 2025-12-24 13:18:07.000000000 +0000 @@ -1826,6 +1826,7 @@ return ret_val; mac->ledctl_default = E1000_READ_REG(hw, E1000_LEDCTL); + rte_compiler_barrier(); mac->ledctl_mode1 = mac->ledctl_default; mac->ledctl_mode2 = mac->ledctl_default; diff -Nru dpdk-22.11.9/drivers/net/ena/base/ena_com.c dpdk-22.11.11/drivers/net/ena/base/ena_com.c --- dpdk-22.11.9/drivers/net/ena/base/ena_com.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/ena/base/ena_com.c 2025-12-24 13:18:07.000000000 +0000 @@ -2014,13 +2014,13 @@ } else { rc = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_MAX_QUEUES_NUM, 0); + if (rc) + return rc; + memcpy(&get_feat_ctx->max_queues, &get_resp.u.max_queue, sizeof(get_resp.u.max_queue)); ena_dev->tx_max_header_size = get_resp.u.max_queue.max_header_size; - - if (rc) - return rc; } rc = ena_com_get_feature(ena_dev, &get_resp, diff -Nru dpdk-22.11.9/drivers/net/ena/ena_ethdev.c dpdk-22.11.11/drivers/net/ena/ena_ethdev.c --- dpdk-22.11.9/drivers/net/ena/ena_ethdev.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/ena/ena_ethdev.c 2025-12-24 13:18:07.000000000 +0000 @@ -8,6 +8,7 @@ #include #include #include +#include #include "ena_ethdev.h" #include "ena_logs.h" @@ -2084,6 +2085,24 @@ return 0; } +/* + * Returns PCI BAR virtual address. + * If the physical address is not page-aligned, + * adjusts the virtual address by the page offset. + * Assumes page size is a power of 2. + */ +static void *pci_bar_addr(struct rte_pci_device *dev, uint32_t bar) +{ + const struct rte_mem_resource *res = &dev->mem_resource[bar]; + size_t offset = res->phys_addr % rte_mem_page_size(); + void *vaddr = RTE_PTR_ADD(res->addr, offset); + + PMD_INIT_LOG(INFO, "PCI BAR [%u]: phys_addr=0x%" PRIx64 ", addr=%p, offset=0x%zx, adjusted_addr=%p\n", + bar, res->phys_addr, res->addr, offset, vaddr); + + return vaddr; +} + static int eth_ena_dev_init(struct rte_eth_dev *eth_dev) { struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 }; @@ -2128,16 +2147,17 @@ intr_handle = pci_dev->intr_handle; - adapter->regs = pci_dev->mem_resource[ENA_REGS_BAR].addr; - adapter->dev_mem_base = pci_dev->mem_resource[ENA_MEM_BAR].addr; - + adapter->regs = pci_bar_addr(pci_dev, ENA_REGS_BAR); if (!adapter->regs) { PMD_INIT_LOG(CRIT, "Failed to access registers BAR(%d)\n", ENA_REGS_BAR); return -ENXIO; } - ena_dev->reg_bar = adapter->regs; + + /* Memory BAR may be NULL on non LLQ supported devices */ + adapter->dev_mem_base = pci_bar_addr(pci_dev, ENA_MEM_BAR); + /* Pass device data as a pointer which can be passed to the IO functions * by the ena_com (for example - the memory allocation). */ diff -Nru dpdk-22.11.9/drivers/net/enetfec/enet_ethdev.c dpdk-22.11.11/drivers/net/enetfec/enet_ethdev.c --- dpdk-22.11.9/drivers/net/enetfec/enet_ethdev.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/enetfec/enet_ethdev.c 2025-12-24 13:18:07.000000000 +0000 @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright 2020-2021 NXP + * Copyright 2020-2021,2023-2024 NXP */ #include @@ -171,8 +171,10 @@ bdp = rxq->bd.base; for (i = 0; i < rxq->bd.ring_size; i++) { mbuf = rxq->rx_mbuf[i]; - rxq->rx_mbuf[i] = NULL; - rte_pktmbuf_free(mbuf); + if (mbuf) { + rxq->rx_mbuf[i] = NULL; + rte_pktmbuf_free(mbuf); + } bdp = enet_get_nextdesc(bdp, &rxq->bd); } } @@ -349,7 +351,7 @@ for (i = 0; i < dev->data->nb_rx_queues; i++) rte_free(fep->rx_queues[i]); for (i = 0; i < dev->data->nb_tx_queues; i++) - rte_free(fep->rx_queues[i]); + rte_free(fep->tx_queues[i]); } static const unsigned short offset_des_active_rxq[] = { @@ -382,6 +384,17 @@ return -EINVAL; } + if (queue_idx > 0) { + ENETFEC_PMD_ERR("Multi queue not supported"); + return -EINVAL; + } + + /* Tx deferred start is not supported */ + if (tx_conf->tx_deferred_start) { + ENETFEC_PMD_ERR("Tx deferred start not supported"); + return -EINVAL; + } + /* allocate transmit queue */ txq = rte_zmalloc(NULL, sizeof(*txq), RTE_CACHE_LINE_SIZE); if (txq == NULL) { @@ -389,7 +402,7 @@ return -ENOMEM; } - if (nb_desc > MAX_TX_BD_RING_SIZE) { + if (nb_desc != MAX_TX_BD_RING_SIZE) { nb_desc = MAX_TX_BD_RING_SIZE; ENETFEC_PMD_WARN("modified the nb_desc to MAX_TX_BD_RING_SIZE"); } @@ -473,7 +486,7 @@ return -ENOMEM; } - if (nb_rx_desc > MAX_RX_BD_RING_SIZE) { + if (nb_rx_desc != MAX_RX_BD_RING_SIZE) { nb_rx_desc = MAX_RX_BD_RING_SIZE; ENETFEC_PMD_WARN("modified the nb_desc to MAX_RX_BD_RING_SIZE"); } @@ -553,7 +566,7 @@ } } rte_free(rxq); - return errno; + return -ENOMEM; } static const struct eth_dev_ops enetfec_ops = { diff -Nru dpdk-22.11.9/drivers/net/enetfec/enet_rxtx.c dpdk-22.11.11/drivers/net/enetfec/enet_rxtx.c --- dpdk-22.11.9/drivers/net/enetfec/enet_rxtx.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/enetfec/enet_rxtx.c 2025-12-24 13:18:07.000000000 +0000 @@ -121,10 +121,11 @@ (rxq->fep->flag_csum & RX_FLAG_CSUM_EN)) { if ((rte_read32(&ebdp->bd_esc) & rte_cpu_to_le_32(RX_FLAG_CSUM_ERR)) == 0) { - /* don't check it */ - mbuf->ol_flags = RTE_MBUF_F_RX_IP_CKSUM_BAD; - } else { + /* No checksum error - checksum is good */ mbuf->ol_flags = RTE_MBUF_F_RX_IP_CKSUM_GOOD; + } else { + /* Checksum error detected */ + mbuf->ol_flags = RTE_MBUF_F_RX_IP_CKSUM_BAD; } } @@ -238,7 +239,8 @@ if (txq->fep->bufdesc_ex) { struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; - if (mbuf->ol_flags == RTE_MBUF_F_RX_IP_CKSUM_GOOD) + if (mbuf->ol_flags & (RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_TCP_CKSUM | + RTE_MBUF_F_TX_UDP_CKSUM | RTE_MBUF_F_TX_SCTP_CKSUM)) estatus |= TX_BD_PINS | TX_BD_IINS; rte_write32(0, &ebdp->bd_bdu); diff -Nru dpdk-22.11.9/drivers/net/enetfec/enet_uio.c dpdk-22.11.11/drivers/net/enetfec/enet_uio.c --- dpdk-22.11.9/drivers/net/enetfec/enet_uio.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/enetfec/enet_uio.c 2025-12-24 13:18:07.000000000 +0000 @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright 2021 NXP + * Copyright 2021,2024-2025 NXP */ #include @@ -32,13 +32,7 @@ static bool file_name_match_extract(const char filename[], const char match[]) { - char *substr = NULL; - - substr = strstr(filename, match); - if (substr == NULL) - return false; - - return true; + return strstr(filename, match) != NULL; } /* @@ -66,13 +60,16 @@ "%s/%s/%s", root, subdir, filename); fd = open(absolute_file_name, O_RDONLY); - if (fd <= 0) + if (fd < 0) { ENETFEC_PMD_ERR("Error opening file %s", absolute_file_name); + return fd; + } /* read UIO device name from first line in file */ ret = read(fd, line, FEC_UIO_MAX_DEVICE_FILE_NAME_LENGTH); if (ret <= 0) { ENETFEC_PMD_ERR("Error reading file %s", absolute_file_name); + close(fd); return ret; } close(fd); @@ -139,6 +136,10 @@ } /* Read mapping size and physical address expressed in hexa(base 16) */ uio_map_size = strtol(uio_map_size_str, NULL, 16); + if (uio_map_size <= 0 || uio_map_size > INT_MAX) { + ENETFEC_PMD_ERR("Invalid mapping size: %u.", uio_map_size); + return NULL; + } uio_map_p_addr = strtol(uio_map_p_addr_str, NULL, 16); if (uio_map_id == 0) { diff -Nru dpdk-22.11.9/drivers/net/fm10k/base/fm10k_common.c dpdk-22.11.11/drivers/net/fm10k/base/fm10k_common.c --- dpdk-22.11.9/drivers/net/fm10k/base/fm10k_common.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/fm10k/base/fm10k_common.c 2025-12-24 13:18:07.000000000 +0000 @@ -477,11 +477,11 @@ * Function invalidates the index values for the queues so any updates that * may have happened are ignored and the base for the queue stats is reset. **/ -void fm10k_unbind_hw_stats_q(struct fm10k_hw_stats_q *q, u32 idx, u32 count) +void fm10k_unbind_hw_stats_q(struct fm10k_hw_stats_q *q, u32 idx __rte_unused, u32 count) { u32 i; - for (i = 0; i < count; i++, idx++, q++) { + for (i = 0; i < count; i++, q++) { q->rx_stats_idx = 0; q->tx_stats_idx = 0; } diff -Nru dpdk-22.11.9/drivers/net/gve/gve_ethdev.c dpdk-22.11.11/drivers/net/gve/gve_ethdev.c --- dpdk-22.11.9/drivers/net/gve/gve_ethdev.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/gve/gve_ethdev.c 2025-12-24 13:18:07.000000000 +0000 @@ -28,13 +28,45 @@ writeb('\n', driver_version_register); } +static const struct rte_memzone * +gve_alloc_using_mz(const char *name, uint32_t num_pages) +{ + const struct rte_memzone *mz; + mz = rte_memzone_reserve_aligned(name, num_pages * PAGE_SIZE, + rte_socket_id(), + RTE_MEMZONE_IOVA_CONTIG, PAGE_SIZE); + if (mz == NULL) + PMD_DRV_LOG(ERR, "Failed to alloc memzone %s.", name); + return mz; +} + static int -gve_alloc_queue_page_list(struct gve_priv *priv, uint32_t id, uint32_t pages) +gve_alloc_using_malloc(void **bufs, uint32_t num_entries) +{ + uint32_t i; + + for (i = 0; i < num_entries; i++) { + bufs[i] = rte_malloc_socket(NULL, PAGE_SIZE, PAGE_SIZE, rte_socket_id()); + if (bufs[i] == NULL) { + PMD_DRV_LOG(ERR, "Failed to malloc"); + goto free_bufs; + } + } + return 0; + +free_bufs: + while (i > 0) + rte_free(bufs[--i]); + + return -ENOMEM; +} + +static int +gve_alloc_queue_page_list(struct gve_priv *priv, uint32_t id, uint32_t pages, + bool is_rx) { - char z_name[RTE_MEMZONE_NAMESIZE]; struct gve_queue_page_list *qpl; - const struct rte_memzone *mz; - dma_addr_t page_bus; + int err = 0; uint32_t i; if (priv->num_registered_pages + pages > @@ -45,31 +77,79 @@ return -EINVAL; } qpl = &priv->qpl[id]; - snprintf(z_name, sizeof(z_name), "gve_%s_qpl%d", priv->pci_dev->device.name, id); - mz = rte_memzone_reserve_aligned(z_name, pages * PAGE_SIZE, - rte_socket_id(), - RTE_MEMZONE_IOVA_CONTIG, PAGE_SIZE); - if (mz == NULL) { - PMD_DRV_LOG(ERR, "Failed to alloc %s.", z_name); - return -ENOMEM; - } + qpl->page_buses = rte_zmalloc("qpl page buses", pages * sizeof(dma_addr_t), 0); if (qpl->page_buses == NULL) { PMD_DRV_LOG(ERR, "Failed to alloc qpl %u page buses", id); return -ENOMEM; } - page_bus = mz->iova; - for (i = 0; i < pages; i++) { - qpl->page_buses[i] = page_bus; - page_bus += PAGE_SIZE; + + if (is_rx) { + /* RX QPL need not be IOVA contiguous. + * Allocate 4K size buffers using malloc + */ + qpl->qpl_bufs = rte_zmalloc("qpl bufs", + pages * sizeof(void *), 0); + if (qpl->qpl_bufs == NULL) { + PMD_DRV_LOG(ERR, "Failed to alloc qpl bufs"); + err = -ENOMEM; + goto free_qpl_page_buses; + } + + err = gve_alloc_using_malloc(qpl->qpl_bufs, pages); + if (err) + goto free_qpl_page_bufs; + + /* Populate the IOVA addresses */ + for (i = 0; i < pages; i++) + qpl->page_buses[i] = + rte_malloc_virt2iova(qpl->qpl_bufs[i]); + } else { + char z_name[RTE_MEMZONE_NAMESIZE]; + + snprintf(z_name, sizeof(z_name), "gve_%s_qpl%d", priv->pci_dev->device.name, id); + + /* TX QPL needs to be IOVA contiguous + * Allocate QPL using memzone + */ + qpl->mz = gve_alloc_using_mz(z_name, pages); + if (!qpl->mz) { + err = -ENOMEM; + goto free_qpl_page_buses; + } + + /* Populate the IOVA addresses */ + for (i = 0; i < pages; i++) + qpl->page_buses[i] = qpl->mz->iova + i * PAGE_SIZE; } + qpl->id = id; - qpl->mz = mz; qpl->num_entries = pages; priv->num_registered_pages += pages; return 0; + +free_qpl_page_bufs: + rte_free(qpl->qpl_bufs); +free_qpl_page_buses: + rte_free(qpl->page_buses); + return err; +} + +/* + * Free QPL bufs in RX QPLs. Should not be used on TX QPLs. + **/ +static void +gve_free_qpl_bufs(struct gve_queue_page_list *qpl) +{ + uint32_t i; + + for (i = 0; i < qpl->num_entries; i++) + rte_free(qpl->qpl_bufs[i]); + + rte_free(qpl->qpl_bufs); + qpl->qpl_bufs = NULL; } static void @@ -79,9 +159,22 @@ uint16_t nb_rxqs = priv->max_nb_rxq; uint32_t i; - for (i = 0; i < nb_txqs + nb_rxqs; i++) { - if (priv->qpl[i].mz != NULL) + if (priv->queue_format != GVE_GQI_QPL_FORMAT) + return; + + /* Free TX QPLs. */ + for (i = 0; i < nb_txqs; i++) { + if (priv->qpl[i].mz) { rte_memzone_free(priv->qpl[i].mz); + priv->qpl[i].mz = NULL; + } + rte_free(priv->qpl[i].page_buses); + } + + /* Free RX QPLs. */ + for (; i < nb_rxqs; i++) { + if (priv->qpl[i].qpl_bufs) + gve_free_qpl_bufs(&priv->qpl[i]); rte_free(priv->qpl[i].page_buses); } @@ -562,11 +655,16 @@ } for (i = 0; i < priv->max_nb_txq + priv->max_nb_rxq; i++) { - if (i < priv->max_nb_txq) + bool is_rx; + + if (i < priv->max_nb_txq) { pages = priv->tx_pages_per_qpl; - else + is_rx = false; + } else { pages = priv->rx_data_slot_cnt; - err = gve_alloc_queue_page_list(priv, i, pages); + is_rx = true; + } + err = gve_alloc_queue_page_list(priv, i, pages, is_rx); if (err != 0) { PMD_DRV_LOG(ERR, "Failed to alloc qpl %u.", i); goto err_qpl; diff -Nru dpdk-22.11.9/drivers/net/gve/gve_ethdev.h dpdk-22.11.11/drivers/net/gve/gve_ethdev.h --- dpdk-22.11.9/drivers/net/gve/gve_ethdev.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/gve/gve_ethdev.h 2025-12-24 13:18:07.000000000 +0000 @@ -39,7 +39,10 @@ uint32_t id; /* unique id */ uint32_t num_entries; dma_addr_t *page_buses; /* the dma addrs of the pages */ - const struct rte_memzone *mz; + union { + const struct rte_memzone *mz; /* memzone allocated for TX queue */ + void **qpl_bufs; /* RX qpl-buffer list allocated using malloc*/ + }; }; /* A TX desc ring entry */ diff -Nru dpdk-22.11.9/drivers/net/gve/gve_rx.c dpdk-22.11.11/drivers/net/gve/gve_rx.c --- dpdk-22.11.9/drivers/net/gve/gve_rx.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/gve/gve_rx.c 2025-12-24 13:18:07.000000000 +0000 @@ -105,9 +105,9 @@ len = rte_be_to_cpu_16(rxd->len) - GVE_RX_PAD; rxe = rxq->sw_ring[rx_id]; if (rxq->is_gqi_qpl) { - addr = (uint64_t)(rxq->qpl->mz->addr) + rx_id * PAGE_SIZE + GVE_RX_PAD; + addr = (uint64_t)rxq->qpl->qpl_bufs[rx_id] + GVE_RX_PAD; rte_memcpy((void *)((size_t)rxe->buf_addr + rxe->data_off), - (void *)(size_t)addr, len); + (void *)(size_t)addr, len); } rxe->pkt_len = len; rxe->data_len = len; diff -Nru dpdk-22.11.9/drivers/net/hns3/hns3_ethdev.c dpdk-22.11.11/drivers/net/hns3/hns3_ethdev.c --- dpdk-22.11.9/drivers/net/hns3/hns3_ethdev.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/hns3/hns3_ethdev.c 2025-12-24 13:18:07.000000000 +0000 @@ -4422,25 +4422,25 @@ ret = hns3_dcb_init(hw); if (ret) { PMD_INIT_LOG(ERR, "Failed to init dcb: %d", ret); - goto err_mac_init; + goto rm_vlan_table; } ret = hns3_init_fd_config(hns); if (ret) { PMD_INIT_LOG(ERR, "Failed to init flow director: %d", ret); - goto err_mac_init; + goto rm_vlan_table; } ret = hns3_config_tso(hw, HNS3_TSO_MSS_MIN, HNS3_TSO_MSS_MAX); if (ret) { PMD_INIT_LOG(ERR, "Failed to config tso: %d", ret); - goto err_mac_init; + goto rm_vlan_table; } ret = hns3_config_gro(hw, false); if (ret) { PMD_INIT_LOG(ERR, "Failed to config gro: %d", ret); - goto err_mac_init; + goto rm_vlan_table; } /* @@ -4452,22 +4452,33 @@ ret = hns3_init_ring_with_vector(hw); if (ret) { PMD_INIT_LOG(ERR, "Failed to init ring intr vector: %d", ret); - goto err_mac_init; + goto rm_vlan_table; } ret = hns3_ptp_init(hw); if (ret) { PMD_INIT_LOG(ERR, "Failed to init PTP, ret = %d", ret); - goto err_mac_init; + goto rm_vlan_table; } return 0; - +rm_vlan_table: + hns3_rm_all_vlan_table(hns, true); err_mac_init: hns3_uninit_umv_space(hw); return ret; } +static void +hns3_uninit_hardware(struct hns3_hw *hw) +{ + struct hns3_adapter *hns = HNS3_DEV_HW_TO_ADAPTER(hw); + + (void)hns3_uninit_umv_space(hw); + hns3_ptp_uninit(hw); + hns3_rm_all_vlan_table(hns, true); +} + static int hns3_clear_hw(struct hns3_hw *hw) { @@ -4699,8 +4710,7 @@ err_enable_intr: hns3_fdir_filter_uninit(hns); err_fdir: - hns3_uninit_umv_space(hw); - hns3_ptp_uninit(hw); + hns3_uninit_hardware(hw); err_init_hw: hns3_stats_uninit(hw); err_get_config: @@ -4735,8 +4745,7 @@ hns3_promisc_uninit(hw); hns3_flow_uninit(eth_dev); hns3_fdir_filter_uninit(hns); - hns3_uninit_umv_space(hw); - hns3_ptp_uninit(hw); + hns3_uninit_hardware(hw); hns3_stats_uninit(hw); hns3_config_mac_tnl_int(hw, false); hns3_pf_disable_irq0(hw); diff -Nru dpdk-22.11.9/drivers/net/hns3/hns3_ethdev.h dpdk-22.11.11/drivers/net/hns3/hns3_ethdev.h --- dpdk-22.11.9/drivers/net/hns3/hns3_ethdev.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/hns3/hns3_ethdev.h 2025-12-24 13:18:07.000000000 +0000 @@ -5,7 +5,6 @@ #ifndef HNS3_ETHDEV_H #define HNS3_ETHDEV_H -#include #include #include #include @@ -75,6 +74,7 @@ #define HNS3_DEFAULT_MTU 1500UL #define HNS3_DEFAULT_FRAME_LEN (HNS3_DEFAULT_MTU + HNS3_ETH_OVERHEAD) #define HNS3_HIP08_MIN_TX_PKT_LEN 33 +#define HNS3_MIN_TUN_PKT_LEN 65 #define HNS3_BITS_PER_BYTE 8 @@ -655,7 +655,6 @@ struct hns3_port_base_vlan_config port_base_vlan_cfg; - pthread_mutex_t flows_lock; /* rte_flow ops lock */ struct hns3_fdir_rule_list flow_fdir_list; /* flow fdir rule list */ struct hns3_rss_filter_list flow_rss_list; /* flow RSS rule list */ struct hns3_flow_mem_list flow_list; diff -Nru dpdk-22.11.9/drivers/net/hns3/hns3_fdir.c dpdk-22.11.11/drivers/net/hns3/hns3_fdir.c --- dpdk-22.11.9/drivers/net/hns3/hns3_fdir.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/hns3/hns3_fdir.c 2025-12-24 13:18:07.000000000 +0000 @@ -1072,17 +1072,6 @@ if (hns->is_vf) return 0; - /* - * This API is called in the reset recovery process, the parent function - * must hold hw->lock. - * There maybe deadlock if acquire hw->flows_lock directly because rte - * flow driver ops first acquire hw->flows_lock and then may acquire - * hw->lock. - * So here first release the hw->lock and then acquire the - * hw->flows_lock to avoid deadlock. - */ - rte_spinlock_unlock(&hw->lock); - pthread_mutex_lock(&hw->flows_lock); TAILQ_FOREACH(fdir_filter, &fdir_info->fdir_list, entries) { ret = hns3_config_action(hw, &fdir_filter->fdir_conf); if (!ret) @@ -1093,8 +1082,6 @@ break; } } - pthread_mutex_unlock(&hw->flows_lock); - rte_spinlock_lock(&hw->lock); if (err) { hns3_err(hw, "Fail to restore FDIR filter, ret = %d", ret); diff -Nru dpdk-22.11.9/drivers/net/hns3/hns3_flow.c dpdk-22.11.11/drivers/net/hns3/hns3_flow.c --- dpdk-22.11.9/drivers/net/hns3/hns3_flow.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/hns3/hns3_flow.c 2025-12-24 13:18:07.000000000 +0000 @@ -2032,18 +2032,6 @@ return 0; } -static int -hns3_restore_rss_filter(struct hns3_hw *hw) -{ - int ret; - - pthread_mutex_lock(&hw->flows_lock); - ret = hns3_reconfig_all_rss_filter(hw); - pthread_mutex_unlock(&hw->flows_lock); - - return ret; -} - int hns3_restore_filter(struct hns3_adapter *hns) { @@ -2054,7 +2042,7 @@ if (ret != 0) return ret; - return hns3_restore_rss_filter(hw); + return hns3_reconfig_all_rss_filter(hw); } static int @@ -2446,10 +2434,10 @@ struct hns3_filter_info filter_info = {0}; int ret; - pthread_mutex_lock(&hw->flows_lock); + rte_spinlock_lock(&hw->lock); ret = hns3_flow_validate(dev, attr, pattern, actions, error, &filter_info); - pthread_mutex_unlock(&hw->flows_lock); + rte_spinlock_unlock(&hw->lock); return ret; } @@ -2463,9 +2451,9 @@ struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct rte_flow *flow; - pthread_mutex_lock(&hw->flows_lock); + rte_spinlock_lock(&hw->lock); flow = hns3_flow_create(dev, attr, pattern, actions, error); - pthread_mutex_unlock(&hw->flows_lock); + rte_spinlock_unlock(&hw->lock); return flow; } @@ -2477,9 +2465,9 @@ struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); int ret; - pthread_mutex_lock(&hw->flows_lock); + rte_spinlock_lock(&hw->lock); ret = hns3_flow_destroy(dev, flow, error); - pthread_mutex_unlock(&hw->flows_lock); + rte_spinlock_unlock(&hw->lock); return ret; } @@ -2490,9 +2478,9 @@ struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); int ret; - pthread_mutex_lock(&hw->flows_lock); + rte_spinlock_lock(&hw->lock); ret = hns3_flow_flush(dev, error); - pthread_mutex_unlock(&hw->flows_lock); + rte_spinlock_unlock(&hw->lock); return ret; } @@ -2505,9 +2493,9 @@ struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); int ret; - pthread_mutex_lock(&hw->flows_lock); + rte_spinlock_lock(&hw->lock); ret = hns3_flow_query(dev, flow, actions, data, error); - pthread_mutex_unlock(&hw->flows_lock); + rte_spinlock_unlock(&hw->lock); return ret; } @@ -2555,7 +2543,7 @@ if (hns3_check_indir_action(conf, action, error)) return NULL; - pthread_mutex_lock(&hw->flows_lock); + rte_spinlock_lock(&hw->lock); act_count = (const struct rte_flow_action_count *)action->conf; if (act_count->id >= pf->fdir.fd_cfg.cnt_num[HNS3_FD_STAGE_1]) { @@ -2580,11 +2568,11 @@ handle.indirect_type = HNS3_INDIRECT_ACTION_TYPE_COUNT; handle.counter_id = counter->id; - pthread_mutex_unlock(&hw->flows_lock); + rte_spinlock_unlock(&hw->lock); return (struct rte_flow_action_handle *)handle.val64; err_exit: - pthread_mutex_unlock(&hw->flows_lock); + rte_spinlock_unlock(&hw->lock); return NULL; } @@ -2597,11 +2585,11 @@ struct rte_flow_action_handle indir; struct hns3_flow_counter *counter; - pthread_mutex_lock(&hw->flows_lock); + rte_spinlock_lock(&hw->lock); indir.val64 = (uint64_t)handle; if (indir.indirect_type != HNS3_INDIRECT_ACTION_TYPE_COUNT) { - pthread_mutex_unlock(&hw->flows_lock); + rte_spinlock_unlock(&hw->lock); return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF, handle, "Invalid indirect type"); @@ -2609,14 +2597,14 @@ counter = hns3_counter_lookup(dev, indir.counter_id); if (counter == NULL) { - pthread_mutex_unlock(&hw->flows_lock); + rte_spinlock_unlock(&hw->lock); return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF, handle, "Counter id not exist"); } if (counter->ref_cnt > 1) { - pthread_mutex_unlock(&hw->flows_lock); + rte_spinlock_unlock(&hw->lock); return rte_flow_error_set(error, EBUSY, RTE_FLOW_ERROR_TYPE_HANDLE, handle, "Counter id in use"); @@ -2624,7 +2612,7 @@ (void)hns3_counter_release(dev, indir.counter_id); - pthread_mutex_unlock(&hw->flows_lock); + rte_spinlock_unlock(&hw->lock); return 0; } @@ -2639,11 +2627,11 @@ struct rte_flow flow; int ret; - pthread_mutex_lock(&hw->flows_lock); + rte_spinlock_lock(&hw->lock); indir.val64 = (uint64_t)handle; if (indir.indirect_type != HNS3_INDIRECT_ACTION_TYPE_COUNT) { - pthread_mutex_unlock(&hw->flows_lock); + rte_spinlock_unlock(&hw->lock); return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF, handle, "Invalid indirect type"); @@ -2653,7 +2641,7 @@ flow.counter_id = indir.counter_id; ret = hns3_counter_query(dev, &flow, (struct rte_flow_query_count *)data, error); - pthread_mutex_unlock(&hw->flows_lock); + rte_spinlock_unlock(&hw->lock); return ret; } @@ -2687,14 +2675,10 @@ hns3_flow_init(struct rte_eth_dev *dev) { struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); - pthread_mutexattr_t attr; if (rte_eal_process_type() != RTE_PROC_PRIMARY) return; - pthread_mutexattr_init(&attr); - pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED); - pthread_mutex_init(&hw->flows_lock, &attr); dev->data->dev_flags |= RTE_ETH_DEV_FLOW_OPS_THREAD_SAFE; TAILQ_INIT(&hw->flow_fdir_list); diff -Nru dpdk-22.11.9/drivers/net/hns3/hns3_rxtx.c dpdk-22.11.11/drivers/net/hns3/hns3_rxtx.c --- dpdk-22.11.9/drivers/net/hns3/hns3_rxtx.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/hns3/hns3_rxtx.c 2025-12-24 13:18:07.000000000 +0000 @@ -4124,6 +4124,37 @@ } } +static bool +hns3_tx_pktmbuf_append(struct hns3_tx_queue *txq, + struct rte_mbuf *tx_pkt) +{ + uint16_t add_len = 0; + uint32_t ptype; + char *appended; + + if (unlikely(tx_pkt->ol_flags & (RTE_MBUF_F_TX_VLAN | RTE_MBUF_F_TX_QINQ) && + rte_pktmbuf_pkt_len(tx_pkt) < HNS3_MIN_TUN_PKT_LEN)) { + ptype = rte_net_get_ptype(tx_pkt, NULL, RTE_PTYPE_L2_MASK | + RTE_PTYPE_L3_MASK | RTE_PTYPE_L4_MASK | + RTE_PTYPE_TUNNEL_MASK); + if (ptype & RTE_PTYPE_TUNNEL_MASK) + add_len = HNS3_MIN_TUN_PKT_LEN - rte_pktmbuf_pkt_len(tx_pkt); + } else if (unlikely(rte_pktmbuf_pkt_len(tx_pkt) < txq->min_tx_pkt_len)) { + add_len = txq->min_tx_pkt_len - rte_pktmbuf_pkt_len(tx_pkt); + } + + if (unlikely(add_len > 0)) { + appended = rte_pktmbuf_append(tx_pkt, add_len); + if (appended == NULL) { + txq->dfx_stats.pkt_padding_fail_cnt++; + return false; + } + memset(appended, 0, add_len); + } + + return true; +} + uint16_t hns3_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, @@ -4201,21 +4232,8 @@ * by hardware in Tx direction, driver need to pad it to avoid * error. */ - if (unlikely(rte_pktmbuf_pkt_len(tx_pkt) < - txq->min_tx_pkt_len)) { - uint16_t add_len; - char *appended; - - add_len = txq->min_tx_pkt_len - - rte_pktmbuf_pkt_len(tx_pkt); - appended = rte_pktmbuf_append(tx_pkt, add_len); - if (appended == NULL) { - txq->dfx_stats.pkt_padding_fail_cnt++; - break; - } - - memset(appended, 0, add_len); - } + if (!hns3_tx_pktmbuf_append(txq, tx_pkt)) + break; m_seg = tx_pkt; diff -Nru dpdk-22.11.9/drivers/net/hns3/hns3_rxtx_vec.c dpdk-22.11.11/drivers/net/hns3/hns3_rxtx_vec.c --- dpdk-22.11.9/drivers/net/hns3/hns3_rxtx_vec.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/hns3/hns3_rxtx_vec.c 2025-12-24 13:18:07.000000000 +0000 @@ -69,8 +69,12 @@ /* * Clear VLD bit for the first descriptor rearmed in case * of going to receive packets later. + * And also point mbufs to fake_mbuf to prevent modification + * of the mbuf field during vector packet receiving. */ rxdp[0].rx.bd_base_info = 0; + for (i = 0; i < HNS3_VECTOR_RX_OFFSET_TABLE_LEN; i++) + rxep[i].mbuf = &rxq->fake_mbuf; rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++; return; } diff -Nru dpdk-22.11.9/drivers/net/i40e/i40e_hash.c dpdk-22.11.11/drivers/net/i40e/i40e_hash.c --- dpdk-22.11.9/drivers/net/i40e/i40e_hash.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/i40e/i40e_hash.c 2025-12-24 13:18:07.000000000 +0000 @@ -561,7 +561,7 @@ } static uint64_t -i40e_hash_get_inset(uint64_t rss_types) +i40e_hash_get_inset(uint64_t rss_types, bool symmetric_enable) { uint64_t mask, inset = 0; int i; @@ -608,6 +608,17 @@ I40E_INSET_IPV4_SRC | I40E_INSET_IPV6_SRC); } + /* SCTP Verification Tag is not required in hash computation for SYMMETRIC_TOEPLITZ */ + if (symmetric_enable) { + mask = rss_types & RTE_ETH_RSS_NONFRAG_IPV4_SCTP; + if (mask == RTE_ETH_RSS_NONFRAG_IPV4_SCTP) + inset &= ~I40E_INSET_SCTP_VT; + + mask = rss_types & RTE_ETH_RSS_NONFRAG_IPV6_SCTP; + if (mask == RTE_ETH_RSS_NONFRAG_IPV6_SCTP) + inset &= ~I40E_INSET_SCTP_VT; + } + return inset; } @@ -1113,6 +1124,7 @@ RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, "RSS Queues not supported when pattern specified"); + rss_conf->symmetric_enable = false; /* by default, symmetric is disabled */ switch (rss_act->func) { case RTE_ETH_HASH_FUNCTION_SYMMETRIC_TOEPLITZ: @@ -1140,7 +1152,7 @@ rss_conf->conf.func = rss_act->func; rss_conf->conf.types = rss_act->types; - rss_conf->inset = i40e_hash_get_inset(rss_act->types); + rss_conf->inset = i40e_hash_get_inset(rss_act->types, rss_conf->symmetric_enable); return i40e_hash_get_pattern_pctypes(dev, pattern, rss_act, rss_conf, error); diff -Nru dpdk-22.11.9/drivers/net/i40e/i40e_rxtx.c dpdk-22.11.11/drivers/net/i40e/i40e_rxtx.c --- dpdk-22.11.9/drivers/net/i40e/i40e_rxtx.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/i40e/i40e_rxtx.c 2025-12-24 13:18:07.000000000 +0000 @@ -128,9 +128,13 @@ #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC if (rte_le_to_cpu_16(rxdp->wb.qword2.ext_status) & (1 << I40E_RX_DESC_EXT_STATUS_L2TAG2P_SHIFT)) { - mb->ol_flags |= RTE_MBUF_F_RX_QINQ_STRIPPED | RTE_MBUF_F_RX_QINQ | - RTE_MBUF_F_RX_VLAN_STRIPPED | RTE_MBUF_F_RX_VLAN; - mb->vlan_tci_outer = mb->vlan_tci; + if ((mb->ol_flags & RTE_MBUF_F_RX_VLAN_STRIPPED) == 0) { + mb->ol_flags |= RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED; + } else { + /* if two tags, move Tag1 to outer tag field */ + mb->ol_flags |= RTE_MBUF_F_RX_QINQ_STRIPPED | RTE_MBUF_F_RX_QINQ; + mb->vlan_tci_outer = mb->vlan_tci; + } mb->vlan_tci = rte_le_to_cpu_16(rxdp->wb.qword2.l2tag2_2); PMD_RX_LOG(DEBUG, "Descriptor l2tag2_1: %u, l2tag2_2: %u", rte_le_to_cpu_16(rxdp->wb.qword2.l2tag2_1), diff -Nru dpdk-22.11.9/drivers/net/iavf/iavf_rxtx.c dpdk-22.11.11/drivers/net/iavf/iavf_rxtx.c --- dpdk-22.11.9/drivers/net/iavf/iavf_rxtx.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/iavf/iavf_rxtx.c 2025-12-24 13:18:07.000000000 +0000 @@ -1131,11 +1131,13 @@ #ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC if (rte_le_to_cpu_16(rxdp->wb.status_error1) & (1 << IAVF_RX_FLEX_DESC_STATUS1_L2TAG2P_S)) { - mb->ol_flags |= RTE_MBUF_F_RX_QINQ_STRIPPED | - RTE_MBUF_F_RX_QINQ | - RTE_MBUF_F_RX_VLAN_STRIPPED | - RTE_MBUF_F_RX_VLAN; - mb->vlan_tci_outer = mb->vlan_tci; + if ((mb->ol_flags & RTE_MBUF_F_RX_VLAN_STRIPPED) == 0) { + mb->ol_flags |= RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED; + } else { + /* if two tags, move Tag1 to outer tag field */ + mb->ol_flags |= RTE_MBUF_F_RX_QINQ_STRIPPED | RTE_MBUF_F_RX_QINQ; + mb->vlan_tci_outer = mb->vlan_tci; + } mb->vlan_tci = rte_le_to_cpu_16(rxdp->wb.l2tag2_2nd); PMD_RX_LOG(DEBUG, "Descriptor l2tag2_1: %u, l2tag2_2: %u", rte_le_to_cpu_16(rxdp->wb.l2tag2_1st), @@ -1551,7 +1553,8 @@ rxd_to_pkt_fields_ops[rxq->rxdid](rxq, rxm, &rxd); pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0); - if (iavf_timestamp_dynflag > 0) { + if (iavf_timestamp_dynflag > 0 && + rxd.wb.time_stamp_low & IAVF_RX_FLX_DESC_TS_VALID) { ts_ns = iavf_tstamp_convert_32b_64b(rxq->phc_time, rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high)); @@ -1720,7 +1723,8 @@ rxd_to_pkt_fields_ops[rxq->rxdid](rxq, first_seg, &rxd); pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0); - if (iavf_timestamp_dynflag > 0) { + if (iavf_timestamp_dynflag > 0 && + rxd.wb.time_stamp_low & IAVF_RX_FLX_DESC_TS_VALID) { ts_ns = iavf_tstamp_convert_32b_64b(rxq->phc_time, rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high)); @@ -2005,7 +2009,8 @@ stat_err0 = rte_le_to_cpu_16(rxdp[j].wb.status_error0); pkt_flags = iavf_flex_rxd_error_to_pkt_flags(stat_err0); - if (iavf_timestamp_dynflag > 0) { + if (iavf_timestamp_dynflag > 0 && + rxdp[j].wb.time_stamp_low & IAVF_RX_FLX_DESC_TS_VALID) { ts_ns = iavf_tstamp_convert_32b_64b(rxq->phc_time, rte_le_to_cpu_32(rxdp[j].wb.flex_ts.ts_high)); diff -Nru dpdk-22.11.9/drivers/net/iavf/iavf_rxtx.h dpdk-22.11.11/drivers/net/iavf/iavf_rxtx.h --- dpdk-22.11.9/drivers/net/iavf/iavf_rxtx.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/iavf/iavf_rxtx.h 2025-12-24 13:18:07.000000000 +0000 @@ -611,6 +611,9 @@ /* for iavf_32b_rx_flex_desc.pkt_len member */ #define IAVF_RX_FLX_DESC_PKT_LEN_M (0x3FFF) /* 14-bits */ +/* Valid indicator bit for the time_stamp_low field */ +#define IAVF_RX_FLX_DESC_TS_VALID (0x1UL) + int iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, uint16_t nb_desc, diff -Nru dpdk-22.11.9/drivers/net/iavf/iavf_vchnl.c dpdk-22.11.11/drivers/net/iavf/iavf_vchnl.c --- dpdk-22.11.9/drivers/net/iavf/iavf_vchnl.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/iavf/iavf_vchnl.c 2025-12-24 13:18:07.000000000 +0000 @@ -93,7 +93,7 @@ void *param, size_t param_alloc_size) { struct iavf_event_handler *handler = &event_handler; - char notify_byte; + char notify_byte = 0; struct iavf_event_element *elem = rte_malloc(NULL, sizeof(*elem) + param_alloc_size, 0); if (!elem) return; diff -Nru dpdk-22.11.9/drivers/net/iavf/rte_pmd_iavf.h dpdk-22.11.11/drivers/net/iavf/rte_pmd_iavf.h --- dpdk-22.11.9/drivers/net/iavf/rte_pmd_iavf.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/iavf/rte_pmd_iavf.h 2025-12-24 13:18:07.000000000 +0000 @@ -15,6 +15,7 @@ */ #include + #include #include #include @@ -184,6 +185,7 @@ static inline void rte_pmd_ifd_dump_proto_xtr_metadata(struct rte_mbuf *m) { +#ifdef ALLOW_EXPERIMENTAL_API union rte_pmd_ifd_proto_xtr_metadata data; if (!rte_pmd_ifd_dynf_proto_xtr_metadata_avail()) @@ -243,6 +245,10 @@ else if (m->ol_flags & RTE_IAVF_PKT_RX_DYNF_PROTO_XTR_IP_OFFSET) printf(" - Flexible descriptor's Extraction: ip_offset=%u", data.ip_ofs); +#else + RTE_SET_USED(m); + RTE_VERIFY(false); +#endif } #ifdef __cplusplus diff -Nru dpdk-22.11.9/drivers/net/ice/base/ice_flow.c dpdk-22.11.11/drivers/net/ice/base/ice_flow.c --- dpdk-22.11.9/drivers/net/ice/base/ice_flow.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/ice/base/ice_flow.c 2025-12-24 13:18:07.000000000 +0000 @@ -2629,10 +2629,6 @@ status = ice_flow_assoc_hw_prof(hw, blk, dest_vsi_handle, fdir_vsi_handle, id); - if (status) - goto free_params; - - return ICE_SUCCESS; free_params: ice_free(hw, params); diff -Nru dpdk-22.11.9/drivers/net/ice/base/ice_switch.c dpdk-22.11.11/drivers/net/ice/base/ice_switch.c --- dpdk-22.11.9/drivers/net/ice/base/ice_switch.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/ice/base/ice_switch.c 2025-12-24 13:18:07.000000000 +0000 @@ -2209,6 +2209,7 @@ bool *refresh_required) { ice_declare_bitmap(result_bm, ICE_MAX_FV_WORDS); + struct ice_recp_grp_entry *rg, *tmprg_entry; struct ice_aqc_recipe_data_elem *tmp; u16 num_recps = ICE_MAX_NUM_RECIPES; struct ice_prot_lkup_ext *lkup_exts; @@ -2250,6 +2251,15 @@ */ lkup_exts = &recps[rid].lkup_exts; + /* Remove duplicate entries */ + LIST_FOR_EACH_ENTRY_SAFE(rg, tmprg_entry, &recps[rid].rg_list, + ice_recp_grp_entry, l_entry) { + if (rg->rid == rid) { + LIST_DEL(&rg->l_entry); + ice_free(hw, rg); + } + } + for (sub_recps = 0; sub_recps < num_recps; sub_recps++) { struct ice_aqc_recipe_data_elem root_bufs = tmp[sub_recps]; struct ice_recp_grp_entry *rg_entry; @@ -7868,10 +7878,6 @@ */ ice_get_compat_fv_bitmap(hw, rinfo, fv_bitmap); - status = ice_get_fv(hw, lkup_exts, fv_bitmap, &rm->fv_list); - if (status) - goto err_unroll; - /* Create any special protocol/offset pairs, such as looking at tunnel * bits by extracting metadata */ @@ -7879,6 +7885,10 @@ if (status) goto err_free_lkup_exts; + status = ice_get_fv(hw, lkup_exts, fv_bitmap, &rm->fv_list); + if (status) + goto err_unroll; + /* Group match words into recipes using preferred recipe grouping * criteria. */ diff -Nru dpdk-22.11.9/drivers/net/ice/base/ice_type.h dpdk-22.11.11/drivers/net/ice/base/ice_type.h --- dpdk-22.11.9/drivers/net/ice/base/ice_type.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/ice/base/ice_type.h 2025-12-24 13:18:07.000000000 +0000 @@ -832,7 +832,7 @@ struct ice_orom_info orom; /* Option ROM version info */ struct ice_nvm_info nvm; /* NVM version information */ struct ice_bank_info banks; /* Flash Bank information */ - u16 sr_words; /* Shadow RAM size in words */ + u32 sr_words; /* Shadow RAM size in words */ u32 flash_size; /* Size of available flash in bytes */ u8 blank_nvm_mode; /* is NVM empty (no FW present) */ }; diff -Nru dpdk-22.11.9/drivers/net/ice/ice_acl_filter.c dpdk-22.11.11/drivers/net/ice/ice_acl_filter.c --- dpdk-22.11.9/drivers/net/ice/ice_acl_filter.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/ice/ice_acl_filter.c 2025-12-24 13:18:07.000000000 +0000 @@ -115,7 +115,10 @@ else params.width = ICE_AQC_ACL_KEY_WIDTH_BYTES * 3; - params.depth = ICE_AQC_ACL_TCAM_DEPTH; + if (pf_num > 4) + params.depth = ICE_AQC_ACL_TCAM_DEPTH / 2; + else + params.depth = ICE_AQC_ACL_TCAM_DEPTH; params.entry_act_pairs = 1; params.concurr = false; diff -Nru dpdk-22.11.9/drivers/net/ice/ice_ethdev.c dpdk-22.11.11/drivers/net/ice/ice_ethdev.c --- dpdk-22.11.9/drivers/net/ice/ice_ethdev.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/ice/ice_ethdev.c 2025-12-24 13:18:07.000000000 +0000 @@ -5299,10 +5299,16 @@ uint64_t *stat) { uint64_t new_data; + uint32_t lo_old, hi, lo; - new_data = (uint64_t)ICE_READ_REG(hw, loreg); - new_data |= (uint64_t)(ICE_READ_REG(hw, hireg) & ICE_8_BIT_MASK) << - ICE_32_BIT_WIDTH; + do { + lo_old = ICE_READ_REG(hw, loreg); + hi = ICE_READ_REG(hw, hireg); + lo = ICE_READ_REG(hw, loreg); + } while (lo_old > lo); + + new_data = (uint64_t)lo; + new_data |= (uint64_t)(hi & ICE_8_BIT_MASK) << ICE_32_BIT_WIDTH; if (!offset_loaded) *offset = new_data; diff -Nru dpdk-22.11.9/drivers/net/ice/ice_ethdev.h dpdk-22.11.11/drivers/net/ice/ice_ethdev.h --- dpdk-22.11.9/drivers/net/ice/ice_ethdev.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/ice/ice_ethdev.h 2025-12-24 13:18:07.000000000 +0000 @@ -345,7 +345,7 @@ uint64_t input_set_i; /* only for tunnel inner fields */ uint32_t mark_flag; - struct ice_parser_profile *prof; + struct ice_parser_profile prof; bool parser_ena; u8 *pkt_buf; u8 pkt_len; diff -Nru dpdk-22.11.9/drivers/net/ice/ice_fdir_filter.c dpdk-22.11.11/drivers/net/ice/ice_fdir_filter.c --- dpdk-22.11.9/drivers/net/ice/ice_fdir_filter.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/ice/ice_fdir_filter.c 2025-12-24 13:18:07.000000000 +0000 @@ -1330,7 +1330,7 @@ if (filter->parser_ena) { struct ice_hw *hw = ICE_PF_TO_HW(pf); - int id = ice_find_first_bit(filter->prof->ptypes, UINT16_MAX); + int id = ice_find_first_bit(filter->prof.ptypes, UINT16_MAX); int ptg = hw->blk[ICE_BLK_FD].xlt1.t[id]; u16 ctrl_vsi = pf->fdir.fdir_vsi->idx; u16 main_vsi = pf->main_vsi->idx; @@ -1340,11 +1340,11 @@ if (pi->fdir_actived_cnt != 0) { for (i = 0; i < ICE_MAX_FV_WORDS; i++) if (pi->prof.fv[i].proto_id != - filter->prof->fv[i].proto_id || + filter->prof.fv[i].proto_id || pi->prof.fv[i].offset != - filter->prof->fv[i].offset || + filter->prof.fv[i].offset || pi->prof.fv[i].msk != - filter->prof->fv[i].msk) + filter->prof.fv[i].msk) break; if (i == ICE_MAX_FV_WORDS) { fv_found = true; @@ -1354,7 +1354,7 @@ if (!fv_found) { ret = ice_flow_set_hw_prof(hw, main_vsi, ctrl_vsi, - filter->prof, ICE_BLK_FD); + &filter->prof, ICE_BLK_FD); if (ret) goto error; } @@ -1364,12 +1364,12 @@ goto error; if (!fv_found) { - for (i = 0; i < filter->prof->fv_num; i++) { + for (i = 0; i < filter->prof.fv_num; i++) { pi->prof.fv[i].proto_id = - filter->prof->fv[i].proto_id; + filter->prof.fv[i].proto_id; pi->prof.fv[i].offset = - filter->prof->fv[i].offset; - pi->prof.fv[i].msk = filter->prof->fv[i].msk; + filter->prof.fv[i].offset; + pi->prof.fv[i].msk = filter->prof.fv[i].msk; } pi->fdir_actived_cnt = 1; } @@ -1467,7 +1467,6 @@ return -rte_errno; error: - rte_free(filter->prof); rte_free(filter->pkt_buf); return -rte_errno; } @@ -1489,7 +1488,7 @@ if (filter->parser_ena) { struct ice_hw *hw = ICE_PF_TO_HW(pf); - int id = ice_find_first_bit(filter->prof->ptypes, UINT16_MAX); + int id = ice_find_first_bit(filter->prof.ptypes, UINT16_MAX); int ptg = hw->blk[ICE_BLK_FD].xlt1.t[id]; u16 ctrl_vsi = pf->fdir.fdir_vsi->idx; u16 main_vsi = pf->main_vsi->idx; @@ -1517,7 +1516,6 @@ flow->rule = NULL; - rte_free(filter->prof); rte_free(filter->pkt_buf); rte_free(filter); @@ -1883,7 +1881,7 @@ uint16_t tmp_val = 0; uint16_t pkt_len = 0; uint8_t tmp = 0; - int i, j; + int i, j, ret_val; pkt_len = strlen((char *)(uintptr_t)raw_spec->pattern); if (strlen((char *)(uintptr_t)raw_mask->pattern) != @@ -1938,24 +1936,22 @@ pkt_len /= 2; - if (ice_parser_run(ad->psr, tmp_spec, pkt_len, &rslt)) - return -rte_errno; - - if (!tmp_mask) - return -rte_errno; - - filter->prof = (struct ice_parser_profile *) - ice_malloc(&ad->hw, sizeof(*filter->prof)); - if (!filter->prof) - return -ENOMEM; + if (ice_parser_run(ad->psr, tmp_spec, pkt_len, &rslt)) { + ret_val = -rte_errno; + goto raw_error; + } if (ice_parser_profile_init(&rslt, tmp_spec, tmp_mask, - pkt_len, ICE_BLK_FD, true, filter->prof)) - return -rte_errno; + pkt_len, ICE_BLK_FD, true, &filter->prof)) { + ret_val = -rte_errno; + goto raw_error; + } u8 *pkt_buf = (u8 *)ice_malloc(&ad->hw, pkt_len + 1); - if (!pkt_buf) - return -ENOMEM; + if (!pkt_buf) { + ret_val = -ENOMEM; + goto raw_error; + } rte_memcpy(pkt_buf, tmp_spec, pkt_len); filter->pkt_buf = pkt_buf; @@ -1966,6 +1962,11 @@ rte_free(tmp_spec); rte_free(tmp_mask); break; + +raw_error: + rte_free(tmp_spec); + rte_free(tmp_mask); + return ret_val; } case RTE_FLOW_ITEM_TYPE_ETH: @@ -2509,7 +2510,6 @@ rte_free(item); return ret; error: - rte_free(filter->prof); rte_free(filter->pkt_buf); rte_free(item); return ret; diff -Nru dpdk-22.11.9/drivers/net/ice/ice_rxtx.c dpdk-22.11.11/drivers/net/ice/ice_rxtx.c --- dpdk-22.11.9/drivers/net/ice/ice_rxtx.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/ice/ice_rxtx.c 2025-12-24 13:18:07.000000000 +0000 @@ -1658,9 +1658,13 @@ #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC if (rte_le_to_cpu_16(rxdp->wb.status_error1) & (1 << ICE_RX_FLEX_DESC_STATUS1_L2TAG2P_S)) { - mb->ol_flags |= RTE_MBUF_F_RX_QINQ_STRIPPED | RTE_MBUF_F_RX_QINQ | - RTE_MBUF_F_RX_VLAN_STRIPPED | RTE_MBUF_F_RX_VLAN; - mb->vlan_tci_outer = mb->vlan_tci; + if ((mb->ol_flags & RTE_MBUF_F_RX_VLAN_STRIPPED) == 0) { + mb->ol_flags |= RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED; + } else { + /* if two tags, move Tag1 to outer tag field */ + mb->ol_flags |= RTE_MBUF_F_RX_QINQ_STRIPPED | RTE_MBUF_F_RX_QINQ; + mb->vlan_tci_outer = mb->vlan_tci; + } mb->vlan_tci = rte_le_to_cpu_16(rxdp->wb.l2tag2_2nd); PMD_RX_LOG(DEBUG, "Descriptor l2tag2_1: %u, l2tag2_2: %u", rte_le_to_cpu_16(rxdp->wb.l2tag2_1st), diff -Nru dpdk-22.11.9/drivers/net/ice/ice_rxtx_vec_common.h dpdk-22.11.11/drivers/net/ice/ice_rxtx_vec_common.h --- dpdk-22.11.9/drivers/net/ice/ice_rxtx_vec_common.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/ice/ice_rxtx_vec_common.h 2025-12-24 13:18:07.000000000 +0000 @@ -249,6 +249,7 @@ #define ICE_TX_NO_VECTOR_FLAGS ( \ RTE_ETH_TX_OFFLOAD_MULTI_SEGS | \ + RTE_ETH_TX_OFFLOAD_QINQ_INSERT | \ RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM | \ RTE_ETH_TX_OFFLOAD_TCP_TSO | \ RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO | \ @@ -259,7 +260,6 @@ #define ICE_TX_VECTOR_OFFLOAD ( \ RTE_ETH_TX_OFFLOAD_VLAN_INSERT | \ - RTE_ETH_TX_OFFLOAD_QINQ_INSERT | \ RTE_ETH_TX_OFFLOAD_IPV4_CKSUM | \ RTE_ETH_TX_OFFLOAD_SCTP_CKSUM | \ RTE_ETH_TX_OFFLOAD_UDP_CKSUM | \ @@ -268,7 +268,8 @@ #define ICE_RX_VECTOR_OFFLOAD ( \ RTE_ETH_RX_OFFLOAD_CHECKSUM | \ RTE_ETH_RX_OFFLOAD_SCTP_CKSUM | \ - RTE_ETH_RX_OFFLOAD_VLAN | \ + RTE_ETH_RX_OFFLOAD_VLAN_STRIP | \ + RTE_ETH_RX_OFFLOAD_VLAN_FILTER | \ RTE_ETH_RX_OFFLOAD_RSS_HASH) #define ICE_VECTOR_PATH 0 @@ -414,8 +415,8 @@ *txd_hi |= ((uint64_t)td_offset) << ICE_TXD_QW1_OFFSET_S; - /* Tx VLAN/QINQ insertion Offload */ - if (ol_flags & (RTE_MBUF_F_TX_VLAN | RTE_MBUF_F_TX_QINQ)) { + /* Tx VLAN insertion Offload */ + if (ol_flags & RTE_MBUF_F_TX_VLAN) { td_cmd |= ICE_TX_DESC_CMD_IL2TAG1; *txd_hi |= ((uint64_t)tx_pkt->vlan_tci << ICE_TXD_QW1_L2TAG1_S); diff -Nru dpdk-22.11.9/drivers/net/memif/rte_eth_memif.c dpdk-22.11.11/drivers/net/memif/rte_eth_memif.c --- dpdk-22.11.9/drivers/net/memif/rte_eth_memif.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/memif/rte_eth_memif.c 2025-12-24 13:18:07.000000000 +0000 @@ -1701,7 +1701,8 @@ static int memif_check_socket_filename(const char *filename) { - char *dir = NULL, *tmp; + char *dir = NULL; + const char *tmp; uint32_t idx; int ret = 0; diff -Nru dpdk-22.11.9/drivers/net/mlx4/mlx4_rxtx.c dpdk-22.11.11/drivers/net/mlx4/mlx4_rxtx.c --- dpdk-22.11.9/drivers/net/mlx4/mlx4_rxtx.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/mlx4/mlx4_rxtx.c 2025-12-24 13:18:07.000000000 +0000 @@ -638,7 +638,7 @@ thdr.vto = sq->buf; /* New TXBB, stash the first 32bits for later use. */ pv[*pv_counter].dst = (volatile uint32_t *)thdr.to; - pv[(*pv_counter)++].val = *(uint32_t *)from, + pv[(*pv_counter)++].val = *(uint32_t *)from; from += sizeof(uint32_t); thdr.to += sizeof(uint32_t); remain_size -= txbb_avail_space + sizeof(uint32_t); diff -Nru dpdk-22.11.9/drivers/net/mlx5/hws/mlx5dr_buddy.c dpdk-22.11.11/drivers/net/mlx5/hws/mlx5dr_buddy.c --- dpdk-22.11.9/drivers/net/mlx5/hws/mlx5dr_buddy.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/mlx5/hws/mlx5dr_buddy.c 2025-12-24 13:18:07.000000000 +0000 @@ -147,6 +147,7 @@ simple_free(buddy->num_free); simple_free(buddy->bits); + simple_free(buddy); } int mlx5dr_buddy_alloc_mem(struct mlx5dr_buddy_mem *buddy, int order) diff -Nru dpdk-22.11.9/drivers/net/mlx5/hws/mlx5dr_definer.c dpdk-22.11.11/drivers/net/mlx5/hws/mlx5dr_definer.c --- dpdk-22.11.9/drivers/net/mlx5/hws/mlx5dr_definer.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/mlx5/hws/mlx5dr_definer.c 2025-12-24 13:18:07.000000000 +0000 @@ -6,8 +6,6 @@ #define GTP_PDU_SC 0x85 #define BAD_PORT 0xBAD -#define ETH_TYPE_IPV4_VXLAN 0x0800 -#define ETH_TYPE_IPV6_VXLAN 0x86DD #define UDP_VXLAN_PORT 4789 #define STE_NO_VLAN 0x0 diff -Nru dpdk-22.11.9/drivers/net/mlx5/hws/mlx5dr_pool.c dpdk-22.11.11/drivers/net/mlx5/hws/mlx5dr_pool.c --- dpdk-22.11.9/drivers/net/mlx5/hws/mlx5dr_pool.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/mlx5/hws/mlx5dr_pool.c 2025-12-24 13:18:07.000000000 +0000 @@ -167,7 +167,7 @@ mlx5dr_pool_buddy_get_next_buddy(struct mlx5dr_pool *pool, int idx, uint32_t order, bool *is_new_buddy) { - static struct mlx5dr_buddy_mem *buddy; + struct mlx5dr_buddy_mem *buddy; uint32_t new_buddy_size; buddy = pool->db.buddy_manager->buddies[idx]; @@ -271,7 +271,6 @@ buddy = pool->db.buddy_manager->buddies[i]; if (buddy) { mlx5dr_buddy_cleanup(buddy); - simple_free(buddy); pool->db.buddy_manager->buddies[i] = NULL; } } diff -Nru dpdk-22.11.9/drivers/net/mlx5/linux/mlx5_ethdev_os.c dpdk-22.11.11/drivers/net/mlx5/linux/mlx5_ethdev_os.c --- dpdk-22.11.9/drivers/net/mlx5/linux/mlx5_ethdev_os.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/mlx5/linux/mlx5_ethdev_os.c 2025-12-24 13:18:07.000000000 +0000 @@ -156,7 +156,7 @@ * 0 on success, a negative errno value otherwise and rte_errno is set. */ int -mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[MLX5_NAMESIZE]) +mlx5_get_ifname(const struct rte_eth_dev *dev, char ifname[MLX5_NAMESIZE]) { struct mlx5_priv *priv = dev->data->dev_private; unsigned int ifindex; @@ -170,12 +170,11 @@ ifindex = mlx5_ifindex(dev); if (!ifindex) { if (!priv->representor) - return mlx5_get_ifname_sysfs(priv->sh->ibdev_path, - *ifname); + return mlx5_get_ifname_sysfs(priv->sh->ibdev_path, ifname); rte_errno = ENXIO; return -rte_errno; } - if (if_indextoname(ifindex, &(*ifname)[0])) + if (if_indextoname(ifindex, ifname)) return 0; rte_errno = errno; return -rte_errno; @@ -233,16 +232,46 @@ static int mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr) { - char ifname[sizeof(ifr->ifr_name)]; + char ifname[MLX5_NAMESIZE]; int ret; - ret = mlx5_get_ifname(dev, &ifname); + ret = mlx5_get_ifname(dev, ifname); if (ret) return -rte_errno; return mlx5_ifreq_by_ifname(ifname, req, ifr); } /** + * Get device minimum and maximum allowed MTU values. + * + * @param dev + * Pointer to Ethernet device. + * @param[out] min_mtu + * Minimum MTU value output buffer. + * @param[out] max_mtu + * Maximum MTU value output buffer. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +mlx5_os_get_mtu_bounds(struct rte_eth_dev *dev, uint16_t *min_mtu, uint16_t *max_mtu) +{ + struct mlx5_priv *priv = dev->data->dev_private; + int nl_route; + int ret; + + nl_route = mlx5_nl_init(NETLINK_ROUTE, 0); + if (nl_route < 0) + return nl_route; + + ret = mlx5_nl_get_mtu_bounds(nl_route, priv->if_index, min_mtu, max_mtu); + + close(nl_route); + return ret; +} + +/** * Get device MTU. * * @param dev @@ -618,7 +647,7 @@ mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete) { int ret; - struct rte_eth_link dev_link; + struct rte_eth_link dev_link = { 0 }; time_t start_time = time(NULL); int retry = MLX5_GET_LINK_STATUS_RETRY_COUNT; @@ -1981,4 +2010,3 @@ rte_mem_unmap(base, MLX5_ST_SZ_BYTES(initial_seg)); return 0; } - diff -Nru dpdk-22.11.9/drivers/net/mlx5/linux/mlx5_flow_os.c dpdk-22.11.11/drivers/net/mlx5/linux/mlx5_flow_os.c --- dpdk-22.11.9/drivers/net/mlx5/linux/mlx5_flow_os.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/mlx5/linux/mlx5_flow_os.c 2025-12-24 13:18:07.000000000 +0000 @@ -23,18 +23,12 @@ const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : MLX5_FLOW_LAYER_OUTER_L3; - const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : - MLX5_FLOW_LAYER_OUTER_L4; int ret; if (!(item_flags & l3m)) return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, item, "L3 is mandatory to filter on L4"); - if (item_flags & l4m) - return rte_flow_error_set(error, EINVAL, - RTE_FLOW_ERROR_TYPE_ITEM, item, - "multiple L4 layers not supported"); if (target_protocol != 0xff && target_protocol != IPPROTO_ESP) return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, item, diff -Nru dpdk-22.11.9/drivers/net/mlx5/linux/mlx5_os.c dpdk-22.11.11/drivers/net/mlx5/linux/mlx5_os.c --- dpdk-22.11.9/drivers/net/mlx5/linux/mlx5_os.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/mlx5/linux/mlx5_os.c 2025-12-24 13:18:07.000000000 +0000 @@ -419,8 +419,8 @@ DRV_LOG(INFO, "No SW steering support"); return; } - dv_attr.type = IBV_FLOW_ATTR_NORMAL, - dv_attr.match_mask = (void *)&matcher_mask, + dv_attr.type = IBV_FLOW_ATTR_NORMAL; + dv_attr.match_mask = (void *)&matcher_mask; dv_attr.match_criteria_enable = (1 << MLX5_MATCH_CRITERIA_ENABLE_OUTER_BIT) | (1 << MLX5_MATCH_CRITERIA_ENABLE_MISC5_BIT); @@ -705,6 +705,26 @@ return err; } +#ifdef HAVE_MLX5DV_DR +static void +mlx5_destroy_send_to_kernel_action(struct mlx5_dev_ctx_shared *sh) +{ + if (sh->send_to_kernel_action.action) { + void *action = sh->send_to_kernel_action.action; + + mlx5_glue->destroy_flow_action(action); + sh->send_to_kernel_action.action = NULL; + } + if (sh->send_to_kernel_action.tbl) { + struct mlx5_flow_tbl_resource *tbl = + sh->send_to_kernel_action.tbl; + + flow_dv_tbl_resource_release(sh, tbl); + sh->send_to_kernel_action.tbl = NULL; + } +} +#endif /* HAVE_MLX5DV_DR */ + /** * Destroy DR related data within private structure. * @@ -721,6 +741,7 @@ return; MLX5_ASSERT(LIST_EMPTY(&sh->shared_rxqs)); #ifdef HAVE_MLX5DV_DR + mlx5_destroy_send_to_kernel_action(sh); if (sh->rx_domain) { mlx5_glue->dr_destroy_domain(sh->rx_domain); sh->rx_domain = NULL; @@ -743,19 +764,6 @@ mlx5_glue->destroy_flow_action(sh->pop_vlan_action); sh->pop_vlan_action = NULL; } - if (sh->send_to_kernel_action.action) { - void *action = sh->send_to_kernel_action.action; - - mlx5_glue->destroy_flow_action(action); - sh->send_to_kernel_action.action = NULL; - } - if (sh->send_to_kernel_action.tbl) { - struct mlx5_flow_tbl_resource *tbl = - sh->send_to_kernel_action.tbl; - - flow_dv_tbl_resource_release(sh, tbl); - sh->send_to_kernel_action.tbl = NULL; - } #endif /* HAVE_MLX5DV_DR */ if (sh->default_miss_action) mlx5_glue->destroy_flow_action @@ -1484,6 +1492,8 @@ eth_dev->data->mac_addrs = priv->mac; eth_dev->device = dpdk_dev; eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; + /* Fetch minimum and maximum allowed MTU from the device. */ + mlx5_get_mtu_bounds(eth_dev, &priv->min_mtu, &priv->max_mtu); /* Configure the first MAC address by default. */ if (mlx5_get_mac(eth_dev, &mac.addr_bytes)) { DRV_LOG(ERR, @@ -1500,7 +1510,7 @@ { char ifname[MLX5_NAMESIZE]; - if (mlx5_get_ifname(eth_dev, &ifname) == 0) + if (mlx5_get_ifname(eth_dev, ifname) == 0) DRV_LOG(DEBUG, "port %u ifname is \"%s\"", eth_dev->data->port_id, ifname); else @@ -1514,6 +1524,7 @@ err = rte_errno; goto error; } + eth_dev->data->mtu = priv->mtu; DRV_LOG(DEBUG, "port %u MTU is %u", eth_dev->data->port_id, priv->mtu); /* Initialize burst functions to prevent crashes before link-up. */ @@ -1525,8 +1536,7 @@ eth_dev->rx_queue_count = mlx5_rx_queue_count; /* Register MAC address. */ claim_zero(mlx5_mac_addr_add(eth_dev, &mac, 0, 0)); - /* Sync mac addresses for PF or VF/SF if vf_nl_en is true */ - if ((!sh->dev_cap.vf && !sh->dev_cap.sf) || sh->config.vf_nl_en) + if (sh->dev_cap.vf && sh->config.vf_nl_en) mlx5_nl_mac_addr_sync(priv->nl_socket_route, mlx5_ifindex(eth_dev), eth_dev->data->mac_addrs, diff -Nru dpdk-22.11.9/drivers/net/mlx5/linux/mlx5_verbs.c dpdk-22.11.11/drivers/net/mlx5/linux/mlx5_verbs.c --- dpdk-22.11.9/drivers/net/mlx5/linux/mlx5_verbs.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/mlx5/linux/mlx5_verbs.c 2025-12-24 13:18:07.000000000 +0000 @@ -397,7 +397,13 @@ rxq_data->wqes = rwq.buf; rxq_data->rq_db = rwq.dbrec; rxq_data->cq_arm_sn = 0; - mlx5_rxq_initialize(rxq_data); + ret = mlx5_rxq_initialize(rxq_data); + if (ret) { + DRV_LOG(ERR, "Port %u Rx queue %u RQ initialization failure.", + priv->dev_data->port_id, rxq->idx); + rte_errno = ENOMEM; + goto error; + } rxq_data->cq_ci = 0; priv->dev_data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED; rxq_ctrl->wqn = ((struct ibv_wq *)(tmpl->wq))->wq_num; @@ -877,7 +883,7 @@ * dev_cap.max_sge limit and will still work properly. */ qp_attr.cap.max_send_sge = 1; - qp_attr.qp_type = IBV_QPT_RAW_PACKET, + qp_attr.qp_type = IBV_QPT_RAW_PACKET; /* Do *NOT* enable this, completions events are managed per Tx burst. */ qp_attr.sq_sig_all = 0; qp_attr.pd = priv->sh->cdev->pd; diff -Nru dpdk-22.11.9/drivers/net/mlx5/mlx5.c dpdk-22.11.11/drivers/net/mlx5/mlx5.c --- dpdk-22.11.9/drivers/net/mlx5/mlx5.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/mlx5/mlx5.c 2025-12-24 13:18:07.000000000 +0000 @@ -384,9 +384,6 @@ }, }; -#define MLX5_FLOW_MIN_ID_POOL_SIZE 512 -#define MLX5_ID_GENERATION_ARRAY_FACTOR 16 - #define MLX5_FLOW_TABLE_HLIST_ARRAY_SIZE 1024 /** @@ -2007,6 +2004,18 @@ dev->process_private = NULL; } +static void +mlx5_flow_pools_destroy(struct mlx5_priv *priv) +{ + int i; + + for (i = 0; i < MLX5_FLOW_TYPE_MAXI; i++) { + if (!priv->flows[i]) + continue; + mlx5_ipool_destroy(priv->flows[i]); + } +} + /** * DPDK callback to close the device. * @@ -2060,6 +2069,11 @@ /* Free the eCPRI flex parser resource. */ mlx5_flex_parser_ecpri_release(dev); mlx5_flex_item_port_cleanup(dev); + if (priv->representor) { + /* Each representor has a dedicated interrupts handler */ + rte_intr_instance_free(dev->intr_handle); + dev->intr_handle = NULL; + } #ifdef HAVE_MLX5_HWS_SUPPORT flow_hw_destroy_vport_action(dev); /* dr context will be closed after mlx5_os_free_shared_dr. */ @@ -2180,6 +2194,7 @@ if (!c) claim_zero(rte_eth_switch_domain_free(priv->domain_id)); } + mlx5_flow_pools_destroy(priv); memset(priv, 0, sizeof(*priv)); priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID; /* diff -Nru dpdk-22.11.9/drivers/net/mlx5/mlx5.h dpdk-22.11.11/drivers/net/mlx5/mlx5.h --- dpdk-22.11.9/drivers/net/mlx5/mlx5.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/mlx5/mlx5.h 2025-12-24 13:18:07.000000000 +0000 @@ -65,6 +65,15 @@ /* Maximal number of field/field parts to map into sample registers .*/ #define MLX5_FLEX_ITEM_MAPPING_NUM 32 +/* Number of bytes not included in MTU. */ +#define MLX5_ETH_OVERHEAD (RTE_ETHER_HDR_LEN + RTE_VLAN_HLEN + RTE_ETHER_CRC_LEN) + +/* Minimum allowed MTU to be reported whenever PMD cannot query it from OS. */ +#define MLX5_ETH_MIN_MTU (RTE_ETHER_MIN_MTU) + +/* Maximum allowed MTU to be reported whenever PMD cannot query it from OS. */ +#define MLX5_ETH_MAX_MTU (9978) + enum mlx5_ipool_index { #if defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_INFINIBAND_VERBS_H) MLX5_IPOOL_DECAP_ENCAP = 0, /* Pool for encap/decap resource. */ @@ -404,8 +413,6 @@ #define MLX5_MAX_PENDING_QUERIES 4 #define MLX5_CNT_MR_ALLOC_BULK 64 #define MLX5_CNT_SHARED_OFFSET 0x80000000 -#define IS_BATCH_CNT(cnt) (((cnt) & (MLX5_CNT_SHARED_OFFSET - 1)) >= \ - MLX5_CNT_BATCH_OFFSET) #define MLX5_CNT_SIZE (sizeof(struct mlx5_flow_counter)) #define MLX5_AGE_SIZE (sizeof(struct mlx5_age_param)) @@ -1140,7 +1147,6 @@ #define MLX5_FLOW_TABLE_LEVEL_METER (MLX5_MAX_TABLES - 3) #define MLX5_FLOW_TABLE_LEVEL_POLICY (MLX5_MAX_TABLES - 4) #define MLX5_MAX_TABLES_EXTERNAL MLX5_FLOW_TABLE_LEVEL_POLICY -#define MLX5_FLOW_TABLE_HWS_POLICY (MLX5_MAX_TABLES - 10) #define MLX5_MAX_TABLES_FDB UINT16_MAX #define MLX5_FLOW_TABLE_FACTOR 10 @@ -1652,8 +1658,6 @@ void (*lb_dummy_queue_release)(struct rte_eth_dev *dev); }; -#define MLX5_RSS_HASH_FIELDS_LEN RTE_DIM(mlx5_rss_hash_fields) - enum mlx5_hw_ctrl_flow_type { MLX5_HW_CTRL_FLOW_TYPE_GENERAL, MLX5_HW_CTRL_FLOW_TYPE_SQ_MISS_ROOT, @@ -1714,6 +1718,8 @@ unsigned int vlan_filter_n; /* Number of configured VLAN filters. */ /* Device properties. */ uint16_t mtu; /* Configured MTU. */ + uint16_t min_mtu; /* Minimum MTU allowed on the NIC. */ + uint16_t max_mtu; /* Maximum MTU allowed on the NIC. */ unsigned int isolated:1; /* Whether isolated mode is enabled. */ unsigned int representor:1; /* Device is a port representor. */ unsigned int master:1; /* Device is a E-Switch master. */ @@ -1952,11 +1958,11 @@ struct mlx5_priv *mlx5_port_to_eswitch_info(uint16_t port, bool valid); struct mlx5_priv *mlx5_dev_to_eswitch_info(struct rte_eth_dev *dev); int mlx5_dev_configure_rss_reta(struct rte_eth_dev *dev); +void mlx5_get_mtu_bounds(struct rte_eth_dev *dev, uint16_t *min_mtu, uint16_t *max_mtu); /* mlx5_ethdev_os.c */ -int mlx5_get_ifname(const struct rte_eth_dev *dev, - char (*ifname)[MLX5_NAMESIZE]); +int mlx5_get_ifname(const struct rte_eth_dev *dev, char ifname[MLX5_NAMESIZE]); unsigned int mlx5_ifindex(const struct rte_eth_dev *dev); int mlx5_get_mac(struct rte_eth_dev *dev, uint8_t (*mac)[RTE_ETHER_ADDR_LEN]); int mlx5_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu); @@ -1990,6 +1996,7 @@ uint16_t *n_stats, uint16_t *n_stats_sec); void mlx5_os_stats_init(struct rte_eth_dev *dev); int mlx5_get_flag_dropless_rq(struct rte_eth_dev *dev); +int mlx5_os_get_mtu_bounds(struct rte_eth_dev *dev, uint16_t *min_mtu, uint16_t *max_mtu); /* mlx5_mac.c */ diff -Nru dpdk-22.11.9/drivers/net/mlx5/mlx5_defs.h dpdk-22.11.11/drivers/net/mlx5/mlx5_defs.h --- dpdk-22.11.9/drivers/net/mlx5/mlx5_defs.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/mlx5/mlx5_defs.h 2025-12-24 13:18:07.000000000 +0000 @@ -46,9 +46,6 @@ /* Maximum number of DCS created per port. */ #define MLX5_HWS_CNT_DCS_NUM 4 -/* Alarm timeout. */ -#define MLX5_ALARM_TIMEOUT_US 100000 - /* Maximum number of extended statistics counters. */ #define MLX5_MAX_XSTATS 64 @@ -170,9 +167,6 @@ /* Size of the hash table for tag table. */ #define MLX5_TAGS_HLIST_ARRAY_SIZE (1 << 15) -/* Size fo the hash table for SFT table. */ -#define MLX5_FLOW_SFT_HLIST_ARRAY_SIZE 4096 - /* Hairpin TX/RX queue configuration parameters. */ #define MLX5_HAIRPIN_QUEUE_STRIDE 6 #define MLX5_HAIRPIN_JUMBO_LOG_SIZE (14 + 2) diff -Nru dpdk-22.11.9/drivers/net/mlx5/mlx5_devx.c dpdk-22.11.11/drivers/net/mlx5/mlx5_devx.c --- dpdk-22.11.9/drivers/net/mlx5/mlx5_devx.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/mlx5/mlx5_devx.c 2025-12-24 13:18:07.000000000 +0000 @@ -614,7 +614,12 @@ (uint32_t *)(uintptr_t)tmpl->devx_rmp.wq.db_rec; } if (!rxq_ctrl->started) { - mlx5_rxq_initialize(rxq_data); + if (mlx5_rxq_initialize(rxq_data)) { + DRV_LOG(ERR, "Port %u Rx queue %u RQ initialization failure.", + priv->dev_data->port_id, rxq->idx); + rte_errno = ENOMEM; + goto error; + } rxq_ctrl->wqn = rxq->devx_rq.rq->id; } priv->dev_data->rx_queue_state[rxq->idx] = RTE_ETH_QUEUE_STATE_STARTED; @@ -676,6 +681,11 @@ struct mlx5_external_rxq *ext_rxq = mlx5_ext_rxq_get(dev, queues[i]); + if (ext_rxq == NULL) { + rte_errno = EINVAL; + mlx5_free(rqt_attr); + return NULL; + } rqt_attr->rq_list[i] = ext_rxq->hw_id; } else { struct mlx5_rxq_priv *rxq = diff -Nru dpdk-22.11.9/drivers/net/mlx5/mlx5_ethdev.c dpdk-22.11.11/drivers/net/mlx5/mlx5_ethdev.c --- dpdk-22.11.9/drivers/net/mlx5/mlx5_ethdev.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/mlx5/mlx5_ethdev.c 2025-12-24 13:18:07.000000000 +0000 @@ -352,9 +352,11 @@ unsigned int max; uint16_t max_wqe; + info->min_mtu = priv->min_mtu; + info->max_mtu = priv->max_mtu; + info->max_rx_pktlen = info->max_mtu + MLX5_ETH_OVERHEAD; /* FIXME: we should ask the device for these values. */ info->min_rx_bufsize = 32; - info->max_rx_pktlen = 65536; info->max_lro_pkt_size = MLX5_MAX_LRO_SIZE; /* * Since we need one CQ per QP, the limit is the minimum number @@ -606,6 +608,7 @@ }; if (dev->rx_pkt_burst == mlx5_rx_burst || + dev->rx_pkt_burst == mlx5_rx_burst_out_of_order || dev->rx_pkt_burst == mlx5_rx_burst_mprq || dev->rx_pkt_burst == mlx5_rx_burst_vec || dev->rx_pkt_burst == mlx5_rx_burst_mprq_vec) @@ -674,7 +677,12 @@ eth_rx_burst_t rx_pkt_burst = mlx5_rx_burst; MLX5_ASSERT(dev != NULL); - if (mlx5_check_vec_rx_support(dev) > 0) { + if (mlx5_shared_rq_enabled(dev)) { + rx_pkt_burst = mlx5_rx_burst_out_of_order; + DRV_LOG(DEBUG, "port %u forced to use SPRQ" + " Rx function with Out-of-Order completions", + dev->data->port_id); + } else if (mlx5_check_vec_rx_support(dev) > 0) { if (mlx5_mprq_enabled(dev)) { rx_pkt_burst = mlx5_rx_burst_mprq_vec; DRV_LOG(DEBUG, "port %u selected vectorized" @@ -795,3 +803,41 @@ cap->tx_cap.rte_memory = hca_attr->hairpin_sq_wq_in_host_mem; return 0; } + +/** + * Query minimum and maximum allowed MTU value on the device. + * + * This functions will always return valid MTU bounds. + * In case platform-specific implementation fails or current platform does not support it, + * the fallback default values will be used. + * + * @param[in] dev + * Pointer to Ethernet device + * @param[out] min_mtu + * Minimum MTU value output buffer. + * @param[out] max_mtu + * Maximum MTU value output buffer. + */ +void +mlx5_get_mtu_bounds(struct rte_eth_dev *dev, uint16_t *min_mtu, uint16_t *max_mtu) +{ + int ret; + + MLX5_ASSERT(min_mtu != NULL); + MLX5_ASSERT(max_mtu != NULL); + + ret = mlx5_os_get_mtu_bounds(dev, min_mtu, max_mtu); + if (ret < 0) { + if (ret != -ENOTSUP) + DRV_LOG(INFO, "port %u failed to query MTU bounds, using fallback values", + dev->data->port_id); + *min_mtu = MLX5_ETH_MIN_MTU; + *max_mtu = MLX5_ETH_MAX_MTU; + + /* This function does not fail. Clear rte_errno. */ + rte_errno = 0; + } + + DRV_LOG(INFO, "port %u minimum MTU is %u", dev->data->port_id, *min_mtu); + DRV_LOG(INFO, "port %u maximum MTU is %u", dev->data->port_id, *max_mtu); +} diff -Nru dpdk-22.11.9/drivers/net/mlx5/mlx5_flow.c dpdk-22.11.11/drivers/net/mlx5/mlx5_flow.c --- dpdk-22.11.9/drivers/net/mlx5/mlx5_flow.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/mlx5/mlx5_flow.c 2025-12-24 13:18:07.000000000 +0000 @@ -33,6 +33,21 @@ #include "mlx5_common_os.h" #include "rte_pmd_mlx5.h" +const uint64_t mlx5_rss_hash_fields[] = { + [MLX5_RSS_HASH_IDX_IPV4] = MLX5_RSS_HASH_IPV4, + [MLX5_RSS_HASH_IDX_IPV4_TCP] = MLX5_RSS_HASH_IPV4_TCP, + [MLX5_RSS_HASH_IDX_IPV4_UDP] = MLX5_RSS_HASH_IPV4_UDP, + [MLX5_RSS_HASH_IDX_IPV4_ESP] = MLX5_RSS_HASH_IPV4_ESP, + [MLX5_RSS_HASH_IDX_IPV6] = MLX5_RSS_HASH_IPV6, + [MLX5_RSS_HASH_IDX_IPV6_TCP] = MLX5_RSS_HASH_IPV6_TCP, + [MLX5_RSS_HASH_IDX_IPV6_UDP] = MLX5_RSS_HASH_IPV6_UDP, + [MLX5_RSS_HASH_IDX_IPV6_ESP] = MLX5_RSS_HASH_IPV6_ESP, + [MLX5_RSS_HASH_IDX_TCP] = MLX5_TCP_IBV_RX_HASH, + [MLX5_RSS_HASH_IDX_UDP] = MLX5_UDP_IBV_RX_HASH, + [MLX5_RSS_HASH_IDX_ESP_SPI] = MLX5_RSS_HASH_ESP_SPI, + [MLX5_RSS_HASH_IDX_NONE] = MLX5_RSS_HASH_NONE, +}; + /* * Shared array for quick translation between port_id and vport mask/values * used for HWS rules. @@ -10426,12 +10441,12 @@ (error, ENOMEM, RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, "invalid default miss RSS"); - ctx->action_rss.func = RTE_ETH_HASH_FUNCTION_DEFAULT, - ctx->action_rss.level = 0, - ctx->action_rss.types = priv->rss_conf.rss_hf, - ctx->action_rss.key_len = priv->rss_conf.rss_key_len, - ctx->action_rss.queue_num = priv->reta_idx_n, - ctx->action_rss.key = priv->rss_conf.rss_key, + ctx->action_rss.func = RTE_ETH_HASH_FUNCTION_DEFAULT; + ctx->action_rss.level = 0; + ctx->action_rss.types = priv->rss_conf.rss_hf; + ctx->action_rss.key_len = priv->rss_conf.rss_key_len; + ctx->action_rss.queue_num = priv->reta_idx_n; + ctx->action_rss.key = priv->rss_conf.rss_key; ctx->action_rss.queue = ctx->queue; if (!priv->reta_idx_n || !priv->rxqs_n) return rte_flow_error_set diff -Nru dpdk-22.11.9/drivers/net/mlx5/mlx5_flow.h dpdk-22.11.11/drivers/net/mlx5/mlx5_flow.h --- dpdk-22.11.9/drivers/net/mlx5/mlx5_flow.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/mlx5/mlx5_flow.h 2025-12-24 13:18:07.000000000 +0000 @@ -95,6 +95,11 @@ #define MLX5_ACTION_CTX_CT_GET_OWNER MLX5_INDIRECT_ACT_CT_GET_OWNER #define MLX5_ACTION_CTX_CT_GEN_IDX MLX5_INDIRECT_ACT_CT_GEN_IDX +#define MLX5_FLOW_CONNTRACK_PKT_STATE_ALL \ + (RTE_FLOW_CONNTRACK_PKT_STATE_VALID | RTE_FLOW_CONNTRACK_PKT_STATE_CHANGED | \ + RTE_FLOW_CONNTRACK_PKT_STATE_INVALID | RTE_FLOW_CONNTRACK_PKT_STATE_DISABLED | \ + RTE_FLOW_CONNTRACK_PKT_STATE_BAD) + /* Matches on selected register. */ struct mlx5_rte_flow_item_tag { enum modify_reg id; @@ -139,9 +144,6 @@ MLX5_SAMPLE_ID, }; -/* Default queue number. */ -#define MLX5_RSSQ_DEFAULT_NUM 16 - #define MLX5_FLOW_LAYER_OUTER_L2 (1u << 0) #define MLX5_FLOW_LAYER_OUTER_L3_IPV4 (1u << 1) #define MLX5_FLOW_LAYER_OUTER_L3_IPV6 (1u << 2) @@ -348,10 +350,6 @@ #define MLX5_FLOW_XCAP_ACTIONS (MLX5_FLOW_ACTION_ENCAP | MLX5_FLOW_ACTION_DECAP) -#ifndef IPPROTO_MPLS -#define IPPROTO_MPLS 137 -#endif - /* UDP port number for MPLS */ #define MLX5_UDP_PORT_MPLS 6635 @@ -362,6 +360,9 @@ /* UDP port numbers for GENEVE. */ #define MLX5_UDP_PORT_GENEVE 6081 +/* UDP port numbers for ESP. */ +#define MLX5_UDP_PORT_ESP 4500 + /* Lowest priority indicator. */ #define MLX5_FLOW_LOWEST_PRIO_INDICATOR ((uint32_t)-1) @@ -1397,27 +1398,31 @@ #define MLX5_RSS_HASH_NONE 0ULL -/* extract next protocol type from Ethernet & VLAN headers */ -#define MLX5_ETHER_TYPE_FROM_HEADER(_s, _m, _itm, _prt) do { \ - (_prt) = ((const struct _s *)(_itm)->mask)->_m; \ - (_prt) &= ((const struct _s *)(_itm)->spec)->_m; \ - (_prt) = rte_be_to_cpu_16((_prt)); \ -} while (0) - -/* array of valid combinations of RX Hash fields for RSS */ -static const uint64_t mlx5_rss_hash_fields[] = { - MLX5_RSS_HASH_IPV4, - MLX5_RSS_HASH_IPV4_TCP, - MLX5_RSS_HASH_IPV4_UDP, - MLX5_RSS_HASH_IPV4_ESP, - MLX5_RSS_HASH_IPV6, - MLX5_RSS_HASH_IPV6_TCP, - MLX5_RSS_HASH_IPV6_UDP, - MLX5_RSS_HASH_IPV6_ESP, - MLX5_RSS_HASH_ESP_SPI, - MLX5_RSS_HASH_NONE, + +/** + * Each enum variant corresponds to a single valid protocols combination for hrxq configuration + * Each variant serves as an index into #mlx5_rss_hash_fields array containing default + * bitmaps of ibv_rx_hash_fields flags for given protocols combination. + */ +enum { + MLX5_RSS_HASH_IDX_IPV4, + MLX5_RSS_HASH_IDX_IPV4_TCP, + MLX5_RSS_HASH_IDX_IPV4_UDP, + MLX5_RSS_HASH_IDX_IPV4_ESP, + MLX5_RSS_HASH_IDX_IPV6, + MLX5_RSS_HASH_IDX_IPV6_TCP, + MLX5_RSS_HASH_IDX_IPV6_UDP, + MLX5_RSS_HASH_IDX_IPV6_ESP, + MLX5_RSS_HASH_IDX_TCP, + MLX5_RSS_HASH_IDX_UDP, + MLX5_RSS_HASH_IDX_ESP_SPI, + MLX5_RSS_HASH_IDX_NONE, + MLX5_RSS_HASH_IDX_MAX, }; +/** Array of valid combinations of RX Hash fields for RSS. */ +extern const uint64_t mlx5_rss_hash_fields[]; + /* Shared RSS action structure */ struct mlx5_shared_action_rss { ILIST_ENTRY(uint32_t)next; /**< Index to the next RSS structure. */ @@ -1426,7 +1431,7 @@ uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */ struct mlx5_ind_table_obj *ind_tbl; /**< Hash RX queues (hrxq, hrxq_tunnel fields) indirection table. */ - uint32_t hrxq[MLX5_RSS_HASH_FIELDS_LEN]; + uint32_t hrxq[MLX5_RSS_HASH_IDX_MAX]; /**< Hash RX queue indexes mapped to mlx5_rss_hash_fields */ rte_spinlock_t action_rss_sl; /**< Shared RSS action spinlock. */ }; @@ -2627,10 +2632,16 @@ int mlx5_flow_hw_esw_create_sq_miss_flow(struct rte_eth_dev *dev, uint32_t sqn, bool external); int mlx5_flow_hw_esw_destroy_sq_miss_flow(struct rte_eth_dev *dev, - uint32_t sqn); + uint32_t sqn, bool external); int mlx5_flow_hw_esw_create_default_jump_flow(struct rte_eth_dev *dev); -int mlx5_flow_hw_create_tx_default_mreg_copy_flow(struct rte_eth_dev *dev); -int mlx5_flow_hw_tx_repr_matching_flow(struct rte_eth_dev *dev, uint32_t sqn, bool external); +int mlx5_flow_hw_create_tx_default_mreg_copy_flow(struct rte_eth_dev *dev, + uint32_t sqn, bool external); +int mlx5_flow_hw_destroy_tx_default_mreg_copy_flow(struct rte_eth_dev *dev, + uint32_t sqn, bool external); +int mlx5_flow_hw_create_tx_repr_matching_flow(struct rte_eth_dev *dev, + uint32_t sqn, bool external); +int mlx5_flow_hw_destroy_tx_repr_matching_flow(struct rte_eth_dev *dev, + uint32_t sqn, bool external); int mlx5_flow_hw_lacp_rx_flow(struct rte_eth_dev *dev); int mlx5_flow_actions_validate(struct rte_eth_dev *dev, const struct rte_flow_actions_template_attr *attr, diff -Nru dpdk-22.11.9/drivers/net/mlx5/mlx5_flow_dv.c dpdk-22.11.11/drivers/net/mlx5/mlx5_flow_dv.c --- dpdk-22.11.9/drivers/net/mlx5/mlx5_flow_dv.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/mlx5/mlx5_flow_dv.c 2025-12-24 13:18:07.000000000 +0000 @@ -2862,6 +2862,11 @@ return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, NULL, "Conflict status bits"); + if (spec->flags & ~MLX5_FLOW_CONNTRACK_PKT_STATE_ALL) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + NULL, + "Invalid CT item flags"); /* State change also needs to be considered. */ *item_flags |= MLX5_FLOW_LAYER_ASO_CT; return 0; @@ -5084,6 +5089,13 @@ } if (action_modify_field->src.field != RTE_FLOW_FIELD_VALUE && action_modify_field->src.field != RTE_FLOW_FIELD_POINTER) { + if (action_modify_field->operation != RTE_FLOW_MODIFY_SET) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ACTION_CONF, + &action_modify_field->operation, + "modify field action type add is not" + " supported when src field type is" + " not value/pointer"); if (root) return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION, action, @@ -8975,29 +8987,35 @@ */ static void flow_dv_translate_item_esp(void *key, const struct rte_flow_item *item, - int inner, uint32_t key_type) + int inner, uint32_t key_type, uint64_t item_flags) { const struct rte_flow_item_esp *esp_m; const struct rte_flow_item_esp *esp_v; void *headers_v; char *spi_v; + bool over_udp = item_flags & (inner ? MLX5_FLOW_LAYER_INNER_L4_UDP : + MLX5_FLOW_LAYER_OUTER_L4_UDP); headers_v = inner ? MLX5_ADDR_OF(fte_match_param, key, inner_headers) : - MLX5_ADDR_OF(fte_match_param, key, outer_headers); - if (key_type & MLX5_SET_MATCHER_M) - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - ip_protocol, 0xff); - else - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - ip_protocol, IPPROTO_ESP); + MLX5_ADDR_OF(fte_match_param, key, outer_headers); + if (key_type & MLX5_SET_MATCHER_M) { + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, 0xff); + if (over_udp && !MLX5_GET16(fte_match_set_lyr_2_4, headers_v, udp_dport)) + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, 0xFFFF); + } else { + if (!over_udp) + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_ESP); + else + if (!MLX5_GET16(fte_match_set_lyr_2_4, headers_v, udp_dport)) + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, + MLX5_UDP_PORT_ESP); + } if (MLX5_ITEM_VALID(item, key_type)) return; - MLX5_ITEM_UPDATE(item, key_type, esp_v, esp_m, - &rte_flow_item_esp_mask); + MLX5_ITEM_UPDATE(item, key_type, esp_v, esp_m, &rte_flow_item_esp_mask); headers_v = MLX5_ADDR_OF(fte_match_param, key, misc_parameters); - spi_v = inner ? MLX5_ADDR_OF(fte_match_set_misc, headers_v, - inner_esp_spi) : MLX5_ADDR_OF(fte_match_set_misc - , headers_v, outer_esp_spi); + spi_v = inner ? MLX5_ADDR_OF(fte_match_set_misc, headers_v, inner_esp_spi) : + MLX5_ADDR_OF(fte_match_set_misc, headers_v, outer_esp_spi); *(uint32_t *)spi_v = esp_m->hdr.spi & esp_v->hdr.spi; } @@ -13255,7 +13273,7 @@ switch (item_type) { case RTE_FLOW_ITEM_TYPE_ESP: - flow_dv_translate_item_esp(key, items, tunnel, key_type); + flow_dv_translate_item_esp(key, items, tunnel, key_type, wks->item_flags); wks->priority = MLX5_PRIORITY_MAP_L4; last_item = MLX5_FLOW_ITEM_ESP; break; @@ -14602,6 +14620,145 @@ return 0; } +/* + * Protocol selector bitmap + * Each flag is used as an indicator that given protocol is specified in given RSS hash fields. + */ +#define RX_HASH_SELECTOR_IPV4 RTE_BIT32(0) +#define RX_HASH_SELECTOR_IPV6 RTE_BIT32(1) +#define RX_HASH_SELECTOR_UDP RTE_BIT32(2) +#define RX_HASH_SELECTOR_TCP RTE_BIT32(3) +#define RX_HASH_SELECTOR_ESP_SPI RTE_BIT32(4) +#define RX_HASH_SELECTOR_NONE (0) + +#define RX_HASH_SELECTOR_IPV4_TCP (RX_HASH_SELECTOR_IPV4 | RX_HASH_SELECTOR_TCP) +#define RX_HASH_SELECTOR_IPV4_UDP (RX_HASH_SELECTOR_IPV4 | RX_HASH_SELECTOR_UDP) +#define RX_HASH_SELECTOR_IPV4_ESP (RX_HASH_SELECTOR_IPV4 | RX_HASH_SELECTOR_ESP_SPI) + +#define RX_HASH_SELECTOR_IPV6_TCP (RX_HASH_SELECTOR_IPV6 | RX_HASH_SELECTOR_TCP) +#define RX_HASH_SELECTOR_IPV6_UDP (RX_HASH_SELECTOR_IPV6 | RX_HASH_SELECTOR_UDP) +#define RX_HASH_SELECTOR_IPV6_ESP (RX_HASH_SELECTOR_IPV6 | RX_HASH_SELECTOR_ESP_SPI) + +static bool +rx_hash_selector_has_valid_l3(const uint32_t selectors) +{ + /* In TIR configuration, RSS hashing on both IPv4 and IPv6 is mutually exclusive. */ + return !((selectors & RX_HASH_SELECTOR_IPV4) && (selectors & RX_HASH_SELECTOR_IPV6)); +} + +static bool +rx_hash_selector_has_valid_l4(const uint32_t selectors) +{ + /* In TIR configuration, RSS hashing on both UDP and TCP is mutually exclusive. */ + return !((selectors & RX_HASH_SELECTOR_UDP) && (selectors & RX_HASH_SELECTOR_TCP)); +} + +static bool +rx_hash_selector_has_valid_esp(const uint32_t selectors) +{ + /* In TIR configuration, RSS hashing on ESP and other L4 protocol is mutually exclusive. */ + if (selectors & RX_HASH_SELECTOR_ESP_SPI) + return !((selectors & RX_HASH_SELECTOR_UDP) || (selectors & RX_HASH_SELECTOR_TCP)); + + return true; +} + +/** + * Calculate protocol combination based on provided RSS hashing fields. + * + * @param[in] hash_fields + * Requested RSS hashing fields specified as a flags bitmap, based on ibv_rx_hash_fields. + * @param[out] selectors_out + * Calculated protocol combination will be written here. + * Result will be a bitmap of RX_HASH_SELECTOR_* flags. + * + * @return + * 0 if conversion is successful and protocol combination written to @p selectors_out. + * (-EINVAL) otherwise. + */ +static int +rx_hash_calc_selector(const uint64_t hash_fields, uint32_t *selectors_out) +{ + const uint64_t filtered_hf = hash_fields & ~IBV_RX_HASH_INNER; + uint32_t selectors = 0; + + if (filtered_hf & MLX5_RSS_HASH_IPV4) + selectors |= RX_HASH_SELECTOR_IPV4; + if (filtered_hf & MLX5_RSS_HASH_IPV6) + selectors |= RX_HASH_SELECTOR_IPV6; + if (!rx_hash_selector_has_valid_l3(selectors)) { + DRV_LOG(NOTICE, "hrxq hashing on both IPv4 and IPv6 is invalid: " + "selectors=0x%" PRIx32, selectors); + return -EINVAL; + } + + if (filtered_hf & MLX5_UDP_IBV_RX_HASH) + selectors |= RX_HASH_SELECTOR_UDP; + if (filtered_hf & MLX5_TCP_IBV_RX_HASH) + selectors |= RX_HASH_SELECTOR_TCP; + if (!rx_hash_selector_has_valid_l4(selectors)) { + DRV_LOG(NOTICE, "hrxq hashing on both UDP and TCP is invalid: " + "selectors=0x%" PRIx32, selectors); + return -EINVAL; + } + + if (filtered_hf & MLX5_RSS_HASH_ESP_SPI) + selectors |= RX_HASH_SELECTOR_ESP_SPI; + if (!rx_hash_selector_has_valid_esp(selectors)) { + DRV_LOG(NOTICE, "hrxq hashing on ESP SPI and UDP or TCP is mutually exclusive: " + "selectors=0x%" PRIx32, selectors); + return -EINVAL; + } + + *selectors_out = selectors; + return 0; +} + +/** + * Calculate the hrxq object index based on protocol combination. + * + * @param[in] selectors + * Protocol combination specified as bitmap of RX_HASH_SELECTOR_* flags. + * + * @return + * Index into hrxq array in #mlx5_shared_action_rss based on ginve protocol combination. + * (-EINVAL) if given protocol combination is not supported or is invalid. + */ +static int +get_rss_hash_idx(const uint32_t selectors) +{ + switch (selectors) { + case RX_HASH_SELECTOR_IPV4: + return MLX5_RSS_HASH_IDX_IPV4; + case RX_HASH_SELECTOR_IPV4_TCP: + return MLX5_RSS_HASH_IDX_IPV4_TCP; + case RX_HASH_SELECTOR_IPV4_UDP: + return MLX5_RSS_HASH_IDX_IPV4_UDP; + case RX_HASH_SELECTOR_IPV4_ESP: + return MLX5_RSS_HASH_IDX_IPV4_ESP; + case RX_HASH_SELECTOR_IPV6: + return MLX5_RSS_HASH_IDX_IPV6; + case RX_HASH_SELECTOR_IPV6_TCP: + return MLX5_RSS_HASH_IDX_IPV6_TCP; + case RX_HASH_SELECTOR_IPV6_UDP: + return MLX5_RSS_HASH_IDX_IPV6_UDP; + case RX_HASH_SELECTOR_IPV6_ESP: + return MLX5_RSS_HASH_IDX_IPV6_ESP; + case RX_HASH_SELECTOR_TCP: + return MLX5_RSS_HASH_IDX_TCP; + case RX_HASH_SELECTOR_UDP: + return MLX5_RSS_HASH_IDX_UDP; + case RX_HASH_SELECTOR_ESP_SPI: + return MLX5_RSS_HASH_IDX_ESP_SPI; + case RX_HASH_SELECTOR_NONE: + return MLX5_RSS_HASH_IDX_NONE; + default: + DRV_LOG(ERR, "invalid hrxq hash fields combination: " + "selectors=0x%" PRIx32, selectors); + return -EINVAL; + } +} + /** * Set hash RX queue by hash fields (see enum ibv_rx_hash_fields) * and tunnel. @@ -14609,7 +14766,8 @@ * @param[in, out] action * Shred RSS action holding hash RX queue objects. * @param[in] hash_fields - * Defines combination of packet fields to participate in RX hash. + * Defines combination of packet fields to participate in RX hash, + * specified as a bitmap of #ibv_rx_hash_fields flags. * @param[in] tunnel * Tunnel type * @param[in] hrxq_idx @@ -14624,65 +14782,26 @@ uint32_t hrxq_idx) { uint32_t *hrxqs = action->hrxq; + uint32_t selectors = 0; + int ret; - switch (hash_fields & ~IBV_RX_HASH_INNER) { - case MLX5_RSS_HASH_IPV4: - /* fall-through. */ - case MLX5_RSS_HASH_IPV4_DST_ONLY: - /* fall-through. */ - case MLX5_RSS_HASH_IPV4_SRC_ONLY: - hrxqs[0] = hrxq_idx; - return 0; - case MLX5_RSS_HASH_IPV4_TCP: - /* fall-through. */ - case MLX5_RSS_HASH_IPV4_TCP_DST_ONLY: - /* fall-through. */ - case MLX5_RSS_HASH_IPV4_TCP_SRC_ONLY: - hrxqs[1] = hrxq_idx; - return 0; - case MLX5_RSS_HASH_IPV4_UDP: - /* fall-through. */ - case MLX5_RSS_HASH_IPV4_UDP_DST_ONLY: - /* fall-through. */ - case MLX5_RSS_HASH_IPV4_UDP_SRC_ONLY: - hrxqs[2] = hrxq_idx; - return 0; - case MLX5_RSS_HASH_IPV6: - /* fall-through. */ - case MLX5_RSS_HASH_IPV6_DST_ONLY: - /* fall-through. */ - case MLX5_RSS_HASH_IPV6_SRC_ONLY: - hrxqs[3] = hrxq_idx; - return 0; - case MLX5_RSS_HASH_IPV6_TCP: - /* fall-through. */ - case MLX5_RSS_HASH_IPV6_TCP_DST_ONLY: - /* fall-through. */ - case MLX5_RSS_HASH_IPV6_TCP_SRC_ONLY: - hrxqs[4] = hrxq_idx; - return 0; - case MLX5_RSS_HASH_IPV6_UDP: - /* fall-through. */ - case MLX5_RSS_HASH_IPV6_UDP_DST_ONLY: - /* fall-through. */ - case MLX5_RSS_HASH_IPV6_UDP_SRC_ONLY: - hrxqs[5] = hrxq_idx; - return 0; - case MLX5_RSS_HASH_NONE: - hrxqs[6] = hrxq_idx; - return 0; - case MLX5_RSS_HASH_IPV4_ESP: - hrxqs[7] = hrxq_idx; - return 0; - case MLX5_RSS_HASH_IPV6_ESP: - hrxqs[8] = hrxq_idx; - return 0; - case MLX5_RSS_HASH_ESP_SPI: - hrxqs[9] = hrxq_idx; - return 0; - default: - return -1; - } + ret = rx_hash_calc_selector(hash_fields, &selectors); + /* + * Hash fields passed to this function are constructed internally. + * If this fails, then this is a PMD bug. + */ + MLX5_ASSERT(ret == 0); + + ret = get_rss_hash_idx(selectors); + /* + * Based on above assert, selectors should always yield correct index + * in mlx5_rss_hash_fields array. + * If this fails, then this is a PMD bug. + */ + MLX5_ASSERT(ret >= 0 && ret < MLX5_RSS_HASH_IDX_MAX); + hrxqs[ret] = hrxq_idx; + + return 0; } /** @@ -14694,7 +14813,8 @@ * @param[in] idx * Shared RSS action ID holding hash RX queue objects. * @param[in] hash_fields - * Defines combination of packet fields to participate in RX hash. + * Defines combination of packet fields to participate in RX hash, + * specified as a bitmap of #ibv_rx_hash_fields flags. * @param[in] tunnel * Tunnel type * @@ -14709,56 +14829,26 @@ struct mlx5_shared_action_rss *shared_rss = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS], idx); const uint32_t *hrxqs = shared_rss->hrxq; + uint32_t selectors = 0; + int ret; - switch (hash_fields & ~IBV_RX_HASH_INNER) { - case MLX5_RSS_HASH_IPV4: - /* fall-through. */ - case MLX5_RSS_HASH_IPV4_DST_ONLY: - /* fall-through. */ - case MLX5_RSS_HASH_IPV4_SRC_ONLY: - return hrxqs[0]; - case MLX5_RSS_HASH_IPV4_TCP: - /* fall-through. */ - case MLX5_RSS_HASH_IPV4_TCP_DST_ONLY: - /* fall-through. */ - case MLX5_RSS_HASH_IPV4_TCP_SRC_ONLY: - return hrxqs[1]; - case MLX5_RSS_HASH_IPV4_UDP: - /* fall-through. */ - case MLX5_RSS_HASH_IPV4_UDP_DST_ONLY: - /* fall-through. */ - case MLX5_RSS_HASH_IPV4_UDP_SRC_ONLY: - return hrxqs[2]; - case MLX5_RSS_HASH_IPV6: - /* fall-through. */ - case MLX5_RSS_HASH_IPV6_DST_ONLY: - /* fall-through. */ - case MLX5_RSS_HASH_IPV6_SRC_ONLY: - return hrxqs[3]; - case MLX5_RSS_HASH_IPV6_TCP: - /* fall-through. */ - case MLX5_RSS_HASH_IPV6_TCP_DST_ONLY: - /* fall-through. */ - case MLX5_RSS_HASH_IPV6_TCP_SRC_ONLY: - return hrxqs[4]; - case MLX5_RSS_HASH_IPV6_UDP: - /* fall-through. */ - case MLX5_RSS_HASH_IPV6_UDP_DST_ONLY: - /* fall-through. */ - case MLX5_RSS_HASH_IPV6_UDP_SRC_ONLY: - return hrxqs[5]; - case MLX5_RSS_HASH_NONE: - return hrxqs[6]; - case MLX5_RSS_HASH_IPV4_ESP: - return hrxqs[7]; - case MLX5_RSS_HASH_IPV6_ESP: - return hrxqs[8]; - case MLX5_RSS_HASH_ESP_SPI: - return hrxqs[9]; - default: + ret = rx_hash_calc_selector(hash_fields, &selectors); + if (ret < 0) { + DRV_LOG(ERR, "port %u Rx hash selector calculation failed: " + "rss_act_idx=%u hash_fields=0x%" PRIx64 " selectors=0x%" PRIx32, + dev->data->port_id, idx, hash_fields, selectors); + return 0; + } + + ret = get_rss_hash_idx(selectors); + if (ret < 0) { + DRV_LOG(ERR, "port %u failed hrxq index lookup: " + "rss_act_idx=%u hash_fields=0x%" PRIx64 " selectors=0x%" PRIx32, + dev->data->port_id, idx, hash_fields, selectors); return 0; } + return hrxqs[ret]; } /** @@ -15431,7 +15521,7 @@ */ static int __flow_dv_hrxqs_release(struct rte_eth_dev *dev, - uint32_t (*hrxqs)[MLX5_RSS_HASH_FIELDS_LEN]) + uint32_t (*hrxqs)[MLX5_RSS_HASH_IDX_MAX]) { size_t i; int remaining = 0; @@ -15466,6 +15556,62 @@ return __flow_dv_hrxqs_release(dev, &shared_rss->hrxq); } +static inline void +filter_ipv4_types(uint64_t rss_types, uint64_t *hash_fields) +{ + if (rss_types & MLX5_IPV4_LAYER_TYPES) { + *hash_fields &= ~MLX5_RSS_HASH_IPV4; + if (rss_types & RTE_ETH_RSS_L3_DST_ONLY) + *hash_fields |= IBV_RX_HASH_DST_IPV4; + else if (rss_types & RTE_ETH_RSS_L3_SRC_ONLY) + *hash_fields |= IBV_RX_HASH_SRC_IPV4; + else + *hash_fields |= MLX5_RSS_HASH_IPV4; + } +} + +static inline void +filter_ipv6_types(uint64_t rss_types, uint64_t *hash_fields) +{ + if (rss_types & MLX5_IPV6_LAYER_TYPES) { + *hash_fields &= ~MLX5_RSS_HASH_IPV6; + if (rss_types & RTE_ETH_RSS_L3_DST_ONLY) + *hash_fields |= IBV_RX_HASH_DST_IPV6; + else if (rss_types & RTE_ETH_RSS_L3_SRC_ONLY) + *hash_fields |= IBV_RX_HASH_SRC_IPV6; + else + *hash_fields |= MLX5_RSS_HASH_IPV6; + } +} + +static inline void +filter_udp_types(uint64_t rss_types, uint64_t *hash_fields) +{ + if (rss_types & RTE_ETH_RSS_UDP) { + *hash_fields &= ~MLX5_UDP_IBV_RX_HASH; + if (rss_types & RTE_ETH_RSS_L4_DST_ONLY) + *hash_fields |= IBV_RX_HASH_DST_PORT_UDP; + else if (rss_types & RTE_ETH_RSS_L4_SRC_ONLY) + *hash_fields |= IBV_RX_HASH_SRC_PORT_UDP; + else + *hash_fields |= MLX5_UDP_IBV_RX_HASH; + } +} + +static inline void +filter_tcp_types(uint64_t rss_types, uint64_t *hash_fields) +{ + if (rss_types & RTE_ETH_RSS_TCP) { + *hash_fields &= ~MLX5_TCP_IBV_RX_HASH; + if (rss_types & RTE_ETH_RSS_L4_DST_ONLY) + *hash_fields |= IBV_RX_HASH_DST_PORT_TCP; + else if (rss_types & RTE_ETH_RSS_L4_SRC_ONLY) + *hash_fields |= IBV_RX_HASH_SRC_PORT_TCP; + else + *hash_fields |= MLX5_TCP_IBV_RX_HASH; + } +} + /** * Adjust L3/L4 hash value of pre-created shared RSS hrxq according to * user input. @@ -15477,9 +15623,9 @@ * same slot in mlx5_rss_hash_fields. * * @param[in] orig_rss_types - * RSS type as provided in shared RSS action. + * RSS type as provided in shared RSS action, specified as a bitmap of RTE_ETH_RSS_* flags. * @param[in, out] hash_field - * hash_field variable needed to be adjusted. + * hash_field variable needed to be adjusted, specified as a bitmap of #ibv_rx_hash_fields flags. * * @return * void @@ -15488,60 +15634,18 @@ flow_dv_action_rss_l34_hash_adjust(uint64_t orig_rss_types, uint64_t *hash_field) { + uint64_t hash_field_protos = *hash_field & ~IBV_RX_HASH_INNER; uint64_t rss_types = rte_eth_rss_hf_refine(orig_rss_types); - switch (*hash_field & ~IBV_RX_HASH_INNER) { - case MLX5_RSS_HASH_IPV4: - if (rss_types & MLX5_IPV4_LAYER_TYPES) { - *hash_field &= ~MLX5_RSS_HASH_IPV4; - if (rss_types & RTE_ETH_RSS_L3_DST_ONLY) - *hash_field |= IBV_RX_HASH_DST_IPV4; - else if (rss_types & RTE_ETH_RSS_L3_SRC_ONLY) - *hash_field |= IBV_RX_HASH_SRC_IPV4; - else - *hash_field |= MLX5_RSS_HASH_IPV4; - } - return; - case MLX5_RSS_HASH_IPV6: - if (rss_types & MLX5_IPV6_LAYER_TYPES) { - *hash_field &= ~MLX5_RSS_HASH_IPV6; - if (rss_types & RTE_ETH_RSS_L3_DST_ONLY) - *hash_field |= IBV_RX_HASH_DST_IPV6; - else if (rss_types & RTE_ETH_RSS_L3_SRC_ONLY) - *hash_field |= IBV_RX_HASH_SRC_IPV6; - else - *hash_field |= MLX5_RSS_HASH_IPV6; - } - return; - case MLX5_RSS_HASH_IPV4_UDP: - /* fall-through. */ - case MLX5_RSS_HASH_IPV6_UDP: - if (rss_types & RTE_ETH_RSS_UDP) { - *hash_field &= ~MLX5_UDP_IBV_RX_HASH; - if (rss_types & RTE_ETH_RSS_L4_DST_ONLY) - *hash_field |= IBV_RX_HASH_DST_PORT_UDP; - else if (rss_types & RTE_ETH_RSS_L4_SRC_ONLY) - *hash_field |= IBV_RX_HASH_SRC_PORT_UDP; - else - *hash_field |= MLX5_UDP_IBV_RX_HASH; - } - return; - case MLX5_RSS_HASH_IPV4_TCP: - /* fall-through. */ - case MLX5_RSS_HASH_IPV6_TCP: - if (rss_types & RTE_ETH_RSS_TCP) { - *hash_field &= ~MLX5_TCP_IBV_RX_HASH; - if (rss_types & RTE_ETH_RSS_L4_DST_ONLY) - *hash_field |= IBV_RX_HASH_DST_PORT_TCP; - else if (rss_types & RTE_ETH_RSS_L4_SRC_ONLY) - *hash_field |= IBV_RX_HASH_SRC_PORT_TCP; - else - *hash_field |= MLX5_TCP_IBV_RX_HASH; - } - return; - default: - return; - } + if (hash_field_protos & MLX5_RSS_HASH_IPV4) + filter_ipv4_types(rss_types, hash_field); + else if (hash_field_protos & MLX5_RSS_HASH_IPV6) + filter_ipv6_types(rss_types, hash_field); + + if (hash_field_protos & MLX5_UDP_IBV_RX_HASH) + filter_udp_types(rss_types, hash_field); + else if (hash_field_protos & MLX5_TCP_IBV_RX_HASH) + filter_tcp_types(rss_types, hash_field); } /** @@ -15591,7 +15695,7 @@ rss_desc.ind_tbl = shared_rss->ind_tbl; if (priv->sh->config.dv_flow_en == 2) rss_desc.hws_flags = MLX5DR_ACTION_FLAG_HWS_RX; - for (i = 0; i < MLX5_RSS_HASH_FIELDS_LEN; i++) { + for (i = 0; i < MLX5_RSS_HASH_IDX_MAX; i++) { struct mlx5_hrxq *hrxq; uint64_t hash_fields = mlx5_rss_hash_fields[i]; int tunnel = 0; @@ -17698,7 +17802,7 @@ } } /* Create default matcher in drop table. */ - matcher.tbl = mtrmng->drop_tbl[domain], + matcher.tbl = mtrmng->drop_tbl[domain]; tbl_data = container_of(mtrmng->drop_tbl[domain], struct mlx5_flow_tbl_data_entry, tbl); if (!mtrmng->def_matcher[domain]) { @@ -18456,7 +18560,8 @@ .size = sizeof(value.buf), }; struct mlx5dv_flow_matcher_attr dv_attr = { - .type = IBV_FLOW_ATTR_NORMAL | IBV_FLOW_ATTR_FLAGS_EGRESS, + .type = IBV_FLOW_ATTR_NORMAL, + .flags = IBV_FLOW_ATTR_FLAGS_EGRESS, .priority = 0, .match_criteria_enable = 0, .match_mask = (void *)&mask, diff -Nru dpdk-22.11.9/drivers/net/mlx5/mlx5_flow_hw.c dpdk-22.11.11/drivers/net/mlx5/mlx5_flow_hw.c --- dpdk-22.11.9/drivers/net/mlx5/mlx5_flow_hw.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/mlx5/mlx5_flow_hw.c 2025-12-24 13:18:07.000000000 +0000 @@ -764,7 +764,8 @@ const struct rte_flow_action *action, struct mlx5_hw_actions *acts, uint16_t action_src, - uint16_t action_dst) + uint16_t action_dst, + struct rte_flow_error *error) { struct mlx5_priv *priv = dev->data->dev_private; struct mlx5_shared_action_rss *shared_rss; @@ -781,8 +782,10 @@ (priv, acts, (enum rte_flow_action_type)MLX5_RTE_FLOW_ACTION_TYPE_RSS, action_src, action_dst, idx, shared_rss)) { - DRV_LOG(WARNING, "Indirect RSS action index %d translate failed", act_idx); - return -1; + DRV_LOG(ERR, "port %u Indirect RSS action (handle %p) translate failed", + dev->data->port_id, action->conf); + return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, + action, "Indirect RSS action translate failed"); } break; case MLX5_INDIRECT_ACTION_TYPE_COUNT: @@ -790,15 +793,22 @@ (enum rte_flow_action_type) MLX5_RTE_FLOW_ACTION_TYPE_COUNT, action_src, action_dst, act_idx)) { - DRV_LOG(WARNING, "Indirect count action translate failed"); - return -1; + DRV_LOG(ERR, + "port %u Indirect count action (handle %p) " + "translate failed", + dev->data->port_id, action->conf); + return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, + action, + "Indirect count action translate failed"); } break; case MLX5_INDIRECT_ACTION_TYPE_CT: if (flow_hw_ct_compile(dev, MLX5_HW_INV_QUEUE, idx, &acts->rule_acts[action_dst])) { - DRV_LOG(WARNING, "Indirect CT action translate failed"); - return -1; + DRV_LOG(ERR, "port %u Indirect CT action (handle %p) translate failed", + dev->data->port_id, action->conf); + return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, + action, "Indirect CT action translate failed"); } break; case MLX5_INDIRECT_ACTION_TYPE_METER_MARK: @@ -806,13 +816,19 @@ (enum rte_flow_action_type) MLX5_RTE_FLOW_ACTION_TYPE_METER_MARK, action_src, action_dst, idx)) { - DRV_LOG(WARNING, "Indirect meter mark action translate failed"); - return -1; + DRV_LOG(ERR, + "port %u Indirect meter mark action (handle %p) " + "translate failed", + dev->data->port_id, action->conf); + return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, + action, + "Indirect meter mark action translate failed"); } break; default: - DRV_LOG(WARNING, "Unsupported shared action type:%d", type); - break; + DRV_LOG(ERR, "Unsupported shared action type: %d", type); + return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION, action, + "Unsupported shared action type"); } return 0; } @@ -1376,8 +1392,8 @@ goto err; } if (actions->conf && masks->conf) { - if (flow_hw_shared_action_translate - (dev, actions, acts, actions - action_start, action_pos)) + if (flow_hw_shared_action_translate(dev, actions, acts, + actions - action_start, action_pos, &sub_error)) goto err; } else if (__flow_hw_act_data_general_append (priv, acts, actions->type, @@ -1666,6 +1682,10 @@ case RTE_FLOW_ACTION_TYPE_END: actions_end = true; break; + case RTE_FLOW_ACTION_TYPE_PORT_ID: + DRV_LOG(ERR, "RTE_FLOW_ACTION_TYPE_PORT_ID action is not supported. " + "Use RTE_FLOW_ACTION_TYPE_REPRESENTED_PORT instead."); + goto err; default: break; } @@ -1882,6 +1902,7 @@ uint32_t idx = act_idx & ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1); uint64_t item_flags; + uint32_t *cnt_queue; cnt_id_t age_cnt; memset(&act_data, 0, sizeof(act_data)); @@ -1927,9 +1948,8 @@ if (param == NULL) return -1; if (action_flags & MLX5_FLOW_ACTION_COUNT) { - if (mlx5_hws_cnt_pool_get(priv->hws_cpool, - ¶m->queue_id, &age_cnt, - idx) < 0) + cnt_queue = mlx5_hws_cnt_get_queue(priv, &queue); + if (mlx5_hws_cnt_pool_get(priv->hws_cpool, cnt_queue, &age_cnt, idx, 1) < 0) return -1; flow->cnt_id = age_cnt; param->nb_cnts++; @@ -2317,7 +2337,8 @@ /* Fall-through. */ case RTE_FLOW_ACTION_TYPE_COUNT: cnt_queue = mlx5_hws_cnt_get_queue(priv, &queue); - ret = mlx5_hws_cnt_pool_get(priv->hws_cpool, cnt_queue, &cnt_id, age_idx); + ret = mlx5_hws_cnt_pool_get(priv->hws_cpool, cnt_queue, &cnt_id, + age_idx, 0); if (ret != 0) { rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ACTION, action, "Failed to allocate flow counter"); @@ -4274,6 +4295,10 @@ at->actions_off[i] = curr_off; action_types[curr_off++] = MLX5DR_ACTION_TYP_MISS; break; + case RTE_FLOW_ACTION_TYPE_PORT_ID: + DRV_LOG(ERR, "RTE_FLOW_ACTION_TYPE_PORT_ID action is not supported. " + "Use RTE_FLOW_ACTION_TYPE_REPRESENTED_PORT instead.\n"); + return NULL; default: type = mlx5_hw_dr_action_types[at->actions[i].type]; at->actions_off[i] = curr_off; @@ -5757,46 +5782,6 @@ } /* - * Creating a flow pattern template with all ETH packets matching. - * This template is used to set up a table for default Tx copy (Tx metadata - * to REG_C_1) flow rule usage. - * - * @param dev - * Pointer to Ethernet device. - * @param error - * Pointer to error structure. - * - * @return - * Pointer to flow pattern template on success, NULL otherwise. - */ -static struct rte_flow_pattern_template * -flow_hw_create_tx_default_mreg_copy_pattern_template(struct rte_eth_dev *dev, - struct rte_flow_error *error) -{ - struct rte_flow_pattern_template_attr tx_pa_attr = { - .relaxed_matching = 0, - .egress = 1, - }; - struct rte_flow_item_eth promisc = { - .dst.addr_bytes = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .src.addr_bytes = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .type = 0, - }; - struct rte_flow_item eth_all[] = { - [0] = { - .type = RTE_FLOW_ITEM_TYPE_ETH, - .spec = &promisc, - .mask = &promisc, - }, - [1] = { - .type = RTE_FLOW_ITEM_TYPE_END, - }, - }; - - return flow_hw_pattern_template_create(dev, &tx_pa_attr, eth_all, error); -} - -/* * Creating a flow pattern template with all LACP packets matching, only for NIC * ingress domain. * @@ -6238,7 +6223,7 @@ .priority = MLX5_HW_LOWEST_PRIO_ROOT, .egress = 1, }, - .nb_flows = 1, /* One default flow rule for all. */ + .nb_flows = MLX5_HW_CTRL_FLOW_NB_RULES, }; struct mlx5_flow_template_table_cfg tx_tbl_cfg = { .attr = tx_tbl_attr, @@ -6498,7 +6483,7 @@ /* Create templates and table for default Tx metadata copy flow rule. */ if (!repr_matching && xmeta == MLX5_XMETA_MODE_META32_HWS) { hw_ctrl_fdb->tx_meta_items_tmpl = - flow_hw_create_tx_default_mreg_copy_pattern_template(dev, error); + flow_hw_create_tx_repr_sq_pattern_tmpl(dev, error); if (!hw_ctrl_fdb->tx_meta_items_tmpl) { DRV_LOG(ERR, "port %u failed to Tx metadata copy pattern" " template for control flows", dev->data->port_id); @@ -8261,13 +8246,16 @@ break; } /* Wait for ASO object completion. */ - if (queue == MLX5_HW_INV_QUEUE && - mlx5_aso_mtr_wait(priv->sh, MLX5_HW_INV_QUEUE, aso_mtr)) { - ret = -EINVAL; - rte_flow_error_set(error, EINVAL, - RTE_FLOW_ERROR_TYPE_UNSPECIFIED, - NULL, "Unable to wait for ASO meter CQE"); - break; + if (queue == MLX5_HW_INV_QUEUE) { + if (mlx5_aso_mtr_wait(priv->sh, MLX5_HW_INV_QUEUE, aso_mtr)) { + ret = -EINVAL; + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, "Unable to wait for ASO meter CQE"); + } + mlx5_ipool_free(pool->idx_pool, idx); + if (ret < 0) + break; } if (!job) mlx5_ipool_free(pool->idx_pool, idx); @@ -8461,6 +8449,14 @@ const struct rte_flow_action *action, struct rte_flow_error *err) { + struct mlx5_priv *priv = dev->data->dev_private; + + if (action->type == RTE_FLOW_ACTION_TYPE_AGE && priv->hws_strict_queue) { + rte_flow_error_set(err, EINVAL, RTE_FLOW_ERROR_TYPE_STATE, NULL, + "Cannot create age action synchronously with strict queueing"); + return NULL; + } + return flow_hw_action_handle_create(dev, MLX5_HW_INV_QUEUE, NULL, conf, action, NULL, err); } @@ -8632,6 +8628,8 @@ RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, "No aging initialized"); if (priv->hws_strict_queue) { + /* Queue is invalid in sync query. Sync query and strict queueing is disallowed. */ + MLX5_ASSERT(queue_id != MLX5_HW_INV_QUEUE); if (queue_id >= age_info->hw_q_age->nb_rings) return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, @@ -8685,10 +8683,10 @@ struct mlx5_priv *priv = dev->data->dev_private; if (priv->hws_strict_queue) - DRV_LOG(WARNING, - "port %u get aged flows called in strict queue mode.", - dev->data->port_id); - return flow_hw_get_q_aged_flows(dev, 0, contexts, nb_contexts, error); + return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_STATE, NULL, + "Cannot get aged flows synchronously with strict queueing"); + + return flow_hw_get_q_aged_flows(dev, MLX5_HW_INV_QUEUE, contexts, nb_contexts, error); } const struct mlx5_flow_driver_ops mlx5_flow_hw_drv_ops = { @@ -9146,7 +9144,7 @@ } int -mlx5_flow_hw_esw_destroy_sq_miss_flow(struct rte_eth_dev *dev, uint32_t sqn) +mlx5_flow_hw_esw_destroy_sq_miss_flow(struct rte_eth_dev *dev, uint32_t sqn, bool external) { uint16_t port_id = dev->data->port_id; uint16_t proxy_port_id = dev->data->port_id; @@ -9173,7 +9171,8 @@ !proxy_priv->hw_ctrl_fdb->hw_esw_sq_miss_root_tbl || !proxy_priv->hw_ctrl_fdb->hw_esw_sq_miss_tbl) return 0; - cf = LIST_FIRST(&proxy_priv->hw_ctrl_flows); + cf = external ? LIST_FIRST(&proxy_priv->hw_ext_ctrl_flows) : + LIST_FIRST(&proxy_priv->hw_ctrl_flows); while (cf != NULL) { cf_next = LIST_NEXT(cf, next); if (flow_hw_is_matching_sq_miss_flow(cf, dev, sqn)) { @@ -9253,21 +9252,18 @@ } int -mlx5_flow_hw_create_tx_default_mreg_copy_flow(struct rte_eth_dev *dev) +mlx5_flow_hw_create_tx_default_mreg_copy_flow(struct rte_eth_dev *dev, uint32_t sqn, bool external) { struct mlx5_priv *priv = dev->data->dev_private; - struct rte_flow_item_eth promisc = { - .dst.addr_bytes = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .src.addr_bytes = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .type = 0, + struct mlx5_rte_flow_item_sq sq_spec = { + .queue = sqn, }; - struct rte_flow_item eth_all[] = { - [0] = { - .type = RTE_FLOW_ITEM_TYPE_ETH, - .spec = &promisc, - .mask = &promisc, + struct rte_flow_item items[] = { + { + .type = (enum rte_flow_item_type)MLX5_RTE_FLOW_ITEM_TYPE_SQ, + .spec = &sq_spec, }, - [1] = { + { .type = RTE_FLOW_ITEM_TYPE_END, }, }; @@ -9297,6 +9293,7 @@ }; struct mlx5_hw_ctrl_flow_info flow_info = { .type = MLX5_HW_CTRL_FLOW_TYPE_TX_META_COPY, + .tx_repr_sq = sqn, }; MLX5_ASSERT(priv->master); @@ -9306,11 +9303,61 @@ return 0; return flow_hw_create_ctrl_flow(dev, dev, priv->hw_ctrl_fdb->hw_tx_meta_cpy_tbl, - eth_all, 0, copy_reg_action, 0, &flow_info, false); + items, 0, copy_reg_action, 0, &flow_info, external); +} + +static bool +flow_hw_is_matching_tx_mreg_copy_flow(struct mlx5_hw_ctrl_flow *cf, + struct rte_eth_dev *dev, + uint32_t sqn) +{ + if (cf->owner_dev != dev) + return false; + if (cf->info.type == MLX5_HW_CTRL_FLOW_TYPE_TX_META_COPY && cf->info.tx_repr_sq == sqn) + return true; + return false; +} + +int +mlx5_flow_hw_destroy_tx_default_mreg_copy_flow(struct rte_eth_dev *dev, uint32_t sqn, bool external) +{ + uint16_t port_id = dev->data->port_id; + uint16_t proxy_port_id = dev->data->port_id; + struct rte_eth_dev *proxy_dev; + struct mlx5_priv *proxy_priv; + struct mlx5_hw_ctrl_flow *cf; + struct mlx5_hw_ctrl_flow *cf_next; + int ret; + + ret = rte_flow_pick_transfer_proxy(port_id, &proxy_port_id, NULL); + if (ret) { + DRV_LOG(ERR, "Unable to pick transfer proxy port for port %u. Transfer proxy " + "port must be present for default SQ miss flow rules to exist.", + port_id); + return ret; + } + proxy_dev = &rte_eth_devices[proxy_port_id]; + proxy_priv = proxy_dev->data->dev_private; + if (!proxy_priv->dr_ctx || + !proxy_priv->hw_ctrl_fdb || + !proxy_priv->hw_ctrl_fdb->hw_tx_meta_cpy_tbl) + return 0; + cf = external ? LIST_FIRST(&proxy_priv->hw_ext_ctrl_flows) : + LIST_FIRST(&proxy_priv->hw_ctrl_flows); + while (cf != NULL) { + cf_next = LIST_NEXT(cf, next); + if (flow_hw_is_matching_tx_mreg_copy_flow(cf, dev, sqn)) { + claim_zero(flow_hw_destroy_ctrl_flow(proxy_dev, cf->flow)); + LIST_REMOVE(cf, next); + mlx5_free(cf); + } + cf = cf_next; + } + return 0; } int -mlx5_flow_hw_tx_repr_matching_flow(struct rte_eth_dev *dev, uint32_t sqn, bool external) +mlx5_flow_hw_create_tx_repr_matching_flow(struct rte_eth_dev *dev, uint32_t sqn, bool external) { struct mlx5_priv *priv = dev->data->dev_private; struct mlx5_rte_flow_item_sq sq_spec = { @@ -9367,6 +9414,55 @@ items, 0, actions, 0, &flow_info, external); } +static bool +flow_hw_is_tx_matching_repr_matching_flow(struct mlx5_hw_ctrl_flow *cf, + struct rte_eth_dev *dev, + uint32_t sqn) +{ + if (cf->owner_dev != dev) + return false; + if (cf->info.type == MLX5_HW_CTRL_FLOW_TYPE_TX_REPR_MATCH && cf->info.tx_repr_sq == sqn) + return true; + return false; +} + +int +mlx5_flow_hw_destroy_tx_repr_matching_flow(struct rte_eth_dev *dev, uint32_t sqn, bool external) +{ + uint16_t port_id = dev->data->port_id; + uint16_t proxy_port_id = dev->data->port_id; + struct rte_eth_dev *proxy_dev; + struct mlx5_priv *proxy_priv; + struct mlx5_hw_ctrl_flow *cf; + struct mlx5_hw_ctrl_flow *cf_next; + int ret; + + ret = rte_flow_pick_transfer_proxy(port_id, &proxy_port_id, NULL); + if (ret) { + DRV_LOG(ERR, "Unable to pick transfer proxy port for port %u. Transfer proxy " + "port must be present for default SQ miss flow rules to exist.", + port_id); + return ret; + } + proxy_dev = &rte_eth_devices[proxy_port_id]; + proxy_priv = proxy_dev->data->dev_private; + if (!proxy_priv->dr_ctx || + !proxy_priv->hw_tx_repr_tagging_tbl) + return 0; + cf = external ? LIST_FIRST(&proxy_priv->hw_ext_ctrl_flows) : + LIST_FIRST(&proxy_priv->hw_ctrl_flows); + while (cf != NULL) { + cf_next = LIST_NEXT(cf, next); + if (flow_hw_is_tx_matching_repr_matching_flow(cf, dev, sqn)) { + claim_zero(flow_hw_destroy_ctrl_flow(proxy_dev, cf->flow)); + LIST_REMOVE(cf, next); + mlx5_free(cf); + } + cf = cf_next; + } + return 0; +} + int mlx5_flow_hw_lacp_rx_flow(struct rte_eth_dev *dev) { diff -Nru dpdk-22.11.9/drivers/net/mlx5/mlx5_hws_cnt.c dpdk-22.11.11/drivers/net/mlx5/mlx5_hws_cnt.c --- dpdk-22.11.9/drivers/net/mlx5/mlx5_hws_cnt.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/mlx5/mlx5_hws_cnt.c 2025-12-24 13:18:07.000000000 +0000 @@ -72,8 +72,8 @@ uint32_t ret __rte_unused; reset_cnt_num = rte_ring_count(reset_list); - cpool->query_gen++; mlx5_aso_cnt_query(sh, cpool); + __atomic_store_n(&cpool->query_gen, cpool->query_gen + 1, __ATOMIC_RELEASE); zcdr.n1 = 0; zcdu.n1 = 0; ret = rte_ring_enqueue_zc_burst_elem_start(reuse_list, @@ -143,14 +143,14 @@ uint32_t nb_alloc_cnts = mlx5_hws_cnt_pool_get_size(cpool); uint16_t expected1 = HWS_AGE_CANDIDATE; uint16_t expected2 = HWS_AGE_CANDIDATE_INSIDE_RING; - uint32_t i; + uint32_t i, age_idx, in_use; cpool->time_of_last_age_check = curr_time; for (i = 0; i < nb_alloc_cnts; ++i) { - uint32_t age_idx = cpool->pool[i].age_idx; uint64_t hits; - if (!cpool->pool[i].in_used || age_idx == 0) + mlx5_hws_cnt_get_all(&cpool->pool[i], &in_use, NULL, &age_idx); + if (!in_use || age_idx == 0) continue; param = mlx5_ipool_get(age_info->ages_ipool, age_idx); if (unlikely(param == NULL)) { @@ -179,10 +179,13 @@ break; case HWS_AGE_FREE: /* - * AGE parameter with state "FREE" couldn't be pointed - * by any counter since counter is destroyed first. - * Fall-through. + * Since this check is async, we may reach a race condition + * where the age and counter are used in the same rule, + * using the same counter index, + * age was freed first, and counter was not freed yet. + * Aging check can be safely ignored in that case. */ + continue; default: MLX5_ASSERT(0); continue; diff -Nru dpdk-22.11.9/drivers/net/mlx5/mlx5_hws_cnt.h dpdk-22.11.11/drivers/net/mlx5/mlx5_hws_cnt.h --- dpdk-22.11.9/drivers/net/mlx5/mlx5_hws_cnt.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/mlx5/mlx5_hws_cnt.h 2025-12-24 13:18:07.000000000 +0000 @@ -42,33 +42,36 @@ struct mlx5_hws_cnt_dcs dcs[MLX5_HWS_CNT_DCS_NUM]; }; -struct mlx5_hws_cnt { - struct flow_counter_stats reset; - bool in_used; /* Indicator whether this counter in used or in pool. */ - union { - struct { - uint32_t share:1; - /* - * share will be set to 1 when this counter is used as - * indirect action. - */ - uint32_t age_idx:24; - /* - * When this counter uses for aging, it save the index - * of AGE parameter. For pure counter (without aging) - * this index is zero. - */ - }; - /* This struct is only meaningful when user own this counter. */ - uint32_t query_gen_when_free; +union mlx5_hws_cnt_state { + uint32_t data; + struct { + uint32_t in_used:1; + /* Indicator whether this counter in used or in pool. */ + uint32_t share:1; + /* + * share will be set to 1 when this counter is used as + * indirect action. + */ + uint32_t age_idx:24; /* - * When PMD own this counter (user put back counter to PMD - * counter pool, i.e), this field recorded value of counter - * pools query generation at time user release the counter. + * When this counter uses for aging, it stores the index + * of AGE parameter. Otherwise, this index is zero. */ }; }; +struct mlx5_hws_cnt { + struct flow_counter_stats reset; + union mlx5_hws_cnt_state cnt_state; + /* This struct is only meaningful when user own this counter. */ + uint32_t query_gen_when_free; + /* + * When PMD own this counter (user put back counter to PMD + * counter pool, i.e), this field recorded value of counter + * pools query generation at time user release the counter. + */ +}; + struct mlx5_hws_cnt_raw_data_mng { struct flow_counter_stats *raw; struct mlx5_pmd_mr mr; @@ -179,6 +182,42 @@ MLX5_INDIRECT_ACTION_TYPE_COUNT ? true : false; } +static __rte_always_inline void +mlx5_hws_cnt_set_age_idx(struct mlx5_hws_cnt *cnt, uint32_t value) +{ + union mlx5_hws_cnt_state cnt_state; + + cnt_state.data = __atomic_load_n(&cnt->cnt_state.data, __ATOMIC_ACQUIRE); + cnt_state.age_idx = value; + __atomic_store_n(&cnt->cnt_state.data, cnt_state.data, __ATOMIC_RELEASE); +} + +static __rte_always_inline void +mlx5_hws_cnt_set_all(struct mlx5_hws_cnt *cnt, uint32_t in_used, uint32_t share, uint32_t age_idx) +{ + union mlx5_hws_cnt_state cnt_state; + + cnt_state.in_used = !!in_used; + cnt_state.share = !!share; + cnt_state.age_idx = age_idx; + __atomic_store_n(&cnt->cnt_state.data, cnt_state.data, __ATOMIC_RELAXED); +} + +static __rte_always_inline void +mlx5_hws_cnt_get_all(struct mlx5_hws_cnt *cnt, uint32_t *in_used, uint32_t *share, + uint32_t *age_idx) +{ + union mlx5_hws_cnt_state cnt_state; + + cnt_state.data = __atomic_load_n(&cnt->cnt_state.data, __ATOMIC_ACQUIRE); + if (in_used != NULL) + *in_used = cnt_state.in_used; + if (share != NULL) + *share = cnt_state.share; + if (age_idx != NULL) + *age_idx = cnt_state.age_idx; +} + /** * Generate Counter id from internal index. * @@ -402,8 +441,7 @@ uint32_t iidx; iidx = mlx5_hws_cnt_iidx(cpool, *cnt_id); - MLX5_ASSERT(cpool->pool[iidx].in_used); - cpool->pool[iidx].in_used = false; + mlx5_hws_cnt_set_all(&cpool->pool[iidx], 0, 0, 0); cpool->pool[iidx].query_gen_when_free = __atomic_load_n(&cpool->query_gen, __ATOMIC_RELAXED); if (likely(queue != NULL)) @@ -459,7 +497,7 @@ */ static __rte_always_inline int mlx5_hws_cnt_pool_get(struct mlx5_hws_cnt_pool *cpool, uint32_t *queue, - cnt_id_t *cnt_id, uint32_t age_idx) + cnt_id_t *cnt_id, uint32_t age_idx, uint32_t shared) { unsigned int ret; struct rte_ring_zc_data zcdc = {0}; @@ -486,9 +524,7 @@ __hws_cnt_query_raw(cpool, *cnt_id, &cpool->pool[iidx].reset.hits, &cpool->pool[iidx].reset.bytes); - MLX5_ASSERT(!cpool->pool[iidx].in_used); - cpool->pool[iidx].in_used = true; - cpool->pool[iidx].age_idx = age_idx; + mlx5_hws_cnt_set_all(&cpool->pool[iidx], 1, shared, age_idx); return 0; } ret = rte_ring_dequeue_zc_burst_elem_start(qcache, sizeof(cnt_id_t), 1, @@ -526,10 +562,7 @@ __hws_cnt_query_raw(cpool, *cnt_id, &cpool->pool[iidx].reset.hits, &cpool->pool[iidx].reset.bytes); rte_ring_dequeue_zc_elem_finish(qcache, 1); - cpool->pool[iidx].share = 0; - MLX5_ASSERT(!cpool->pool[iidx].in_used); - cpool->pool[iidx].in_used = true; - cpool->pool[iidx].age_idx = age_idx; + mlx5_hws_cnt_set_all(&cpool->pool[iidx], 1, shared, age_idx); return 0; } @@ -582,23 +615,12 @@ mlx5_hws_cnt_shared_get(struct mlx5_hws_cnt_pool *cpool, cnt_id_t *cnt_id, uint32_t age_idx) { - int ret; - uint32_t iidx; - - ret = mlx5_hws_cnt_pool_get(cpool, NULL, cnt_id, age_idx); - if (ret != 0) - return ret; - iidx = mlx5_hws_cnt_iidx(cpool, *cnt_id); - cpool->pool[iidx].share = 1; - return 0; + return mlx5_hws_cnt_pool_get(cpool, NULL, cnt_id, age_idx, 1); } static __rte_always_inline void mlx5_hws_cnt_shared_put(struct mlx5_hws_cnt_pool *cpool, cnt_id_t *cnt_id) { - uint32_t iidx = mlx5_hws_cnt_iidx(cpool, *cnt_id); - - cpool->pool[iidx].share = 0; mlx5_hws_cnt_pool_put(cpool, NULL, cnt_id); } @@ -606,8 +628,10 @@ mlx5_hws_cnt_is_shared(struct mlx5_hws_cnt_pool *cpool, cnt_id_t cnt_id) { uint32_t iidx = mlx5_hws_cnt_iidx(cpool, cnt_id); + uint32_t share; - return cpool->pool[iidx].share ? true : false; + mlx5_hws_cnt_get_all(&cpool->pool[iidx], NULL, &share, NULL); + return !!share; } static __rte_always_inline void @@ -616,17 +640,19 @@ { uint32_t iidx = mlx5_hws_cnt_iidx(cpool, cnt_id); - MLX5_ASSERT(cpool->pool[iidx].share); - cpool->pool[iidx].age_idx = age_idx; + MLX5_ASSERT(cpool->pool[iidx].cnt_state.share); + mlx5_hws_cnt_set_age_idx(&cpool->pool[iidx], age_idx); } static __rte_always_inline uint32_t mlx5_hws_cnt_age_get(struct mlx5_hws_cnt_pool *cpool, cnt_id_t cnt_id) { uint32_t iidx = mlx5_hws_cnt_iidx(cpool, cnt_id); + uint32_t age_idx, share; - MLX5_ASSERT(cpool->pool[iidx].share); - return cpool->pool[iidx].age_idx; + mlx5_hws_cnt_get_all(&cpool->pool[iidx], NULL, &share, &age_idx); + MLX5_ASSERT(share); + return age_idx; } static __rte_always_inline cnt_id_t diff -Nru dpdk-22.11.9/drivers/net/mlx5/mlx5_rx.c dpdk-22.11.11/drivers/net/mlx5/mlx5_rx.c --- dpdk-22.11.9/drivers/net/mlx5/mlx5_rx.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/mlx5/mlx5_rx.c 2025-12-24 13:18:07.000000000 +0000 @@ -40,7 +40,7 @@ static __rte_always_inline int mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe, - uint16_t *skip_cnt, bool mprq); + uint16_t *skip_cnt, bool mprq, uint32_t *widx); static __rte_always_inline uint32_t rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe); @@ -219,6 +219,8 @@ } if (pkt_burst == mlx5_rx_burst) { snprintf(mode->info, sizeof(mode->info), "%s", "Scalar"); + } else if (pkt_burst == mlx5_rx_burst_out_of_order) { + snprintf(mode->info, sizeof(mode->info), "%s", "Scalar Out-of-Order"); } else if (pkt_burst == mlx5_rx_burst_mprq) { snprintf(mode->info, sizeof(mode->info), "%s", "Multi-Packet RQ"); } else if (pkt_burst == mlx5_rx_burst_vec) { @@ -350,13 +352,84 @@ return mlx5_ptype_table[idx] | rxq->tunnel * !!(idx & (1 << 6)); } +static inline void mlx5_rq_win_reset(struct mlx5_rxq_data *rxq) +{ + static_assert(MLX5_WINOOO_BITS == (sizeof(*rxq->rq_win_data) * CHAR_BIT), + "Invalid out-of-order window bitwidth"); + rxq->rq_win_idx = 0; + rxq->rq_win_cnt = 0; + if (rxq->rq_win_data != NULL && rxq->rq_win_idx_mask != 0) + memset(rxq->rq_win_data, 0, (rxq->rq_win_idx_mask + 1) * sizeof(*rxq->rq_win_data)); +} + +static inline int mlx5_rq_win_init(struct mlx5_rxq_data *rxq) +{ + struct mlx5_rxq_ctrl *ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq); + uint32_t win_size, win_mask; + + /* Set queue size as window size */ + win_size = 1u << rxq->elts_n; + win_size = RTE_MAX(win_size, MLX5_WINOOO_BITS); + win_size = win_size / MLX5_WINOOO_BITS; + win_mask = win_size - 1; + if (win_mask != rxq->rq_win_idx_mask || rxq->rq_win_data == NULL) { + mlx5_free(rxq->rq_win_data); + rxq->rq_win_idx_mask = 0; + rxq->rq_win_data = mlx5_malloc(MLX5_MEM_RTE, + win_size * sizeof(*rxq->rq_win_data), + RTE_CACHE_LINE_SIZE, ctrl->socket); + if (rxq->rq_win_data == NULL) + return -ENOMEM; + rxq->rq_win_idx_mask = (uint16_t)win_mask; + } + mlx5_rq_win_reset(rxq); + return 0; +} + +static inline bool mlx5_rq_win_test(struct mlx5_rxq_data *rxq) +{ + return !!rxq->rq_win_cnt; +} + +static inline void mlx5_rq_win_update(struct mlx5_rxq_data *rxq, uint32_t delta) +{ + uint32_t idx; + + idx = (delta / MLX5_WINOOO_BITS) + rxq->rq_win_idx; + idx &= rxq->rq_win_idx_mask; + rxq->rq_win_cnt = 1; + rxq->rq_win_data[idx] |= 1u << (delta % MLX5_WINOOO_BITS); +} + +static inline uint32_t mlx5_rq_win_advance(struct mlx5_rxq_data *rxq, uint32_t delta) +{ + uint32_t idx; + + idx = (delta / MLX5_WINOOO_BITS) + rxq->rq_win_idx; + idx &= rxq->rq_win_idx_mask; + rxq->rq_win_data[idx] |= 1u << (delta % MLX5_WINOOO_BITS); + ++rxq->rq_win_cnt; + if (delta >= MLX5_WINOOO_BITS) + return 0; + delta = 0; + while (~rxq->rq_win_data[idx] == 0) { + rxq->rq_win_data[idx] = 0; + MLX5_ASSERT(rxq->rq_win_cnt >= MLX5_WINOOO_BITS); + rxq->rq_win_cnt -= MLX5_WINOOO_BITS; + idx = (idx + 1) & rxq->rq_win_idx_mask; + rxq->rq_win_idx = idx; + delta += MLX5_WINOOO_BITS; + } + return delta; +} + /** * Initialize Rx WQ and indexes. * * @param[in] rxq * Pointer to RX queue structure. */ -void +int mlx5_rxq_initialize(struct mlx5_rxq_data *rxq) { const unsigned int wqe_n = 1 << rxq->elts_n; @@ -405,8 +478,12 @@ (wqe_n >> rxq->sges_n) * RTE_BIT32(rxq->log_strd_num) : 0; /* Update doorbell counter. */ rxq->rq_ci = wqe_n >> rxq->sges_n; + rxq->rq_ci_ooo = rxq->rq_ci; + if (mlx5_rq_win_init(rxq)) + return -ENOMEM; rte_io_wmb(); *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); + return 0; } #define MLX5_ERROR_CQE_MASK 0x40000000 @@ -515,6 +592,9 @@ 16 * wqe_n); rxq_ctrl->dump_file_n++; } + /* Try to find the actual cq_ci in hardware for shared queue. */ + if (rxq->shared) + rxq_sync_cq(rxq); rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_READY; /* Fall-through */ case MLX5_RXQ_ERR_STATE_NEED_READY: @@ -574,7 +654,8 @@ (*rxq->elts)[elts_n + i] = &rxq->fake_mbuf; } - mlx5_rxq_initialize(rxq); + if (mlx5_rxq_initialize(rxq)) + return MLX5_RECOVERY_ERROR_RET; rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR; return MLX5_RECOVERY_COMPLETED_RET; } @@ -600,6 +681,10 @@ * Number of packets skipped due to recoverable errors. * @param mprq * Indication if it is called from MPRQ. + * @param[out] widx + * Store WQE index from CQE to support out of order completions. NULL + * can be specified if index is not needed + * * @return * 0 in case of empty CQE, * MLX5_REGULAR_ERROR_CQE_RET in case of error CQE, @@ -610,7 +695,7 @@ static inline int mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe, - uint16_t *skip_cnt, bool mprq) + uint16_t *skip_cnt, bool mprq, uint32_t *widx) { struct rxq_zip *zip = &rxq->zip; uint16_t cqe_n = cqe_cnt + 1; @@ -627,6 +712,8 @@ cqe_cnt].pkt_info); len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt & rxq->byte_mask); + if (widx != NULL) + *widx = zip->wqe_idx + zip->ai; *mcqe = &(*mc)[zip->ai & 7]; if ((++zip->ai & 7) == 0) { /* Invalidate consumed CQEs */ @@ -670,6 +757,9 @@ if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { if (unlikely(ret == MLX5_CQE_STATUS_ERR || rxq->err_state)) { + /* We should try to track out-pf-order WQE */ + if (widx != NULL) + *widx = rte_be_to_cpu_16(cqe->wqe_counter); ret = mlx5_rx_err_handle(rxq, 0, 1, skip_cnt); if (ret == MLX5_CQE_STATUS_HW_OWN) return MLX5_ERROR_CQE_MASK; @@ -712,6 +802,10 @@ */ zip->ca = cq_ci; zip->na = zip->ca + 7; + if (widx != NULL) { + zip->wqe_idx = rte_be_to_cpu_16(cqe->wqe_counter); + *widx = zip->wqe_idx; + } /* Compute the next non compressed CQE. */ zip->cq_ci = rxq->cq_ci + zip->cqe_cnt; /* Get packet size to return. */ @@ -730,6 +824,8 @@ } else { rxq->cq_ci = cq_ci; len = rte_be_to_cpu_32(cqe->byte_cnt); + if (widx != NULL) + *widx = rte_be_to_cpu_16(cqe->wqe_counter); } } if (unlikely(rxq->err_state)) { @@ -932,7 +1028,7 @@ } if (!pkt) { cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; - len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, &mcqe, &skip_cnt, false); + len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, &mcqe, &skip_cnt, false, NULL); if (unlikely(len & MLX5_ERROR_CQE_MASK)) { /* We drop packets with non-critical errors */ rte_mbuf_raw_free(rep); @@ -1017,6 +1113,179 @@ } /** + * DPDK callback for RX with Out-of-Order completions support. + * + * @param dpdk_rxq + * Generic pointer to RX queue structure. + * @param[out] pkts + * Array to store received packets. + * @param pkts_n + * Maximum number of packets in array. + * + * @return + * Number of packets successfully received (<= pkts_n). + */ +uint16_t +mlx5_rx_burst_out_of_order(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) +{ + struct mlx5_rxq_data *rxq = dpdk_rxq; + const uint32_t wqe_n = 1 << rxq->elts_n; + const uint32_t wqe_mask = wqe_n - 1; + const uint32_t cqe_n = 1 << rxq->cqe_n; + const uint32_t cqe_mask = cqe_n - 1; + const unsigned int sges_n = rxq->sges_n; + const uint32_t pkt_mask = wqe_mask >> sges_n; + struct rte_mbuf *pkt = NULL; + struct rte_mbuf *seg = NULL; + volatile struct mlx5_cqe *cqe = + &(*rxq->cqes)[rxq->cq_ci & cqe_mask]; + unsigned int i = 0; + int len = 0; /* keep its value across iterations. */ + const uint32_t rq_ci = rxq->rq_ci; + uint32_t idx = 0; + + do { + volatile struct mlx5_wqe_data_seg *wqe; + struct rte_mbuf *rep = NULL; + volatile struct mlx5_mini_cqe8 *mcqe = NULL; + uint32_t delta; + uint16_t skip_cnt; + + if (!pkt) { + cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask]; + rte_prefetch0(cqe); + /* Allocate from the first packet mbuf pool */ + rep = (*rxq->elts)[0]; + /* We must allocate before CQE consuming to allow retry */ + rep = rte_mbuf_raw_alloc(rep->pool); + if (unlikely(rep == NULL)) { + ++rxq->stats.rx_nombuf; + break; + } + len = mlx5_rx_poll_len(rxq, cqe, cqe_mask, + &mcqe, &skip_cnt, false, &idx); + if (unlikely(len == MLX5_CRITICAL_ERROR_CQE_RET)) { + rte_mbuf_raw_free(rep); + mlx5_rq_win_reset(rxq); + break; + } + if (len == 0) { + rte_mbuf_raw_free(rep); + break; + } + idx &= pkt_mask; + delta = (idx - rxq->rq_ci) & pkt_mask; + MLX5_ASSERT(delta < ((rxq->rq_win_idx_mask + 1) * MLX5_WINOOO_BITS)); + if (likely(!mlx5_rq_win_test(rxq))) { + /* No out of order completions in sliding window */ + if (likely(delta == 0)) + rxq->rq_ci++; + else + mlx5_rq_win_update(rxq, delta); + } else { + /* We have out of order completions */ + rxq->rq_ci += mlx5_rq_win_advance(rxq, delta); + } + if (rxq->zip.ai == 0) + rxq->rq_ci_ooo = rxq->rq_ci; + idx <<= sges_n; + /* We drop packets with non-critical errors */ + if (unlikely(len & MLX5_ERROR_CQE_MASK)) { + rte_mbuf_raw_free(rep); + continue; + } + } + wqe = &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[idx]; + if (unlikely(pkt)) + NEXT(seg) = (*rxq->elts)[idx]; + seg = (*rxq->elts)[idx]; + rte_prefetch0(seg); + rte_prefetch0(wqe); + /* Allocate the buf from the same pool. */ + if (unlikely(rep == NULL)) { + rep = rte_mbuf_raw_alloc(seg->pool); + if (unlikely(rep == NULL)) { + ++rxq->stats.rx_nombuf; + if (!pkt) { + /* + * no buffers before we even started, + * bail out silently. + */ + break; + } + while (pkt != seg) { + MLX5_ASSERT(pkt != (*rxq->elts)[idx]); + rep = NEXT(pkt); + NEXT(pkt) = NULL; + NB_SEGS(pkt) = 1; + rte_mbuf_raw_free(pkt); + pkt = rep; + } + break; + } + } + if (!pkt) { + pkt = seg; + MLX5_ASSERT(len >= (rxq->crc_present << 2)); + pkt->ol_flags &= RTE_MBUF_F_EXTERNAL; + rxq_cq_to_mbuf(rxq, pkt, cqe, mcqe); + if (rxq->crc_present) + len -= RTE_ETHER_CRC_LEN; + PKT_LEN(pkt) = len; + if (cqe->lro_num_seg > 1) { + mlx5_lro_update_hdr + (rte_pktmbuf_mtod(pkt, uint8_t *), cqe, + mcqe, rxq, len); + pkt->ol_flags |= RTE_MBUF_F_RX_LRO; + pkt->tso_segsz = len / cqe->lro_num_seg; + } + } + DATA_LEN(rep) = DATA_LEN(seg); + PKT_LEN(rep) = PKT_LEN(seg); + SET_DATA_OFF(rep, DATA_OFF(seg)); + PORT(rep) = PORT(seg); + (*rxq->elts)[idx] = rep; + /* + * Fill NIC descriptor with the new buffer. The lkey and size + * of the buffers are already known, only the buffer address + * changes. + */ + wqe->addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(rep, uintptr_t)); + /* If there's only one MR, no need to replace LKey in WQE. */ + if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1)) + wqe->lkey = mlx5_rx_mb2mr(rxq, rep); + if (len > DATA_LEN(seg)) { + len -= DATA_LEN(seg); + ++NB_SEGS(pkt); + ++idx; + idx &= wqe_mask; + continue; + } + DATA_LEN(seg) = len; +#ifdef MLX5_PMD_SOFT_COUNTERS + /* Increment bytes counter. */ + rxq->stats.ibytes += PKT_LEN(pkt); +#endif + /* Return packet. */ + *(pkts++) = pkt; + pkt = NULL; + ++i; + } while (i < pkts_n); + if (unlikely(i == 0 && rq_ci == rxq->rq_ci_ooo)) + return 0; + /* Update the consumer index. */ + rte_io_wmb(); + *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); + rte_io_wmb(); + *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci_ooo); +#ifdef MLX5_PMD_SOFT_COUNTERS + /* Increment packets counter. */ + rxq->stats.ipackets += i; +#endif + return i; +} + +/** * Update LRO packet TCP header. * The HW LRO feature doesn't update the TCP header after coalescing the * TCP segments but supplies information in CQE to fill it by SW. @@ -1172,7 +1441,7 @@ buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; } cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; - ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &mcqe, &skip_cnt, true); + ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &mcqe, &skip_cnt, true, NULL); if (unlikely(ret & MLX5_ERROR_CQE_MASK)) { if (ret == MLX5_CRITICAL_ERROR_CQE_RET) { rq_ci = rxq->rq_ci; diff -Nru dpdk-22.11.9/drivers/net/mlx5/mlx5_rx.h dpdk-22.11.11/drivers/net/mlx5/mlx5_rx.h --- dpdk-22.11.9/drivers/net/mlx5/mlx5_rx.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/mlx5/mlx5_rx.h 2025-12-24 13:18:07.000000000 +0000 @@ -22,6 +22,7 @@ /* Support tunnel matching. */ #define MLX5_FLOW_TUNNEL 10 +#define MLX5_WINOOO_BITS (sizeof(uint32_t) * CHAR_BIT) #define RXQ_PORT(rxq_ctrl) LIST_FIRST(&(rxq_ctrl)->owners)->priv #define RXQ_DEV(rxq_ctrl) ETH_DEV(RXQ_PORT(rxq_ctrl)) @@ -46,6 +47,7 @@ uint16_t na; /* Next array index. */ uint16_t cq_ci; /* The next CQE. */ uint32_t cqe_cnt; /* Number of CQEs. */ + uint16_t wqe_idx; /* WQE index */ }; /* Get pointer to the first stride. */ @@ -105,6 +107,7 @@ uint16_t port_id; uint32_t elts_ci; uint32_t rq_ci; + uint32_t rq_ci_ooo; uint16_t consumed_strd; /* Number of consumed strides in WQE. */ uint32_t rq_pi; uint32_t cq_ci; @@ -142,11 +145,16 @@ uint32_t rxseg_n; /* Number of split segment descriptions. */ struct mlx5_eth_rxseg rxseg[MLX5_MAX_RXQ_NSEG]; /* Buffer split segment descriptions - sizes, offsets, pools. */ + uint16_t rq_win_cnt; /* Number of packets in the sliding window data. */ + uint16_t rq_win_idx_mask; /* Sliding window index wrapping mask. */ + uint16_t rq_win_idx; /* Index of the first element in sliding window. */ + uint32_t *rq_win_data; /* Out-of-Order completions sliding window. */ } __rte_cache_aligned; /* RX queue control descriptor. */ struct mlx5_rxq_ctrl { struct mlx5_rxq_data rxq; /* Data path structure. */ + uint16_t mtu; /* Original MTU that the queue was allocated with. */ LIST_ENTRY(mlx5_rxq_ctrl) next; /* Pointer to the next element. */ LIST_HEAD(priv, mlx5_rxq_priv) owners; /* Owner rxq list. */ struct mlx5_rxq_obj *obj; /* Verbs/DevX elements. */ @@ -287,7 +295,8 @@ /* mlx5_rx.c */ uint16_t mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n); -void mlx5_rxq_initialize(struct mlx5_rxq_data *rxq); +uint16_t mlx5_rx_burst_out_of_order(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n); +int mlx5_rxq_initialize(struct mlx5_rxq_data *rxq); __rte_noinline int mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec, uint16_t err_n, uint16_t *skip_cnt); void mlx5_mprq_buf_free(struct mlx5_mprq_buf *buf); @@ -313,6 +322,7 @@ uint16_t pkts_n); uint16_t mlx5_rx_burst_mprq_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n); +void rxq_sync_cq(struct mlx5_rxq_data *rxq); static int mlx5_rxq_mprq_enabled(struct mlx5_rxq_data *rxq); @@ -663,6 +673,23 @@ } /** + * Check whether Shared RQ is enabled for the device. + * + * @param dev + * Pointer to Ethernet device. + * + * @return + * 0 if disabled, otherwise enabled. + */ +static __rte_always_inline int +mlx5_shared_rq_enabled(struct rte_eth_dev *dev) +{ + struct mlx5_priv *priv = dev->data->dev_private; + + return !LIST_EMPTY(&priv->sh->shared_rxqs); +} + +/** * Check whether given RxQ is external. * * @param dev diff -Nru dpdk-22.11.9/drivers/net/mlx5/mlx5_rxq.c dpdk-22.11.11/drivers/net/mlx5/mlx5_rxq.c --- dpdk-22.11.9/drivers/net/mlx5/mlx5_rxq.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/mlx5/mlx5_rxq.c 2025-12-24 13:18:07.000000000 +0000 @@ -420,7 +420,7 @@ } /* Fetches and drops all SW-owned and error CQEs to synchronize CQ. */ -static void +void rxq_sync_cq(struct mlx5_rxq_data *rxq) { const uint16_t cqe_n = 1 << rxq->cqe_n; @@ -589,7 +589,13 @@ return ret; } /* Reinitialize RQ - set WQEs. */ - mlx5_rxq_initialize(rxq_data); + ret = mlx5_rxq_initialize(rxq_data); + if (ret) { + DRV_LOG(ERR, "Port %u Rx queue %u RQ initialization failure.", + priv->dev_data->port_id, rxq->idx); + rte_errno = ENOMEM; + return ret; + } rxq_data->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR; /* Set actual queue state. */ dev->data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED; @@ -770,7 +776,7 @@ dev->data->port_id, idx); return false; } - if (priv->mtu != spriv->mtu) { + if (priv->mtu != rxq_ctrl->mtu) { DRV_LOG(ERR, "port %u queue index %u failed to join shared group: mtu mismatch", dev->data->port_id, idx); return false; @@ -1760,6 +1766,10 @@ LIST_INIT(&tmpl->owners); MLX5_ASSERT(n_seg && n_seg <= MLX5_MAX_RXQ_NSEG); /* + * Save the original MTU to check against for shared rx queues. + */ + tmpl->mtu = dev->data->mtu; + /* * Save the original segment configuration in the shared queue * descriptor for the later check on the sibling queue creation. */ @@ -2146,7 +2156,8 @@ { struct mlx5_external_rxq *rxq = mlx5_ext_rxq_get(dev, idx); - __atomic_fetch_add(&rxq->refcnt, 1, __ATOMIC_RELAXED); + if (rxq != NULL) + __atomic_fetch_add(&rxq->refcnt, 1, __ATOMIC_RELAXED); return rxq; } @@ -2166,7 +2177,9 @@ { struct mlx5_external_rxq *rxq = mlx5_ext_rxq_get(dev, idx); - return __atomic_sub_fetch(&rxq->refcnt, 1, __ATOMIC_RELAXED); + return rxq != NULL ? + __atomic_sub_fetch(&rxq->refcnt, 1, __ATOMIC_RELAXED) : + UINT32_MAX; } /** @@ -2185,8 +2198,8 @@ { struct mlx5_priv *priv = dev->data->dev_private; - MLX5_ASSERT(mlx5_is_external_rxq(dev, idx)); - return &priv->ext_rxqs[idx - MLX5_EXTERNAL_RX_QUEUE_ID_MIN]; + return mlx5_is_external_rxq(dev, idx) ? + &priv->ext_rxqs[idx - MLX5_EXTERNAL_RX_QUEUE_ID_MIN] : NULL; } /** @@ -2299,6 +2312,7 @@ (&rxq_ctrl->rxq.mr_ctrl.cache_bh); if (rxq_ctrl->rxq.shared) LIST_REMOVE(rxq_ctrl, share_entry); + mlx5_free(rxq_ctrl->rxq.rq_win_data); mlx5_free(rxq_ctrl); } dev->data->rx_queues[idx] = NULL; @@ -2345,7 +2359,6 @@ mlx5_ext_rxq_verify(struct rte_eth_dev *dev) { struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_external_rxq *rxq; uint32_t i; int ret = 0; @@ -2353,8 +2366,9 @@ return 0; for (i = MLX5_EXTERNAL_RX_QUEUE_ID_MIN; i <= UINT16_MAX ; ++i) { - rxq = mlx5_ext_rxq_get(dev, i); - if (rxq->refcnt < 2) + struct mlx5_external_rxq *rxq = mlx5_ext_rxq_get(dev, i); + + if (rxq == NULL || rxq->refcnt < 2) continue; DRV_LOG(DEBUG, "Port %u external RxQ %u still referenced.", dev->data->port_id, i); diff -Nru dpdk-22.11.9/drivers/net/mlx5/mlx5_rxtx_vec.h dpdk-22.11.11/drivers/net/mlx5/mlx5_rxtx_vec.h --- dpdk-22.11.9/drivers/net/mlx5/mlx5_rxtx_vec.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/mlx5/mlx5_rxtx_vec.h 2025-12-24 13:18:07.000000000 +0000 @@ -13,13 +13,6 @@ #include "mlx5_autoconf.h" -/* HW checksum offload capabilities of vectorized Tx. */ -#define MLX5_VEC_TX_CKSUM_OFFLOAD_CAP \ - (RTE_ETH_TX_OFFLOAD_IPV4_CKSUM | \ - RTE_ETH_TX_OFFLOAD_UDP_CKSUM | \ - RTE_ETH_TX_OFFLOAD_TCP_CKSUM | \ - RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM) - /* * Compile time sanity check for vectorized functions. */ diff -Nru dpdk-22.11.9/drivers/net/mlx5/mlx5_trigger.c dpdk-22.11.11/drivers/net/mlx5/mlx5_trigger.c --- dpdk-22.11.9/drivers/net/mlx5/mlx5_trigger.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/mlx5/mlx5_trigger.c 2025-12-24 13:18:07.000000000 +0000 @@ -1133,6 +1133,11 @@ #endif +#define SAVE_RTE_ERRNO_AND_STOP(ret, dev) do { \ + ret = rte_errno; \ + (dev)->data->dev_started = 0; \ +} while (0) + /** * DPDK callback to start the device. * @@ -1203,19 +1208,23 @@ if (ret) { DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s", dev->data->port_id, strerror(rte_errno)); + SAVE_RTE_ERRNO_AND_STOP(ret, dev); goto error; } if (mlx5_devx_obj_ops_en(priv->sh) && priv->obj_ops.lb_dummy_queue_create) { ret = priv->obj_ops.lb_dummy_queue_create(dev); - if (ret) - goto error; + if (ret) { + SAVE_RTE_ERRNO_AND_STOP(ret, dev); + goto txpp_stop; + } } ret = mlx5_txq_start(dev); if (ret) { DRV_LOG(ERR, "port %u Tx queue allocation failed: %s", dev->data->port_id, strerror(rte_errno)); - goto error; + SAVE_RTE_ERRNO_AND_STOP(ret, dev); + goto lb_dummy_queue_release; } if (priv->config.std_delay_drop || priv->config.hp_delay_drop) { if (!priv->sh->dev_cap.vf && !priv->sh->dev_cap.sf && @@ -1239,7 +1248,8 @@ if (ret) { DRV_LOG(ERR, "port %u Rx queue allocation failed: %s", dev->data->port_id, strerror(rte_errno)); - goto error; + SAVE_RTE_ERRNO_AND_STOP(ret, dev); + goto txq_stop; } /* * Such step will be skipped if there is no hairpin TX queue configured @@ -1249,7 +1259,8 @@ if (ret) { DRV_LOG(ERR, "port %u hairpin auto binding failed: %s", dev->data->port_id, strerror(rte_errno)); - goto error; + SAVE_RTE_ERRNO_AND_STOP(ret, dev); + goto rxq_stop; } /* Set started flag here for the following steps like control flow. */ dev->data->dev_started = 1; @@ -1257,7 +1268,8 @@ if (ret) { DRV_LOG(ERR, "port %u Rx interrupt vector creation failed", dev->data->port_id); - goto error; + SAVE_RTE_ERRNO_AND_STOP(ret, dev); + goto rxq_stop; } mlx5_os_stats_init(dev); /* @@ -1269,7 +1281,8 @@ DRV_LOG(ERR, "port %u failed to attach indirect actions: %s", dev->data->port_id, rte_strerror(rte_errno)); - goto error; + SAVE_RTE_ERRNO_AND_STOP(ret, dev); + goto rx_intr_vec_disable; } #ifdef HAVE_MLX5_HWS_SUPPORT if (priv->sh->config.dv_flow_en == 2) { @@ -1277,7 +1290,8 @@ if (ret) { DRV_LOG(ERR, "port %u failed to update HWS tables", dev->data->port_id); - goto error; + SAVE_RTE_ERRNO_AND_STOP(ret, dev); + goto action_handle_detach; } } #endif @@ -1285,7 +1299,8 @@ if (ret) { DRV_LOG(ERR, "port %u failed to set defaults flows", dev->data->port_id); - goto error; + SAVE_RTE_ERRNO_AND_STOP(ret, dev); + goto action_handle_detach; } /* Set a mask and offset of dynamic metadata flows into Rx queues. */ mlx5_flow_rxq_dynf_metadata_set(dev); @@ -1302,12 +1317,14 @@ if (ret) { DRV_LOG(DEBUG, "port %u failed to start default actions: %s", dev->data->port_id, strerror(rte_errno)); - goto error; + SAVE_RTE_ERRNO_AND_STOP(ret, dev); + goto traffic_disable; } if (mlx5_dev_ctx_shared_mempool_subscribe(dev) != 0) { DRV_LOG(ERR, "port %u failed to subscribe for mempool life cycle: %s", dev->data->port_id, rte_strerror(rte_errno)); - goto error; + SAVE_RTE_ERRNO_AND_STOP(ret, dev); + goto stop_default; } rte_wmb(); dev->tx_pkt_burst = mlx5_select_tx_function(dev); @@ -1334,18 +1351,25 @@ priv->sh->port[priv->dev_port - 1].devx_ih_port_id = (uint32_t)dev->data->port_id; return 0; -error: - ret = rte_errno; /* Save rte_errno before cleanup. */ - /* Rollback. */ - dev->data->dev_started = 0; +stop_default: mlx5_flow_stop_default(dev); +traffic_disable: mlx5_traffic_disable(dev); - mlx5_txq_stop(dev); +action_handle_detach: + mlx5_action_handle_detach(dev); +rx_intr_vec_disable: + mlx5_rx_intr_vec_disable(dev); +rxq_stop: mlx5_rxq_stop(dev); +txq_stop: + mlx5_txq_stop(dev); +lb_dummy_queue_release: if (priv->obj_ops.lb_dummy_queue_release) priv->obj_ops.lb_dummy_queue_release(dev); - mlx5_txpp_stop(dev); /* Stop last. */ - rte_errno = ret; /* Restore rte_errno. */ +txpp_stop: + mlx5_txpp_stop(dev); +error: + rte_errno = ret; return -rte_errno; } @@ -1479,18 +1503,6 @@ unsigned int i; int ret; - /* - * With extended metadata enabled, the Tx metadata copy is handled by default - * Tx tagging flow rules, so default Tx flow rule is not needed. It is only - * required when representor matching is disabled. - */ - if (config->dv_esw_en && - !config->repr_matching && - config->dv_xmeta_en == MLX5_XMETA_MODE_META32_HWS && - priv->master) { - if (mlx5_flow_hw_create_tx_default_mreg_copy_flow(dev)) - goto error; - } for (i = 0; i < priv->txqs_n; ++i) { struct mlx5_txq_ctrl *txq = mlx5_txq_get(dev, i); uint32_t queue; @@ -1507,7 +1519,20 @@ } } if (config->dv_esw_en && config->repr_matching) { - if (mlx5_flow_hw_tx_repr_matching_flow(dev, queue, false)) { + if (mlx5_flow_hw_create_tx_repr_matching_flow(dev, queue, false)) { + mlx5_txq_release(dev, i); + goto error; + } + } + /* + * With extended metadata enabled, the Tx metadata copy is handled by default + * Tx tagging flow rules, so default Tx flow rule is not needed. It is only + * required when representor matching is disabled. + */ + if (config->dv_esw_en && !config->repr_matching && + config->dv_xmeta_en == MLX5_XMETA_MODE_META32_HWS && + (priv->master || priv->representor)) { + if (mlx5_flow_hw_create_tx_default_mreg_copy_flow(dev, queue, false)) { mlx5_txq_release(dev, i); goto error; } @@ -1705,7 +1730,7 @@ for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) { struct rte_ether_addr *mac = &dev->data->mac_addrs[i]; - if (!memcmp(mac, &cmp, sizeof(*mac)) || rte_is_multicast_ether_addr(mac)) + if (!memcmp(mac, &cmp, sizeof(*mac))) continue; memcpy(&unicast.dst.addr_bytes, mac->addr_bytes, diff -Nru dpdk-22.11.9/drivers/net/mlx5/mlx5_txq.c dpdk-22.11.11/drivers/net/mlx5/mlx5_txq.c --- dpdk-22.11.9/drivers/net/mlx5/mlx5_txq.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/mlx5/mlx5_txq.c 2025-12-24 13:18:07.000000000 +0000 @@ -1308,7 +1308,7 @@ priv = dev->data->dev_private; if ((!priv->representor && !priv->master) || !priv->sh->config.dv_esw_en) { - DRV_LOG(ERR, "Port %u must be represetnor or master port in E-Switch mode.", + DRV_LOG(ERR, "Port %u must be representor or master port in E-Switch mode.", port_id); rte_errno = EINVAL; return -rte_errno; @@ -1329,9 +1329,17 @@ } if (priv->sh->config.repr_matching && - mlx5_flow_hw_tx_repr_matching_flow(dev, sq_num, true)) { + mlx5_flow_hw_create_tx_repr_matching_flow(dev, sq_num, true)) { if (sq_miss_created) - mlx5_flow_hw_esw_destroy_sq_miss_flow(dev, sq_num); + mlx5_flow_hw_esw_destroy_sq_miss_flow(dev, sq_num, true); + return -rte_errno; + } + + if (!priv->sh->config.repr_matching && + priv->sh->config.dv_xmeta_en == MLX5_XMETA_MODE_META32_HWS && + mlx5_flow_hw_create_tx_default_mreg_copy_flow(dev, sq_num, true)) { + if (sq_miss_created) + mlx5_flow_hw_esw_destroy_sq_miss_flow(dev, sq_num, true); return -rte_errno; } return 0; @@ -1345,6 +1353,52 @@ return -rte_errno; } +int +rte_pmd_mlx5_external_sq_disable(uint16_t port_id, uint32_t sq_num) +{ + struct rte_eth_dev *dev; + struct mlx5_priv *priv; + + if (rte_eth_dev_is_valid_port(port_id) < 0) { + DRV_LOG(ERR, "There is no Ethernet device for port %u.", + port_id); + rte_errno = ENODEV; + return -rte_errno; + } + dev = &rte_eth_devices[port_id]; + priv = dev->data->dev_private; + if ((!priv->representor && !priv->master) || + !priv->sh->config.dv_esw_en) { + DRV_LOG(ERR, "Port %u must be representor or master port in E-Switch mode.", + port_id); + rte_errno = EINVAL; + return -rte_errno; + } + if (sq_num == 0) { + DRV_LOG(ERR, "Invalid SQ number."); + rte_errno = EINVAL; + return -rte_errno; + } +#ifdef HAVE_MLX5_HWS_SUPPORT + if (priv->sh->config.dv_flow_en == 2) { + if (priv->sh->config.fdb_def_rule && + mlx5_flow_hw_esw_destroy_sq_miss_flow(dev, sq_num, true)) + return -rte_errno; + if (priv->sh->config.repr_matching && + mlx5_flow_hw_destroy_tx_repr_matching_flow(dev, sq_num, true)) + return -rte_errno; + if (!priv->sh->config.repr_matching && + priv->sh->config.dv_xmeta_en == MLX5_XMETA_MODE_META32_HWS && + mlx5_flow_hw_destroy_tx_default_mreg_copy_flow(dev, sq_num, true)) + return -rte_errno; + return 0; + } +#endif + /* Not supported for software steering. */ + rte_errno = ENOTSUP; + return -rte_errno; +} + /** * Set the Tx queue dynamic timestamp (mask and offset) * diff -Nru dpdk-22.11.9/drivers/net/mlx5/mlx5_utils.h dpdk-22.11.11/drivers/net/mlx5/mlx5_utils.h --- dpdk-22.11.9/drivers/net/mlx5/mlx5_utils.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/mlx5/mlx5_utils.h 2025-12-24 13:18:07.000000000 +0000 @@ -25,9 +25,6 @@ /* Convert a bit number to the corresponding 64-bit mask */ #define MLX5_BITSHIFT(v) (UINT64_C(1) << (v)) -/* Save and restore errno around argument evaluation. */ -#define ERRNO_SAFE(x) ((errno = (int []){ errno, ((x), 0) }[0])) - extern int mlx5_logtype; #define MLX5_NET_LOG_PREFIX "mlx5_net" diff -Nru dpdk-22.11.9/drivers/net/mlx5/rte_pmd_mlx5.h dpdk-22.11.11/drivers/net/mlx5/rte_pmd_mlx5.h --- dpdk-22.11.9/drivers/net/mlx5/rte_pmd_mlx5.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/mlx5/rte_pmd_mlx5.h 2025-12-24 13:18:07.000000000 +0000 @@ -5,6 +5,9 @@ #ifndef RTE_PMD_PRIVATE_MLX5_H_ #define RTE_PMD_PRIVATE_MLX5_H_ +#include + +#include #include /** @@ -158,6 +161,24 @@ __rte_experimental int rte_pmd_mlx5_external_sq_enable(uint16_t port_id, uint32_t sq_num); +/** + * Disable traffic for external SQ. Should be invoked by application + * before destroying the external SQ. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] sq_num + * SQ HW number. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + * Possible values for rte_errno: + * - EINVAL - invalid sq_number or port type. + * - ENODEV - there is no Ethernet device for this port id. + */ +__rte_experimental +int rte_pmd_mlx5_external_sq_disable(uint16_t port_id, uint32_t sq_num); + #ifdef __cplusplus } #endif diff -Nru dpdk-22.11.9/drivers/net/mlx5/version.map dpdk-22.11.11/drivers/net/mlx5/version.map --- dpdk-22.11.9/drivers/net/mlx5/version.map 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/mlx5/version.map 2025-12-24 13:18:07.000000000 +0000 @@ -15,4 +15,5 @@ # added in 22.07 rte_pmd_mlx5_host_shaper_config; rte_pmd_mlx5_external_sq_enable; + rte_pmd_mlx5_external_sq_disable; }; diff -Nru dpdk-22.11.9/drivers/net/mlx5/windows/mlx5_ethdev_os.c dpdk-22.11.11/drivers/net/mlx5/windows/mlx5_ethdev_os.c --- dpdk-22.11.9/drivers/net/mlx5/windows/mlx5_ethdev_os.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/mlx5/windows/mlx5_ethdev_os.c 2025-12-24 13:18:07.000000000 +0000 @@ -56,7 +56,7 @@ * 0 on success, a negative errno value otherwise and rte_errno is set. */ int -mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[MLX5_NAMESIZE]) +mlx5_get_ifname(const struct rte_eth_dev *dev, char ifname[MLX5_NAMESIZE]) { struct mlx5_priv *priv; mlx5_context_st *context_obj; @@ -67,11 +67,39 @@ } priv = dev->data->dev_private; context_obj = (mlx5_context_st *)priv->sh->cdev->ctx; - strncpy(*ifname, context_obj->mlx5_dev.name, MLX5_NAMESIZE); + strncpy(ifname, context_obj->mlx5_dev.name, MLX5_NAMESIZE); return 0; } /** + * Get device minimum and maximum allowed MTU. + * + * Windows API does not expose minimum and maximum allowed MTU. + * In this case, this just returns (-ENOTSUP) to allow platform-independent code + * to fallback to default values. + * + * @param dev + * Pointer to Ethernet device. + * @param[out] min_mtu + * Minimum MTU value output buffer. + * @param[out] max_mtu + * Maximum MTU value output buffer. + * + * @return + * (-ENOTSUP) - not supported on Windows + */ +int +mlx5_os_get_mtu_bounds(struct rte_eth_dev *dev, uint16_t *min_mtu, uint16_t *max_mtu) +{ + RTE_SET_USED(dev); + RTE_SET_USED(min_mtu); + RTE_SET_USED(max_mtu); + + rte_errno = ENOTSUP; + return -rte_errno; +} + +/** * Get device MTU. * * @param dev @@ -283,11 +311,11 @@ dev_link.link_duplex = 1; if (dev->data->dev_link.link_speed != dev_link.link_speed || dev->data->dev_link.link_duplex != dev_link.link_duplex || - dev->data->dev_link.link_autoneg != dev_link.link_autoneg || dev->data->dev_link.link_status != dev_link.link_status) ret = 1; else ret = 0; + dev_link.link_autoneg = dev->data->dev_link.link_autoneg; dev->data->dev_link = dev_link; return ret; } diff -Nru dpdk-22.11.9/drivers/net/mlx5/windows/mlx5_flow_os.c dpdk-22.11.11/drivers/net/mlx5/windows/mlx5_flow_os.c --- dpdk-22.11.9/drivers/net/mlx5/windows/mlx5_flow_os.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/mlx5/windows/mlx5_flow_os.c 2025-12-24 13:18:07.000000000 +0000 @@ -219,9 +219,9 @@ default: break; } - MLX5_SET(devx_fs_rule_add_in, in, match_criteria_enable, - MLX5_MATCH_OUTER_HEADERS); } + MLX5_SET(devx_fs_rule_add_in, in, match_criteria_enable, + mlx5_matcher->attr.match_criteria_enable); *flow = mlx5_glue->devx_fs_rule_add(mlx5_matcher->ctx, in, sizeof(in)); return (*flow) ? 0 : -1; } diff -Nru dpdk-22.11.9/drivers/net/mlx5/windows/mlx5_flow_os.h dpdk-22.11.11/drivers/net/mlx5/windows/mlx5_flow_os.h --- dpdk-22.11.9/drivers/net/mlx5/windows/mlx5_flow_os.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/mlx5/windows/mlx5_flow_os.h 2025-12-24 13:18:07.000000000 +0000 @@ -47,6 +47,7 @@ case RTE_FLOW_ITEM_TYPE_IPV6: case RTE_FLOW_ITEM_TYPE_VLAN: case RTE_FLOW_ITEM_TYPE_ESP: + case RTE_FLOW_ITEM_TYPE_NVGRE: return true; default: return false; diff -Nru dpdk-22.11.9/drivers/net/mlx5/windows/mlx5_os.c dpdk-22.11.11/drivers/net/mlx5/windows/mlx5_os.c --- dpdk-22.11.9/drivers/net/mlx5/windows/mlx5_os.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/mlx5/windows/mlx5_os.c 2025-12-24 13:18:07.000000000 +0000 @@ -431,6 +431,8 @@ eth_dev->data->mac_addrs = priv->mac; eth_dev->device = dpdk_dev; eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; + /* Fetch minimum and maximum allowed MTU from the device. */ + mlx5_get_mtu_bounds(eth_dev, &priv->min_mtu, &priv->max_mtu); /* Configure the first MAC address by default. */ if (mlx5_get_mac(eth_dev, &mac.addr_bytes)) { DRV_LOG(ERR, @@ -461,6 +463,7 @@ err = rte_errno; goto error; } + eth_dev->data->mtu = priv->mtu; DRV_LOG(DEBUG, "port %u MTU is %u.", eth_dev->data->port_id, priv->mtu); /* Initialize burst functions to prevent crashes before link-up. */ diff -Nru dpdk-22.11.9/drivers/net/mlx5/windows/mlx5_os.h dpdk-22.11.11/drivers/net/mlx5/windows/mlx5_os.h --- dpdk-22.11.9/drivers/net/mlx5/windows/mlx5_os.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/mlx5/windows/mlx5_os.h 2025-12-24 13:18:07.000000000 +0000 @@ -12,8 +12,6 @@ MLX5_FS_PATH_MAX = MLX5_DEVX_DEVICE_PNP_SIZE + 1 }; -#define PCI_DRV_FLAGS 0 - #define MLX5_NAMESIZE MLX5_FS_NAME_MAX enum mlx5_sw_parsing_offloads { diff -Nru dpdk-22.11.9/drivers/net/ngbe/ngbe_rxtx.c dpdk-22.11.11/drivers/net/ngbe/ngbe_rxtx.c --- dpdk-22.11.9/drivers/net/ngbe/ngbe_rxtx.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/ngbe/ngbe_rxtx.c 2025-12-24 13:18:07.000000000 +0000 @@ -1983,13 +1983,9 @@ if (txq == NULL) return -ENOMEM; - /* - * Allocate Tx ring hardware descriptors. A memzone large enough to - * handle the maximum ring size is allocated in order to allow for - * resizing in later calls to the queue setup function. - */ + /* Allocate Tx ring hardware descriptors. */ tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx, - sizeof(struct ngbe_tx_desc) * NGBE_RING_DESC_MAX, + sizeof(struct ngbe_tx_desc) * nb_desc, NGBE_ALIGN, socket_id); if (tz == NULL) { ngbe_tx_queue_release(txq); @@ -2236,6 +2232,7 @@ uint16_t len; struct ngbe_adapter *adapter = ngbe_dev_adapter(dev); uint64_t offloads; + uint32_t size; PMD_INIT_FUNC_TRACE(); hw = ngbe_dev_hw(dev); @@ -2269,13 +2266,10 @@ rxq->rx_deferred_start = rx_conf->rx_deferred_start; rxq->offloads = offloads; - /* - * Allocate Rx ring hardware descriptors. A memzone large enough to - * handle the maximum ring size is allocated in order to allow for - * resizing in later calls to the queue setup function. - */ + /* Allocate Rx ring hardware descriptors. */ + size = (nb_desc + RTE_PMD_NGBE_RX_MAX_BURST) * sizeof(struct ngbe_rx_desc); rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx, - RX_RING_SZ, NGBE_ALIGN, socket_id); + size, NGBE_ALIGN, socket_id); if (rz == NULL) { ngbe_rx_queue_release(rxq); return -ENOMEM; @@ -2285,7 +2279,7 @@ /* * Zero init all the descriptors in the ring. */ - memset(rz->addr, 0, RX_RING_SZ); + memset(rz->addr, 0, size); rxq->rdt_reg_addr = NGBE_REG_ADDR(hw, NGBE_RXWP(rxq->reg_idx)); rxq->rdh_reg_addr = NGBE_REG_ADDR(hw, NGBE_RXRP(rxq->reg_idx)); diff -Nru dpdk-22.11.9/drivers/net/tap/rte_eth_tap.c dpdk-22.11.11/drivers/net/tap/rte_eth_tap.c --- dpdk-22.11.9/drivers/net/tap/rte_eth_tap.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/tap/rte_eth_tap.c 2025-12-24 13:18:07.000000000 +0000 @@ -952,8 +952,10 @@ return err; err = tap_link_set_up(dev); - if (err) + if (err) { + tap_intr_handle_set(dev, 0); return err; + } for (i = 0; i < dev->data->nb_tx_queues; i++) dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED; diff -Nru dpdk-22.11.9/drivers/net/tap/tap_flow.c dpdk-22.11.11/drivers/net/tap/tap_flow.c --- dpdk-22.11.9/drivers/net/tap/tap_flow.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/tap/tap_flow.c 2025-12-24 13:18:07.000000000 +0000 @@ -535,20 +535,16 @@ return 0; msg = &flow->msg; if (!rte_is_zero_ether_addr(&mask->dst)) { - tap_nlattr_add(&msg->nh, TCA_FLOWER_KEY_ETH_DST, - RTE_ETHER_ADDR_LEN, - &spec->dst.addr_bytes); - tap_nlattr_add(&msg->nh, - TCA_FLOWER_KEY_ETH_DST_MASK, RTE_ETHER_ADDR_LEN, - &mask->dst.addr_bytes); + tap_nlattr_add(msg, TCA_FLOWER_KEY_ETH_DST, RTE_ETHER_ADDR_LEN, + &spec->dst.addr_bytes); + tap_nlattr_add(msg, TCA_FLOWER_KEY_ETH_DST_MASK, RTE_ETHER_ADDR_LEN, + &mask->dst.addr_bytes); } if (!rte_is_zero_ether_addr(&mask->src)) { - tap_nlattr_add(&msg->nh, TCA_FLOWER_KEY_ETH_SRC, - RTE_ETHER_ADDR_LEN, + tap_nlattr_add(msg, TCA_FLOWER_KEY_ETH_SRC, RTE_ETHER_ADDR_LEN, &spec->src.addr_bytes); - tap_nlattr_add(&msg->nh, - TCA_FLOWER_KEY_ETH_SRC_MASK, RTE_ETHER_ADDR_LEN, - &mask->src.addr_bytes); + tap_nlattr_add(msg, TCA_FLOWER_KEY_ETH_SRC_MASK, RTE_ETHER_ADDR_LEN, + &mask->src.addr_bytes); } return 0; } @@ -604,11 +600,9 @@ uint8_t vid = VLAN_ID(tci); if (prio) - tap_nlattr_add8(&msg->nh, - TCA_FLOWER_KEY_VLAN_PRIO, prio); + tap_nlattr_add8(msg, TCA_FLOWER_KEY_VLAN_PRIO, prio); if (vid) - tap_nlattr_add16(&msg->nh, - TCA_FLOWER_KEY_VLAN_ID, vid); + tap_nlattr_add16(msg, TCA_FLOWER_KEY_VLAN_ID, vid); } return 0; } @@ -651,20 +645,15 @@ if (!spec) return 0; if (mask->hdr.dst_addr) { - tap_nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_DST, - spec->hdr.dst_addr); - tap_nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_DST_MASK, - mask->hdr.dst_addr); + tap_nlattr_add32(msg, TCA_FLOWER_KEY_IPV4_DST, spec->hdr.dst_addr); + tap_nlattr_add32(msg, TCA_FLOWER_KEY_IPV4_DST_MASK, mask->hdr.dst_addr); } if (mask->hdr.src_addr) { - tap_nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_SRC, - spec->hdr.src_addr); - tap_nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_SRC_MASK, - mask->hdr.src_addr); + tap_nlattr_add32(msg, TCA_FLOWER_KEY_IPV4_SRC, spec->hdr.src_addr); + tap_nlattr_add32(msg, TCA_FLOWER_KEY_IPV4_SRC_MASK, mask->hdr.src_addr); } if (spec->hdr.next_proto_id) - tap_nlattr_add8(&msg->nh, TCA_FLOWER_KEY_IP_PROTO, - spec->hdr.next_proto_id); + tap_nlattr_add8(msg, TCA_FLOWER_KEY_IP_PROTO, spec->hdr.next_proto_id); return 0; } @@ -707,20 +696,19 @@ if (!spec) return 0; if (memcmp(mask->hdr.dst_addr, empty_addr, 16)) { - tap_nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_DST, - sizeof(spec->hdr.dst_addr), &spec->hdr.dst_addr); - tap_nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_DST_MASK, - sizeof(mask->hdr.dst_addr), &mask->hdr.dst_addr); + tap_nlattr_add(msg, TCA_FLOWER_KEY_IPV6_DST, sizeof(spec->hdr.dst_addr), + &spec->hdr.dst_addr); + tap_nlattr_add(msg, TCA_FLOWER_KEY_IPV6_DST_MASK, sizeof(mask->hdr.dst_addr), + &mask->hdr.dst_addr); } if (memcmp(mask->hdr.src_addr, empty_addr, 16)) { - tap_nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_SRC, - sizeof(spec->hdr.src_addr), &spec->hdr.src_addr); - tap_nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_SRC_MASK, - sizeof(mask->hdr.src_addr), &mask->hdr.src_addr); + tap_nlattr_add(msg, TCA_FLOWER_KEY_IPV6_SRC, sizeof(spec->hdr.src_addr), + &spec->hdr.src_addr); + tap_nlattr_add(msg, TCA_FLOWER_KEY_IPV6_SRC_MASK, sizeof(mask->hdr.src_addr), + &mask->hdr.src_addr); } if (spec->hdr.proto) - tap_nlattr_add8(&msg->nh, - TCA_FLOWER_KEY_IP_PROTO, spec->hdr.proto); + tap_nlattr_add8(msg, TCA_FLOWER_KEY_IP_PROTO, spec->hdr.proto); return 0; } @@ -758,15 +746,13 @@ if (!flow) return 0; msg = &flow->msg; - tap_nlattr_add8(&msg->nh, TCA_FLOWER_KEY_IP_PROTO, IPPROTO_UDP); + tap_nlattr_add8(msg, TCA_FLOWER_KEY_IP_PROTO, IPPROTO_UDP); if (!spec) return 0; if (mask->hdr.dst_port) - tap_nlattr_add16(&msg->nh, TCA_FLOWER_KEY_UDP_DST, - spec->hdr.dst_port); + tap_nlattr_add16(msg, TCA_FLOWER_KEY_UDP_DST, spec->hdr.dst_port); if (mask->hdr.src_port) - tap_nlattr_add16(&msg->nh, TCA_FLOWER_KEY_UDP_SRC, - spec->hdr.src_port); + tap_nlattr_add16(msg, TCA_FLOWER_KEY_UDP_SRC, spec->hdr.src_port); return 0; } @@ -804,15 +790,13 @@ if (!flow) return 0; msg = &flow->msg; - tap_nlattr_add8(&msg->nh, TCA_FLOWER_KEY_IP_PROTO, IPPROTO_TCP); + tap_nlattr_add8(msg, TCA_FLOWER_KEY_IP_PROTO, IPPROTO_TCP); if (!spec) return 0; if (mask->hdr.dst_port) - tap_nlattr_add16(&msg->nh, TCA_FLOWER_KEY_TCP_DST, - spec->hdr.dst_port); + tap_nlattr_add16(msg, TCA_FLOWER_KEY_TCP_DST, spec->hdr.dst_port); if (mask->hdr.src_port) - tap_nlattr_add16(&msg->nh, TCA_FLOWER_KEY_TCP_SRC, - spec->hdr.src_port); + tap_nlattr_add16(msg, TCA_FLOWER_KEY_TCP_SRC, spec->hdr.src_port); return 0; } @@ -920,35 +904,27 @@ if (tap_nlattr_nested_start(msg, (*act_index)++) < 0) return -1; - tap_nlattr_add(&msg->nh, TCA_ACT_KIND, - strlen(adata->id) + 1, adata->id); + tap_nlattr_add(msg, TCA_ACT_KIND, strlen(adata->id) + 1, adata->id); if (tap_nlattr_nested_start(msg, TCA_ACT_OPTIONS) < 0) return -1; if (strcmp("gact", adata->id) == 0) { - tap_nlattr_add(&msg->nh, TCA_GACT_PARMS, sizeof(adata->gact), - &adata->gact); + tap_nlattr_add(msg, TCA_GACT_PARMS, sizeof(adata->gact), &adata->gact); } else if (strcmp("mirred", adata->id) == 0) { if (adata->mirred.eaction == TCA_EGRESS_MIRROR) adata->mirred.action = TC_ACT_PIPE; else /* REDIRECT */ adata->mirred.action = TC_ACT_STOLEN; - tap_nlattr_add(&msg->nh, TCA_MIRRED_PARMS, - sizeof(adata->mirred), - &adata->mirred); + tap_nlattr_add(msg, TCA_MIRRED_PARMS, sizeof(adata->mirred), &adata->mirred); } else if (strcmp("skbedit", adata->id) == 0) { - tap_nlattr_add(&msg->nh, TCA_SKBEDIT_PARMS, - sizeof(adata->skbedit.skbedit), - &adata->skbedit.skbedit); - tap_nlattr_add16(&msg->nh, TCA_SKBEDIT_QUEUE_MAPPING, - adata->skbedit.queue); + tap_nlattr_add(msg, TCA_SKBEDIT_PARMS, sizeof(adata->skbedit.skbedit), + &adata->skbedit.skbedit); + tap_nlattr_add16(msg, TCA_SKBEDIT_QUEUE_MAPPING, adata->skbedit.queue); } else if (strcmp("bpf", adata->id) == 0) { - tap_nlattr_add32(&msg->nh, TCA_ACT_BPF_FD, adata->bpf.bpf_fd); - tap_nlattr_add(&msg->nh, TCA_ACT_BPF_NAME, - strlen(adata->bpf.annotation) + 1, - adata->bpf.annotation); - tap_nlattr_add(&msg->nh, TCA_ACT_BPF_PARMS, - sizeof(adata->bpf.bpf), - &adata->bpf.bpf); + tap_nlattr_add32(msg, TCA_ACT_BPF_FD, adata->bpf.bpf_fd); + tap_nlattr_add(msg, TCA_ACT_BPF_NAME, strlen(adata->bpf.annotation) + 1, + adata->bpf.annotation); + tap_nlattr_add(msg, TCA_ACT_BPF_PARMS, sizeof(adata->bpf.bpf), + &adata->bpf.bpf); } else { return -1; } @@ -1078,7 +1054,7 @@ TC_H_MAKE(MULTIQ_MAJOR_HANDLE, 0); } /* use flower filter type */ - tap_nlattr_add(&flow->msg.nh, TCA_KIND, sizeof("flower"), "flower"); + tap_nlattr_add(&flow->msg, TCA_KIND, sizeof("flower"), "flower"); if (tap_nlattr_nested_start(&flow->msg, TCA_OPTIONS) < 0) { rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_ACTION, actions, "could not allocated netlink msg"); @@ -1118,15 +1094,11 @@ } if (flow) { if (data.vlan) { - tap_nlattr_add16(&flow->msg.nh, TCA_FLOWER_KEY_ETH_TYPE, - htons(ETH_P_8021Q)); - tap_nlattr_add16(&flow->msg.nh, - TCA_FLOWER_KEY_VLAN_ETH_TYPE, - data.eth_type ? - data.eth_type : htons(ETH_P_ALL)); + tap_nlattr_add16(&flow->msg, TCA_FLOWER_KEY_ETH_TYPE, htons(ETH_P_8021Q)); + tap_nlattr_add16(&flow->msg, TCA_FLOWER_KEY_VLAN_ETH_TYPE, + data.eth_type ? data.eth_type : htons(ETH_P_ALL)); } else if (data.eth_type) { - tap_nlattr_add16(&flow->msg.nh, TCA_FLOWER_KEY_ETH_TYPE, - data.eth_type); + tap_nlattr_add16(&flow->msg, TCA_FLOWER_KEY_ETH_TYPE, data.eth_type); } } if (mirred && flow) { @@ -1919,13 +1891,13 @@ msg->t.tcm_info = TC_H_MAKE(prio << 16, msg->t.tcm_info); msg->t.tcm_parent = TC_H_MAKE(MULTIQ_MAJOR_HANDLE, 0); - tap_nlattr_add(&msg->nh, TCA_KIND, sizeof("bpf"), "bpf"); + tap_nlattr_add(msg, TCA_KIND, sizeof("bpf"), "bpf"); if (tap_nlattr_nested_start(msg, TCA_OPTIONS) < 0) return -1; - tap_nlattr_add32(&msg->nh, TCA_BPF_FD, pmd->bpf_fd[i]); + tap_nlattr_add32(msg, TCA_BPF_FD, pmd->bpf_fd[i]); snprintf(annotation, sizeof(annotation), "[%s%d]", SEC_NAME_CLS_Q, i); - tap_nlattr_add(&msg->nh, TCA_BPF_NAME, strlen(annotation) + 1, + tap_nlattr_add(msg, TCA_BPF_NAME, strlen(annotation) + 1, annotation); /* Actions */ { diff -Nru dpdk-22.11.9/drivers/net/tap/tap_netlink.c dpdk-22.11.11/drivers/net/tap/tap_netlink.c --- dpdk-22.11.9/drivers/net/tap/tap_netlink.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/tap/tap_netlink.c 2025-12-24 13:18:07.000000000 +0000 @@ -293,18 +293,18 @@ * The data to append. */ void -tap_nlattr_add(struct nlmsghdr *nh, unsigned short type, +tap_nlattr_add(struct nlmsg *msg, unsigned short type, unsigned int data_len, const void *data) { /* see man 3 rtnetlink */ struct rtattr *rta; - rta = (struct rtattr *)NLMSG_TAIL(nh); + rta = (struct rtattr *)NLMSG_TAIL(msg); rta->rta_len = RTA_LENGTH(data_len); rta->rta_type = type; if (data_len > 0) memcpy(RTA_DATA(rta), data, data_len); - nh->nlmsg_len = NLMSG_ALIGN(nh->nlmsg_len) + RTA_ALIGN(rta->rta_len); + msg->nh.nlmsg_len = NLMSG_ALIGN(msg->nh.nlmsg_len) + RTA_ALIGN(rta->rta_len); } /** @@ -318,9 +318,9 @@ * The data to append. */ void -tap_nlattr_add8(struct nlmsghdr *nh, unsigned short type, uint8_t data) +tap_nlattr_add8(struct nlmsg *msg, unsigned short type, uint8_t data) { - tap_nlattr_add(nh, type, sizeof(uint8_t), &data); + tap_nlattr_add(msg, type, sizeof(uint8_t), &data); } /** @@ -334,9 +334,9 @@ * The data to append. */ void -tap_nlattr_add16(struct nlmsghdr *nh, unsigned short type, uint16_t data) +tap_nlattr_add16(struct nlmsg *msg, unsigned short type, uint16_t data) { - tap_nlattr_add(nh, type, sizeof(uint16_t), &data); + tap_nlattr_add(msg, type, sizeof(uint16_t), &data); } /** @@ -350,9 +350,9 @@ * The data to append. */ void -tap_nlattr_add32(struct nlmsghdr *nh, unsigned short type, uint32_t data) +tap_nlattr_add32(struct nlmsg *msg, unsigned short type, uint32_t data) { - tap_nlattr_add(nh, type, sizeof(uint32_t), &data); + tap_nlattr_add(msg, type, sizeof(uint32_t), &data); } /** @@ -379,9 +379,9 @@ return -1; } - tail->tail = (struct rtattr *)NLMSG_TAIL(&msg->nh); + tail->tail = (struct rtattr *)NLMSG_TAIL(msg); - tap_nlattr_add(&msg->nh, type, 0, NULL); + tap_nlattr_add(msg, type, 0, NULL); tail->prev = msg->nested_tails; @@ -404,7 +404,7 @@ { struct nested_tail *tail = msg->nested_tails; - tail->tail->rta_len = (char *)NLMSG_TAIL(&msg->nh) - (char *)tail->tail; + tail->tail->rta_len = (char *)NLMSG_TAIL(msg) - (char *)tail->tail; if (tail->prev) msg->nested_tails = tail->prev; diff -Nru dpdk-22.11.9/drivers/net/tap/tap_netlink.h dpdk-22.11.11/drivers/net/tap/tap_netlink.h --- dpdk-22.11.9/drivers/net/tap/tap_netlink.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/tap/tap_netlink.h 2025-12-24 13:18:07.000000000 +0000 @@ -23,7 +23,7 @@ struct nested_tail *nested_tails; }; -#define NLMSG_TAIL(nlh) (void *)((char *)(nlh) + NLMSG_ALIGN((nlh)->nlmsg_len)) +#define NLMSG_TAIL(msg) (void *)((char *)(msg) + NLMSG_ALIGN((msg)->nh.nlmsg_len)) int tap_nl_init(uint32_t nl_groups); int tap_nl_final(int nlsk_fd); @@ -31,11 +31,11 @@ int tap_nl_recv(int nlsk_fd, int (*callback)(struct nlmsghdr *, void *), void *arg); int tap_nl_recv_ack(int nlsk_fd); -void tap_nlattr_add(struct nlmsghdr *nh, unsigned short type, +void tap_nlattr_add(struct nlmsg *msg, unsigned short type, unsigned int data_len, const void *data); -void tap_nlattr_add8(struct nlmsghdr *nh, unsigned short type, uint8_t data); -void tap_nlattr_add16(struct nlmsghdr *nh, unsigned short type, uint16_t data); -void tap_nlattr_add32(struct nlmsghdr *nh, unsigned short type, uint32_t data); +void tap_nlattr_add8(struct nlmsg *msg, unsigned short type, uint8_t data); +void tap_nlattr_add16(struct nlmsg *msg, unsigned short type, uint16_t data); +void tap_nlattr_add32(struct nlmsg *msg, unsigned short type, uint32_t data); int tap_nlattr_nested_start(struct nlmsg *msg, uint16_t type); void tap_nlattr_nested_finish(struct nlmsg *msg); diff -Nru dpdk-22.11.9/drivers/net/tap/tap_tcmsgs.c dpdk-22.11.11/drivers/net/tap/tap_tcmsgs.c --- dpdk-22.11.9/drivers/net/tap/tap_tcmsgs.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/tap/tap_tcmsgs.c 2025-12-24 13:18:07.000000000 +0000 @@ -123,8 +123,8 @@ NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE); msg.t.tcm_handle = TC_H_MAKE(MULTIQ_MAJOR_HANDLE, 0); msg.t.tcm_parent = TC_H_ROOT; - tap_nlattr_add(&msg.nh, TCA_KIND, sizeof("multiq"), "multiq"); - tap_nlattr_add(&msg.nh, TCA_OPTIONS, sizeof(opt), &opt); + tap_nlattr_add(&msg, TCA_KIND, sizeof("multiq"), "multiq"); + tap_nlattr_add(&msg, TCA_OPTIONS, sizeof(opt), &opt); if (tap_nl_send(nlsk_fd, &msg.nh) < 0) return -1; if (tap_nl_recv_ack(nlsk_fd) < 0) @@ -152,7 +152,7 @@ NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE); msg.t.tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0); msg.t.tcm_parent = TC_H_INGRESS; - tap_nlattr_add(&msg.nh, TCA_KIND, sizeof("ingress"), "ingress"); + tap_nlattr_add(&msg, TCA_KIND, sizeof("ingress"), "ingress"); if (tap_nl_send(nlsk_fd, &msg.nh) < 0) return -1; if (tap_nl_recv_ack(nlsk_fd) < 0) diff -Nru dpdk-22.11.9/drivers/net/txgbe/base/txgbe_type.h dpdk-22.11.11/drivers/net/txgbe/base/txgbe_type.h --- dpdk-22.11.9/drivers/net/txgbe/base/txgbe_type.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/txgbe/base/txgbe_type.h 2025-12-24 13:18:07.000000000 +0000 @@ -699,6 +699,8 @@ #define TXGBE_DEVARG_FFE_MAIN "ffe_main" #define TXGBE_DEVARG_FFE_PRE "ffe_pre" #define TXGBE_DEVARG_FFE_POST "ffe_post" +#define TXGBE_DEVARG_FDIR_PBALLOC "pkt-filter-size" +#define TXGBE_DEVARG_FDIR_DROP_QUEUE "pkt-filter-drop-queue" static const char * const txgbe_valid_arguments[] = { TXGBE_DEVARG_BP_AUTO, @@ -709,6 +711,8 @@ TXGBE_DEVARG_FFE_MAIN, TXGBE_DEVARG_FFE_PRE, TXGBE_DEVARG_FFE_POST, + TXGBE_DEVARG_FDIR_PBALLOC, + TXGBE_DEVARG_FDIR_DROP_QUEUE, NULL }; diff -Nru dpdk-22.11.9/drivers/net/txgbe/txgbe_ethdev.c dpdk-22.11.11/drivers/net/txgbe/txgbe_ethdev.c --- dpdk-22.11.9/drivers/net/txgbe/txgbe_ethdev.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/txgbe/txgbe_ethdev.c 2025-12-24 13:18:07.000000000 +0000 @@ -496,8 +496,12 @@ } static void -txgbe_parse_devargs(struct txgbe_hw *hw, struct rte_devargs *devargs) +txgbe_parse_devargs(struct rte_eth_dev *dev) { + struct rte_eth_fdir_conf *fdir_conf = TXGBE_DEV_FDIR_CONF(dev); + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); + struct rte_devargs *devargs = pci_dev->device.devargs; + struct txgbe_hw *hw = TXGBE_DEV_HW(dev); struct rte_kvargs *kvlist; u16 auto_neg = 1; u16 poll = 0; @@ -507,6 +511,9 @@ u16 ffe_main = 27; u16 ffe_pre = 8; u16 ffe_post = 44; + /* FDIR args */ + u8 pballoc = 0; + u8 drop_queue = 127; if (devargs == NULL) goto null; @@ -531,6 +538,10 @@ &txgbe_handle_devarg, &ffe_pre); rte_kvargs_process(kvlist, TXGBE_DEVARG_FFE_POST, &txgbe_handle_devarg, &ffe_post); + rte_kvargs_process(kvlist, TXGBE_DEVARG_FDIR_PBALLOC, + &txgbe_handle_devarg, &pballoc); + rte_kvargs_process(kvlist, TXGBE_DEVARG_FDIR_DROP_QUEUE, + &txgbe_handle_devarg, &drop_queue); rte_kvargs_free(kvlist); null: @@ -542,6 +553,9 @@ hw->phy.ffe_main = ffe_main; hw->phy.ffe_pre = ffe_pre; hw->phy.ffe_post = ffe_post; + + fdir_conf->pballoc = pballoc; + fdir_conf->drop_queue = drop_queue; } static int @@ -629,7 +643,7 @@ hw->isb_dma = TMZ_PADDR(mz); hw->isb_mem = TMZ_VADDR(mz); - txgbe_parse_devargs(hw, pci_dev->device.devargs); + txgbe_parse_devargs(eth_dev); /* Initialize the shared code (base driver) */ err = txgbe_init_shared_code(hw); if (err != 0) { @@ -837,6 +851,7 @@ } memset(filter_info->fivetuple_mask, 0, sizeof(uint32_t) * TXGBE_5TUPLE_ARRAY_SIZE); + filter_info->ntuple_is_full = false; return 0; } @@ -879,11 +894,13 @@ static int txgbe_fdir_filter_init(struct rte_eth_dev *eth_dev) { + struct rte_eth_fdir_conf *fdir_conf = TXGBE_DEV_FDIR_CONF(eth_dev); struct txgbe_hw_fdir_info *fdir_info = TXGBE_DEV_FDIR(eth_dev); char fdir_hash_name[RTE_HASH_NAMESIZE]; + u16 max_fdir_num = (1024 << (fdir_conf->pballoc + 1)) - 2; struct rte_hash_parameters fdir_hash_params = { .name = fdir_hash_name, - .entries = TXGBE_MAX_FDIR_FILTER_NUM, + .entries = max_fdir_num, .key_len = sizeof(struct txgbe_atr_input), .hash_func = rte_hash_crc, .hash_func_init_val = 0, @@ -900,7 +917,7 @@ } fdir_info->hash_map = rte_zmalloc("txgbe", sizeof(struct txgbe_fdir_filter *) * - TXGBE_MAX_FDIR_FILTER_NUM, + max_fdir_num, 0); if (!fdir_info->hash_map) { PMD_INIT_LOG(ERR, @@ -4054,7 +4071,8 @@ } } if (i >= TXGBE_MAX_FTQF_FILTERS) { - PMD_DRV_LOG(ERR, "5tuple filters are full."); + PMD_DRV_LOG(INFO, "5tuple filters are full, switch to FDIR"); + filter_info->ntuple_is_full = true; return -ENOSYS; } @@ -4082,6 +4100,7 @@ ~(1 << (index % (sizeof(uint32_t) * NBBY))); TAILQ_REMOVE(&filter_info->fivetuple_list, filter, entries); rte_free(filter); + filter_info->ntuple_is_full = false; wr32(hw, TXGBE_5TFDADDR(index), 0); wr32(hw, TXGBE_5TFSADDR(index), 0); @@ -5498,7 +5517,9 @@ TXGBE_DEVARG_FFE_SET "=<0-4>" TXGBE_DEVARG_FFE_MAIN "=" TXGBE_DEVARG_FFE_PRE "=" - TXGBE_DEVARG_FFE_POST "="); + TXGBE_DEVARG_FFE_POST "=" + TXGBE_DEVARG_FDIR_PBALLOC "=<0|1|2>" + TXGBE_DEVARG_FDIR_DROP_QUEUE "="); RTE_LOG_REGISTER_SUFFIX(txgbe_logtype_init, init, NOTICE); RTE_LOG_REGISTER_SUFFIX(txgbe_logtype_driver, driver, NOTICE); diff -Nru dpdk-22.11.9/drivers/net/txgbe/txgbe_ethdev.h dpdk-22.11.11/drivers/net/txgbe/txgbe_ethdev.h --- dpdk-22.11.9/drivers/net/txgbe/txgbe_ethdev.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/txgbe/txgbe_ethdev.h 2025-12-24 13:18:07.000000000 +0000 @@ -241,6 +241,7 @@ /* Bit mask for every used 5tuple filter */ uint32_t fivetuple_mask[TXGBE_5TUPLE_ARRAY_SIZE]; struct txgbe_5tuple_filter_list fivetuple_list; + bool ntuple_is_full; /* store the SYN filter info */ uint32_t syn_info; /* store the rss filter info */ diff -Nru dpdk-22.11.9/drivers/net/txgbe/txgbe_fdir.c dpdk-22.11.11/drivers/net/txgbe/txgbe_fdir.c --- dpdk-22.11.9/drivers/net/txgbe/txgbe_fdir.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/txgbe/txgbe_fdir.c 2025-12-24 13:18:07.000000000 +0000 @@ -165,6 +165,15 @@ return 0; } +static inline uint16_t +txgbe_reverse_fdir_bitmasks(uint16_t mask) +{ + mask = ((mask & 0x5555) << 1) | ((mask & 0xAAAA) >> 1); + mask = ((mask & 0x3333) << 2) | ((mask & 0xCCCC) >> 2); + mask = ((mask & 0x0F0F) << 4) | ((mask & 0xF0F0) >> 4); + return ((mask & 0x00FF) << 8) | ((mask & 0xFF00) >> 8); +} + int txgbe_fdir_set_input_mask(struct rte_eth_dev *dev) { @@ -206,15 +215,15 @@ wr32(hw, TXGBE_FDIRUDPMSK, ~fdirtcpm); wr32(hw, TXGBE_FDIRSCTPMSK, ~fdirtcpm); - /* Store source and destination IPv4 masks (big-endian) */ - wr32(hw, TXGBE_FDIRSIP4MSK, ~info->mask.src_ipv4_mask); - wr32(hw, TXGBE_FDIRDIP4MSK, ~info->mask.dst_ipv4_mask); + /* Store source and destination IPv4 masks (little-endian) */ + wr32(hw, TXGBE_FDIRSIP4MSK, rte_be_to_cpu_32(~info->mask.src_ipv4_mask)); + wr32(hw, TXGBE_FDIRDIP4MSK, rte_be_to_cpu_32(~info->mask.dst_ipv4_mask)); /* * Store source and destination IPv6 masks (bit reversed) */ - fdiripv6m = TXGBE_FDIRIP6MSK_DST(info->mask.dst_ipv6_mask) | - TXGBE_FDIRIP6MSK_SRC(info->mask.src_ipv6_mask); + fdiripv6m = txgbe_reverse_fdir_bitmasks(info->mask.dst_ipv6_mask) << 16; + fdiripv6m |= txgbe_reverse_fdir_bitmasks(info->mask.src_ipv6_mask); wr32(hw, TXGBE_FDIRIP6MSK, ~fdiripv6m); return 0; @@ -258,10 +267,7 @@ if (rule->input.flow_type & TXGBE_ATR_L4TYPE_MASK) return TXGBE_FDIRFLEXCFG_BASE_PAY; - if (rule->input.flow_type & TXGBE_ATR_L3TYPE_MASK) - return TXGBE_FDIRFLEXCFG_BASE_L3; - - return TXGBE_FDIRFLEXCFG_BASE_L2; + return TXGBE_FDIRFLEXCFG_BASE_L3; } int @@ -639,8 +645,14 @@ fdircmd |= TXGBE_FDIRPICMD_QP(queue); fdircmd |= TXGBE_FDIRPICMD_POOL(input->vm_pool); - if (input->flow_type & TXGBE_ATR_L3TYPE_IPV6) + if (input->flow_type & TXGBE_ATR_L3TYPE_IPV6) { + /* use SIP4 to store LS Dword of the Source iPv6 address */ + wr32(hw, TXGBE_FDIRPISIP4, be_to_le32(input->src_ip[3])); + wr32(hw, TXGBE_FDIRPISIP6(0), be_to_le32(input->src_ip[2])); + wr32(hw, TXGBE_FDIRPISIP6(1), be_to_le32(input->src_ip[1])); + wr32(hw, TXGBE_FDIRPISIP6(2), be_to_le32(input->src_ip[0])); fdircmd |= TXGBE_FDIRPICMD_IP6; + } wr32(hw, TXGBE_FDIRPICMD, fdircmd); PMD_DRV_LOG(DEBUG, "Rx Queue=%x hash=%x", queue, fdirhash); @@ -786,6 +798,26 @@ return 0; } +static void +txgbe_fdir_mask_input(struct txgbe_hw_fdir_mask *mask, + struct txgbe_atr_input *input) +{ + int i; + + if (input->flow_type & TXGBE_ATR_L3TYPE_IPV6) { + for (i = 0; i < 16; i++) { + if (!(mask->src_ipv6_mask & (1 << i))) + input->src_ip[i / 4] &= ~(0xFF << ((i % 4) * 8)); + } + } else { + input->src_ip[0] &= mask->src_ipv4_mask; + input->dst_ip[0] &= mask->dst_ipv4_mask; + } + + input->src_port &= mask->src_port_mask; + input->dst_port &= mask->dst_port_mask; +} + int txgbe_fdir_filter_program(struct rte_eth_dev *dev, struct txgbe_fdir_rule *rule, @@ -808,6 +840,8 @@ if (fdir_mode >= RTE_FDIR_MODE_PERFECT) is_perfect = TRUE; + txgbe_fdir_mask_input(&info->mask, &rule->input); + if (is_perfect) { fdirhash = atr_compute_perfect_hash(&rule->input, TXGBE_DEV_FDIR_CONF(dev)->pballoc); @@ -959,6 +993,7 @@ int txgbe_clear_all_fdir_filter(struct rte_eth_dev *dev) { + struct rte_eth_fdir_conf *fdir_conf = TXGBE_DEV_FDIR_CONF(dev); struct txgbe_hw_fdir_info *fdir_info = TXGBE_DEV_FDIR(dev); struct txgbe_fdir_filter *fdir_filter; struct txgbe_fdir_filter *filter_flag; @@ -967,7 +1002,9 @@ /* flush flow director */ rte_hash_reset(fdir_info->hash_handle); memset(fdir_info->hash_map, 0, - sizeof(struct txgbe_fdir_filter *) * TXGBE_MAX_FDIR_FILTER_NUM); + sizeof(struct txgbe_fdir_filter *) * + ((1024 << (fdir_conf->pballoc + 1)) - 2)); + fdir_conf->mode = RTE_FDIR_MODE_NONE; filter_flag = TAILQ_FIRST(&fdir_info->fdir_list); while ((fdir_filter = TAILQ_FIRST(&fdir_info->fdir_list))) { TAILQ_REMOVE(&fdir_info->fdir_list, diff -Nru dpdk-22.11.9/drivers/net/txgbe/txgbe_flow.c dpdk-22.11.11/drivers/net/txgbe/txgbe_flow.c --- dpdk-22.11.9/drivers/net/txgbe/txgbe_flow.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/txgbe/txgbe_flow.c 2025-12-24 13:18:07.000000000 +0000 @@ -580,8 +580,12 @@ struct rte_eth_ntuple_filter *filter, struct rte_flow_error *error) { + struct txgbe_filter_info *filter_info = TXGBE_DEV_FILTER(dev); int ret; + if (filter_info->ntuple_is_full) + return -ENOSYS; + ret = cons_parse_ntuple_filter(attr, pattern, actions, filter, error); if (ret) @@ -1333,7 +1337,6 @@ { const struct rte_flow_action *act; const struct rte_flow_action_queue *act_q; - const struct rte_flow_action_mark *mark; /* parse attr */ /* must be input direction */ @@ -1398,10 +1401,9 @@ rule->fdirflags = TXGBE_FDIRPICMD_DROP; } - /* check if the next not void item is MARK */ + /* nothing else supported */ act = next_no_void_action(actions, act); - if (act->type != RTE_FLOW_ACTION_TYPE_MARK && - act->type != RTE_FLOW_ACTION_TYPE_END) { + if (act->type != RTE_FLOW_ACTION_TYPE_END) { memset(rule, 0, sizeof(struct txgbe_fdir_rule)); rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, @@ -1411,21 +1413,6 @@ rule->soft_id = 0; - if (act->type == RTE_FLOW_ACTION_TYPE_MARK) { - mark = (const struct rte_flow_action_mark *)act->conf; - rule->soft_id = mark->id; - act = next_no_void_action(actions, act); - } - - /* check if the next not void item is END */ - if (act->type != RTE_FLOW_ACTION_TYPE_END) { - memset(rule, 0, sizeof(struct txgbe_fdir_rule)); - rte_flow_error_set(error, EINVAL, - RTE_FLOW_ERROR_TYPE_ACTION, - act, "Not supported action."); - return -rte_errno; - } - return 0; } @@ -1537,8 +1524,6 @@ * The next not void item must be END. * ACTION: * The first not void action should be QUEUE or DROP. - * The second not void optional action should be MARK, - * mark_id is a uint32_t number. * The next not void action should be END. * UDP/TCP/SCTP pattern example: * ITEM Spec Mask @@ -1849,9 +1834,7 @@ /* check dst addr mask */ for (j = 0; j < 16; j++) { - if (ipv6_mask->hdr.dst_addr[j] == UINT8_MAX) { - rule->mask.dst_ipv6_mask |= 1 << j; - } else if (ipv6_mask->hdr.dst_addr[j] != 0) { + if (ipv6_mask->hdr.dst_addr[j] != 0) { memset(rule, 0, sizeof(struct txgbe_fdir_rule)); rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, @@ -2222,6 +2205,8 @@ const struct rte_flow_item_udp *udp_mask; const struct rte_flow_item_sctp *sctp_spec; const struct rte_flow_item_sctp *sctp_mask; + const struct rte_flow_item_raw *raw_mask; + const struct rte_flow_item_raw *raw_spec; u8 ptid = 0; uint32_t j; @@ -2548,7 +2533,8 @@ if (item->type != RTE_FLOW_ITEM_TYPE_TCP && item->type != RTE_FLOW_ITEM_TYPE_UDP && item->type != RTE_FLOW_ITEM_TYPE_SCTP && - item->type != RTE_FLOW_ITEM_TYPE_END) { + item->type != RTE_FLOW_ITEM_TYPE_END && + item->type != RTE_FLOW_ITEM_TYPE_RAW) { memset(rule, 0, sizeof(struct txgbe_fdir_rule)); rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, @@ -2609,9 +2595,7 @@ /* check dst addr mask */ for (j = 0; j < 16; j++) { - if (ipv6_mask->hdr.dst_addr[j] == UINT8_MAX) { - rule->mask.dst_ipv6_mask |= 1 << j; - } else if (ipv6_mask->hdr.dst_addr[j] != 0) { + if (ipv6_mask->hdr.dst_addr[j] != 0) { memset(rule, 0, sizeof(struct txgbe_fdir_rule)); rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, @@ -2637,7 +2621,8 @@ if (item->type != RTE_FLOW_ITEM_TYPE_TCP && item->type != RTE_FLOW_ITEM_TYPE_UDP && item->type != RTE_FLOW_ITEM_TYPE_SCTP && - item->type != RTE_FLOW_ITEM_TYPE_END) { + item->type != RTE_FLOW_ITEM_TYPE_END && + item->type != RTE_FLOW_ITEM_TYPE_RAW) { memset(rule, 0, sizeof(struct txgbe_fdir_rule)); rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, @@ -2699,6 +2684,16 @@ rule->input.dst_port = tcp_spec->hdr.dst_port; } + + item = next_no_fuzzy_pattern(pattern, item); + if (item->type != RTE_FLOW_ITEM_TYPE_RAW && + item->type != RTE_FLOW_ITEM_TYPE_END) { + memset(rule, 0, sizeof(struct txgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } } /* Get the UDP info */ @@ -2748,6 +2743,16 @@ rule->input.dst_port = udp_spec->hdr.dst_port; } + + item = next_no_fuzzy_pattern(pattern, item); + if (item->type != RTE_FLOW_ITEM_TYPE_RAW && + item->type != RTE_FLOW_ITEM_TYPE_END) { + memset(rule, 0, sizeof(struct txgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } } /* Get the SCTP info */ @@ -2798,13 +2803,10 @@ rule->input.dst_port = sctp_spec->hdr.dst_port; } - /* others even sctp port is not supported */ - sctp_mask = item->mask; - if (sctp_mask && - (sctp_mask->hdr.src_port || - sctp_mask->hdr.dst_port || - sctp_mask->hdr.tag || - sctp_mask->hdr.cksum)) { + + item = next_no_fuzzy_pattern(pattern, item); + if (item->type != RTE_FLOW_ITEM_TYPE_RAW && + item->type != RTE_FLOW_ITEM_TYPE_END) { memset(rule, 0, sizeof(struct txgbe_fdir_rule)); rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, @@ -2813,6 +2815,93 @@ } } + /* Get the flex byte info */ + if (item->type == RTE_FLOW_ITEM_TYPE_RAW) { + uint16_t pattern = 0; + + /* Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + } + /* mask should not be null */ + if (!item->mask || !item->spec) { + memset(rule, 0, sizeof(struct txgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + + rule->b_mask = TRUE; + raw_mask = item->mask; + + /* check mask */ + if (raw_mask->relative != 0x1 || + raw_mask->search != 0x1 || + raw_mask->reserved != 0x0 || + (uint32_t)raw_mask->offset != 0xffffffff || + raw_mask->limit != 0xffff || + raw_mask->length != 0xffff) { + memset(rule, 0, sizeof(struct txgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + + rule->b_spec = TRUE; + raw_spec = item->spec; + + /* check spec */ + if (raw_spec->search != 0 || + raw_spec->reserved != 0 || + raw_spec->offset > TXGBE_MAX_FLX_SOURCE_OFF || + raw_spec->offset % 2 || + raw_spec->limit != 0 || + raw_spec->length != 4 || + /* pattern can't be 0xffff */ + (raw_spec->pattern[0] == 0xff && + raw_spec->pattern[1] == 0xff && + raw_spec->pattern[2] == 0xff && + raw_spec->pattern[3] == 0xff)) { + memset(rule, 0, sizeof(struct txgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + + /* check pattern mask */ + if (raw_mask->pattern[0] != 0xff || + raw_mask->pattern[1] != 0xff || + raw_mask->pattern[2] != 0xff || + raw_mask->pattern[3] != 0xff) { + memset(rule, 0, sizeof(struct txgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + + rule->mask.flex_bytes_mask = 0xffff; + /* Convert pattern string to hex bytes */ + if (sscanf((const char *)raw_spec->pattern, "%hx", &pattern) != 1) { + memset(rule, 0, sizeof(struct txgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Failed to parse raw pattern"); + return -rte_errno; + } + rule->input.flex_bytes = (pattern & 0x00FF) << 8; + rule->input.flex_bytes |= (pattern & 0xFF00) >> 8; + + rule->flex_bytes_offset = raw_spec->offset; + rule->flex_relative = raw_spec->relative; + } + if (item->type != RTE_FLOW_ITEM_TYPE_END) { /* check if the next not void item is END */ item = next_no_fuzzy_pattern(pattern, item); @@ -2839,7 +2928,6 @@ struct rte_flow_error *error) { int ret; - struct txgbe_hw *hw = TXGBE_DEV_HW(dev); struct rte_eth_fdir_conf *fdir_conf = TXGBE_DEV_FDIR_CONF(dev); ret = txgbe_parse_fdir_filter_normal(dev, attr, pattern, @@ -2853,12 +2941,6 @@ return ret; step_next: - - if (hw->mac.type == txgbe_mac_raptor && - rule->fdirflags == TXGBE_FDIRPICMD_DROP && - (rule->input.src_port != 0 || rule->input.dst_port != 0)) - return -ENOTSUP; - if (fdir_conf->mode == RTE_FDIR_MODE_NONE) { fdir_conf->mode = rule->mode; ret = txgbe_fdir_configure(dev); @@ -3103,6 +3185,7 @@ struct txgbe_fdir_rule_ele *fdir_rule_ptr; struct txgbe_rss_conf_ele *rss_filter_ptr; struct txgbe_flow_mem *txgbe_flow_mem_ptr; + struct txgbe_filter_info *filter_info = TXGBE_DEV_FILTER(dev); uint8_t first_mask = FALSE; flow = rte_zmalloc("txgbe_rte_flow", sizeof(struct rte_flow), 0); @@ -3148,10 +3231,13 @@ flow->rule = ntuple_filter_ptr; flow->filter_type = RTE_ETH_FILTER_NTUPLE; return flow; + } else if (filter_info->ntuple_is_full) { + goto next; } goto out; } +next: memset(ðertype_filter, 0, sizeof(struct rte_eth_ethertype_filter)); ret = txgbe_parse_ethertype_filter(dev, attr, pattern, actions, ðertype_filter, error); @@ -3426,6 +3512,7 @@ struct txgbe_fdir_rule_ele *fdir_rule_ptr; struct txgbe_flow_mem *txgbe_flow_mem_ptr; struct txgbe_hw_fdir_info *fdir_info = TXGBE_DEV_FDIR(dev); + struct rte_eth_fdir_conf *fdir_conf = TXGBE_DEV_FDIR_CONF(dev); struct txgbe_rss_conf_ele *rss_filter_ptr; switch (filter_type) { @@ -3485,6 +3572,7 @@ fdir_info->mask_added = false; fdir_info->flex_relative = false; fdir_info->flex_bytes_offset = 0; + fdir_conf->mode = RTE_FDIR_MODE_NONE; } } break; diff -Nru dpdk-22.11.9/drivers/net/txgbe/txgbe_rxtx.c dpdk-22.11.11/drivers/net/txgbe/txgbe_rxtx.c --- dpdk-22.11.9/drivers/net/txgbe/txgbe_rxtx.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/txgbe/txgbe_rxtx.c 2025-12-24 13:18:07.000000000 +0000 @@ -2335,13 +2335,9 @@ if (txq == NULL) return -ENOMEM; - /* - * Allocate TX ring hardware descriptors. A memzone large enough to - * handle the maximum ring size is allocated in order to allow for - * resizing in later calls to the queue setup function. - */ + /* Allocate TX ring hardware descriptors. */ tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx, - sizeof(struct txgbe_tx_desc) * TXGBE_RING_DESC_MAX, + sizeof(struct txgbe_tx_desc) * nb_desc, TXGBE_ALIGN, socket_id); if (tz == NULL) { txgbe_tx_queue_release(txq); @@ -2579,6 +2575,7 @@ uint16_t len; struct txgbe_adapter *adapter = TXGBE_DEV_ADAPTER(dev); uint64_t offloads; + uint32_t size; PMD_INIT_FUNC_TRACE(); hw = TXGBE_DEV_HW(dev); @@ -2629,13 +2626,10 @@ */ rxq->pkt_type_mask = TXGBE_PTID_MASK; - /* - * Allocate RX ring hardware descriptors. A memzone large enough to - * handle the maximum ring size is allocated in order to allow for - * resizing in later calls to the queue setup function. - */ + /* Allocate RX ring hardware descriptors. */ + size = (nb_desc + RTE_PMD_TXGBE_RX_MAX_BURST) * sizeof(struct txgbe_rx_desc); rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx, - RX_RING_SZ, TXGBE_ALIGN, socket_id); + size, TXGBE_ALIGN, socket_id); if (rz == NULL) { txgbe_rx_queue_release(rxq); return -ENOMEM; @@ -2645,7 +2639,7 @@ /* * Zero init all the descriptors in the ring. */ - memset(rz->addr, 0, RX_RING_SZ); + memset(rz->addr, 0, size); /* * Modified to setup VFRDT for Virtual Function @@ -4916,7 +4910,7 @@ */ buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) - RTE_PKTMBUF_HEADROOM); - buf_size = ROUND_UP(buf_size, 1 << 10); + buf_size = ROUND_DOWN(buf_size, 1 << 10); srrctl |= TXGBE_RXCFG_PKTLEN(buf_size); /* diff -Nru dpdk-22.11.9/drivers/net/vmxnet3/base/vmxnet3_defs.h dpdk-22.11.11/drivers/net/vmxnet3/base/vmxnet3_defs.h --- dpdk-22.11.9/drivers/net/vmxnet3/base/vmxnet3_defs.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/vmxnet3/base/vmxnet3_defs.h 2025-12-24 13:18:07.000000000 +0000 @@ -575,6 +575,9 @@ /* addition 1 for events */ #define VMXNET3_MAX_INTRS 25 +/* Max number of queues that can request memreg, for both RX and TX. */ +#define VMXNET3_MAX_MEMREG_QUEUES 16 + /* Version 6 and later will use below macros */ #define VMXNET3_EXT_MAX_TX_QUEUES 32 #define VMXNET3_EXT_MAX_RX_QUEUES 32 diff -Nru dpdk-22.11.9/drivers/net/vmxnet3/vmxnet3_ethdev.c dpdk-22.11.11/drivers/net/vmxnet3/vmxnet3_ethdev.c --- dpdk-22.11.9/drivers/net/vmxnet3/vmxnet3_ethdev.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/net/vmxnet3/vmxnet3_ethdev.c 2025-12-24 13:18:07.000000000 +0000 @@ -534,6 +534,13 @@ PMD_INIT_FUNC_TRACE(); + /* Disabling RSS for single queue pair */ + if (dev->data->nb_rx_queues == 1 && + dev->data->dev_conf.rxmode.mq_mode == RTE_ETH_MQ_RX_RSS) { + dev->data->dev_conf.rxmode.mq_mode = RTE_ETH_MQ_RX_NONE; + PMD_INIT_LOG(ERR, "WARN: Disabling RSS for single Rx queue"); + } + if (dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) dev->data->dev_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH; @@ -725,14 +732,15 @@ Vmxnet3_DriverShared *shared = hw->shared; Vmxnet3_CmdInfo *cmdInfo; struct rte_mempool *mp[VMXNET3_MAX_RX_QUEUES]; - uint8_t index[VMXNET3_MAX_RX_QUEUES + VMXNET3_MAX_TX_QUEUES]; - uint32_t num, i, j, size; + uint16_t index[VMXNET3_MAX_MEMREG_QUEUES]; + uint16_t tx_index_mask; + uint32_t num, tx_num, i, j, size; if (hw->memRegsPA == 0) { const struct rte_memzone *mz; size = sizeof(Vmxnet3_MemRegs) + - (VMXNET3_MAX_RX_QUEUES + VMXNET3_MAX_TX_QUEUES) * + (2 * VMXNET3_MAX_MEMREG_QUEUES) * sizeof(Vmxnet3_MemoryRegion); mz = gpa_zone_reserve(dev, size, "memRegs", rte_socket_id(), 8, @@ -746,7 +754,9 @@ hw->memRegsPA = mz->iova; } - num = hw->num_rx_queues; + num = RTE_MIN(hw->num_rx_queues, VMXNET3_MAX_MEMREG_QUEUES); + tx_num = RTE_MIN(hw->num_tx_queues, VMXNET3_MAX_MEMREG_QUEUES); + tx_index_mask = (uint16_t)((1UL << tx_num) - 1); for (i = 0; i < num; i++) { vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i]; @@ -781,13 +791,15 @@ (uintptr_t)STAILQ_FIRST(&mp[i]->mem_list)->iova; mr->length = STAILQ_FIRST(&mp[i]->mem_list)->len <= INT32_MAX ? STAILQ_FIRST(&mp[i]->mem_list)->len : INT32_MAX; - mr->txQueueBits = index[i]; mr->rxQueueBits = index[i]; + /* tx uses same pool, but there may be fewer tx queues */ + mr->txQueueBits = index[i] & tx_index_mask; PMD_INIT_LOG(INFO, "index: %u startPA: %" PRIu64 " length: %u, " - "rxBits: %x", - j, mr->startPA, mr->length, mr->rxQueueBits); + "rxBits: %x, txBits: %x", + j, mr->startPA, mr->length, + mr->rxQueueBits, mr->txQueueBits); j++; } hw->memRegs->numRegs = j; @@ -995,8 +1007,8 @@ } /* Check memregs restrictions first */ - if (dev->data->nb_rx_queues <= VMXNET3_MAX_RX_QUEUES && - dev->data->nb_tx_queues <= VMXNET3_MAX_TX_QUEUES) { + if (dev->data->nb_rx_queues <= VMXNET3_MAX_MEMREG_QUEUES && + dev->data->nb_tx_queues <= VMXNET3_MAX_MEMREG_QUEUES) { ret = vmxnet3_dev_setup_memreg(dev); if (ret == 0) { VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, diff -Nru dpdk-22.11.9/drivers/raw/dpaa2_cmdif/rte_pmd_dpaa2_cmdif.h dpdk-22.11.11/drivers/raw/dpaa2_cmdif/rte_pmd_dpaa2_cmdif.h --- dpdk-22.11.9/drivers/raw/dpaa2_cmdif/rte_pmd_dpaa2_cmdif.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/raw/dpaa2_cmdif/rte_pmd_dpaa2_cmdif.h 2025-12-24 13:18:07.000000000 +0000 @@ -12,6 +12,8 @@ * */ +#include + #ifdef __cplusplus extern "C" { #endif diff -Nru dpdk-22.11.9/drivers/raw/ntb/rte_pmd_ntb.h dpdk-22.11.11/drivers/raw/ntb/rte_pmd_ntb.h --- dpdk-22.11.9/drivers/raw/ntb/rte_pmd_ntb.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/raw/ntb/rte_pmd_ntb.h 2025-12-24 13:18:07.000000000 +0000 @@ -5,6 +5,8 @@ #ifndef _RTE_PMD_NTB_H_ #define _RTE_PMD_NTB_H_ +#include + /* App needs to set/get these attrs */ #define NTB_QUEUE_SZ_NAME "queue_size" #define NTB_QUEUE_NUM_NAME "queue_num" diff -Nru dpdk-22.11.9/drivers/regex/mlx5/mlx5_regex_fastpath.c dpdk-22.11.11/drivers/regex/mlx5/mlx5_regex_fastpath.c --- dpdk-22.11.9/drivers/regex/mlx5/mlx5_regex_fastpath.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/regex/mlx5/mlx5_regex_fastpath.c 2025-12-24 13:18:07.000000000 +0000 @@ -27,7 +27,6 @@ #define MLX5_REGEX_MAX_WQE_INDEX 0xffff #define MLX5_REGEX_METADATA_SIZE ((size_t)64) #define MLX5_REGEX_MAX_OUTPUT (((size_t)1) << 11) -#define MLX5_REGEX_WQE_CTRL_OFFSET 12 #define MLX5_REGEX_WQE_METADATA_OFFSET 16 #define MLX5_REGEX_WQE_GATHER_OFFSET 32 #define MLX5_REGEX_WQE_SCATTER_OFFSET 48 diff -Nru dpdk-22.11.9/drivers/regex/mlx5/mlx5_rxp.c dpdk-22.11.11/drivers/regex/mlx5/mlx5_rxp.c --- dpdk-22.11.9/drivers/regex/mlx5/mlx5_rxp.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/regex/mlx5/mlx5_rxp.c 2025-12-24 13:18:07.000000000 +0000 @@ -24,8 +24,6 @@ #define MLX5_REGEX_MAX_RULES_PER_GROUP UINT32_MAX #define MLX5_REGEX_MAX_GROUPS MLX5_RXP_MAX_SUBSETS -#define MLX5_REGEX_RXP_ROF2_LINE_LEN 34 - const uint64_t combined_rof_tag = 0xff52544424a52475; /* Private Declarations */ diff -Nru dpdk-22.11.9/drivers/regex/mlx5/mlx5_rxp.h dpdk-22.11.11/drivers/regex/mlx5/mlx5_rxp.h --- dpdk-22.11.9/drivers/regex/mlx5/mlx5_rxp.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/regex/mlx5/mlx5_rxp.h 2025-12-24 13:18:07.000000000 +0000 @@ -9,27 +9,13 @@ #define MLX5_RXP_BF3_IDENTIFIER 0x1 #define MLX5_RXP_MAX_JOB_LENGTH 16384 #define MLX5_RXP_MAX_SUBSETS 4095 -#define MLX5_RXP_CSR_NUM_ENTRIES 31 #define MLX5_RXP_BF2_ROF_VERSION_STRING 0x07055254 #define MLX5_RXP_BF3_ROF_VERSION_STRING 0x00065254 -#define MLX5_RXP_BF4_ROF_VERSION_STRING 0x00075254 - -#define MLX5_RXP_CTRL_TYPE_MASK 7 -#define MLX5_RXP_CTRL_TYPE_JOB_DESCRIPTOR 0 -#define MLX5_RXP_CTRL_TYPE_RESPONSE_DESCRIPTOR 1 -#define MLX5_RXP_CTRL_TYPE_MEMORY_WRITE 4 -#define MLX5_RXP_CSR_CTRL_DISABLE_L2C (1 << 7) #define MLX5_RXP_CTRL_JOB_DESC_SOF 0x0010 #define MLX5_RXP_CTRL_JOB_DESC_EOF 0x0020 #define MLX5_RXP_CTRL_JOB_DESC_HPM_ENABLE 0x0100 #define MLX5_RXP_CTRL_JOB_DESC_ANYMATCH_ENABLE 0x0200 -#define MLX5_RXP_CTRL_JOB_DESC_FLAGS (MLX5_RXP_CTRL_JOB_DESC_SOF | \ - MLX5_RXP_CTRL_JOB_DESC_EOF | \ - MLX5_RXP_CTRL_JOB_DESC_HPM_ENABLE | \ - MLX5_RXP_CTRL_JOB_DESC_ANYMATCH_ENABLE) - -#define MLX5_RXP_CTRL_VALID 0x8000 #define MLX5_RXP_RESP_STATUS_MAX_PRI_THREADS (1 << 3) #define MLX5_RXP_RESP_STATUS_MAX_SEC_THREADS (1 << 4) @@ -128,12 +114,6 @@ MLX5_RXP_PRIVATE_PROG_MODE, }; -#define MLX5_RXP_POLL_CSR_FOR_VALUE_TIMEOUT 3000 /* Poll timeout in ms. */ -#define MLX5_RXP_INITIALIZATION_TIMEOUT 60000 /* Initialize timeout in ms. */ -#define MLX5_RXP_MAX_ENGINES 2u /* Number of RXP engines. */ -#define MLX5_RXP_EM_COUNT 1u /* Extra External Memories to use. */ -#define MLX5_RXP_DB_NOT_ASSIGNED 0xFF - struct mlx5_regex_mkey { struct mlx5dv_devx_umem *umem; struct mlx5_devx_obj *mkey; diff -Nru dpdk-22.11.9/drivers/vdpa/mlx5/mlx5_vdpa.h dpdk-22.11.11/drivers/vdpa/mlx5/mlx5_vdpa.h --- dpdk-22.11.9/drivers/vdpa/mlx5/mlx5_vdpa.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/drivers/vdpa/mlx5/mlx5_vdpa.h 2025-12-24 13:18:07.000000000 +0000 @@ -37,7 +37,6 @@ #define VIRTIO_F_RING_PACKED 34 #endif -#define MLX5_VDPA_DEFAULT_TIMER_DELAY_US 0u #define MLX5_VDPA_DEFAULT_TIMER_STEP_US 1u struct mlx5_vdpa_cq { diff -Nru dpdk-22.11.9/examples/l3fwd-power/main.c dpdk-22.11.11/examples/l3fwd-power/main.c --- dpdk-22.11.9/examples/l3fwd-power/main.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/examples/l3fwd-power/main.c 2025-12-24 13:18:07.000000000 +0000 @@ -3099,7 +3099,7 @@ rte_spinlock_init(&stats[lcore_id].telemetry_lock); } rte_timer_init(&telemetry_timer); - rte_telemetry_register_cmd("/l3fwd-power/stats", + rte_telemetry_register_cmd("/l3fwd_power/stats", handle_app_stats, "Returns global power stats. Parameters: None"); rte_eal_mp_remote_launch(main_telemetry_loop, NULL, diff -Nru dpdk-22.11.9/examples/server_node_efd/server/main.c dpdk-22.11.11/examples/server_node_efd/server/main.c --- dpdk-22.11.9/examples/server_node_efd/server/main.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/examples/server_node_efd/server/main.c 2025-12-24 13:18:07.000000000 +0000 @@ -68,7 +68,7 @@ get_printable_mac_addr(uint16_t port) { static const char err_address[] = "00:00:00:00:00:00"; - static char addresses[RTE_MAX_ETHPORTS][sizeof(err_address)]; + static char addresses[RTE_MAX_ETHPORTS][RTE_ETHER_ADDR_FMT_SIZE + 1]; struct rte_ether_addr mac; int ret; diff -Nru dpdk-22.11.9/examples/server_node_efd/shared/common.h dpdk-22.11.11/examples/server_node_efd/shared/common.h --- dpdk-22.11.9/examples/server_node_efd/shared/common.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/examples/server_node_efd/shared/common.h 2025-12-24 13:18:07.000000000 +0000 @@ -58,8 +58,9 @@ /* * Buffer for return value. Size calculated by %u being replaced * by maximum 3 digits (plus an extra byte for safety) + * Used as ring name, so upper limit is ring name size. */ - static char buffer[sizeof(MP_NODE_RXQ_NAME) + 2]; + static char buffer[RTE_RING_NAMESIZE]; snprintf(buffer, sizeof(buffer), MP_NODE_RXQ_NAME, id); return buffer; diff -Nru dpdk-22.11.9/examples/vdpa/main.c dpdk-22.11.11/examples/vdpa/main.c --- dpdk-22.11.9/examples/vdpa/main.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/examples/vdpa/main.c 2025-12-24 13:18:07.000000000 +0000 @@ -25,6 +25,7 @@ #define MAX_PATH_LEN 128 #define MAX_VDPA_SAMPLE_PORTS 1024 +#define MAX_VDPA_STR_LEN sizeof(RTE_STR(MAX_VDPA_SAMPLE_PORTS)) #define RTE_LOGTYPE_VDPA RTE_LOGTYPE_USER1 struct vdpa_port { @@ -39,7 +40,7 @@ static struct vdpa_port vports[MAX_VDPA_SAMPLE_PORTS]; -static char iface[MAX_PATH_LEN]; +static char iface[MAX_PATH_LEN - MAX_VDPA_STR_LEN]; static int devcnt; static int interactive; static int client_mode; @@ -77,9 +78,8 @@ break; /* long options */ case 0: - if (strncmp(long_option[idx].name, "iface", - MAX_PATH_LEN) == 0) { - rte_strscpy(iface, optarg, MAX_PATH_LEN); + if (!strcmp(long_option[idx].name, "iface")) { + rte_strscpy(iface, optarg, sizeof(iface)); printf("iface %s\n", iface); } if (!strcmp(long_option[idx].name, "interactive")) { diff -Nru dpdk-22.11.9/lib/cfgfile/rte_cfgfile.c dpdk-22.11.11/lib/cfgfile/rte_cfgfile.c --- dpdk-22.11.9/lib/cfgfile/rte_cfgfile.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/lib/cfgfile/rte_cfgfile.c 2025-12-24 13:18:07.000000000 +0000 @@ -465,10 +465,14 @@ int rte_cfgfile_num_sections(struct rte_cfgfile *cfg, const char *sectionname, -size_t length) + size_t length) { - int i; int num_sections = 0; + int i; + + if (sectionname == NULL) + return cfg->num_sections; + for (i = 0; i < cfg->num_sections; i++) { if (strncmp(cfg->sections[i].name, sectionname, length) == 0) num_sections++; diff -Nru dpdk-22.11.9/lib/cmdline/cmdline_parse_portlist.c dpdk-22.11.11/lib/cmdline/cmdline_parse_portlist.c --- dpdk-22.11.9/lib/cmdline/cmdline_parse_portlist.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/lib/cmdline/cmdline_parse_portlist.c 2025-12-24 13:18:07.000000000 +0000 @@ -9,7 +9,9 @@ #include #include +#include #include + #include "cmdline_parse.h" #include "cmdline_parse_portlist.h" @@ -24,22 +26,20 @@ parse_set_list(cmdline_portlist_t *pl, size_t low, size_t high) { do { - pl->map |= (1 << low++); + pl->map |= RTE_BIT32(low); + low++; } while (low <= high); } static int parse_ports(cmdline_portlist_t *pl, const char *str) { + const char *first = str; size_t ps, pe; - const char *first, *last; char *end; - for (first = str, last = first; - first != NULL && last != NULL; - first = last + 1) { - - last = strchr(first, ','); + while (first != NULL) { + const char *last = strchr(first, ','); errno = 0; ps = strtoul(first, &end, 10); @@ -63,6 +63,7 @@ return -1; parse_set_list(pl, ps, pe); + first = (last == NULL ? NULL : last + 1); } return 0; diff -Nru dpdk-22.11.9/lib/dmadev/rte_dmadev.h dpdk-22.11.11/lib/dmadev/rte_dmadev.h --- dpdk-22.11.9/lib/dmadev/rte_dmadev.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/lib/dmadev/rte_dmadev.h 2025-12-24 13:18:07.000000000 +0000 @@ -145,6 +145,7 @@ */ #include +#include #include #include diff -Nru dpdk-22.11.9/lib/eal/common/eal_common_options.c dpdk-22.11.11/lib/eal/common/eal_common_options.c --- dpdk-22.11.9/lib/eal/common/eal_common_options.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/lib/eal/common/eal_common_options.c 2025-12-24 13:18:07.000000000 +0000 @@ -397,12 +397,21 @@ } #else +static bool +ends_with(const char *str, const char *tail) +{ + size_t tail_len = strlen(tail); + size_t str_len = strlen(str); + + return str_len >= tail_len && strcmp(&str[str_len - tail_len], tail) == 0; +} + static int eal_plugindir_init(const char *path) { - DIR *d = NULL; struct dirent *dent = NULL; char sopath[PATH_MAX]; + DIR *d = NULL; if (path == NULL || *path == '\0') return 0; @@ -416,12 +425,8 @@ while ((dent = readdir(d)) != NULL) { struct stat sb; - int nlen = strnlen(dent->d_name, sizeof(dent->d_name)); - /* check if name ends in .so or .so.ABI_VERSION */ - if (strcmp(&dent->d_name[nlen - 3], ".so") != 0 && - strcmp(&dent->d_name[nlen - 4 - strlen(ABI_VERSION)], - ".so."ABI_VERSION) != 0) + if (!ends_with(dent->d_name, ".so") && !ends_with(dent->d_name, ".so."ABI_VERSION)) continue; snprintf(sopath, sizeof(sopath), "%s/%s", path, dent->d_name); diff -Nru dpdk-22.11.9/lib/eal/freebsd/eal.c dpdk-22.11.11/lib/eal/freebsd/eal.c --- dpdk-22.11.9/lib/eal/freebsd/eal.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/lib/eal/freebsd/eal.c 2025-12-24 13:18:07.000000000 +0000 @@ -692,6 +692,10 @@ * with a message describing the cause. */ has_phys_addr = internal_conf->no_hugetlbfs == 0; + + /* Always call rte_bus_get_iommu_class() to trigger DMA mask detection and validation */ + enum rte_iova_mode bus_iova_mode = rte_bus_get_iommu_class(); + iova_mode = internal_conf->iova_mode; if (iova_mode == RTE_IOVA_PA && !has_phys_addr) { rte_eal_init_alert("Cannot use IOVA as 'PA' since physical addresses are not available"); @@ -702,7 +706,7 @@ RTE_LOG(DEBUG, EAL, "Specific IOVA mode is not requested, autodetecting\n"); if (has_phys_addr) { RTE_LOG(DEBUG, EAL, "Selecting IOVA mode according to bus requests\n"); - iova_mode = rte_bus_get_iommu_class(); + iova_mode = bus_iova_mode; if (iova_mode == RTE_IOVA_DC) iova_mode = RTE_IOVA_PA; } else { @@ -902,8 +906,8 @@ struct internal_config *internal_conf = eal_get_internal_configuration(); rte_service_finalize(); - rte_mp_channel_cleanup(); eal_bus_cleanup(); + rte_mp_channel_cleanup(); rte_eal_alarm_cleanup(); rte_trace_save(); eal_trace_fini(); diff -Nru dpdk-22.11.9/lib/eal/include/rte_mcslock.h dpdk-22.11.11/lib/eal/include/rte_mcslock.h --- dpdk-22.11.9/lib/eal/include/rte_mcslock.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/lib/eal/include/rte_mcslock.h 2025-12-24 13:18:07.000000000 +0000 @@ -56,11 +56,21 @@ __atomic_store_n(&me->locked, 1, __ATOMIC_RELAXED); __atomic_store_n(&me->next, NULL, __ATOMIC_RELAXED); - /* If the queue is empty, the exchange operation is enough to acquire - * the lock. Hence, the exchange operation requires acquire semantics. - * The store to me->next above should complete before the node is - * visible to other CPUs/threads. Hence, the exchange operation requires - * release semantics as well. + /* + * A0/R0: Queue might be empty, perform the exchange (RMW) with both acquire and + * release semantics: + * A0: Acquire — synchronizes with both R0 and R2. + * Must synchronize with R0 to ensure that this thread observes predecessor's + * initialization of its lock object or risk them overwriting this thread's + * update to the next of the same object via store to prev->next. + * + * Must synchronize with R2 the releasing CAS in unlock(), this will ensure + * that all prior critical-section writes become visible to this thread. + * + * R0: Release — ensures the successor observes our initialization of me->next; + * without it, me->next could be overwritten to NULL after the successor + * sets its own address, causing deadlock. This release synchronizes with + * A0 above. */ prev = __atomic_exchange_n(msl, me, __ATOMIC_ACQ_REL); if (likely(prev == NULL)) { @@ -69,24 +79,26 @@ */ return; } - /* The store to me->next above should also complete before the node is - * visible to predecessor thread releasing the lock. Hence, the store - * prev->next also requires release semantics. Note that, for example, - * on ARM, the release semantics in the exchange operation is not - * strong as a release fence and is not sufficient to enforce the - * desired order here. + + /* + * R1: With the relaxed memory model of C/C++, it's essential that after + * we link ourselves by storing prev->next = me, the owner of prev must + * observe our prior initialization of me->locked. Otherwise it could + * clear me->locked before we set it to 1, which may deadlock. + * Perform a releasing store so the predecessor's acquire loads A2 and A3 + * observes our initialization, establishing a happens-before from those + * writes. */ __atomic_store_n(&prev->next, me, __ATOMIC_RELEASE); - /* The while-load of me->locked should not move above the previous - * store to prev->next. Otherwise it will cause a deadlock. Need a - * store-load barrier. - */ - __atomic_thread_fence(__ATOMIC_ACQ_REL); - /* If the lock has already been acquired, it first atomically + /* + * A1: If the lock has already been acquired, it first atomically * places the node at the end of the queue and then proceeds * to spin on me->locked until the previous lock holder resets - * the me->locked using mcslock_unlock(). + * the me->locked in rte_mcslock_unlock(). + * This load must synchronize with store-release R3 to ensure that + * all updates to critical section by previous lock holder is visible + * to this thread after acquiring the lock. */ rte_wait_until_equal_32((uint32_t *)&me->locked, 0, __ATOMIC_ACQUIRE); } @@ -102,31 +114,46 @@ static inline void rte_mcslock_unlock(rte_mcslock_t **msl, rte_mcslock_t *me) { - /* Check if there are more nodes in the queue. */ - if (likely(__atomic_load_n(&me->next, __ATOMIC_RELAXED) == NULL)) { + /* + * A2: Check whether a successor is already queued. + * Load me->next with acquire semantics so it can synchronize with the + * successor’s release store R1. This guarantees that the successor’s + * initialization of its lock object (me) is completed before we observe + * it here, preventing a race between this thread’s store-release to + * me->next->locked and the successor’s store to me->locked. + */ + if (likely(__atomic_load_n(&me->next, __ATOMIC_ACQUIRE) == NULL)) { /* No, last member in the queue. */ - rte_mcslock_t *save_me = __atomic_load_n(&me, __ATOMIC_RELAXED); + rte_mcslock_t *save_me = me; - /* Release the lock by setting it to NULL */ + /* + * R2: Try to release the lock by swinging *msl from save_me to NULL. + * Use release semantics so all critical section writes become + * visible to the next lock acquirer. + */ if (likely(__atomic_compare_exchange_n(msl, &save_me, NULL, 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED))) return; - /* Speculative execution would be allowed to read in the - * while-loop first. This has the potential to cause a - * deadlock. Need a load barrier. - */ - __atomic_thread_fence(__ATOMIC_ACQUIRE); - /* More nodes added to the queue by other CPUs. - * Wait until the next pointer is set. + /* + * A3: Another thread was enqueued concurrently, so the CAS and the lock + * release failed. Wait until the successor sets our 'next' pointer. + * This load must synchronize with the successor’s release store (R1) to + * ensure that the successor’s initialization completes before we observe + * it here. This ordering prevents a race between this thread’s later + * store-release to me->next->locked and the successor’s store to me->locked. */ uintptr_t *next; next = (uintptr_t *)&me->next; - RTE_WAIT_UNTIL_MASKED(next, UINTPTR_MAX, !=, 0, - __ATOMIC_RELAXED); + RTE_WAIT_UNTIL_MASKED(next, UINTPTR_MAX, !=, 0, __ATOMIC_ACQUIRE); } - /* Pass lock to next waiter. */ + /* + * R3: Pass the lock to the successor. + * Use a release store to synchronize with A1 when clearing me->next->locked + * so the successor observes our critical section writes after it sees locked + * become 0. + */ __atomic_store_n(&me->next->locked, 0, __ATOMIC_RELEASE); } @@ -149,11 +176,11 @@ /* Try to lock */ rte_mcslock_t *expected = NULL; - /* The lock can be taken only when the queue is empty. Hence, - * the compare-exchange operation requires acquire semantics. - * The store to me->next above should complete before the node - * is visible to other CPUs/threads. Hence, the compare-exchange - * operation requires release semantics as well. + /* + * A4/R4: The lock can be acquired only when the queue is empty. + * The compare-and-exchange operation must use acquire and release + * semantics for the same reasons described in the rte_mcslock_lock() + * function’s empty-queue case (see A0/R0 for details). */ return __atomic_compare_exchange_n(msl, &expected, me, 0, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED); diff -Nru dpdk-22.11.9/lib/eal/include/rte_tailq.h dpdk-22.11.11/lib/eal/include/rte_tailq.h --- dpdk-22.11.9/lib/eal/include/rte_tailq.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/lib/eal/include/rte_tailq.h 2025-12-24 13:18:07.000000000 +0000 @@ -70,11 +70,12 @@ * @return * The return value from rte_eal_tailq_lookup, typecast to the appropriate * structure pointer type. - * NULL on error, since the tailq_head is the first - * element in the rte_tailq_head structure. + * NULL on error. */ -#define RTE_TAILQ_LOOKUP(name, struct_name) \ - RTE_TAILQ_CAST(rte_eal_tailq_lookup(name), struct_name) +#define RTE_TAILQ_LOOKUP(name, struct_name) __extension__ ({ \ + struct rte_tailq_head *head = rte_eal_tailq_lookup(name); \ + head == NULL ? NULL : RTE_TAILQ_CAST(head, struct_name); \ +}) /** * Dump tail queues to a file. diff -Nru dpdk-22.11.9/lib/eal/include/rte_vfio.h dpdk-22.11.11/lib/eal/include/rte_vfio.h --- dpdk-22.11.9/lib/eal/include/rte_vfio.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/lib/eal/include/rte_vfio.h 2025-12-24 13:18:07.000000000 +0000 @@ -214,14 +214,14 @@ const char *dev_addr, int *iommu_group_num); /** - * Open a new VFIO container fd + * Get the default VFIO container fd * * This function is only relevant to linux and will return * an error on BSD. * * @return - * > 0 container fd - * < 0 for errors + * > 0 default container fd + * < 0 if VFIO is not enabled or not supported */ int rte_vfio_get_container_fd(void); diff -Nru dpdk-22.11.9/lib/eal/linux/eal.c dpdk-22.11.11/lib/eal/linux/eal.c --- dpdk-22.11.9/lib/eal/linux/eal.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/lib/eal/linux/eal.c 2025-12-24 13:18:07.000000000 +0000 @@ -1065,10 +1065,13 @@ phys_addrs = rte_eal_using_phys_addrs() != 0; + /* Always call rte_bus_get_iommu_class() to trigger DMA mask detection and validation */ + enum rte_iova_mode bus_iova_mode = rte_bus_get_iommu_class(); + /* if no EAL option "--iova-mode=", use bus IOVA scheme */ if (internal_conf->iova_mode == RTE_IOVA_DC) { /* autodetect the IOVA mapping mode */ - enum rte_iova_mode iova_mode = rte_bus_get_iommu_class(); + enum rte_iova_mode iova_mode = bus_iova_mode; if (iova_mode == RTE_IOVA_DC) { RTE_LOG(DEBUG, EAL, "Buses did not request a specific IOVA mode.\n"); @@ -1375,11 +1378,11 @@ rte_memseg_walk(mark_freeable, NULL); rte_service_finalize(); + eal_bus_cleanup(); #ifdef VFIO_PRESENT vfio_mp_sync_cleanup(); #endif rte_mp_channel_cleanup(); - eal_bus_cleanup(); rte_eal_alarm_cleanup(); rte_trace_save(); eal_trace_fini(); diff -Nru dpdk-22.11.9/lib/eal/linux/eal_vfio.c dpdk-22.11.11/lib/eal/linux/eal_vfio.c --- dpdk-22.11.9/lib/eal/linux/eal_vfio.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/lib/eal/linux/eal_vfio.c 2025-12-24 13:18:07.000000000 +0000 @@ -347,7 +347,7 @@ } static int -vfio_open_group_fd(int iommu_group_num) +vfio_open_group_fd(int iommu_group_num, bool mp_request) { int vfio_group_fd; char filename[PATH_MAX]; @@ -355,11 +355,9 @@ struct rte_mp_reply mp_reply = {0}; struct timespec ts = {.tv_sec = 5, .tv_nsec = 0}; struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param; - const struct internal_config *internal_conf = - eal_get_internal_configuration(); - /* if primary, try to open the group */ - if (internal_conf->process_type == RTE_PROC_PRIMARY) { + /* if not requesting via mp, open the group locally */ + if (!mp_request) { /* try regular group format */ snprintf(filename, sizeof(filename), VFIO_GROUP_FMT, iommu_group_num); @@ -469,7 +467,24 @@ return -1; } - vfio_group_fd = vfio_open_group_fd(iommu_group_num); + /* + * When opening a group fd, we need to decide whether to open it locally + * or request it from the primary process via mp_sync. + * + * For the default container, secondary processes use mp_sync so that + * the primary process tracks the group fd and maintains VFIO state + * across all processes. + * + * For custom containers, we open the group fd locally in each process + * since custom containers are process-local and the primary has no + * knowledge of them. Requesting a group fd from the primary for a + * container it doesn't know about would be incorrect. + */ + const struct internal_config *internal_conf = eal_get_internal_configuration(); + bool mp_request = (internal_conf->process_type == RTE_PROC_SECONDARY) && + (vfio_cfg == default_vfio_cfg); + + vfio_group_fd = vfio_open_group_fd(iommu_group_num, mp_request); if (vfio_group_fd < 0) { RTE_LOG(ERR, EAL, "Failed to open VFIO group %d\n", iommu_group_num); @@ -1120,13 +1135,12 @@ } if (internal_conf->process_type == RTE_PROC_PRIMARY) { - /* open a new container */ - default_vfio_cfg->vfio_container_fd = - rte_vfio_get_container_fd(); + /* open a default container */ + default_vfio_cfg->vfio_container_fd = vfio_open_container_fd(false); } else { /* get the default container from the primary process */ default_vfio_cfg->vfio_container_fd = - vfio_get_default_container_fd(); + vfio_open_container_fd(true); } /* check if we have VFIO driver enabled */ @@ -1148,49 +1162,6 @@ } int -vfio_get_default_container_fd(void) -{ - struct rte_mp_msg mp_req, *mp_rep; - struct rte_mp_reply mp_reply = {0}; - struct timespec ts = {.tv_sec = 5, .tv_nsec = 0}; - struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param; - int container_fd; - const struct internal_config *internal_conf = - eal_get_internal_configuration(); - - if (default_vfio_cfg->vfio_enabled) - return default_vfio_cfg->vfio_container_fd; - - if (internal_conf->process_type == RTE_PROC_PRIMARY) { - /* if we were secondary process we would try requesting - * container fd from the primary, but we're the primary - * process so just exit here - */ - return -1; - } - - p->req = SOCKET_REQ_DEFAULT_CONTAINER; - strcpy(mp_req.name, EAL_VFIO_MP); - mp_req.len_param = sizeof(*p); - mp_req.num_fds = 0; - - if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 && - mp_reply.nb_received == 1) { - mp_rep = &mp_reply.msgs[0]; - p = (struct vfio_mp_param *)mp_rep->param; - if (p->result == SOCKET_OK && mp_rep->num_fds == 1) { - container_fd = mp_rep->fds[0]; - free(mp_reply.msgs); - return container_fd; - } - } - - free(mp_reply.msgs); - RTE_LOG(ERR, EAL, "Cannot request default VFIO container fd\n"); - return -1; -} - -int vfio_get_iommu_type(void) { if (default_vfio_cfg->vfio_iommu_type == NULL) @@ -1255,20 +1226,25 @@ return 0; } +/* + * Open a new VFIO container fd. + * + * If mp_request is true, requests a new container fd from the primary process + * via mp channel (for secondary processes that need to open the default container). + * + * Otherwise, opens a new container fd locally by opening /dev/vfio/vfio. + */ int -rte_vfio_get_container_fd(void) +vfio_open_container_fd(bool mp_request) { int ret, vfio_container_fd; struct rte_mp_msg mp_req, *mp_rep; struct rte_mp_reply mp_reply = {0}; struct timespec ts = {.tv_sec = 5, .tv_nsec = 0}; struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param; - const struct internal_config *internal_conf = - eal_get_internal_configuration(); - - /* if we're in a primary process, try to open the container */ - if (internal_conf->process_type == RTE_PROC_PRIMARY) { + /* if not requesting via mp, open a new container locally */ + if (!mp_request) { vfio_container_fd = open(VFIO_CONTAINER_PATH, O_RDWR); if (vfio_container_fd < 0) { RTE_LOG(ERR, EAL, @@ -1327,6 +1303,19 @@ } int +rte_vfio_get_container_fd(void) +{ + /* Return the default container fd if VFIO is enabled. + * The default container is set up during rte_vfio_enable(). + * This function does not create a new container. + */ + if (!default_vfio_cfg->vfio_enabled) + return -1; + + return default_vfio_cfg->vfio_container_fd; +} + +int rte_vfio_get_group_num(const char *sysfs_base, const char *dev_addr, int *iommu_group_num) { @@ -2072,7 +2061,8 @@ return -1; } - vfio_cfgs[i].vfio_container_fd = rte_vfio_get_container_fd(); + /* Create a new container fd */ + vfio_cfgs[i].vfio_container_fd = vfio_open_container_fd(false); if (vfio_cfgs[i].vfio_container_fd < 0) { RTE_LOG(NOTICE, EAL, "Fail to create a new VFIO container\n"); return -1; diff -Nru dpdk-22.11.9/lib/eal/linux/eal_vfio.h dpdk-22.11.11/lib/eal/linux/eal_vfio.h --- dpdk-22.11.9/lib/eal/linux/eal_vfio.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/lib/eal/linux/eal_vfio.h 2025-12-24 13:18:07.000000000 +0000 @@ -119,7 +119,7 @@ }; /* get the vfio container that devices are bound to by default */ -int vfio_get_default_container_fd(void); +int vfio_open_container_fd(bool mp_request); /* pick IOMMU type. returns a pointer to vfio_iommu_type or NULL for error */ const struct vfio_iommu_type * @@ -139,8 +139,7 @@ #define SOCKET_REQ_CONTAINER 0x100 #define SOCKET_REQ_GROUP 0x200 -#define SOCKET_REQ_DEFAULT_CONTAINER 0x400 -#define SOCKET_REQ_IOMMU_TYPE 0x800 +#define SOCKET_REQ_IOMMU_TYPE 0x400 #define SOCKET_OK 0x0 #define SOCKET_NO_FD 0x1 #define SOCKET_ERR 0xFF diff -Nru dpdk-22.11.9/lib/eal/linux/eal_vfio_mp_sync.c dpdk-22.11.11/lib/eal/linux/eal_vfio_mp_sync.c --- dpdk-22.11.9/lib/eal/linux/eal_vfio_mp_sync.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/lib/eal/linux/eal_vfio_mp_sync.c 2025-12-24 13:18:07.000000000 +0000 @@ -26,7 +26,6 @@ vfio_mp_primary(const struct rte_mp_msg *msg, const void *peer) { int fd = -1; - int ret; struct rte_mp_msg reply; struct vfio_mp_param *r = (struct vfio_mp_param *)reply.param; const struct vfio_mp_param *m = @@ -67,17 +66,6 @@ reply.fds[0] = fd; } break; - case SOCKET_REQ_DEFAULT_CONTAINER: - r->req = SOCKET_REQ_DEFAULT_CONTAINER; - fd = vfio_get_default_container_fd(); - if (fd < 0) - r->result = SOCKET_ERR; - else { - r->result = SOCKET_OK; - reply.num_fds = 1; - reply.fds[0] = fd; - } - break; case SOCKET_REQ_IOMMU_TYPE: { int iommu_type_id; @@ -102,10 +90,7 @@ strcpy(reply.name, EAL_VFIO_MP); reply.len_param = sizeof(*r); - ret = rte_mp_reply(&reply, peer); - if (m->req == SOCKET_REQ_CONTAINER && fd >= 0) - close(fd); - return ret; + return rte_mp_reply(&reply, peer); } int diff -Nru dpdk-22.11.9/lib/eal/windows/eal.c dpdk-22.11.11/lib/eal/windows/eal.c --- dpdk-22.11.9/lib/eal/windows/eal.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/lib/eal/windows/eal.c 2025-12-24 13:18:07.000000000 +0000 @@ -359,6 +359,9 @@ has_phys_addr = false; } + /* Always call rte_bus_get_iommu_class() to trigger DMA mask detection and validation */ + enum rte_iova_mode bus_iova_mode = rte_bus_get_iommu_class(); + iova_mode = internal_conf->iova_mode; if (iova_mode == RTE_IOVA_PA && !has_phys_addr) { rte_eal_init_alert("Cannot use IOVA as 'PA' since physical addresses are not available"); @@ -369,7 +372,7 @@ RTE_LOG(DEBUG, EAL, "Specific IOVA mode is not requested, autodetecting\n"); if (has_phys_addr) { RTE_LOG(DEBUG, EAL, "Selecting IOVA mode according to bus requests\n"); - iova_mode = rte_bus_get_iommu_class(); + iova_mode = bus_iova_mode; if (iova_mode == RTE_IOVA_DC) iova_mode = RTE_IOVA_PA; } else { diff -Nru dpdk-22.11.9/lib/efd/rte_efd.c dpdk-22.11.11/lib/efd/rte_efd.c --- dpdk-22.11.9/lib/efd/rte_efd.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/lib/efd/rte_efd.c 2025-12-24 13:18:07.000000000 +0000 @@ -24,6 +24,7 @@ #include "rte_efd.h" #if defined(RTE_ARCH_X86) +#include "rte_efd_x86.h" #elif defined(RTE_ARCH_ARM64) #include "rte_efd_arm64.h" #endif @@ -1268,7 +1269,7 @@ switch (lookup_fn) { -#if defined(RTE_ARCH_X86) && defined(CC_SUPPORT_AVX2) +#if defined(RTE_ARCH_X86) case EFD_LOOKUP_AVX2: return efd_lookup_internal_avx2(group->hash_idx, group->lookup_table, diff -Nru dpdk-22.11.9/lib/ethdev/rte_ethdev.h dpdk-22.11.11/lib/ethdev/rte_ethdev.h --- dpdk-22.11.9/lib/ethdev/rte_ethdev.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/lib/ethdev/rte_ethdev.h 2025-12-24 13:18:07.000000000 +0000 @@ -3443,7 +3443,7 @@ * @param port_id * The port identifier of the Ethernet device. * @param rx_queue_id - * The index of the receive queue for which a queue stats mapping is required. + * The index of the receive queue on which to enable/disable VLAN stripping. * The value must be in the range [0, nb_rx_queue - 1] previously supplied * to rte_eth_dev_configure(). * @param on diff -Nru dpdk-22.11.9/lib/eventdev/rte_event_crypto_adapter.c dpdk-22.11.11/lib/eventdev/rte_event_crypto_adapter.c --- dpdk-22.11.9/lib/eventdev/rte_event_crypto_adapter.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/lib/eventdev/rte_event_crypto_adapter.c 2025-12-24 13:18:07.000000000 +0000 @@ -1453,7 +1453,7 @@ RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); if (!rte_cryptodev_is_valid_dev(cdev_id)) { - RTE_EDEV_LOG_ERR("Invalid dev_id=%" PRIu8, cdev_id); + RTE_EDEV_LOG_ERR("Invalid dev_id=%" PRIu16, cdev_id); return -EINVAL; } @@ -1474,7 +1474,7 @@ if (!(cap & RTE_EVENT_CRYPTO_ADAPTER_CAP_EVENT_VECTOR)) { RTE_EDEV_LOG_ERR("Event vectorization is not supported," - "dev %" PRIu8 " cdev %" PRIu8, dev_id, cdev_id); + "dev %" PRIu8 " cdev %" PRIu16, dev_id, cdev_id); return -ENOTSUP; } diff -Nru dpdk-22.11.9/lib/eventdev/rte_event_timer_adapter.c dpdk-22.11.11/lib/eventdev/rte_event_timer_adapter.c --- dpdk-22.11.9/lib/eventdev/rte_event_timer_adapter.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/lib/eventdev/rte_event_timer_adapter.c 2025-12-24 13:18:07.000000000 +0000 @@ -1324,7 +1324,7 @@ adapter_id = atoi(params); - if (adapter_id >= RTE_EVENT_TIMER_ADAPTER_NUM_MAX) { + if (adapters == NULL || adapter_id >= RTE_EVENT_TIMER_ADAPTER_NUM_MAX) { EVTIM_LOG_ERR("Invalid timer adapter id %u", adapter_id); return -EINVAL; } @@ -1365,7 +1365,7 @@ adapter_id = atoi(params); - if (adapter_id >= RTE_EVENT_TIMER_ADAPTER_NUM_MAX) { + if (adapters == NULL || adapter_id >= RTE_EVENT_TIMER_ADAPTER_NUM_MAX) { EVTIM_LOG_ERR("Invalid timer adapter id %u", adapter_id); return -EINVAL; } diff -Nru dpdk-22.11.9/lib/eventdev/rte_event_timer_adapter.h dpdk-22.11.11/lib/eventdev/rte_event_timer_adapter.h --- dpdk-22.11.9/lib/eventdev/rte_event_timer_adapter.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/lib/eventdev/rte_event_timer_adapter.h 2025-12-24 13:18:07.000000000 +0000 @@ -555,7 +555,7 @@ * Before calling this function, the application allocates * ``struct rte_event_timer`` objects from mempool or huge page backed * application buffers of desired size. On successful allocation, - * application updates the `struct rte_event_timer`` attributes such as + * application updates the ``struct rte_event_timer`` attributes such as * expiry event attributes, timeout ticks from now. * This function submits the event timer arm requests to the event timer adapter * and on expiry, the events will be injected to designated event queue. diff -Nru dpdk-22.11.9/lib/fib/trie.c dpdk-22.11.11/lib/fib/trie.c --- dpdk-22.11.9/lib/fib/trie.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/lib/fib/trie.c 2025-12-24 13:18:07.000000000 +0000 @@ -570,8 +570,7 @@ return 0; } - if ((depth > 24) && (dp->rsvd_tbl8s >= - dp->number_tbl8s - depth_diff)) + if ((depth > 24) && (dp->rsvd_tbl8s + depth_diff > dp->number_tbl8s)) return -ENOSPC; node = rte_rib6_insert(rib, ip_masked, depth); diff -Nru dpdk-22.11.9/lib/gpudev/gpudev.c dpdk-22.11.11/lib/gpudev/gpudev.c --- dpdk-22.11.9/lib/gpudev/gpudev.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/lib/gpudev/gpudev.c 2025-12-24 13:18:07.000000000 +0000 @@ -3,6 +3,7 @@ */ #include +#include #include #include diff -Nru dpdk-22.11.9/lib/gpudev/gpudev_driver.h dpdk-22.11.11/lib/gpudev/gpudev_driver.h --- dpdk-22.11.9/lib/gpudev/gpudev_driver.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/lib/gpudev/gpudev_driver.h 2025-12-24 13:18:07.000000000 +0000 @@ -12,11 +12,11 @@ #define RTE_GPUDEV_DRIVER_H #include -#include #include #include +#include #include "rte_gpudev.h" #ifdef __cplusplus @@ -80,7 +80,7 @@ /* Driver functions. */ struct rte_gpu_ops ops; /* Event callback list. */ - TAILQ_HEAD(rte_gpu_callback_list, rte_gpu_callback) callbacks; + RTE_TAILQ_HEAD(rte_gpu_callback_list, rte_gpu_callback) callbacks; /* Current state (used or not) in the running process. */ enum rte_gpu_state process_state; /* Updated by this library. */ /* Driver-specific private data for the running process. */ diff -Nru dpdk-22.11.9/lib/graph/graph.c dpdk-22.11.11/lib/graph/graph.c --- dpdk-22.11.9/lib/graph/graph.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/lib/graph/graph.c 2025-12-24 13:18:07.000000000 +0000 @@ -253,6 +253,20 @@ graph_node->node->name)); } +void +graph_node_replace_all(struct node *old, struct node *new) +{ + struct graph_node *graph_node; + struct graph *graph; + + STAILQ_FOREACH(graph, &graph_list, next) { + STAILQ_FOREACH(graph_node, &graph->node_list, next) { + if (graph_node->node == old) + graph_node->node = new; + } + } +} + static struct rte_graph * graph_mem_fixup_node_ctx(struct rte_graph *graph) { diff -Nru dpdk-22.11.9/lib/graph/graph_private.h dpdk-22.11.11/lib/graph/graph_private.h --- dpdk-22.11.9/lib/graph/graph_private.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/lib/graph/graph_private.h 2025-12-24 13:18:07.000000000 +0000 @@ -222,6 +222,18 @@ /** * @internal * + * Replace all pointers of a given node with another one in all active graphs. + * + * @param old + * Node pointer to replace in all graphs. + * @param new + * Updated pointer. + */ +void graph_node_replace_all(struct node *old, struct node *new); + +/** + * @internal + * * Get the count of source nodes in the graph. * * @param graph diff -Nru dpdk-22.11.9/lib/graph/graph_stats.c dpdk-22.11.11/lib/graph/graph_stats.c --- dpdk-22.11.9/lib/graph/graph_stats.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/lib/graph/graph_stats.c 2025-12-24 13:18:07.000000000 +0000 @@ -35,7 +35,6 @@ rte_node_t max_nodes; int socket_id; void *cookie; - size_t sz; struct cluster_node clusters[]; } __rte_cache_aligned; @@ -99,15 +98,55 @@ return 0; }; +static uint32_t +cluster_count_nodes(const struct cluster *cluster) +{ + rte_node_t *nodes = NULL; + uint32_t max_nodes = 0; + + for (unsigned int i = 0; i < cluster->nb_graphs; i++) { + struct graph_node *graph_node; + + STAILQ_FOREACH(graph_node, &cluster->graphs[i]->node_list, next) { + rte_node_t *new_nodes; + unsigned int n; + + for (n = 0; n < max_nodes; n++) { + if (nodes[n] != graph_node->node->id) + continue; + break; + } + if (n != max_nodes) + continue; + + max_nodes++; + new_nodes = realloc(nodes, max_nodes * sizeof(nodes[0])); + if (new_nodes == NULL) { + free(nodes); + return 0; + } + nodes = new_nodes; + nodes[n] = graph_node->node->id; + } + } + free(nodes); + + return max_nodes; +} + static struct rte_graph_cluster_stats * stats_mem_init(struct cluster *cluster, const struct rte_graph_cluster_stats_param *prm) { - size_t sz = sizeof(struct rte_graph_cluster_stats); struct rte_graph_cluster_stats *stats; rte_graph_cluster_stats_cb_t fn; int socket_id = prm->socket_id; uint32_t cluster_node_size; + uint32_t max_nodes; + + max_nodes = cluster_count_nodes(cluster); + if (max_nodes == 0) + return NULL; /* Fix up callback */ fn = prm->fn; @@ -119,25 +158,23 @@ cluster_node_size += cluster->nb_graphs * sizeof(struct rte_node *); cluster_node_size = RTE_ALIGN(cluster_node_size, RTE_CACHE_LINE_SIZE); - stats = realloc(NULL, sz); + stats = rte_zmalloc_socket(NULL, sizeof(struct rte_graph_cluster_stats) + + max_nodes * cluster_node_size, 0, socket_id); if (stats) { - memset(stats, 0, sz); stats->fn = fn; stats->cluster_node_size = cluster_node_size; stats->max_nodes = 0; stats->socket_id = socket_id; stats->cookie = prm->cookie; - stats->sz = sz; } return stats; } static int -stats_mem_populate(struct rte_graph_cluster_stats **stats_in, +stats_mem_populate(struct rte_graph_cluster_stats *stats, struct rte_graph *graph, struct graph_node *graph_node) { - struct rte_graph_cluster_stats *stats = *stats_in; rte_node_t id = graph_node->node->id; struct cluster_node *cluster; struct rte_node *node; @@ -162,42 +199,23 @@ cluster = RTE_PTR_ADD(cluster, stats->cluster_node_size); } - /* Hey, it is a new node, allocate space for it in the reel */ - stats = realloc(stats, stats->sz + stats->cluster_node_size); - if (stats == NULL) - SET_ERR_JMP(ENOMEM, err, "Realloc failed"); - *stats_in = NULL; - - /* Clear the new struct cluster_node area */ - cluster = RTE_PTR_ADD(stats, stats->sz), - memset(cluster, 0, stats->cluster_node_size); memcpy(cluster->stat.name, graph_node->node->name, RTE_NODE_NAMESIZE); cluster->stat.id = graph_node->node->id; cluster->stat.hz = rte_get_timer_hz(); node = graph_node_id_to_ptr(graph, id); if (node == NULL) - SET_ERR_JMP(ENOENT, free, "Failed to find node %s in graph %s", + SET_ERR_JMP(ENOENT, err, "Failed to find node %s in graph %s", graph_node->node->name, graph->name); cluster->nodes[cluster->nb_nodes++] = node; - stats->sz += stats->cluster_node_size; stats->max_nodes++; - *stats_in = stats; return 0; -free: - free(stats); err: return -rte_errno; } static void -stats_mem_fini(struct rte_graph_cluster_stats *stats) -{ - free(stats); -} - -static void cluster_init(struct cluster *cluster) { memset(cluster, 0, sizeof(*cluster)); @@ -265,10 +283,7 @@ rte_graph_cluster_stats_create(const struct rte_graph_cluster_stats_param *prm) { struct rte_graph_cluster_stats *stats, *rc = NULL; - struct graph_node *graph_node; struct cluster cluster; - struct graph *graph; - const char *pattern; rte_graph_t i; /* Sanity checks */ @@ -286,35 +301,34 @@ graph_spinlock_lock(); /* Expand graph pattern and add the graph to the cluster */ for (i = 0; i < prm->nb_graph_patterns; i++) { - pattern = prm->graph_patterns[i]; - if (expand_pattern_to_cluster(&cluster, pattern)) + if (expand_pattern_to_cluster(&cluster, prm->graph_patterns[i])) goto bad_pattern; } /* Alloc the stats memory */ stats = stats_mem_init(&cluster, prm); if (stats == NULL) - SET_ERR_JMP(ENOMEM, bad_pattern, "Failed alloc stats memory"); + SET_ERR_JMP(ENOMEM, bad_pattern, "Failed rte_malloc for stats memory"); /* Iterate over M(Graph) x N (Nodes in graph) */ for (i = 0; i < cluster.nb_graphs; i++) { + struct graph_node *graph_node; + struct graph *graph; + graph = cluster.graphs[i]; STAILQ_FOREACH(graph_node, &graph->node_list, next) { struct rte_graph *graph_fp = graph->graph; - if (stats_mem_populate(&stats, graph_fp, graph_node)) + if (stats_mem_populate(stats, graph_fp, graph_node)) goto realloc_fail; } } - /* Finally copy to hugepage memory to avoid pressure on rte_realloc */ - rc = rte_malloc_socket(NULL, stats->sz, 0, stats->socket_id); - if (rc) - rte_memcpy(rc, stats, stats->sz); - else - SET_ERR_JMP(ENOMEM, realloc_fail, "rte_malloc failed"); + rc = stats; + stats = NULL; realloc_fail: - stats_mem_fini(stats); + if (stats != NULL) + rte_graph_cluster_stats_destroy(stats); bad_pattern: graph_spinlock_unlock(); cluster_fini(&cluster); diff -Nru dpdk-22.11.9/lib/graph/node.c dpdk-22.11.11/lib/graph/node.c --- dpdk-22.11.9/lib/graph/node.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/lib/graph/node.c 2025-12-24 13:18:07.000000000 +0000 @@ -258,11 +258,15 @@ need_realloc = max_edges > node->nb_edges; if (need_realloc) { sz = sizeof(struct node) + (max_edges * RTE_NODE_NAMESIZE); - new_node = realloc(node, sz); + new_node = malloc(sz); if (new_node == NULL) { rte_errno = ENOMEM; goto restore; } else { + sz = sizeof(*node) + (node->nb_edges * RTE_NODE_NAMESIZE); + memcpy(new_node, node, sz); + graph_node_replace_all(node, new_node); + free(node); node = new_node; } } diff -Nru dpdk-22.11.9/lib/hash/rte_thash.c dpdk-22.11.11/lib/hash/rte_thash.c --- dpdk-22.11.9/lib/hash/rte_thash.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/lib/hash/rte_thash.c 2025-12-24 13:18:07.000000000 +0000 @@ -437,10 +437,10 @@ static inline uint32_t get_subvalue(struct rte_thash_ctx *ctx, uint32_t offset) { - uint32_t *tmp, val; + uint32_t tmp, val; - tmp = (uint32_t *)(&ctx->hash_key[offset >> 3]); - val = rte_be_to_cpu_32(*tmp); + tmp = *(unaligned_uint32_t *)&ctx->hash_key[offset >> 3]; + val = rte_be_to_cpu_32(tmp); val >>= (TOEPLITZ_HASH_LEN - ((offset & (CHAR_BIT - 1)) + ctx->reta_sz_log)); diff -Nru dpdk-22.11.9/lib/net/rte_net.c dpdk-22.11.11/lib/net/rte_net.c --- dpdk-22.11.9/lib/net/rte_net.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/lib/net/rte_net.c 2025-12-24 13:18:07.000000000 +0000 @@ -376,6 +376,7 @@ pkt_type |= ptype_tunnel(&proto, m, &off); hdr_lens->tunnel_len = off - prev_off; + hdr_lens->inner_l2_len = off - prev_off; } /* same job for inner header: we need to duplicate the code diff -Nru dpdk-22.11.9/lib/rawdev/rte_rawdev.c dpdk-22.11.11/lib/rawdev/rte_rawdev.c --- dpdk-22.11.9/lib/rawdev/rte_rawdev.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/lib/rawdev/rte_rawdev.c 2025-12-24 13:18:07.000000000 +0000 @@ -403,12 +403,12 @@ struct rte_rawdev *dev; int diag; - RTE_RDEV_DEBUG("Start dev_id=%" PRIu8, dev_id); + RTE_RDEV_DEBUG("Start dev_id=%" PRIu16, dev_id); RTE_RAWDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); dev = &rte_rawdevs[dev_id]; if (dev->started != 0) { - RTE_RDEV_ERR("Device with dev_id=%" PRIu8 "already started", + RTE_RDEV_ERR("Device with dev_id=%" PRIu16 "already started", dev_id); return 0; } @@ -430,13 +430,13 @@ { struct rte_rawdev *dev; - RTE_RDEV_DEBUG("Stop dev_id=%" PRIu8, dev_id); + RTE_RDEV_DEBUG("Stop dev_id=%" PRIu16, dev_id); RTE_RAWDEV_VALID_DEVID_OR_RET(dev_id); dev = &rte_rawdevs[dev_id]; if (dev->started == 0) { - RTE_RDEV_ERR("Device with dev_id=%" PRIu8 "already stopped", + RTE_RDEV_ERR("Device with dev_id=%" PRIu16 "already stopped", dev_id); return; } diff -Nru dpdk-22.11.9/lib/rawdev/rte_rawdev_pmd.h dpdk-22.11.11/lib/rawdev/rte_rawdev_pmd.h --- dpdk-22.11.9/lib/rawdev/rte_rawdev_pmd.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/lib/rawdev/rte_rawdev_pmd.h 2025-12-24 13:18:07.000000000 +0000 @@ -484,7 +484,7 @@ * >0, ~0: for successful load * <0: for failure * - * @see Application may use 'firmware_version_get` for ascertaining successful + * @see Application may use `firmware_version_get` for ascertaining successful * load */ typedef int (*rawdev_firmware_load_t)(struct rte_rawdev *dev, diff -Nru dpdk-22.11.9/lib/ring/rte_ring_c11_pvt.h dpdk-22.11.11/lib/ring/rte_ring_c11_pvt.h --- dpdk-22.11.9/lib/ring/rte_ring_c11_pvt.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/lib/ring/rte_ring_c11_pvt.h 2025-12-24 13:18:07.000000000 +0000 @@ -24,6 +24,12 @@ if (!single) rte_wait_until_equal_32(&ht->tail, old_val, __ATOMIC_RELAXED); + /* + * R0: Establishes a synchronizing edge with load-acquire of + * cons_tail at A1 or prod_tail at A4. + * Ensures that memory effects by this thread on ring elements array + * is observed by a different thread of the other type. + */ __atomic_store_n(&ht->tail, new_val, __ATOMIC_RELEASE); } @@ -61,19 +67,25 @@ unsigned int max = n; int success; - *old_head = __atomic_load_n(&r->prod.head, __ATOMIC_RELAXED); + /* + * A0: Establishes a synchronizing edge with R1. + * Ensure that this thread observes same values + * to cons_tail observed by the thread that + * updated r->prod.head. + * If not, an unsafe partial order may ensue. + */ + *old_head = __atomic_load_n(&r->prod.head, __ATOMIC_ACQUIRE); do { /* Reset n to the initial burst count */ n = max; - /* Ensure the head is read before tail */ - __atomic_thread_fence(__ATOMIC_ACQUIRE); - - /* load-acquire synchronize with store-release of ht->tail - * in update_tail. + /* + * A1: Establishes a synchronizing edge with R0. + * Ensures that other thread's memory effects on + * ring elements array is observed by the time + * this thread observes its tail update. */ - cons_tail = __atomic_load_n(&r->cons.tail, - __ATOMIC_ACQUIRE); + cons_tail = __atomic_load_n(&r->cons.tail, __ATOMIC_ACQUIRE); /* The subtraction is done between two unsigned 32bits value * (the result is always modulo 32 bits even if we have @@ -95,10 +107,19 @@ r->prod.head = *new_head, success = 1; else /* on failure, *old_head is updated */ + /* + * R1/A2. + * R1: Establishes a synchronizing edge with A0 of a + * different thread. + * A2: Establishes a synchronizing edge with R1 of a + * different thread to observe same value for + * cons_tail observed by that thread on CAS failure + * (to retry with an updated *old_head). + */ success = __atomic_compare_exchange_n(&r->prod.head, old_head, *new_head, - 0, __ATOMIC_RELAXED, - __ATOMIC_RELAXED); + 0, __ATOMIC_RELEASE, + __ATOMIC_ACQUIRE); } while (unlikely(success == 0)); return n; } @@ -136,20 +157,25 @@ uint32_t prod_tail; int success; - /* move cons.head atomically */ - *old_head = __atomic_load_n(&r->cons.head, __ATOMIC_RELAXED); + /* + * A3: Establishes a synchronizing edge with R2. + * Ensure that this thread observes same values + * to prod_tail observed by the thread that + * updated r->cons.head. + * If not, an unsafe partial order may ensue. + */ + *old_head = __atomic_load_n(&r->cons.head, __ATOMIC_ACQUIRE); do { /* Restore n as it may change every loop */ n = max; - /* Ensure the head is read before tail */ - __atomic_thread_fence(__ATOMIC_ACQUIRE); - - /* this load-acquire synchronize with store-release of ht->tail - * in update_tail. + /* + * A4: Establishes a synchronizing edge with R0. + * Ensures that other thread's memory effects on + * ring elements array is observed by the time + * this thread observes its tail update. */ - prod_tail = __atomic_load_n(&r->prod.tail, - __ATOMIC_ACQUIRE); + prod_tail = __atomic_load_n(&r->prod.tail, __ATOMIC_ACQUIRE); /* The subtraction is done between two unsigned 32bits value * (the result is always modulo 32 bits even if we have @@ -170,10 +196,19 @@ r->cons.head = *new_head, success = 1; else /* on failure, *old_head will be updated */ + /* + * R2/A5. + * R2: Establishes a synchronizing edge with A3 of a + * different thread. + * A5: Establishes a synchronizing edge with R2 of a + * different thread to observe same value for + * prod_tail observed by that thread on CAS failure + * (to retry with an updated *old_head). + */ success = __atomic_compare_exchange_n(&r->cons.head, old_head, *new_head, - 0, __ATOMIC_RELAXED, - __ATOMIC_RELAXED); + 0, __ATOMIC_RELEASE, + __ATOMIC_ACQUIRE); } while (unlikely(success == 0)); return n; } diff -Nru dpdk-22.11.9/lib/ring/rte_ring_hts_elem_pvt.h dpdk-22.11.11/lib/ring/rte_ring_hts_elem_pvt.h --- dpdk-22.11.9/lib/ring/rte_ring_hts_elem_pvt.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/lib/ring/rte_ring_hts_elem_pvt.h 2025-12-24 13:18:07.000000000 +0000 @@ -30,22 +30,40 @@ RTE_SET_USED(enqueue); tail = old_tail + num; + + /* + * R0: Release the tail update. Establishes a synchronization edge with + * the load-acquire at A1/A3. This release ensures that all updates to + * *ht and the ring array made by this thread become visible to the + * opposing thread once the tail value written here is observed. + */ __atomic_store_n(&ht->ht.pos.tail, tail, __ATOMIC_RELEASE); } /** - * @internal waits till tail will become equal to head. - * Means no writer/reader is active for that ring. - * Suppose to work as serialization point. + * @internal + * Waits until the tail becomes equal to the head. + * This indicates that another thread has finished its transaction, and there + * is a chance that we could be the next writer or reader in line. + * + * Returns ht.raw at this point. The value may be imprecise, since another + * thread might change the state before we observe ht.raw, but that does not + * matter. The function __rte_ring_hts_move_head() can detect and recall this + * function when it reaches the linearization point (CAS). */ -static __rte_always_inline void +static __rte_always_inline union __rte_ring_hts_pos __rte_ring_hts_head_wait(const struct rte_ring_hts_headtail *ht, - union __rte_ring_hts_pos *p) + int memorder) { - while (p->pos.head != p->pos.tail) { + union __rte_ring_hts_pos p; + p.raw = __atomic_load_n(&ht->ht.raw, memorder); + + while (p.pos.head != p.pos.tail) { rte_pause(); - p->raw = __atomic_load_n(&ht->ht.raw, __ATOMIC_ACQUIRE); + p.raw = __atomic_load_n(&ht->ht.raw, memorder); } + + return p; } /** @@ -56,13 +74,11 @@ enum rte_ring_queue_behavior behavior, uint32_t *old_head, uint32_t *free_entries) { - uint32_t n; + uint32_t n, cons_tail; union __rte_ring_hts_pos np, op; const uint32_t capacity = r->capacity; - op.raw = __atomic_load_n(&r->hts_prod.ht.raw, __ATOMIC_ACQUIRE); - do { /* Reset n to the initial burst count */ n = num; @@ -72,7 +88,20 @@ * make sure that we read prod head/tail *before* * reading cons tail. */ - __rte_ring_hts_head_wait(&r->hts_prod, &op); + /* + * A0: Synchronizes with the CAS at R1. + * Establishes a happens-before relationship with a thread of the same + * type that released the ht.raw, ensuring this thread observes all of + * its memory effects needed to maintain a safe partial order. + */ + op = __rte_ring_hts_head_wait(&r->hts_prod, __ATOMIC_ACQUIRE); + + /* + * A1: Establish a synchronizes-with edge using a store-release at R0. + * This ensures that all memory effects from the preceding opposing + * thread are observed. + */ + cons_tail = __atomic_load_n(&r->cons.tail, __ATOMIC_ACQUIRE); /* * The subtraction is done between two unsigned 32bits value @@ -80,7 +109,7 @@ * *old_head > cons_tail). So 'free_entries' is always between 0 * and capacity (which is < size). */ - *free_entries = capacity + r->cons.tail - op.pos.head; + *free_entries = capacity + cons_tail - op.pos.head; /* check that we have enough room in ring */ if (unlikely(n > *free_entries)) @@ -94,13 +123,16 @@ np.pos.head = op.pos.head + n; /* - * this CAS(ACQUIRE, ACQUIRE) serves as a hoist barrier to prevent: - * - OOO reads of cons tail value - * - OOO copy of elems from the ring + * R1: Establishes a synchronizes-with edge with the load-acquire + * of ht.raw at A0. This makes sure that the store-release to the + * tail by this thread, if it was of the opposite type, becomes + * visible to another thread of the current type. That thread will + * then observe the updates in the same order, keeping a safe + * partial order. */ } while (__atomic_compare_exchange_n(&r->hts_prod.ht.raw, &op.raw, np.raw, - 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE) == 0); + 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED) == 0); *old_head = op.pos.head; return n; @@ -114,11 +146,9 @@ enum rte_ring_queue_behavior behavior, uint32_t *old_head, uint32_t *entries) { - uint32_t n; + uint32_t n, prod_tail; union __rte_ring_hts_pos np, op; - op.raw = __atomic_load_n(&r->hts_cons.ht.raw, __ATOMIC_ACQUIRE); - /* move cons.head atomically */ do { /* Restore n as it may change every loop */ @@ -129,14 +159,27 @@ * make sure that we read cons head/tail *before* * reading prod tail. */ - __rte_ring_hts_head_wait(&r->hts_cons, &op); + /* + * A2: Synchronizes with the CAS at R2. + * Establishes a happens-before relationship with a thread of the same + * type that released the ht.raw, ensuring this thread observes all of + * its memory effects needed to maintain a safe partial order. + */ + op = __rte_ring_hts_head_wait(&r->hts_cons, __ATOMIC_ACQUIRE); + + /* + * A3: Establish a synchronizes-with edge using a store-release at R0. + * This ensures that all memory effects from the preceding opposing + * thread are observed. + */ + prod_tail = __atomic_load_n(&r->prod.tail, __ATOMIC_ACQUIRE); /* The subtraction is done between two unsigned 32bits value * (the result is always modulo 32 bits even if we have * cons_head > prod_tail). So 'entries' is always between 0 * and size(ring)-1. */ - *entries = r->prod.tail - op.pos.head; + *entries = prod_tail - op.pos.head; /* Set the actual entries for dequeue */ if (n > *entries) @@ -149,13 +192,16 @@ np.pos.head = op.pos.head + n; /* - * this CAS(ACQUIRE, ACQUIRE) serves as a hoist barrier to prevent: - * - OOO reads of prod tail value - * - OOO copy of elems from the ring + * R2: Establishes a synchronizes-with edge with the load-acquire + * of ht.raw at A2. This makes sure that the store-release to the + * tail by this thread, if it was of the opposite type, becomes + * visible to another thread of the current type. That thread will + * then observe the updates in the same order, keeping a safe + * partial order. */ } while (__atomic_compare_exchange_n(&r->hts_cons.ht.raw, &op.raw, np.raw, - 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE) == 0); + 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED) == 0); *old_head = op.pos.head; return n; diff -Nru dpdk-22.11.9/lib/ring/rte_ring_rts_elem_pvt.h dpdk-22.11.11/lib/ring/rte_ring_rts_elem_pvt.h --- dpdk-22.11.9/lib/ring/rte_ring_rts_elem_pvt.h 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/lib/ring/rte_ring_rts_elem_pvt.h 2025-12-24 13:18:07.000000000 +0000 @@ -31,6 +31,17 @@ * might preceded us, then don't update tail with new value. */ + /* + * A0 = {A0.a, A0.b}: Synchronizes with the CAS at R0. + * The CAS at R0 in same typed thread establishes a happens-before + * relationship with this load acquire. Ensures that this thread + * observes the same or later values for h.raw/h.val.cnt + * observed by the other thread when it updated ht->tail.raw. + * If not, ht->tail.raw may get updated out of sync (e.g. getting + * updated to the same value twice). A0.a makes sure this condition + * holds when CAS succeeds and A0.b when it fails. + */ + /* A0.a */ ot.raw = __atomic_load_n(&ht->tail.raw, __ATOMIC_ACQUIRE); do { @@ -40,7 +51,11 @@ nt.raw = ot.raw; if (++nt.val.cnt == h.val.cnt) nt.val.pos = h.val.pos; - + /* + * R0: Synchronizes with A2 of a different thread of the opposite type and A0.b + * of a different thread of the same type. + */ + /* A0.b */ } while (__atomic_compare_exchange_n(&ht->tail.raw, &ot.raw, nt.raw, 0, __ATOMIC_RELEASE, __ATOMIC_ACQUIRE) == 0); } @@ -49,18 +64,22 @@ * @internal This function waits till head/tail distance wouldn't * exceed pre-defined max value. */ -static __rte_always_inline void +static __rte_always_inline union __rte_ring_rts_poscnt __rte_ring_rts_head_wait(const struct rte_ring_rts_headtail *ht, - union __rte_ring_rts_poscnt *h) + int memorder) { - uint32_t max; + union __rte_ring_rts_poscnt h; + uint32_t max = ht->htd_max; - max = ht->htd_max; - while (h->val.pos - ht->tail.val.pos > max) { + h.raw = __atomic_load_n(&ht->head.raw, memorder); + + while (h.val.pos - ht->tail.val.pos > max) { rte_pause(); - h->raw = __atomic_load_n(&ht->head.raw, __ATOMIC_ACQUIRE); + h.raw = __atomic_load_n(&ht->head.raw, memorder); } + + return h; } /** @@ -71,13 +90,11 @@ enum rte_ring_queue_behavior behavior, uint32_t *old_head, uint32_t *free_entries) { - uint32_t n; + uint32_t n, cons_tail; union __rte_ring_rts_poscnt nh, oh; const uint32_t capacity = r->capacity; - oh.raw = __atomic_load_n(&r->rts_prod.head.raw, __ATOMIC_ACQUIRE); - do { /* Reset n to the initial burst count */ n = num; @@ -87,7 +104,20 @@ * make sure that we read prod head *before* * reading cons tail. */ - __rte_ring_rts_head_wait(&r->rts_prod, &oh); + /* + * A1 Synchronizes with the CAS at R1. + * Establishes a happens-before relationship with a thread of the same + * type that released the ht.raw, ensuring this thread observes all of + * its memory effects needed to maintain a safe partial order. + */ + oh = __rte_ring_rts_head_wait(&r->rts_prod, __ATOMIC_ACQUIRE); + + /* + * A2: Establish a synchronizes-with edge using a store-release at R0. + * This ensures that all memory effects from the preceding opposing + * thread are observed. + */ + cons_tail = __atomic_load_n(&r->cons.tail, __ATOMIC_ACQUIRE); /* * The subtraction is done between two unsigned 32bits value @@ -95,7 +125,7 @@ * *old_head > cons_tail). So 'free_entries' is always between 0 * and capacity (which is < size). */ - *free_entries = capacity + r->cons.tail - oh.val.pos; + *free_entries = capacity + cons_tail - oh.val.pos; /* check that we have enough room in ring */ if (unlikely(n > *free_entries)) @@ -109,13 +139,16 @@ nh.val.cnt = oh.val.cnt + 1; /* - * this CAS(ACQUIRE, ACQUIRE) serves as a hoist barrier to prevent: - * - OOO reads of cons tail value - * - OOO copy of elems to the ring + * R1: Establishes a synchronizes-with edge with the load-acquire + * of ht.raw at A1. Ensures that the store-release to the tail by + * this thread, if it was of the opposite type, becomes + * visible to another thread of the current type. That thread will + * then observe the updates in the same order, keeping a safe + * partial order. */ } while (__atomic_compare_exchange_n(&r->rts_prod.head.raw, &oh.raw, nh.raw, - 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE) == 0); + 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED) == 0); *old_head = oh.val.pos; return n; @@ -129,11 +162,9 @@ enum rte_ring_queue_behavior behavior, uint32_t *old_head, uint32_t *entries) { - uint32_t n; + uint32_t n, prod_tail; union __rte_ring_rts_poscnt nh, oh; - oh.raw = __atomic_load_n(&r->rts_cons.head.raw, __ATOMIC_ACQUIRE); - /* move cons.head atomically */ do { /* Restore n as it may change every loop */ @@ -144,14 +175,27 @@ * make sure that we read cons head *before* * reading prod tail. */ - __rte_ring_rts_head_wait(&r->rts_cons, &oh); + /* + * A3: Synchronizes with the CAS at R2. + * Establishes a happens-before relationship with a thread of the same + * type that released the ht.raw, ensuring this thread observes all of + * its memory effects needed to maintain a safe partial order. + */ + oh = __rte_ring_rts_head_wait(&r->rts_cons, __ATOMIC_ACQUIRE); + + /* + * A4: Establish a synchronizes-with edge using a store-release at R0. + * This ensures that all memory effects from the preceding opposing + * thread are observed. + */ + prod_tail = __atomic_load_n(&r->prod.tail, __ATOMIC_ACQUIRE); /* The subtraction is done between two unsigned 32bits value * (the result is always modulo 32 bits even if we have * cons_head > prod_tail). So 'entries' is always between 0 * and size(ring)-1. */ - *entries = r->prod.tail - oh.val.pos; + *entries = prod_tail - oh.val.pos; /* Set the actual entries for dequeue */ if (n > *entries) @@ -164,13 +208,16 @@ nh.val.cnt = oh.val.cnt + 1; /* - * this CAS(ACQUIRE, ACQUIRE) serves as a hoist barrier to prevent: - * - OOO reads of prod tail value - * - OOO copy of elems from the ring + * R2: Establishes a synchronizes-with edge with the load-acquire + * of ht.raw at A3. Ensures that the store-release to the tail by + * this thread, if it was of the opposite type, becomes + * visible to another thread of the current type. That thread will + * then observe the updates in the same order, keeping a safe + * partial order. */ } while (__atomic_compare_exchange_n(&r->rts_cons.head.raw, &oh.raw, nh.raw, - 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE) == 0); + 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED) == 0); *old_head = oh.val.pos; return n; diff -Nru dpdk-22.11.9/lib/sched/rte_sched.c dpdk-22.11.11/lib/sched/rte_sched.c --- dpdk-22.11.9/lib/sched/rte_sched.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/lib/sched/rte_sched.c 2025-12-24 13:18:07.000000000 +0000 @@ -67,7 +67,7 @@ uint64_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /* Weighted Round Robin (WRR) */ - uint8_t wrr_tokens[RTE_SCHED_BE_QUEUES_PER_PIPE]; + uint16_t wrr_tokens[RTE_SCHED_BE_QUEUES_PER_PIPE]; /* TC oversubscription */ uint64_t tc_ov_credits; diff -Nru dpdk-22.11.9/lib/vhost/virtio_net.c dpdk-22.11.11/lib/vhost/virtio_net.c --- dpdk-22.11.9/lib/vhost/virtio_net.c 2025-08-19 18:03:58.000000000 +0000 +++ dpdk-22.11.11/lib/vhost/virtio_net.c 2025-12-24 13:18:07.000000000 +0000 @@ -2634,25 +2634,28 @@ } } -static __rte_noinline void +static __rte_always_inline int copy_vnet_hdr_from_desc(struct virtio_net_hdr *hdr, - struct buf_vector *buf_vec) + const struct buf_vector *buf_vec, + uint16_t nr_vec) { - uint64_t len; - uint64_t remain = sizeof(struct virtio_net_hdr); - uint64_t src; - uint64_t dst = (uint64_t)(uintptr_t)hdr; - - while (remain) { - len = RTE_MIN(remain, buf_vec->buf_len); - src = buf_vec->buf_addr; - rte_memcpy((void *)(uintptr_t)dst, - (void *)(uintptr_t)src, len); + size_t remain = sizeof(struct virtio_net_hdr); + uint8_t *dst = (uint8_t *)hdr; + + while (remain > 0) { + size_t len = RTE_MIN(remain, buf_vec->buf_len); + const void *src = (const void *)(uintptr_t)buf_vec->buf_addr; + + if (unlikely(nr_vec == 0)) + return -1; + memcpy(dst, src, len); remain -= len; dst += len; buf_vec++; + --nr_vec; } + return 0; } static __rte_always_inline int @@ -2679,16 +2682,12 @@ */ if (virtio_net_with_host_offload(dev)) { - if (unlikely(buf_vec[0].buf_len < sizeof(struct virtio_net_hdr))) { - /* - * No luck, the virtio-net header doesn't fit - * in a contiguous virtual area. - */ - copy_vnet_hdr_from_desc(&tmp_hdr, buf_vec); - hdr = &tmp_hdr; - } else { - hdr = (struct virtio_net_hdr *)((uintptr_t)buf_vec[0].buf_addr); - } + if (unlikely(copy_vnet_hdr_from_desc(&tmp_hdr, buf_vec, nr_vec) != 0)) + return -1; + + /* ensure that compiler does not delay copy */ + rte_compiler_barrier(); + hdr = &tmp_hdr; } for (vec_idx = 0; vec_idx < nr_vec; vec_idx++) { @@ -3048,7 +3047,6 @@ { uint16_t avail_idx = vq->last_avail_idx; uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); - struct virtio_net_hdr *hdr; uintptr_t desc_addrs[PACKED_BATCH_SIZE]; uint16_t ids[PACKED_BATCH_SIZE]; uint16_t i; @@ -3067,8 +3065,12 @@ if (virtio_net_with_host_offload(dev)) { vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { - hdr = (struct virtio_net_hdr *)(desc_addrs[i]); - vhost_dequeue_offload(dev, hdr, pkts[i], legacy_ol_flags); + struct virtio_net_hdr hdr; + + memcpy(&hdr, (void *)desc_addrs[i], sizeof(struct virtio_net_hdr)); + rte_compiler_barrier(); + + vhost_dequeue_offload(dev, &hdr, pkts[i], legacy_ol_flags); } }