From ec66ec6a5a8f53e7c70085749e8d68f4431c630f Mon Sep 17 00:00:00 2001 From: Pagadala Yesu Anjaneyulu Date: Tue, 24 Mar 2026 11:33:24 +0200 Subject: wifi: iwlwifi: mld: Fix MLO scan timing Calculate MLO scan start time based on actual scan start notification from firmware instead of recording time when scan command is sent. Currently, MLO scan start time was captured immediately after sending the scan command to firmware. However, the actual scan start time may differ due to the FW being busy with a previous scan. In that case, the link selection code will think that the MLO scan is too old, and will warn. To fix it, Implement start scan notification handling to capture the precise moment when firmware begins the scan operation. Fixes: 9324731b9985 ("wifi: iwlwifi: mld: avoid selecting bad links") Signed-off-by: Pagadala Yesu Anjaneyulu Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260324113316.4c56b8bac533.I6e656d8cc30bb82c96aabadedd62bd67f4c46bf9@changeid --- .../net/wireless/intel/iwlwifi/fw/api/commands.h | 5 ++++ drivers/net/wireless/intel/iwlwifi/fw/api/scan.h | 10 ++++++++ drivers/net/wireless/intel/iwlwifi/mld/mld.c | 1 + drivers/net/wireless/intel/iwlwifi/mld/mlo.c | 4 +-- drivers/net/wireless/intel/iwlwifi/mld/notif.c | 5 ++++ drivers/net/wireless/intel/iwlwifi/mld/scan.c | 30 +++++++++++++++++++--- drivers/net/wireless/intel/iwlwifi/mld/scan.h | 9 ++++--- 7 files changed, 56 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h b/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h index 8d64a271bb94..36159a769916 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h @@ -296,6 +296,11 @@ enum iwl_legacy_cmds { */ SCAN_OFFLOAD_UPDATE_PROFILES_CMD = 0x6E, + /** + * @SCAN_START_NOTIFICATION_UMAC: uses &struct iwl_umac_scan_start + */ + SCAN_START_NOTIFICATION_UMAC = 0xb2, + /** * @MATCH_FOUND_NOTIFICATION: scan match found */ diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h b/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h index 60f0a4924ddf..46fcc32608e3 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h @@ -1156,6 +1156,16 @@ enum iwl_umac_scan_abort_status { IWL_UMAC_SCAN_ABORT_STATUS_NOT_FOUND, }; +/** + * struct iwl_umac_scan_start - scan start notification + * @uid: scan id, &enum iwl_umac_scan_uid_offsets + * @reserved: for future use + */ +struct iwl_umac_scan_start { + __le32 uid; + __le32 reserved; +} __packed; /* SCAN_START_UMAC_API_S_VER_1 */ + /** * struct iwl_umac_scan_complete - scan complete notification * @uid: scan id, &enum iwl_umac_scan_uid_offsets diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mld.c b/drivers/net/wireless/intel/iwlwifi/mld/mld.c index 495e9d8f3af6..9af79297c3b6 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mld.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mld.c @@ -171,6 +171,7 @@ static const struct iwl_hcmd_names iwl_mld_legacy_names[] = { HCMD_NAME(MISSED_BEACONS_NOTIFICATION), HCMD_NAME(MAC_PM_POWER_TABLE), HCMD_NAME(MFUART_LOAD_NOTIFICATION), + HCMD_NAME(SCAN_START_NOTIFICATION_UMAC), HCMD_NAME(RSS_CONFIG_CMD), HCMD_NAME(SCAN_ITERATION_COMPLETE_UMAC), HCMD_NAME(REPLY_RX_MPDU_CMD), diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mlo.c b/drivers/net/wireless/intel/iwlwifi/mld/mlo.c index f842f5183223..fbff5915f7fd 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mlo.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mlo.c @@ -739,7 +739,7 @@ iwl_mld_set_link_sel_data(struct iwl_mld *mld, /* Ignore any BSS that was not seen in the last MLO scan */ if (ktime_before(link_conf->bss->ts_boottime, - mld->scan.last_mlo_scan_time)) + mld->scan.last_mlo_scan_start_time)) continue; data[n_data].link_id = link_id; @@ -945,7 +945,7 @@ static void _iwl_mld_select_links(struct iwl_mld *mld, if (!mld_vif->authorized || hweight16(usable_links) <= 1) return; - if (WARN(ktime_before(mld->scan.last_mlo_scan_time, + if (WARN(ktime_before(mld->scan.last_mlo_scan_start_time, ktime_sub_ns(ktime_get_boottime_ns(), 5ULL * NSEC_PER_SEC)), "Last MLO scan was too long ago, can't select links\n")) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/notif.c b/drivers/net/wireless/intel/iwlwifi/mld/notif.c index 240526d8b632..9c88a8579a75 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/notif.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/notif.c @@ -287,6 +287,8 @@ static void iwl_mld_handle_beacon_notification(struct iwl_mld *mld, * at least enough bytes to cover the structure listed in the CMD_VER_ENTRY. */ +CMD_VERSIONS(scan_start_notif, + CMD_VER_ENTRY(1, iwl_umac_scan_start)) CMD_VERSIONS(scan_complete_notif, CMD_VER_ENTRY(1, iwl_umac_scan_complete)) CMD_VERSIONS(scan_iter_complete_notif, @@ -360,6 +362,7 @@ DEFINE_SIMPLE_CANCELLATION(datapath_monitor, iwl_datapath_monitor_notif, link_id) DEFINE_SIMPLE_CANCELLATION(roc, iwl_roc_notif, activity) DEFINE_SIMPLE_CANCELLATION(scan_complete, iwl_umac_scan_complete, uid) +DEFINE_SIMPLE_CANCELLATION(scan_start, iwl_umac_scan_start, uid) DEFINE_SIMPLE_CANCELLATION(probe_resp_data, iwl_probe_resp_data_notif, mac_id) DEFINE_SIMPLE_CANCELLATION(uapsd_misbehaving_ap, iwl_uapsd_misbehaving_ap_notif, @@ -402,6 +405,8 @@ const struct iwl_rx_handler iwl_mld_rx_handlers[] = { RX_HANDLER_SYNC) RX_HANDLER_NO_OBJECT(LEGACY_GROUP, BA_NOTIF, compressed_ba_notif, RX_HANDLER_SYNC) + RX_HANDLER_OF_SCAN(LEGACY_GROUP, SCAN_START_NOTIFICATION_UMAC, + scan_start_notif) RX_HANDLER_OF_SCAN(LEGACY_GROUP, SCAN_COMPLETE_UMAC, scan_complete_notif) RX_HANDLER_NO_OBJECT(LEGACY_GROUP, SCAN_ITERATION_COMPLETE_UMAC, diff --git a/drivers/net/wireless/intel/iwlwifi/mld/scan.c b/drivers/net/wireless/intel/iwlwifi/mld/scan.c index a1a4cf3ab3d3..abd4281b4b0e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/scan.c @@ -473,6 +473,9 @@ iwl_mld_scan_get_cmd_gen_flags(struct iwl_mld *mld, params->flags & NL80211_SCAN_FLAG_COLOCATED_6GHZ) flags |= IWL_UMAC_SCAN_GEN_FLAGS_V2_TRIGGER_UHB_SCAN; + if (scan_status == IWL_MLD_SCAN_INT_MLO) + flags |= IWL_UMAC_SCAN_GEN_FLAGS_V2_NTF_START; + if (params->enable_6ghz_passive) flags |= IWL_UMAC_SCAN_GEN_FLAGS_V2_6GHZ_PASSIVE_SCAN; @@ -1817,9 +1820,6 @@ static void iwl_mld_int_mlo_scan_start(struct iwl_mld *mld, ret = _iwl_mld_single_scan_start(mld, vif, req, &ies, IWL_MLD_SCAN_INT_MLO); - if (!ret) - mld->scan.last_mlo_scan_time = ktime_get_boottime_ns(); - IWL_DEBUG_SCAN(mld, "Internal MLO scan: ret=%d\n", ret); } @@ -1904,6 +1904,30 @@ void iwl_mld_handle_match_found_notif(struct iwl_mld *mld, ieee80211_sched_scan_results(mld->hw); } +void iwl_mld_handle_scan_start_notif(struct iwl_mld *mld, + struct iwl_rx_packet *pkt) +{ + struct iwl_umac_scan_complete *notif = (void *)pkt->data; + u32 uid = le32_to_cpu(notif->uid); + + if (IWL_FW_CHECK(mld, uid >= ARRAY_SIZE(mld->scan.uid_status), + "FW reports out-of-range scan UID %d\n", uid)) + return; + + if (IWL_FW_CHECK(mld, !(mld->scan.uid_status[uid] & mld->scan.status), + "FW reports scan UID %d we didn't trigger\n", uid)) + return; + + IWL_DEBUG_SCAN(mld, "Scan started: uid=%u type=%u\n", uid, + mld->scan.uid_status[uid]); + if (IWL_FW_CHECK(mld, mld->scan.uid_status[uid] != IWL_MLD_SCAN_INT_MLO, + "FW reports scan start notification %d we didn't trigger\n", + mld->scan.uid_status[uid])) + return; + + mld->scan.last_mlo_scan_start_time = ktime_get_boottime_ns(); +} + void iwl_mld_handle_scan_complete_notif(struct iwl_mld *mld, struct iwl_rx_packet *pkt) { diff --git a/drivers/net/wireless/intel/iwlwifi/mld/scan.h b/drivers/net/wireless/intel/iwlwifi/mld/scan.h index 69110f0cfc8e..de5620e7f463 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/scan.h +++ b/drivers/net/wireless/intel/iwlwifi/mld/scan.h @@ -27,6 +27,9 @@ int iwl_mld_sched_scan_start(struct iwl_mld *mld, void iwl_mld_handle_match_found_notif(struct iwl_mld *mld, struct iwl_rx_packet *pkt); +void iwl_mld_handle_scan_start_notif(struct iwl_mld *mld, + struct iwl_rx_packet *pkt); + void iwl_mld_handle_scan_complete_notif(struct iwl_mld *mld, struct iwl_rx_packet *pkt); @@ -114,8 +117,8 @@ enum iwl_mld_traffic_load { * in jiffies. * @last_start_time_jiffies: stores the last start time in jiffies * (interface up/reset/resume). - * @last_mlo_scan_time: start time of the last MLO scan in nanoseconds since - * boot. + * @last_mlo_scan_start_time: start time of the last MLO scan in nanoseconds + * since boot. */ struct iwl_mld_scan { /* Add here fields that need clean up on restart */ @@ -136,7 +139,7 @@ struct iwl_mld_scan { void *cmd; unsigned long last_6ghz_passive_jiffies; unsigned long last_start_time_jiffies; - u64 last_mlo_scan_time; + u64 last_mlo_scan_start_time; }; /** -- cgit v1.2.3 From 323156c3541e23da7e582008a7ac30cd51b60acd Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 24 Mar 2026 11:33:25 +0200 Subject: wifi: iwlwifi: mvm: don't send a 6E related command when not supported MCC_ALLOWED_AP_TYPE_CMD is related to 6E support. Do not send it if the device doesn't support 6E. Apparently, the firmware is mistakenly advertising support for this command even on AX201 which does not support 6E and then the firmware crashes. Fixes: 0d2fc8821a7d ("wifi: iwlwifi: nvm: parse the VLP/AFC bit from regulatory") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220804 Signed-off-by: Emmanuel Grumbach Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260324113316.e171f0163f2a.I0c444d1f82d1773054e7ffc391ad49697d58f44e@changeid --- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 43cf94c9a36b..6cc78661116e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -470,7 +470,8 @@ static void iwl_mvm_uats_init(struct iwl_mvm *mvm) .dataflags[0] = IWL_HCMD_DFL_NOCOPY, }; - if (mvm->trans->mac_cfg->device_family < IWL_DEVICE_FAMILY_AX210) { + if (mvm->trans->mac_cfg->device_family < IWL_DEVICE_FAMILY_AX210 || + !mvm->trans->cfg->uhb_supported) { IWL_DEBUG_RADIO(mvm, "UATS feature is not supported\n"); return; } -- cgit v1.2.3 From 687a95d204e72e52f2e6bc7a994cc82f76b2678f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 24 Mar 2026 11:33:26 +0200 Subject: wifi: iwlwifi: mld: correctly set wifi generation data In each MAC context, the firmware expects the wifi generation data, i.e. whether or not HE/EHT (and in the future UHR) is enabled on that MAC. However, this is currently handled wrong in two ways: - EHT is only enabled when the interface is also an MLD, but we currently allow (despite the spec) connecting with EHT but without MLO. - when HE or EHT are used by TDLS peers, the firmware needs to have them enabled regardless of the AP Fix this by iterating setting up the data depending on the interface type: - for AP, just set it according to the BSS configuration - for monitor, set it according to HW capabilities - otherwise, particularly for client, iterate all stations and then their links on the interface in question and set according to their capabilities, this handles the AP and TDLS peers. Re-calculate this whenever a TDLS station is marked associated or removed so that it's kept updated, for the AP it's already updated on assoc/disassoc. Fixes: d1e879ec600f ("wifi: iwlwifi: add iwlmld sub-driver") Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260319110722.404713b22177.Ic972b5e557d011a5438f8f97c1e793cc829e2ea9@changeid Link: https://patch.msgid.link/20260324093333.2953495-1-miriam.rachel.korenblit@intel.com --- drivers/net/wireless/intel/iwlwifi/mld/iface.c | 101 +++++++++++++++------- drivers/net/wireless/intel/iwlwifi/mld/mac80211.c | 19 ++++ 2 files changed, 88 insertions(+), 32 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/iface.c b/drivers/net/wireless/intel/iwlwifi/mld/iface.c index 29df747c8938..9215fc7e2eca 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/iface.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/iface.c @@ -111,14 +111,75 @@ static bool iwl_mld_is_nic_ack_enabled(struct iwl_mld *mld, IEEE80211_HE_MAC_CAP2_ACK_EN); } -static void iwl_mld_set_he_support(struct iwl_mld *mld, - struct ieee80211_vif *vif, - struct iwl_mac_config_cmd *cmd) +struct iwl_mld_mac_wifi_gen_sta_iter_data { + struct ieee80211_vif *vif; + struct iwl_mac_wifi_gen_support *support; +}; + +static void iwl_mld_mac_wifi_gen_sta_iter(void *_data, + struct ieee80211_sta *sta) { - if (vif->type == NL80211_IFTYPE_AP) - cmd->wifi_gen.he_ap_support = 1; - else - cmd->wifi_gen.he_support = 1; + struct iwl_mld_sta *mld_sta = iwl_mld_sta_from_mac80211(sta); + struct iwl_mld_mac_wifi_gen_sta_iter_data *data = _data; + struct ieee80211_link_sta *link_sta; + unsigned int link_id; + + if (mld_sta->vif != data->vif) + return; + + for_each_sta_active_link(data->vif, sta, link_sta, link_id) { + if (link_sta->he_cap.has_he) + data->support->he_support = 1; + if (link_sta->eht_cap.has_eht) + data->support->eht_support = 1; + } +} + +static void iwl_mld_set_wifi_gen(struct iwl_mld *mld, + struct ieee80211_vif *vif, + struct iwl_mac_wifi_gen_support *support) +{ + struct iwl_mld_mac_wifi_gen_sta_iter_data sta_iter_data = { + .vif = vif, + .support = support, + }; + struct ieee80211_bss_conf *link_conf; + unsigned int link_id; + + switch (vif->type) { + case NL80211_IFTYPE_MONITOR: + /* for sniffer, set to HW capabilities */ + support->he_support = 1; + support->eht_support = mld->trans->cfg->eht_supported; + break; + case NL80211_IFTYPE_AP: + /* for AP set according to the link configs */ + for_each_vif_active_link(vif, link_conf, link_id) { + support->he_ap_support |= link_conf->he_support; + support->eht_support |= link_conf->eht_support; + } + break; + default: + /* + * If we have MLO enabled, then the firmware needs to enable + * address translation for the station(s) we add. That depends + * on having EHT enabled in firmware, which in turn depends on + * mac80211 in the iteration below. + * However, mac80211 doesn't enable capabilities on the AP STA + * until it has parsed the association response successfully, + * so set EHT (and HE as a pre-requisite for EHT) when the vif + * is an MLD. + */ + if (ieee80211_vif_is_mld(vif)) { + support->he_support = 1; + support->eht_support = 1; + } + + ieee80211_iterate_stations_mtx(mld->hw, + iwl_mld_mac_wifi_gen_sta_iter, + &sta_iter_data); + break; + } } /* fill the common part for all interface types */ @@ -128,8 +189,6 @@ static void iwl_mld_mac_cmd_fill_common(struct iwl_mld *mld, u32 action) { struct iwl_mld_vif *mld_vif = iwl_mld_vif_from_mac80211(vif); - struct ieee80211_bss_conf *link_conf; - unsigned int link_id; lockdep_assert_wiphy(mld->wiphy); @@ -147,29 +206,7 @@ static void iwl_mld_mac_cmd_fill_common(struct iwl_mld *mld, cmd->nic_not_ack_enabled = cpu_to_le32(!iwl_mld_is_nic_ack_enabled(mld, vif)); - /* If we have MLO enabled, then the firmware needs to enable - * address translation for the station(s) we add. That depends - * on having EHT enabled in firmware, which in turn depends on - * mac80211 in the code below. - * However, mac80211 doesn't enable HE/EHT until it has parsed - * the association response successfully, so just skip all that - * and enable both when we have MLO. - */ - if (ieee80211_vif_is_mld(vif)) { - iwl_mld_set_he_support(mld, vif, cmd); - cmd->wifi_gen.eht_support = 1; - return; - } - - for_each_vif_active_link(vif, link_conf, link_id) { - if (!link_conf->he_support) - continue; - - iwl_mld_set_he_support(mld, vif, cmd); - - /* EHT, if supported, was already set above */ - break; - } + iwl_mld_set_wifi_gen(mld, vif, &cmd->wifi_gen); } static void iwl_mld_fill_mac_cmd_sta(struct iwl_mld *mld, diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c index 0c53d6bd9651..71a9a72c9ac0 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c @@ -1761,6 +1761,16 @@ static int iwl_mld_move_sta_state_up(struct iwl_mld *mld, if (vif->type == NL80211_IFTYPE_STATION) iwl_mld_link_set_2mhz_block(mld, vif, sta); + + if (sta->tdls) { + /* + * update MAC since wifi generation flags may change, + * we also update MAC on association to the AP via the + * vif assoc change + */ + iwl_mld_mac_fw_action(mld, vif, FW_CTXT_ACTION_MODIFY); + } + /* Now the link_sta's capabilities are set, update the FW */ iwl_mld_config_tlc(mld, vif, sta); @@ -1873,6 +1883,15 @@ static int iwl_mld_move_sta_state_down(struct iwl_mld *mld, /* just removed last TDLS STA, so enable PM */ iwl_mld_update_mac_power(mld, vif, false); } + + if (sta->tdls) { + /* + * update MAC since wifi generation flags may change, + * we also update MAC on disassociation to the AP via + * the vif assoc change + */ + iwl_mld_mac_fw_action(mld, vif, FW_CTXT_ACTION_MODIFY); + } } else { return -EINVAL; } -- cgit v1.2.3 From e225b36f83d7926c1f2035923bb0359d851fdb73 Mon Sep 17 00:00:00 2001 From: Reshma Immaculate Rajkumar Date: Thu, 19 Mar 2026 12:26:08 +0530 Subject: wifi: ath11k: Pass the correct value of each TID during a stop AMPDU session During ongoing traffic, a request to stop an AMPDU session for one TID could incorrectly affect other active sessions. This can happen because an incorrect TID reference would be passed when updating the BA session state, causing the wrong session to be stopped. As a result, the affected session would be reduced to a minimal BA size, leading to a noticeable throughput degradation. Fix this issue by passing the correct argument from ath11k_dp_rx_ampdu_stop() to ath11k_peer_rx_tid_reo_update() during a stop AMPDU session. Instead of passing peer->tx_tid, which is the base address of the array, corresponding to TID 0; pass the value of &peer->rx_tid[params->tid], where the different TID numbers are accounted for. Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.9.0.1-02146-QCAHKSWPL_SILICONZ-1 Fixes: d5c65159f2895 ("ath11k: driver for Qualcomm IEEE 802.11ax devices") Signed-off-by: Reshma Immaculate Rajkumar Reviewed-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20260319065608.2408179-1-reshma.rajkumar@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/dp_rx.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c index 49d959b2e148..85defe11750d 100644 --- a/drivers/net/wireless/ath/ath11k/dp_rx.c +++ b/drivers/net/wireless/ath/ath11k/dp_rx.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #include @@ -1110,9 +1110,8 @@ int ath11k_dp_rx_ampdu_stop(struct ath11k *ar, struct ath11k_base *ab = ar->ab; struct ath11k_peer *peer; struct ath11k_sta *arsta = ath11k_sta_to_arsta(params->sta); + struct dp_rx_tid *rx_tid; int vdev_id = arsta->arvif->vdev_id; - dma_addr_t paddr; - bool active; int ret; spin_lock_bh(&ab->base_lock); @@ -1124,15 +1123,14 @@ int ath11k_dp_rx_ampdu_stop(struct ath11k *ar, return -ENOENT; } - paddr = peer->rx_tid[params->tid].paddr; - active = peer->rx_tid[params->tid].active; + rx_tid = &peer->rx_tid[params->tid]; - if (!active) { + if (!rx_tid->active) { spin_unlock_bh(&ab->base_lock); return 0; } - ret = ath11k_peer_rx_tid_reo_update(ar, peer, peer->rx_tid, 1, 0, false); + ret = ath11k_peer_rx_tid_reo_update(ar, peer, rx_tid, 1, 0, false); spin_unlock_bh(&ab->base_lock); if (ret) { ath11k_warn(ab, "failed to update reo for rx tid %d: %d\n", @@ -1141,7 +1139,8 @@ int ath11k_dp_rx_ampdu_stop(struct ath11k *ar, } ret = ath11k_wmi_peer_rx_reorder_queue_setup(ar, vdev_id, - params->sta->addr, paddr, + params->sta->addr, + rx_tid->paddr, params->tid, 1, 1); if (ret) ath11k_warn(ab, "failed to send wmi to delete rx tid %d\n", -- cgit v1.2.3 From 4242625f272974dd1947f73b10d884eab3b277cd Mon Sep 17 00:00:00 2001 From: Reshma Immaculate Rajkumar Date: Fri, 27 Feb 2026 16:31:23 +0530 Subject: wifi: ath12k: Pass the correct value of each TID during a stop AMPDU session With traffic ongoing for data TID [TID 0], an DELBA request to stop AMPDU for the BA session was received on management TID [TID 4]. The corresponding TID number was incorrectly passed to stop the BA session, resulting in the BA session for data TIDs being stopped and the BA size being reduced to 1, causing an overall dip in TCP throughput. Fix this issue by passing the correct argument from ath12k_dp_rx_ampdu_stop() to ath12k_dp_arch_peer_rx_tid_reo_update() during an AMPDU stop session. Instead of passing peer->dp_peer->rx_tid, which is the base address of the array, corresponding to TID 0, pass the value of &peer->dp_peer->rx_tid[params->tid]. With this, the different TID numbers are accounted for. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.5-01651-QCAHKSWPL_SILICONZ-1 Fixes: d889913205cf ("wifi: ath12k: driver for Qualcomm Wi-Fi 7 devices") Signed-off-by: Reshma Immaculate Rajkumar Reviewed-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20260227110123.3726354-1-reshma.rajkumar@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/dp_rx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index 5a82ede65dd3..244d5230a5bd 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -735,6 +735,7 @@ int ath12k_dp_rx_ampdu_stop(struct ath12k *ar, struct ath12k_dp *dp = ath12k_ab_to_dp(ab); struct ath12k_dp_link_peer *peer; struct ath12k_sta *ahsta = ath12k_sta_to_ahsta(params->sta); + struct ath12k_dp_rx_tid *rx_tid; struct ath12k_link_sta *arsta; int vdev_id; bool active; @@ -770,7 +771,8 @@ int ath12k_dp_rx_ampdu_stop(struct ath12k *ar, return 0; } - ret = ath12k_dp_arch_peer_rx_tid_reo_update(dp, peer, peer->dp_peer->rx_tid, + rx_tid = &peer->dp_peer->rx_tid[params->tid]; + ret = ath12k_dp_arch_peer_rx_tid_reo_update(dp, peer, rx_tid, 1, 0, false); spin_unlock_bh(&dp->dp_lock); if (ret) { -- cgit v1.2.3 From d049e56b1739101d1c4d81deedb269c52a8dbba0 Mon Sep 17 00:00:00 2001 From: Yasuaki Torimaru Date: Tue, 24 Mar 2026 19:06:24 +0900 Subject: wifi: wilc1000: fix u8 overflow in SSID scan buffer size calculation The variable valuesize is declared as u8 but accumulates the total length of all SSIDs to scan. Each SSID contributes up to 33 bytes (IEEE80211_MAX_SSID_LEN + 1), and with WILC_MAX_NUM_PROBED_SSID (10) SSIDs the total can reach 330, which wraps around to 74 when stored in a u8. This causes kmalloc to allocate only 75 bytes while the subsequent memcpy writes up to 331 bytes into the buffer, resulting in a 256-byte heap buffer overflow. Widen valuesize from u8 to u32 to accommodate the full range. Fixes: c5c77ba18ea6 ("staging: wilc1000: Add SDIO/SPI 802.11 driver") Cc: stable@vger.kernel.org Signed-off-by: Yasuaki Torimaru Link: https://patch.msgid.link/20260324100624.983458-1-yasuakitorimaru@gmail.com Signed-off-by: Johannes Berg --- drivers/net/wireless/microchip/wilc1000/hif.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/microchip/wilc1000/hif.c b/drivers/net/wireless/microchip/wilc1000/hif.c index f354b11cb919..944b2a812b63 100644 --- a/drivers/net/wireless/microchip/wilc1000/hif.c +++ b/drivers/net/wireless/microchip/wilc1000/hif.c @@ -163,7 +163,7 @@ int wilc_scan(struct wilc_vif *vif, u8 scan_source, u32 index = 0; u32 i, scan_timeout; u8 *buffer; - u8 valuesize = 0; + u32 valuesize = 0; u8 *search_ssid_vals = NULL; const u8 ch_list_len = request->n_channels; struct host_if_drv *hif_drv = vif->hif_drv; -- cgit v1.2.3 From 0fd56fad9c56356e7fa7a7c52e7ecbf807a44eb0 Mon Sep 17 00:00:00 2001 From: Pengpeng Hou Date: Mon, 23 Mar 2026 16:08:45 +0800 Subject: wifi: wl1251: validate packet IDs before indexing tx_frames wl1251_tx_packet_cb() uses the firmware completion ID directly to index the fixed 16-entry wl->tx_frames[] array. The ID is a raw u8 from the completion block, and the callback does not currently verify that it fits the array before dereferencing it. Reject completion IDs that fall outside wl->tx_frames[] and keep the existing NULL check in the same guard. This keeps the fix local to the trust boundary and avoids touching the rest of the completion flow. Signed-off-by: Pengpeng Hou Link: https://patch.msgid.link/20260323080845.40033-1-pengpeng@iscas.ac.cn Signed-off-by: Johannes Berg --- drivers/net/wireless/ti/wl1251/tx.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ti/wl1251/tx.c b/drivers/net/wireless/ti/wl1251/tx.c index 2da8c0d5105b..4489aa77bb0f 100644 --- a/drivers/net/wireless/ti/wl1251/tx.c +++ b/drivers/net/wireless/ti/wl1251/tx.c @@ -402,12 +402,14 @@ static void wl1251_tx_packet_cb(struct wl1251 *wl, int hdrlen; u8 *frame; - skb = wl->tx_frames[result->id]; - if (skb == NULL) { - wl1251_error("SKB for packet %d is NULL", result->id); + if (unlikely(result->id >= ARRAY_SIZE(wl->tx_frames) || + wl->tx_frames[result->id] == NULL)) { + wl1251_error("invalid packet id %u", result->id); return; } + skb = wl->tx_frames[result->id]; + info = IEEE80211_SKB_CB(skb); if (!(info->flags & IEEE80211_TX_CTL_NO_ACK) && -- cgit v1.2.3 From 744fabc338e87b95c4d1ff7c95bc8c0f834c6d99 Mon Sep 17 00:00:00 2001 From: Alexey Velichayshiy Date: Sat, 7 Feb 2026 18:03:22 +0300 Subject: wifi: iwlwifi: mvm: fix potential out-of-bounds read in iwl_mvm_nd_match_info_handler() The memcpy function assumes the dynamic array notif->matches is at least as large as the number of bytes to copy. Otherwise, results->matches may contain unwanted data. To guarantee safety, extend the validation in one of the checks to ensure sufficient packet length. Found by Linux Verification Center (linuxtesting.org) with SVACE. Cc: stable@vger.kernel.org Fixes: 5ac54afd4d97 ("wifi: iwlwifi: mvm: Add handling for scan offload match info notification") Signed-off-by: Alexey Velichayshiy Link: https://patch.msgid.link/20260207150335.1013646-1-a.velichayshiy@ispras.ru Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/d3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index a19f9d2e9346..9a74f60c9185 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -2807,7 +2807,7 @@ static void iwl_mvm_nd_match_info_handler(struct iwl_mvm *mvm, if (IS_ERR_OR_NULL(vif)) return; - if (len < sizeof(struct iwl_scan_offload_match_info)) { + if (len < sizeof(struct iwl_scan_offload_match_info) + matches_len) { IWL_ERR(mvm, "Invalid scan match info notification\n"); return; } -- cgit v1.2.3 From 789b06f9f39cdc7e895bdab2c034e39c41c8f8d6 Mon Sep 17 00:00:00 2001 From: Alexander Popov Date: Wed, 25 Mar 2026 01:46:02 +0300 Subject: wifi: virt_wifi: remove SET_NETDEV_DEV to avoid use-after-free Currently we execute `SET_NETDEV_DEV(dev, &priv->lowerdev->dev)` for the virt_wifi net devices. However, unregistering a virt_wifi device in netdev_run_todo() can happen together with the device referenced by SET_NETDEV_DEV(). It can result in use-after-free during the ethtool operations performed on a virt_wifi device that is currently being unregistered. Such a net device can have the `dev.parent` field pointing to the freed memory, but ethnl_ops_begin() calls `pm_runtime_get_sync(dev->dev.parent)`. Let's remove SET_NETDEV_DEV for virt_wifi to avoid bugs like this: ================================================================== BUG: KASAN: slab-use-after-free in __pm_runtime_resume+0xe2/0xf0 Read of size 2 at addr ffff88810cfc46f8 by task pm/606 Call Trace: dump_stack_lvl+0x4d/0x70 print_report+0x170/0x4f3 ? __pfx__raw_spin_lock_irqsave+0x10/0x10 kasan_report+0xda/0x110 ? __pm_runtime_resume+0xe2/0xf0 ? __pm_runtime_resume+0xe2/0xf0 __pm_runtime_resume+0xe2/0xf0 ethnl_ops_begin+0x49/0x270 ethnl_set_features+0x23c/0xab0 ? __pfx_ethnl_set_features+0x10/0x10 ? kvm_sched_clock_read+0x11/0x20 ? local_clock_noinstr+0xf/0xf0 ? local_clock+0x10/0x30 ? kasan_save_track+0x25/0x60 ? __kasan_kmalloc+0x7f/0x90 ? genl_family_rcv_msg_attrs_parse.isra.0+0x150/0x2c0 genl_family_rcv_msg_doit+0x1e7/0x2c0 ? __pfx_genl_family_rcv_msg_doit+0x10/0x10 ? __pfx_cred_has_capability.isra.0+0x10/0x10 ? stack_trace_save+0x8e/0xc0 genl_rcv_msg+0x411/0x660 ? __pfx_genl_rcv_msg+0x10/0x10 ? __pfx_ethnl_set_features+0x10/0x10 netlink_rcv_skb+0x121/0x380 ? __pfx_genl_rcv_msg+0x10/0x10 ? __pfx_netlink_rcv_skb+0x10/0x10 ? __pfx_down_read+0x10/0x10 genl_rcv+0x23/0x30 netlink_unicast+0x60f/0x830 ? __pfx_netlink_unicast+0x10/0x10 ? __pfx___alloc_skb+0x10/0x10 netlink_sendmsg+0x6ea/0xbc0 ? __pfx_netlink_sendmsg+0x10/0x10 ? __futex_queue+0x10b/0x1f0 ____sys_sendmsg+0x7a2/0x950 ? copy_msghdr_from_user+0x26b/0x430 ? __pfx_____sys_sendmsg+0x10/0x10 ? __pfx_copy_msghdr_from_user+0x10/0x10 ___sys_sendmsg+0xf8/0x180 ? __pfx____sys_sendmsg+0x10/0x10 ? __pfx_futex_wait+0x10/0x10 ? fdget+0x2e4/0x4a0 __sys_sendmsg+0x11f/0x1c0 ? __pfx___sys_sendmsg+0x10/0x10 do_syscall_64+0xe2/0x570 ? exc_page_fault+0x66/0xb0 entry_SYSCALL_64_after_hwframe+0x77/0x7f This fix may be combined with another one in the ethtool subsystem: https://lore.kernel.org/all/20260322075917.254874-1-alex.popov@linux.com/T/#u Fixes: d43c65b05b848e0b ("ethtool: runtime-resume netdev parent in ethnl_ops_begin") Cc: stable@vger.kernel.org Signed-off-by: Alexander Popov Acked-by: Greg Kroah-Hartman Reviewed-by: Breno Leitao Link: https://patch.msgid.link/20260324224607.374327-1-alex.popov@linux.com Signed-off-by: Johannes Berg --- drivers/net/wireless/virtual/virt_wifi.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/wireless/virtual/virt_wifi.c b/drivers/net/wireless/virtual/virt_wifi.c index 885dc7243e8d..97bd39d89e98 100644 --- a/drivers/net/wireless/virtual/virt_wifi.c +++ b/drivers/net/wireless/virtual/virt_wifi.c @@ -557,7 +557,6 @@ static int virt_wifi_newlink(struct net_device *dev, eth_hw_addr_inherit(dev, priv->lowerdev); netif_stacked_transfer_operstate(priv->lowerdev, dev); - SET_NETDEV_DEV(dev, &priv->lowerdev->dev); dev->ieee80211_ptr = kzalloc_obj(*dev->ieee80211_ptr); if (!dev->ieee80211_ptr) { -- cgit v1.2.3 From 629ec78ef8608d955ce217880cdc3e1873af3a15 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 24 Mar 2026 00:25:57 +0100 Subject: mpls: add seqcount to protect the platform_label{,s} pair The RCU-protected codepaths (mpls_forward, mpls_dump_routes) can have an inconsistent view of platform_labels vs platform_label in case of a concurrent resize (resize_platform_label_table, under platform_mutex). This can lead to OOB accesses. This patch adds a seqcount, so that we get a consistent snapshot. Note that mpls_label_ok is also susceptible to this, so the check against RTA_DST in rtm_to_route_config, done outside platform_mutex, is not sufficient. This value gets passed to mpls_label_ok once more in both mpls_route_add and mpls_route_del, so there is no issue, but that additional check must not be removed. Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Fixes: 7720c01f3f590 ("mpls: Add a sysctl to control the size of the mpls label table") Fixes: dde1b38e873c ("mpls: Convert mpls_dump_routes() to RCU.") Signed-off-by: Sabrina Dubroca Link: https://patch.msgid.link/cd8fca15e3eb7e212b094064cd83652e20fd9d31.1774284088.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- include/net/netns/mpls.h | 1 + net/mpls/af_mpls.c | 29 +++++++++++++++++++++++++---- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/include/net/netns/mpls.h b/include/net/netns/mpls.h index 6682e51513ef..2073cbac2afb 100644 --- a/include/net/netns/mpls.h +++ b/include/net/netns/mpls.h @@ -17,6 +17,7 @@ struct netns_mpls { size_t platform_labels; struct mpls_route __rcu * __rcu *platform_label; struct mutex platform_mutex; + seqcount_mutex_t platform_label_seq; struct ctl_table_header *ctl; }; diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index d5417688f69e..18d3da8ab384 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -83,14 +83,30 @@ static struct mpls_route *mpls_route_input(struct net *net, unsigned int index) return mpls_dereference(net, platform_label[index]); } +static struct mpls_route __rcu **mpls_platform_label_rcu(struct net *net, size_t *platform_labels) +{ + struct mpls_route __rcu **platform_label; + unsigned int sequence; + + do { + sequence = read_seqcount_begin(&net->mpls.platform_label_seq); + platform_label = rcu_dereference(net->mpls.platform_label); + *platform_labels = net->mpls.platform_labels; + } while (read_seqcount_retry(&net->mpls.platform_label_seq, sequence)); + + return platform_label; +} + static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned int index) { struct mpls_route __rcu **platform_label; + size_t platform_labels; + + platform_label = mpls_platform_label_rcu(net, &platform_labels); - if (index >= net->mpls.platform_labels) + if (index >= platform_labels) return NULL; - platform_label = rcu_dereference(net->mpls.platform_label); return rcu_dereference(platform_label[index]); } @@ -2240,8 +2256,7 @@ static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb) if (index < MPLS_LABEL_FIRST_UNRESERVED) index = MPLS_LABEL_FIRST_UNRESERVED; - platform_label = rcu_dereference(net->mpls.platform_label); - platform_labels = net->mpls.platform_labels; + platform_label = mpls_platform_label_rcu(net, &platform_labels); if (filter.filter_set) flags |= NLM_F_DUMP_FILTERED; @@ -2645,8 +2660,12 @@ static int resize_platform_label_table(struct net *net, size_t limit) } /* Update the global pointers */ + local_bh_disable(); + write_seqcount_begin(&net->mpls.platform_label_seq); net->mpls.platform_labels = limit; rcu_assign_pointer(net->mpls.platform_label, labels); + write_seqcount_end(&net->mpls.platform_label_seq); + local_bh_enable(); mutex_unlock(&net->mpls.platform_mutex); @@ -2728,6 +2747,8 @@ static __net_init int mpls_net_init(struct net *net) int i; mutex_init(&net->mpls.platform_mutex); + seqcount_mutex_init(&net->mpls.platform_label_seq, &net->mpls.platform_mutex); + net->mpls.platform_labels = 0; net->mpls.platform_label = NULL; net->mpls.ip_ttl_propagate = 1; -- cgit v1.2.3 From f73896b4197ed53cf0894657c899265ef7c86b7a Mon Sep 17 00:00:00 2001 From: Dipayaan Roy Date: Tue, 24 Mar 2026 11:14:28 -0700 Subject: net: mana: Fix RX skb truesize accounting MANA passes rxq->alloc_size to napi_build_skb() for all RX buffers. It is correct for fragment-backed RX buffers, where alloc_size matches the actual backing allocation used for each packet buffer. However, in the non-fragment RX path mana allocates a full page, or a higher-order page, per RX buffer. In that case alloc_size only reflects the usable packet area and not the actual backing memory. This causes napi_build_skb() to underestimate the skb backing allocation in the single-buffer RX path, so skb->truesize is derived from a value smaller than the real RX buffer allocation. Fix this by updating alloc_size in the non-fragment RX path to the actual backing allocation size before it is passed to napi_build_skb(). Fixes: 730ff06d3f5c ("net: mana: Use page pool fragments for RX buffers instead of full pages to improve memory efficiency.") Signed-off-by: Dipayaan Roy Reviewed-by: Haiyang Zhang Link: https://patch.msgid.link/acLUhLpLum6qrD/N@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microsoft/mana/mana_en.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index dca62fb9a3a9..09a53c977545 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -766,6 +766,13 @@ static void mana_get_rxbuf_cfg(struct mana_port_context *apc, } *frag_count = 1; + + /* In the single-buffer path, napi_build_skb() must see the + * actual backing allocation size so skb->truesize reflects + * the full page (or higher-order page), not just the usable + * packet area. + */ + *alloc_size = PAGE_SIZE << get_order(*alloc_size); return; } -- cgit v1.2.3 From 976ff48c2ac6e6b25b01428c9d7997bcd0fb2949 Mon Sep 17 00:00:00 2001 From: "Sven Eckelmann (Plasma Cloud)" Date: Tue, 24 Mar 2026 09:36:01 +0100 Subject: net: ethernet: mtk_ppe: avoid NULL deref when gmac0 is disabled If the gmac0 is disabled, the precheck for a valid ingress device will cause a NULL pointer deref and crash the system. This happens because eth->netdev[0] will be NULL but the code will directly try to access netdev_ops. Instead of just checking for the first net_device, it must be checked if any of the mtk_eth net_devices is matching the netdev_ops of the ingress device. Cc: stable@vger.kernel.org Fixes: 73cfd947dbdb ("net: ethernet: mtk_eth_soc: ppe: prevent ppe update for non-mtk devices") Signed-off-by: Sven Eckelmann (Plasma Cloud) Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260324-wed-crash-gmac0-disabled-v1-1-3bc388aee565@simonwunderlich.de Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_ppe_offload.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c index cb30108f2bf6..cc8c4ef8038f 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c @@ -244,6 +244,25 @@ out: return 0; } +static bool +mtk_flow_is_valid_idev(const struct mtk_eth *eth, const struct net_device *idev) +{ + size_t i; + + if (!idev) + return false; + + for (i = 0; i < ARRAY_SIZE(eth->netdev); i++) { + if (!eth->netdev[i]) + continue; + + if (idev->netdev_ops == eth->netdev[i]->netdev_ops) + return true; + } + + return false; +} + static int mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f, int ppe_index) @@ -270,7 +289,7 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f, flow_rule_match_meta(rule, &match); if (mtk_is_netsys_v2_or_greater(eth)) { idev = __dev_get_by_index(&init_net, match.key->ingress_ifindex); - if (idev && idev->netdev_ops == eth->netdev[0]->netdev_ops) { + if (mtk_flow_is_valid_idev(eth, idev)) { struct mtk_mac *mac = netdev_priv(idev); if (WARN_ON(mac->ppe_idx >= eth->soc->ppe_num)) -- cgit v1.2.3 From 57a04a13aac1f247d171c3f3aef93efc69e6979e Mon Sep 17 00:00:00 2001 From: Qingfang Deng Date: Tue, 24 Mar 2026 22:08:56 +0800 Subject: netdevsim: fix build if SKB_EXTENSIONS=n __skb_ext_put() is not declared if SKB_EXTENSIONS is not enabled, which causes a build error: drivers/net/netdevsim/netdev.c: In function 'nsim_forward_skb': drivers/net/netdevsim/netdev.c:114:25: error: implicit declaration of function '__skb_ext_put'; did you mean 'skb_ext_put'? [-Werror=implicit-function-declaration] 114 | __skb_ext_put(psp_ext); | ^~~~~~~~~~~~~ | skb_ext_put cc1: some warnings being treated as errors Add a stub to fix the build. Fixes: 7d9351435ebb ("netdevsim: drop PSP ext ref on forward failure") Signed-off-by: Qingfang Deng Link: https://patch.msgid.link/20260324140857.783-1-dqfext@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index daa4e4944ce3..2f278ce376b7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -5097,6 +5097,7 @@ static inline bool skb_has_extensions(struct sk_buff *skb) return unlikely(skb->active_extensions); } #else +static inline void __skb_ext_put(struct skb_ext *ext) {} static inline void skb_ext_put(struct sk_buff *skb) {} static inline void skb_ext_reset(struct sk_buff *skb) {} static inline void skb_ext_del(struct sk_buff *skb, int unused) {} -- cgit v1.2.3 From e8e44c98f789dee45cfd24ffb9d4936e0606d7c6 Mon Sep 17 00:00:00 2001 From: Buday Csaba Date: Tue, 24 Mar 2026 14:32:30 +0100 Subject: net: fec: fix the PTP periodic output sysfs interface When the PPS channel configuration was implemented, the channel index for the periodic outputs was configured as the hardware channel number. The sysfs interface uses a logical channel index, and rejects numbers greater than `n_per_out` (see period_store() in ptp_sysfs.c). That property was left at 1, since the driver implements channel selection, not simultaneous operation of multiple PTP hardware timer channels. A second check in fec_ptp_enable() returns -EOPNOTSUPP when the two channel numbers disagree, making channels 1..3 unusable from sysfs. Fix by removing this redundant check in the FEC PTP driver. Fixes: 566c2d83887f ("net: fec: make PPS channel configurable") Signed-off-by: Buday Csaba Link: https://patch.msgid.link/8ec2afe88423c2231f9cf8044d212ce57846670e.1774359059.git.buday.csaba@prolan.hu Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fec_ptp.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c index 4b7bad9a485d..56801c2009d5 100644 --- a/drivers/net/ethernet/freescale/fec_ptp.c +++ b/drivers/net/ethernet/freescale/fec_ptp.c @@ -545,9 +545,6 @@ static int fec_ptp_enable(struct ptp_clock_info *ptp, if (rq->perout.flags) return -EOPNOTSUPP; - if (rq->perout.index != fep->pps_channel) - return -EOPNOTSUPP; - period.tv_sec = rq->perout.period.sec; period.tv_nsec = rq->perout.period.nsec; period_ns = timespec64_to_ns(&period); -- cgit v1.2.3 From 0239fd701d33475a39428daa3dc627407cd417a6 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Tue, 24 Mar 2026 14:21:19 +0800 Subject: net: enetc: reset PIR and CIR if they are not equal when initializing TX ring Currently the driver does not reset the producer index register (PIR) and consumer index register (CIR) when initializing a TX BD ring. The driver only reads the PIR and CIR and initializes the software indexes. If the TX BD ring is reinitialized when it still contains unsent frames, its PIR and CIR will not be equal after the reinitialization. However, the BDs between CIR and PIR have been freed and become invalid and this can lead to a hardware malfunction, causing the TX BD ring will not work properly. For ENETC v4, it supports software to set the PIR and CIR, so the driver can reset these two registers if they are not equal when reinitializing the TX BD ring. Therefore, add this solution for ENETC v4. Note that this patch does not work for ENETC v1 because it does not support software to set the PIR and CIR. Fixes: 99100d0d9922 ("net: enetc: add preliminary support for i.MX95 ENETC PF") Signed-off-by: Wei Fang Reviewed-by: Claudiu Manoil Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260324062121.2745033-2-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index a146ceaf2ed6..aa8a87124b10 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -2578,6 +2578,7 @@ EXPORT_SYMBOL_GPL(enetc_free_si_resources); static void enetc_setup_txbdr(struct enetc_hw *hw, struct enetc_bdr *tx_ring) { + struct enetc_si *si = container_of(hw, struct enetc_si, hw); int idx = tx_ring->index; u32 tbmr; @@ -2591,10 +2592,20 @@ static void enetc_setup_txbdr(struct enetc_hw *hw, struct enetc_bdr *tx_ring) enetc_txbdr_wr(hw, idx, ENETC_TBLENR, ENETC_RTBLENR_LEN(tx_ring->bd_count)); - /* clearing PI/CI registers for Tx not supported, adjust sw indexes */ + /* For ENETC v1, clearing PI/CI registers for Tx not supported, + * adjust sw indexes + */ tx_ring->next_to_use = enetc_txbdr_rd(hw, idx, ENETC_TBPIR); tx_ring->next_to_clean = enetc_txbdr_rd(hw, idx, ENETC_TBCIR); + if (tx_ring->next_to_use != tx_ring->next_to_clean && + !is_enetc_rev1(si)) { + tx_ring->next_to_use = 0; + tx_ring->next_to_clean = 0; + enetc_txbdr_wr(hw, idx, ENETC_TBPIR, 0); + enetc_txbdr_wr(hw, idx, ENETC_TBCIR, 0); + } + /* enable Tx ints by setting pkt thr to 1 */ enetc_txbdr_wr(hw, idx, ENETC_TBICR0, ENETC_TBICR0_ICEN | 0x1); -- cgit v1.2.3 From 2725d84efe2582c0a4b907e74a689d26b2dbd382 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Tue, 24 Mar 2026 14:21:20 +0800 Subject: net: enetc: add graceful stop to safely reinitialize the TX Ring For ENETC v4, the PIR and CIR will be reset if they are not equal when reinitializing the TX BD ring. However, resetting the PIR and CIR alone is insufficient. When a link-down event occurs while the TX BD ring is transmitting frames, subsequent reinitialization of the TX BD ring may cause it to malfunction. For example, the below steps can reproduce the problem. 1. Unplug the cable when the TX BD ring is busy transmitting frames. 2. Disable the network interface (ifconfig eth0 down). 3. Re-enable the network interface (ifconfig eth0 up). 4. Plug in the cable, the TX BD ring may fail to transmit packets. When the link-down event occurs, enetc4_pl_mac_link_down() only clears PMa_COMMAND_CONFIG[TX_EN] to disable MAC transmit data path. It doesn't set PORT[TXDIS] to 1 to flush the TX BD ring. Therefore, reinitializing the TX BD ring at this point is unsafe. To safely reinitialize the TX BD ring after a link-down event, we checked with the NETC IP team, a proper Ethernet MAC graceful stop is necessary. Therefore, add the Ethernet MAC graceful stop to the link-down event handler enetc4_pl_mac_link_down(). Fixes: 99100d0d9922 ("net: enetc: add preliminary support for i.MX95 ENETC PF") Signed-off-by: Wei Fang Reviewed-by: Claudiu Manoil Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260324062121.2745033-3-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc4_hw.h | 11 +++ drivers/net/ethernet/freescale/enetc/enetc4_pf.c | 111 ++++++++++++++++++++--- 2 files changed, 108 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_hw.h b/drivers/net/ethernet/freescale/enetc/enetc4_hw.h index 3ed0f7a02767..719c88ceb801 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc4_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc4_hw.h @@ -134,6 +134,12 @@ /* Port operational register */ #define ENETC4_POR 0x4100 +#define POR_TXDIS BIT(0) +#define POR_RXDIS BIT(1) + +/* Port status register */ +#define ENETC4_PSR 0x4104 +#define PSR_RX_BUSY BIT(1) /* Port traffic class a transmit maximum SDU register */ #define ENETC4_PTCTMSDUR(a) ((a) * 0x20 + 0x4208) @@ -173,6 +179,11 @@ /* Port internal MDIO base address, use to access PCS */ #define ENETC4_PM_IMDIO_BASE 0x5030 +/* Port MAC 0/1 Interrupt Event Register */ +#define ENETC4_PM_IEVENT(mac) (0x5040 + (mac) * 0x400) +#define PM_IEVENT_TX_EMPTY BIT(5) +#define PM_IEVENT_RX_EMPTY BIT(6) + /* Port MAC 0/1 Pause Quanta Register */ #define ENETC4_PM_PAUSE_QUANTA(mac) (0x5054 + (mac) * 0x400) diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c index 689b9f13c5eb..53cecbb23a97 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c @@ -444,20 +444,11 @@ static void enetc4_set_trx_frame_size(struct enetc_pf *pf) enetc4_pf_reset_tc_msdu(&si->hw); } -static void enetc4_enable_trx(struct enetc_pf *pf) -{ - struct enetc_hw *hw = &pf->si->hw; - - /* Enable port transmit/receive */ - enetc_port_wr(hw, ENETC4_POR, 0); -} - static void enetc4_configure_port(struct enetc_pf *pf) { enetc4_configure_port_si(pf); enetc4_set_trx_frame_size(pf); enetc_set_default_rss_key(pf); - enetc4_enable_trx(pf); } static int enetc4_init_ntmp_user(struct enetc_si *si) @@ -801,15 +792,105 @@ static void enetc4_set_tx_pause(struct enetc_pf *pf, int num_rxbdr, bool tx_paus enetc_port_wr(hw, ENETC4_PPAUOFFTR, pause_off_thresh); } -static void enetc4_enable_mac(struct enetc_pf *pf, bool en) +static void enetc4_mac_wait_tx_empty(struct enetc_si *si, int mac) +{ + u32 val; + + if (read_poll_timeout(enetc_port_rd, val, + val & PM_IEVENT_TX_EMPTY, + 100, 10000, false, &si->hw, + ENETC4_PM_IEVENT(mac))) + dev_warn(&si->pdev->dev, + "MAC %d TX is not empty\n", mac); +} + +static void enetc4_mac_tx_graceful_stop(struct enetc_pf *pf) +{ + struct enetc_hw *hw = &pf->si->hw; + struct enetc_si *si = pf->si; + u32 val; + + val = enetc_port_rd(hw, ENETC4_POR); + val |= POR_TXDIS; + enetc_port_wr(hw, ENETC4_POR, val); + + enetc4_mac_wait_tx_empty(si, 0); + if (si->hw_features & ENETC_SI_F_QBU) + enetc4_mac_wait_tx_empty(si, 1); + + val = enetc_port_mac_rd(si, ENETC4_PM_CMD_CFG(0)); + val &= ~PM_CMD_CFG_TX_EN; + enetc_port_mac_wr(si, ENETC4_PM_CMD_CFG(0), val); +} + +static void enetc4_mac_tx_enable(struct enetc_pf *pf) { + struct enetc_hw *hw = &pf->si->hw; struct enetc_si *si = pf->si; u32 val; val = enetc_port_mac_rd(si, ENETC4_PM_CMD_CFG(0)); - val &= ~(PM_CMD_CFG_TX_EN | PM_CMD_CFG_RX_EN); - val |= en ? (PM_CMD_CFG_TX_EN | PM_CMD_CFG_RX_EN) : 0; + val |= PM_CMD_CFG_TX_EN; + enetc_port_mac_wr(si, ENETC4_PM_CMD_CFG(0), val); + + val = enetc_port_rd(hw, ENETC4_POR); + val &= ~POR_TXDIS; + enetc_port_wr(hw, ENETC4_POR, val); +} + +static void enetc4_mac_wait_rx_empty(struct enetc_si *si, int mac) +{ + u32 val; + + if (read_poll_timeout(enetc_port_rd, val, + val & PM_IEVENT_RX_EMPTY, + 100, 10000, false, &si->hw, + ENETC4_PM_IEVENT(mac))) + dev_warn(&si->pdev->dev, + "MAC %d RX is not empty\n", mac); +} + +static void enetc4_mac_rx_graceful_stop(struct enetc_pf *pf) +{ + struct enetc_hw *hw = &pf->si->hw; + struct enetc_si *si = pf->si; + u32 val; + + if (si->hw_features & ENETC_SI_F_QBU) { + val = enetc_port_rd(hw, ENETC4_PM_CMD_CFG(1)); + val &= ~PM_CMD_CFG_RX_EN; + enetc_port_wr(hw, ENETC4_PM_CMD_CFG(1), val); + enetc4_mac_wait_rx_empty(si, 1); + } + + val = enetc_port_rd(hw, ENETC4_PM_CMD_CFG(0)); + val &= ~PM_CMD_CFG_RX_EN; + enetc_port_wr(hw, ENETC4_PM_CMD_CFG(0), val); + enetc4_mac_wait_rx_empty(si, 0); + + if (read_poll_timeout(enetc_port_rd, val, + !(val & PSR_RX_BUSY), + 100, 10000, false, hw, + ENETC4_PSR)) + dev_warn(&si->pdev->dev, "Port RX busy\n"); + + val = enetc_port_rd(hw, ENETC4_POR); + val |= POR_RXDIS; + enetc_port_wr(hw, ENETC4_POR, val); +} + +static void enetc4_mac_rx_enable(struct enetc_pf *pf) +{ + struct enetc_hw *hw = &pf->si->hw; + struct enetc_si *si = pf->si; + u32 val; + + val = enetc_port_rd(hw, ENETC4_POR); + val &= ~POR_RXDIS; + enetc_port_wr(hw, ENETC4_POR, val); + val = enetc_port_mac_rd(si, ENETC4_PM_CMD_CFG(0)); + val |= PM_CMD_CFG_RX_EN; enetc_port_mac_wr(si, ENETC4_PM_CMD_CFG(0), val); } @@ -853,7 +934,8 @@ static void enetc4_pl_mac_link_up(struct phylink_config *config, enetc4_set_hd_flow_control(pf, hd_fc); enetc4_set_tx_pause(pf, priv->num_rx_rings, tx_pause); enetc4_set_rx_pause(pf, rx_pause); - enetc4_enable_mac(pf, true); + enetc4_mac_tx_enable(pf); + enetc4_mac_rx_enable(pf); } static void enetc4_pl_mac_link_down(struct phylink_config *config, @@ -862,7 +944,8 @@ static void enetc4_pl_mac_link_down(struct phylink_config *config, { struct enetc_pf *pf = phylink_to_enetc_pf(config); - enetc4_enable_mac(pf, false); + enetc4_mac_rx_graceful_stop(pf); + enetc4_mac_tx_graceful_stop(pf); } static const struct phylink_mac_ops enetc_pl_mac_ops = { -- cgit v1.2.3 From f2df9567b123145a07ee4ea7440e233f5d0232cc Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Tue, 24 Mar 2026 14:21:21 +0800 Subject: net: enetc: do not access non-existent registers on pseudo MAC The ENETC4_PM_IEVENT and ENETC4_PM_CMD_CFG registers do not exist on the ENETC pseudo MAC, so the driver should prevent from accessing them. Fixes: 5175c1e4adca ("net: enetc: add basic support for the ENETC with pseudo MAC for i.MX94") Signed-off-by: Wei Fang Tested-by: Claudiu Manoil Reviewed-by: Claudiu Manoil Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260324062121.2745033-4-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc4_pf.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c index 53cecbb23a97..56899f2254aa 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c @@ -814,6 +814,9 @@ static void enetc4_mac_tx_graceful_stop(struct enetc_pf *pf) val |= POR_TXDIS; enetc_port_wr(hw, ENETC4_POR, val); + if (enetc_is_pseudo_mac(si)) + return; + enetc4_mac_wait_tx_empty(si, 0); if (si->hw_features & ENETC_SI_F_QBU) enetc4_mac_wait_tx_empty(si, 1); @@ -856,6 +859,9 @@ static void enetc4_mac_rx_graceful_stop(struct enetc_pf *pf) struct enetc_si *si = pf->si; u32 val; + if (enetc_is_pseudo_mac(si)) + goto check_rx_busy; + if (si->hw_features & ENETC_SI_F_QBU) { val = enetc_port_rd(hw, ENETC4_PM_CMD_CFG(1)); val &= ~PM_CMD_CFG_RX_EN; @@ -868,6 +874,7 @@ static void enetc4_mac_rx_graceful_stop(struct enetc_pf *pf) enetc_port_wr(hw, ENETC4_PM_CMD_CFG(0), val); enetc4_mac_wait_rx_empty(si, 0); +check_rx_busy: if (read_poll_timeout(enetc_port_rd, val, !(val & PSR_RX_BUSY), 100, 10000, false, hw, -- cgit v1.2.3 From 2428083101f6883f979cceffa76cd8440751ffe6 Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Tue, 24 Mar 2026 16:06:44 +0800 Subject: net: qrtr: replace qrtr_tx_flow radix_tree with xarray to fix memory leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit __radix_tree_create() allocates and links intermediate nodes into the tree one by one. If a subsequent allocation fails, the already-linked nodes remain in the tree with no corresponding leaf entry. These orphaned internal nodes are never reclaimed because radix_tree_for_each_slot() only visits slots containing leaf values. The radix_tree API is deprecated in favor of xarray. As suggested by Matthew Wilcox, migrate qrtr_tx_flow from radix_tree to xarray instead of fixing the radix_tree itself [1]. xarray properly handles cleanup of internal nodes — xa_destroy() frees all internal xarray nodes when the qrtr_node is released, preventing the leak. [1] https://lore.kernel.org/all/20260225071623.41275-1-jiayuan.chen@linux.dev/T/ Reported-by: syzbot+006987d1be3586e13555@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/000000000000bfba3a060bf4ffcf@google.com/T/ Fixes: 5fdeb0d372ab ("net: qrtr: Implement outgoing flow control") Signed-off-by: Jiayuan Chen Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260324080645.290197-1-jiayuan.chen@linux.dev Signed-off-by: Jakub Kicinski --- net/qrtr/af_qrtr.c | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/net/qrtr/af_qrtr.c b/net/qrtr/af_qrtr.c index 55fd2dd37588..d77e9c8212da 100644 --- a/net/qrtr/af_qrtr.c +++ b/net/qrtr/af_qrtr.c @@ -118,7 +118,7 @@ static DEFINE_XARRAY_ALLOC(qrtr_ports); * @ep: endpoint * @ref: reference count for node * @nid: node id - * @qrtr_tx_flow: tree of qrtr_tx_flow, keyed by node << 32 | port + * @qrtr_tx_flow: xarray of qrtr_tx_flow, keyed by node << 32 | port * @qrtr_tx_lock: lock for qrtr_tx_flow inserts * @rx_queue: receive queue * @item: list item for broadcast list @@ -129,7 +129,7 @@ struct qrtr_node { struct kref ref; unsigned int nid; - struct radix_tree_root qrtr_tx_flow; + struct xarray qrtr_tx_flow; struct mutex qrtr_tx_lock; /* for qrtr_tx_flow */ struct sk_buff_head rx_queue; @@ -172,6 +172,7 @@ static void __qrtr_node_release(struct kref *kref) struct qrtr_tx_flow *flow; unsigned long flags; void __rcu **slot; + unsigned long index; spin_lock_irqsave(&qrtr_nodes_lock, flags); /* If the node is a bridge for other nodes, there are possibly @@ -189,11 +190,9 @@ static void __qrtr_node_release(struct kref *kref) skb_queue_purge(&node->rx_queue); /* Free tx flow counters */ - radix_tree_for_each_slot(slot, &node->qrtr_tx_flow, &iter, 0) { - flow = *slot; - radix_tree_iter_delete(&node->qrtr_tx_flow, &iter, slot); + xa_for_each(&node->qrtr_tx_flow, index, flow) kfree(flow); - } + xa_destroy(&node->qrtr_tx_flow); kfree(node); } @@ -228,9 +227,7 @@ static void qrtr_tx_resume(struct qrtr_node *node, struct sk_buff *skb) key = remote_node << 32 | remote_port; - rcu_read_lock(); - flow = radix_tree_lookup(&node->qrtr_tx_flow, key); - rcu_read_unlock(); + flow = xa_load(&node->qrtr_tx_flow, key); if (flow) { spin_lock(&flow->resume_tx.lock); flow->pending = 0; @@ -269,12 +266,13 @@ static int qrtr_tx_wait(struct qrtr_node *node, int dest_node, int dest_port, return 0; mutex_lock(&node->qrtr_tx_lock); - flow = radix_tree_lookup(&node->qrtr_tx_flow, key); + flow = xa_load(&node->qrtr_tx_flow, key); if (!flow) { flow = kzalloc_obj(*flow); if (flow) { init_waitqueue_head(&flow->resume_tx); - if (radix_tree_insert(&node->qrtr_tx_flow, key, flow)) { + if (xa_err(xa_store(&node->qrtr_tx_flow, key, flow, + GFP_KERNEL))) { kfree(flow); flow = NULL; } @@ -326,9 +324,7 @@ static void qrtr_tx_flow_failed(struct qrtr_node *node, int dest_node, unsigned long key = (u64)dest_node << 32 | dest_port; struct qrtr_tx_flow *flow; - rcu_read_lock(); - flow = radix_tree_lookup(&node->qrtr_tx_flow, key); - rcu_read_unlock(); + flow = xa_load(&node->qrtr_tx_flow, key); if (flow) { spin_lock_irq(&flow->resume_tx.lock); flow->tx_failed = 1; @@ -599,7 +595,7 @@ int qrtr_endpoint_register(struct qrtr_endpoint *ep, unsigned int nid) node->nid = QRTR_EP_NID_AUTO; node->ep = ep; - INIT_RADIX_TREE(&node->qrtr_tx_flow, GFP_KERNEL); + xa_init(&node->qrtr_tx_flow); mutex_init(&node->qrtr_tx_lock); qrtr_node_assign(node, nid); @@ -627,6 +623,7 @@ void qrtr_endpoint_unregister(struct qrtr_endpoint *ep) struct qrtr_tx_flow *flow; struct sk_buff *skb; unsigned long flags; + unsigned long index; void __rcu **slot; mutex_lock(&node->ep_lock); @@ -649,10 +646,8 @@ void qrtr_endpoint_unregister(struct qrtr_endpoint *ep) /* Wake up any transmitters waiting for resume-tx from the node */ mutex_lock(&node->qrtr_tx_lock); - radix_tree_for_each_slot(slot, &node->qrtr_tx_flow, &iter, 0) { - flow = *slot; + xa_for_each(&node->qrtr_tx_flow, index, flow) wake_up_interruptible_all(&flow->resume_tx); - } mutex_unlock(&node->qrtr_tx_lock); qrtr_node_release(node); -- cgit v1.2.3 From ae05340ccaa9d347fe85415609e075545bec589f Mon Sep 17 00:00:00 2001 From: Yochai Eisenrich Date: Wed, 25 Mar 2026 00:49:25 +0200 Subject: net: ipv6: ndisc: fix ndisc_ra_useropt to initialize nduseropt_padX fields to zero to prevent an info-leak When processing Router Advertisements with user options the kernel builds an RTM_NEWNDUSEROPT netlink message. The nduseroptmsg struct has three padding fields that are never zeroed and can leak kernel data The fix is simple, just zeroes the padding fields. Fixes: 31910575a9de ("[IPv6]: Export userland ND options through netlink (RDNSS support)") Signed-off-by: Yochai Eisenrich Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260324224925.2437775-1-echelonh@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv6/ndisc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index f6a5d8c73af9..186e60c79214 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1209,6 +1209,9 @@ static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt) ndmsg->nduseropt_icmp_type = icmp6h->icmp6_type; ndmsg->nduseropt_icmp_code = icmp6h->icmp6_code; ndmsg->nduseropt_opts_len = opt->nd_opt_len << 3; + ndmsg->nduseropt_pad1 = 0; + ndmsg->nduseropt_pad2 = 0; + ndmsg->nduseropt_pad3 = 0; memcpy(ndmsg + 1, opt, opt->nd_opt_len << 3); -- cgit v1.2.3 From 5e67ba9bb531e1ec6599a82a065dea9040b9ce50 Mon Sep 17 00:00:00 2001 From: Pengpeng Hou Date: Wed, 25 Mar 2026 15:41:52 +0800 Subject: net/ipv6: ioam6: prevent schema length wraparound in trace fill ioam6_fill_trace_data() stores the schema contribution to the trace length in a u8. With bit 22 enabled and the largest schema payload, sclen becomes 1 + 1020 / 4, wraps from 256 to 0, and bypasses the remaining-space check. __ioam6_fill_trace_data() then positions the write cursor without reserving the schema area but still copies the 4-byte schema header and the full schema payload, overrunning the trace buffer. Keep sclen in an unsigned int so the remaining-space check and the write cursor calculation both see the full schema length. Fixes: 8c6f6fa67726 ("ipv6: ioam: IOAM Generic Netlink API") Signed-off-by: Pengpeng Hou Reviewed-by: Justin Iurman Signed-off-by: David S. Miller --- net/ipv6/ioam6.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c index b76f89d92e7b..3978773bec42 100644 --- a/net/ipv6/ioam6.c +++ b/net/ipv6/ioam6.c @@ -708,7 +708,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, struct ioam6_namespace *ns, struct ioam6_trace_hdr *trace, struct ioam6_schema *sc, - u8 sclen, bool is_input) + unsigned int sclen, bool is_input) { struct net_device *dev = skb_dst_dev(skb); struct timespec64 ts; @@ -939,7 +939,7 @@ void ioam6_fill_trace_data(struct sk_buff *skb, bool is_input) { struct ioam6_schema *sc; - u8 sclen = 0; + unsigned int sclen = 0; /* Skip if Overflow flag is set */ -- cgit v1.2.3 From bb417456c7814d1493d98b7dd9c040bf3ce3b4ed Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Wed, 25 Mar 2026 12:20:53 +0100 Subject: tg3: Fix race for querying speed/duplex When driver signals carrier up via netif_carrier_on() its internal link_up state isn't updated immediately. This leads to inconsistent speed/duplex in /proc/net/bonding/bondX where the speed and duplex is shown as unknown while ethtool shows correct values. Fix this by using netif_carrier_ok() for link checking in get_ksettings function. Fixes: 84421b99cedc ("tg3: Update link_up flag for phylib devices") Signed-off-by: Thomas Bogendoerfer Reviewed-by: Pavan Chebbi Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 21a5dd342724..73a4b569b03e 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -12299,7 +12299,7 @@ static int tg3_get_link_ksettings(struct net_device *dev, ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, advertising); - if (netif_running(dev) && tp->link_up) { + if (netif_running(dev) && netif_carrier_ok(dev)) { cmd->base.speed = tp->link_config.active_speed; cmd->base.duplex = tp->link_config.active_duplex; ethtool_convert_legacy_u32_to_link_mode( -- cgit v1.2.3 From 5597dd284ff8c556c0b00f6a34473677426e3f81 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Wed, 25 Mar 2026 12:51:30 +0000 Subject: net: ti: icssg-prueth: fix missing data copy and wrong recycle in ZC RX dispatch emac_dispatch_skb_zc() allocates a new skb via napi_alloc_skb() but never copies the packet data from the XDP buffer into it. The skb is passed up the stack containing uninitialized heap memory instead of the actual received packet, leaking kernel heap contents to userspace. Copy the received packet data from the XDP buffer into the skb using skb_copy_to_linear_data(). Additionally, remove the skb_mark_for_recycle() call since the skb is backed by the NAPI page frag allocator, not page_pool. Marking a non-page_pool skb for recycle causes the free path to return pages to a page_pool that does not own them, corrupting page_pool state. The non-ZC path (emac_rx_packet) does not have these issues because it uses napi_build_skb() to wrap the existing page_pool page directly, requiring no copy, and correctly marks for recycle since the page comes from page_pool_dev_alloc_pages(). Fixes: 7a64bb388df3 ("net: ti: icssg-prueth: Add AF_XDP zero copy for RX") Signed-off-by: David Carlier Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/icssg/icssg_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index fd4e7622f123..a28a608f9bf4 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -902,6 +902,7 @@ static void emac_dispatch_skb_zc(struct prueth_emac *emac, struct xdp_buff *xdp, skb_reserve(skb, headroom); skb_put(skb, pkt_len); + skb_copy_to_linear_data(skb, xdp->data, pkt_len); skb->dev = ndev; /* RX HW timestamp */ @@ -912,7 +913,6 @@ static void emac_dispatch_skb_zc(struct prueth_emac *emac, struct xdp_buff *xdp, skb->offload_fwd_mark = emac->offload_fwd_mark; skb->protocol = eth_type_trans(skb, ndev); - skb_mark_for_recycle(skb); napi_gro_receive(&emac->napi_rx, skb); ndev->stats.rx_bytes += pkt_len; ndev->stats.rx_packets++; -- cgit v1.2.3 From 86ab3e55673a7a49a841838776f1ab18d23a67b5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Mar 2026 20:26:08 +0000 Subject: ipv6: icmp: clear skb2->cb[] in ip6_err_gen_icmpv6_unreach() Sashiko AI-review observed: In ip6_err_gen_icmpv6_unreach(), the skb is an outer IPv4 ICMP error packet where its cb contains an IPv4 inet_skb_parm. When skb is cloned into skb2 and passed to icmp6_send(), it uses IP6CB(skb2). IP6CB interprets the IPv4 inet_skb_parm as an inet6_skb_parm. The cipso offset in inet_skb_parm.opt directly overlaps with dsthao in inet6_skb_parm at offset 18. If an attacker sends a forged ICMPv4 error with a CIPSO IP option, dsthao would be a non-zero offset. Inside icmp6_send(), mip6_addr_swap() is called and uses ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO). This would scan the inner, attacker-controlled IPv6 packet starting at that offset, potentially returning a fake TLV without checking if the remaining packet length can hold the full 18-byte struct ipv6_destopt_hao. Could mip6_addr_swap() then perform a 16-byte swap that extends past the end of the packet data into skb_shared_info? Should the cb array also be cleared in ip6_err_gen_icmpv6_unreach() and ip6ip6_err() to prevent this? This patch implements the first suggestion. I am not sure if ip6ip6_err() needs to be changed. A separate patch would be better anyway. Fixes: ca15a078bd90 ("sit: generate icmpv6 error when receiving icmpv4 error") Reported-by: Ido Schimmel Closes: https://sashiko.dev/#/patchset/20260326155138.2429480-1-edumazet%40google.com Signed-off-by: Eric Dumazet Cc: Oskar Kjos Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20260326202608.2976021-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/icmp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 813d2e9edb8b..d5d23a9296ea 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -875,6 +875,9 @@ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type, if (!skb2) return 1; + /* Remove debris left by IPv4 stack. */ + memset(IP6CB(skb2), 0, sizeof(*IP6CB(skb2))); + skb_dst_drop(skb2); skb_pull(skb2, nhs); skb_reset_network_header(skb2); -- cgit v1.2.3 From 2edfa31769a4add828a7e604b21cb82aaaa05925 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Mar 2026 15:51:38 +0000 Subject: ip6_tunnel: clear skb2->cb[] in ip4ip6_err() Oskar Kjos reported the following problem. ip4ip6_err() calls icmp_send() on a cloned skb whose cb[] was written by the IPv6 receive path as struct inet6_skb_parm. icmp_send() passes IPCB(skb2) to __ip_options_echo(), which interprets that cb[] region as struct inet_skb_parm (IPv4). The layouts differ: inet6_skb_parm.nhoff at offset 14 overlaps inet_skb_parm.opt.rr, producing a non-zero rr value. __ip_options_echo() then reads optlen from attacker-controlled packet data at sptr[rr+1] and copies that many bytes into dopt->__data, a fixed 40-byte stack buffer (IP_OPTIONS_DATA_FIXED_SIZE). To fix this we clear skb2->cb[], as suggested by Oskar Kjos. Also add minimal IPv4 header validation (version == 4, ihl >= 5). Fixes: c4d3efafcc93 ("[IPV6] IP6TUNNEL: Add support to IPv4 over IPv6 tunnel.") Reported-by: Oskar Kjos Signed-off-by: Eric Dumazet Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20260326155138.2429480-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_tunnel.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 4c29aa94e86e..0b53488a9229 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -601,11 +601,16 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!skb2) return 0; + /* Remove debris left by IPv6 stack. */ + memset(IPCB(skb2), 0, sizeof(*IPCB(skb2))); + skb_dst_drop(skb2); skb_pull(skb2, offset); skb_reset_network_header(skb2); eiph = ip_hdr(skb2); + if (eiph->version != 4 || eiph->ihl < 5) + goto out; /* Try to guess incoming interface */ rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, eiph->saddr, -- cgit v1.2.3 From b38c55320bf85a84a4f04803c57b261fc87e9b4b Mon Sep 17 00:00:00 2001 From: Dimitri Daskalakis Date: Tue, 24 Mar 2026 12:51:22 -0700 Subject: eth: fbnic: Account for page fragments when updating BDQ tail FBNIC supports fixed size buffers of 4K. When PAGE_SIZE > 4K, we fragment the page across multiple descriptors (FBNIC_BD_FRAG_COUNT). When refilling the BDQ, the correct number of entries are populated, but tail was only incremented by one. So on a system with 64K pages, HW would get one descriptor refilled for every 16 we populate. Additionally, we program the ring size in the HW when enabling the BDQ. This was not accounting for page fragments, so on systems with 64K pages, the HW used 1/16th of the ring. Fixes: 0cb4c0a13723 ("eth: fbnic: Implement Rx queue alloc/start/stop/free") Signed-off-by: Dimitri Daskalakis Link: https://patch.msgid.link/20260324195123.3486219-2-dimitri.daskalakis1@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/meta/fbnic/fbnic_txrx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c index 9fb91d4f3971..9cd85a0d0c3a 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c @@ -927,7 +927,7 @@ static void fbnic_fill_bdq(struct fbnic_ring *bdq) /* Force DMA writes to flush before writing to tail */ dma_wmb(); - writel(i, bdq->doorbell); + writel(i * FBNIC_BD_FRAG_COUNT, bdq->doorbell); } } @@ -2564,7 +2564,7 @@ static void fbnic_enable_bdq(struct fbnic_ring *hpq, struct fbnic_ring *ppq) hpq->tail = 0; hpq->head = 0; - log_size = fls(hpq->size_mask); + log_size = fls(hpq->size_mask) + ilog2(FBNIC_BD_FRAG_COUNT); /* Store descriptor ring address and size */ fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_HPQ_BAL, lower_32_bits(hpq->dma)); @@ -2576,7 +2576,7 @@ static void fbnic_enable_bdq(struct fbnic_ring *hpq, struct fbnic_ring *ppq) if (!ppq->size_mask) goto write_ctl; - log_size = fls(ppq->size_mask); + log_size = fls(ppq->size_mask) + ilog2(FBNIC_BD_FRAG_COUNT); /* Add enabling of PPQ to BDQ control */ bdq_ctl |= FBNIC_QUEUE_BDQ_CTL_PPQ_ENABLE; -- cgit v1.2.3 From f3567dd428b264b3f06f881e5e85a738c7c910df Mon Sep 17 00:00:00 2001 From: Dimitri Daskalakis Date: Tue, 24 Mar 2026 12:51:23 -0700 Subject: eth: fbnic: Fix debugfs output for BDQ's with page frags The rings size_mask represents the number of pages, so we need to determine the number of page frags when dumping the descriptors. Fixes: df04373b0dab ("eth fbnic: Add debugfs hooks for tx/rx rings") Signed-off-by: Dimitri Daskalakis Link: https://patch.msgid.link/20260324195123.3486219-3-dimitri.daskalakis1@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c b/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c index 08270db2dee8..3c4563c8f403 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c @@ -197,7 +197,7 @@ static int fbnic_dbg_bdq_desc_seq_show(struct seq_file *s, void *v) return 0; } - for (i = 0; i <= ring->size_mask; i++) { + for (i = 0; i < (ring->size_mask + 1) * FBNIC_BD_FRAG_COUNT; i++) { u64 bd = le64_to_cpu(ring->desc[i]); seq_printf(s, "%04x %#04llx %#014llx\n", i, -- cgit v1.2.3 From a01aee7cafc575bb82f5529e8734e7052f9b16ea Mon Sep 17 00:00:00 2001 From: Yang Yang Date: Thu, 26 Mar 2026 03:44:39 +0000 Subject: bridge: br_nd_send: linearize skb before parsing ND options br_nd_send() parses neighbour discovery options from ns->opt[] and assumes that these options are in the linear part of request. Its callers only guarantee that the ICMPv6 header and target address are available, so the option area can still be non-linear. Parsing ns->opt[] in that case can access data past the linear buffer. Linearize request before option parsing and derive ns from the linear network header. Fixes: ed842faeb2bd ("bridge: suppress nd pkts on BR_NEIGH_SUPPRESS ports") Reported-by: Yifan Wu Reported-by: Juefei Pu Tested-by: Ao Zhou Co-developed-by: Yuan Tan Signed-off-by: Yuan Tan Suggested-by: Xin Liu Signed-off-by: Yang Yang Reviewed-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20260326034441.2037420-2-n05ec@lzu.edu.cn Signed-off-by: Jakub Kicinski --- net/bridge/br_arp_nd_proxy.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c index 1e2b51769eec..af3d1e33f50b 100644 --- a/net/bridge/br_arp_nd_proxy.c +++ b/net/bridge/br_arp_nd_proxy.c @@ -251,12 +251,12 @@ struct nd_msg *br_is_nd_neigh_msg(const struct sk_buff *skb, struct nd_msg *msg) static void br_nd_send(struct net_bridge *br, struct net_bridge_port *p, struct sk_buff *request, struct neighbour *n, - __be16 vlan_proto, u16 vlan_tci, struct nd_msg *ns) + __be16 vlan_proto, u16 vlan_tci) { struct net_device *dev = request->dev; struct net_bridge_vlan_group *vg; + struct nd_msg *na, *ns; struct sk_buff *reply; - struct nd_msg *na; struct ipv6hdr *pip6; int na_olen = 8; /* opt hdr + ETH_ALEN for target */ int ns_olen; @@ -264,7 +264,7 @@ static void br_nd_send(struct net_bridge *br, struct net_bridge_port *p, u8 *daddr; u16 pvid; - if (!dev) + if (!dev || skb_linearize(request)) return; len = LL_RESERVED_SPACE(dev) + sizeof(struct ipv6hdr) + @@ -281,6 +281,8 @@ static void br_nd_send(struct net_bridge *br, struct net_bridge_port *p, skb_set_mac_header(reply, 0); daddr = eth_hdr(request)->h_source; + ns = (struct nd_msg *)(skb_network_header(request) + + sizeof(struct ipv6hdr)); /* Do we need option processing ? */ ns_olen = request->len - (skb_network_offset(request) + @@ -472,9 +474,9 @@ void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br, if (vid != 0) br_nd_send(br, p, skb, n, skb->vlan_proto, - skb_vlan_tag_get(skb), msg); + skb_vlan_tag_get(skb)); else - br_nd_send(br, p, skb, n, 0, 0, msg); + br_nd_send(br, p, skb, n, 0, 0); replied = true; } -- cgit v1.2.3 From 850837965af15707fd3142c1cf3c5bfaf022299b Mon Sep 17 00:00:00 2001 From: Yang Yang Date: Thu, 26 Mar 2026 03:44:40 +0000 Subject: bridge: br_nd_send: validate ND option lengths br_nd_send() walks ND options according to option-provided lengths. A malformed option can make the parser advance beyond the computed option span or use a too-short source LLADDR option payload. Validate option lengths against the remaining NS option area before advancing, and only read source LLADDR when the option is large enough for an Ethernet address. Fixes: ed842faeb2bd ("bridge: suppress nd pkts on BR_NEIGH_SUPPRESS ports") Cc: stable@vger.kernel.org Reported-by: Yifan Wu Reported-by: Juefei Pu Tested-by: Ao Zhou Co-developed-by: Yuan Tan Signed-off-by: Yuan Tan Suggested-by: Xin Liu Signed-off-by: Yang Yang Reviewed-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20260326034441.2037420-3-n05ec@lzu.edu.cn Signed-off-by: Jakub Kicinski --- net/bridge/br_arp_nd_proxy.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c index af3d1e33f50b..6b5595868a39 100644 --- a/net/bridge/br_arp_nd_proxy.c +++ b/net/bridge/br_arp_nd_proxy.c @@ -288,12 +288,14 @@ static void br_nd_send(struct net_bridge *br, struct net_bridge_port *p, ns_olen = request->len - (skb_network_offset(request) + sizeof(struct ipv6hdr)) - sizeof(*ns); for (i = 0; i < ns_olen - 1; i += (ns->opt[i + 1] << 3)) { - if (!ns->opt[i + 1]) { + if (!ns->opt[i + 1] || i + (ns->opt[i + 1] << 3) > ns_olen) { kfree_skb(reply); return; } if (ns->opt[i] == ND_OPT_SOURCE_LL_ADDR) { - daddr = ns->opt + i + sizeof(struct nd_opt_hdr); + if ((ns->opt[i + 1] << 3) >= + sizeof(struct nd_opt_hdr) + ETH_ALEN) + daddr = ns->opt + i + sizeof(struct nd_opt_hdr); break; } } -- cgit v1.2.3 From afa9a05e6c4971bd5586f1b304e14d61fb3d9385 Mon Sep 17 00:00:00 2001 From: Yang Yang Date: Thu, 26 Mar 2026 03:44:41 +0000 Subject: vxlan: validate ND option lengths in vxlan_na_create vxlan_na_create() walks ND options according to option-provided lengths. A malformed option can make the parser advance beyond the computed option span or use a too-short source LLADDR option payload. Validate option lengths against the remaining NS option area before advancing, and only read source LLADDR when the option is large enough for an Ethernet address. Fixes: 4b29dba9c085 ("vxlan: fix nonfunctional neigh_reduce()") Cc: stable@vger.kernel.org Reported-by: Yifan Wu Reported-by: Juefei Pu Tested-by: Ao Zhou Co-developed-by: Yuan Tan Signed-off-by: Yuan Tan Suggested-by: Xin Liu Signed-off-by: Yang Yang Reviewed-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20260326034441.2037420-4-n05ec@lzu.edu.cn Signed-off-by: Jakub Kicinski --- drivers/net/vxlan/vxlan_core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 17c941aac32d..a94ac82a6136 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -1965,12 +1965,14 @@ static struct sk_buff *vxlan_na_create(struct sk_buff *request, ns_olen = request->len - skb_network_offset(request) - sizeof(struct ipv6hdr) - sizeof(*ns); for (i = 0; i < ns_olen-1; i += (ns->opt[i+1]<<3)) { - if (!ns->opt[i + 1]) { + if (!ns->opt[i + 1] || i + (ns->opt[i + 1] << 3) > ns_olen) { kfree_skb(reply); return NULL; } if (ns->opt[i] == ND_OPT_SOURCE_LL_ADDR) { - daddr = ns->opt + i + sizeof(struct nd_opt_hdr); + if ((ns->opt[i + 1] << 3) >= + sizeof(struct nd_opt_hdr) + ETH_ALEN) + daddr = ns->opt + i + sizeof(struct nd_opt_hdr); break; } } -- cgit v1.2.3 From 4576100b8cd03118267513cafacde164b498b322 Mon Sep 17 00:00:00 2001 From: Xiang Mei Date: Thu, 26 Mar 2026 13:43:09 -0700 Subject: net/sched: sch_hfsc: fix divide-by-zero in rtsc_min() m2sm() converts a u32 slope to a u64 scaled value. For large inputs (e.g. m1=4000000000), the result can reach 2^32. rtsc_min() stores the difference of two such u64 values in a u32 variable `dsm` and uses it as a divisor. When the difference is exactly 2^32 the truncation yields zero, causing a divide-by-zero oops in the concave-curve intersection path: Oops: divide error: 0000 RIP: 0010:rtsc_min (net/sched/sch_hfsc.c:601) Call Trace: init_ed (net/sched/sch_hfsc.c:629) hfsc_enqueue (net/sched/sch_hfsc.c:1569) [...] Widen `dsm` to u64 and replace do_div() with div64_u64() so the full difference is preserved. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Weiming Shi Signed-off-by: Xiang Mei Acked-by: Jamal Hadi Salim Link: https://patch.msgid.link/20260326204310.1549327-1-xmei5@asu.edu Signed-off-by: Jakub Kicinski --- net/sched/sch_hfsc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index b5657ffbbf84..83b2ca2e37fc 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -555,7 +555,7 @@ static void rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u64 x, u64 y) { u64 y1, y2, dx, dy; - u32 dsm; + u64 dsm; if (isc->sm1 <= isc->sm2) { /* service curve is convex */ @@ -598,7 +598,7 @@ rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u64 x, u64 y) */ dx = (y1 - y) << SM_SHIFT; dsm = isc->sm1 - isc->sm2; - do_div(dx, dsm); + dx = div64_u64(dx, dsm); /* * check if (x, y1) belongs to the 1st segment of rtsc. * if so, add the offset. -- cgit v1.2.3 From 5d17af9eb2dd3de6846ed344c883de7812e6cc09 Mon Sep 17 00:00:00 2001 From: Xiang Mei Date: Thu, 26 Mar 2026 13:43:10 -0700 Subject: selftests/tc-testing: add test for HFSC divide-by-zero in rtsc_min() Add a regression test for the divide-by-zero in rtsc_min() triggered when m2sm() converts a large m1 value (e.g. 32gbit) to a u64 scaled slope reaching 2^32. rtsc_min() stores the difference of two such u64 values (sm1 - sm2) in a u32 variable `dsm`, truncating 2^32 to zero and causing a divide-by-zero oops in the concave-curve intersection path. The test configures an HFSC class with m1=32gbit d=1ms m2=0bit, sends a packet to activate the class, waits for it to drain and go idle, then sends another packet to trigger reactivation through rtsc_min(). Signed-off-by: Xiang Mei Acked-by: Jamal Hadi Salim Reviewed-by: Victor Nogueira Link: https://patch.msgid.link/20260326204310.1549327-2-xmei5@asu.edu Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/infra/qdiscs.json | 25 ++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 6a39640aa2a8..1e5efb2a31eb 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -1111,5 +1111,30 @@ "teardown": [ "$TC qdisc del dev $DUMMY root handle 1:" ] + }, + { + "id": "a3d7", + "name": "HFSC with large m1 - no divide-by-zero on class reactivation", + "category": [ + "qdisc", + "hfsc" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc replace dev $DUMMY root handle 1: hfsc default 1", + "$TC class replace dev $DUMMY parent 1: classid 1:1 hfsc rt m1 32gbit d 1ms m2 0bit ls m1 32gbit d 1ms m2 0bit", + "ping -I$DUMMY -f -c1 -s64 -W1 10.10.10.1 || true", + "sleep 1" + ], + "cmdUnderTest": "ping -I$DUMMY -f -c1 -s64 -W1 10.10.10.1 || true", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc hfsc 1: root", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] } ] -- cgit v1.2.3 From eeee5a710f26ce57807024ef330fe5a850eaecd8 Mon Sep 17 00:00:00 2001 From: Marek Behún Date: Thu, 26 Mar 2026 13:20:38 +0100 Subject: net: sfp: Fix Ubiquiti U-Fiber Instant SFP module on mvneta MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit 8110633db49d7de2 ("net: sfp-bus: allow SFP quirks to override Autoneg and pause bits") we moved the setting of Autoneg and pause bits before the call to SFP quirk when parsing SFP module support. Since the quirk for Ubiquiti U-Fiber Instant SFP module zeroes the support bits and sets 1000baseX_Full only, the above mentioned commit changed the overall computed support from 1000baseX_Full, Autoneg, Pause, Asym_Pause to just 1000baseX_Full. This broke the SFP module for mvneta, which requires Autoneg for 1000baseX since commit c762b7fac1b249a9 ("net: mvneta: deny disabling autoneg for 802.3z modes"). Fix this by setting back the Autoneg, Pause and Asym_Pause bits in the quirk. Fixes: 8110633db49d7de2 ("net: sfp-bus: allow SFP quirks to override Autoneg and pause bits") Signed-off-by: Marek Behún Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20260326122038.2489589-1-kabel@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/phy/sfp.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 5db841377199..ce8924613363 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -480,11 +480,16 @@ static void sfp_quirk_ubnt_uf_instant(const struct sfp_eeprom_id *id, { /* Ubiquiti U-Fiber Instant module claims that support all transceiver * types including 10G Ethernet which is not truth. So clear all claimed - * modes and set only one mode which module supports: 1000baseX_Full. + * modes and set only one mode which module supports: 1000baseX_Full, + * along with the Autoneg and pause bits. */ linkmode_zero(caps->link_modes); linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT, caps->link_modes); + linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, caps->link_modes); + linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, caps->link_modes); + linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, caps->link_modes); + phy_interface_zero(caps->interfaces); __set_bit(PHY_INTERFACE_MODE_1000BASEX, caps->interfaces); } -- cgit v1.2.3 From d389954a6cae7bf76b7b082ac3511d177b77ef2d Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Thu, 26 Mar 2026 15:52:32 +0800 Subject: net: enetc: check whether the RSS algorithm is Toeplitz Both ENETC v1 and v4 only provide Toeplitz RSS support. This patch adds a validation check to reject attempts to configure other RSS algorithms, avoiding misleading configuration options for users. Fixes: d382563f541b ("enetc: Add RFS and RSS support") Signed-off-by: Wei Fang Reviewed-by: Clark Wang Reviewed-by: Claudiu Manoil Link: https://patch.msgid.link/20260326075233.3628047-2-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc_ethtool.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index 2fe140ddebb2..a393647e6062 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -795,6 +795,10 @@ static int enetc_set_rxfh(struct net_device *ndev, struct enetc_si *si = priv->si; int err = 0; + if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE && + rxfh->hfunc != ETH_RSS_HASH_TOP) + return -EOPNOTSUPP; + /* set hash key, if PF */ if (rxfh->key && enetc_si_is_pf(si)) enetc_set_rss_key(si, rxfh->key); -- cgit v1.2.3 From a142d139168cce8d5776245b5494c7f7f5d7fb7d Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Thu, 26 Mar 2026 15:52:33 +0800 Subject: net: enetc: do not allow VF to configure the RSS key VFs do not have privilege to configure the RSS key because the registers are owned by the PF. Currently, if VF attempts to configure the RSS key, enetc_set_rxfh() simply skips the configuration and does not generate a warning, which may mislead users into thinking the feature is supported. To improve this situation, add a check to reject RSS key configuration on VFs. Fixes: d382563f541b ("enetc: Add RFS and RSS support") Signed-off-by: Wei Fang Reviewed-by: Clark Wang Reviewed-by: Claudiu Manoil Link: https://patch.msgid.link/20260326075233.3628047-3-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc_ethtool.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index a393647e6062..7c17acaf7a38 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -800,8 +800,12 @@ static int enetc_set_rxfh(struct net_device *ndev, return -EOPNOTSUPP; /* set hash key, if PF */ - if (rxfh->key && enetc_si_is_pf(si)) + if (rxfh->key) { + if (!enetc_si_is_pf(si)) + return -EOPNOTSUPP; + enetc_set_rss_key(si, rxfh->key); + } /* set RSS table */ if (rxfh->indir) -- cgit v1.2.3 From fd63f185979b047fb22a0dfc6bd94d0cab6a6a70 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 27 Mar 2026 10:52:57 +0100 Subject: ipv6: prevent possible UaF in addrconf_permanent_addr() The mentioned helper try to warn the user about an exceptional condition, but the message is delivered too late, accessing the ipv6 after its possible deletion. Reorder the statement to avoid the possible UaF; while at it, place the warning outside the idev->lock as it needs no protection. Reported-by: Jakub Kicinski Closes: https://sashiko.dev/#/patchset/8c8bfe2e1a324e501f0e15fef404a77443fd8caf.1774365668.git.pabeni%40redhat.com Fixes: f1705ec197e7 ("net: ipv6: Make address flushing on ifdown optional") Signed-off-by: Paolo Abeni Link: https://patch.msgid.link/ef973c3a8cb4f8f1787ed469f3e5391b9fe95aa0.1774601542.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f4e23b543585..dd0b4d80e0f8 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3625,12 +3625,12 @@ static void addrconf_permanent_addr(struct net *net, struct net_device *dev) if ((ifp->flags & IFA_F_PERMANENT) && fixup_permanent_addr(net, idev, ifp) < 0) { write_unlock_bh(&idev->lock); - in6_ifa_hold(ifp); - ipv6_del_addr(ifp); - write_lock_bh(&idev->lock); net_info_ratelimited("%s: Failed to add prefix route for address %pI6c; dropping\n", idev->dev->name, &ifp->addr); + in6_ifa_hold(ifp); + ipv6_del_addr(ifp); + write_lock_bh(&idev->lock); } } -- cgit v1.2.3 From 514aac3599879a7ed48b7dc19e31145beb6958ac Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 27 Mar 2026 10:48:21 +0100 Subject: net: airoha: Add missing cleanup bits in airoha_qdma_cleanup_rx_queue() In order to properly cleanup hw rx QDMA queues and bring the device to the initial state, reset rx DMA queue head/tail index. Moreover, reset queued DMA descriptor fields. Fixes: 23020f049327 ("net: airoha: Introduce ethernet support for EN7581 SoC") Tested-by: Madhur Agrawal Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260327-airoha_qdma_cleanup_rx_queue-fix-v1-1-369d6ab1511a@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 56cf9a926a83..c2a54dbcbb0d 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -794,18 +794,34 @@ static int airoha_qdma_init_rx_queue(struct airoha_queue *q, static void airoha_qdma_cleanup_rx_queue(struct airoha_queue *q) { - struct airoha_eth *eth = q->qdma->eth; + struct airoha_qdma *qdma = q->qdma; + struct airoha_eth *eth = qdma->eth; + int qid = q - &qdma->q_rx[0]; while (q->queued) { struct airoha_queue_entry *e = &q->entry[q->tail]; + struct airoha_qdma_desc *desc = &q->desc[q->tail]; struct page *page = virt_to_head_page(e->buf); dma_sync_single_for_cpu(eth->dev, e->dma_addr, e->dma_len, page_pool_get_dma_dir(q->page_pool)); page_pool_put_full_page(q->page_pool, page, false); + /* Reset DMA descriptor */ + WRITE_ONCE(desc->ctrl, 0); + WRITE_ONCE(desc->addr, 0); + WRITE_ONCE(desc->data, 0); + WRITE_ONCE(desc->msg0, 0); + WRITE_ONCE(desc->msg1, 0); + WRITE_ONCE(desc->msg2, 0); + WRITE_ONCE(desc->msg3, 0); + q->tail = (q->tail + 1) % q->ndesc; q->queued--; } + + q->head = q->tail; + airoha_qdma_rmw(qdma, REG_RX_DMA_IDX(qid), RX_RING_DMA_IDX_MASK, + FIELD_PREP(RX_RING_DMA_IDX_MASK, q->tail)); } static int airoha_qdma_init_rx(struct airoha_qdma *qdma) -- cgit v1.2.3 From ddc748a391dd8642ba6b2e4fe22e7f2ddf84b7f0 Mon Sep 17 00:00:00 2001 From: Guoyu Su Date: Fri, 27 Mar 2026 23:35:07 +0800 Subject: net: use skb_header_pointer() for TCPv4 GSO frag_off check Syzbot reported a KMSAN uninit-value warning in gso_features_check() called from netif_skb_features() [1]. gso_features_check() reads iph->frag_off to decide whether to clear mangleid_features. Accessing the IPv4 header via ip_hdr()/inner_ip_hdr() can rely on skb header offsets that are not always safe for direct dereference on packets injected from PF_PACKET paths. Use skb_header_pointer() for the TCPv4 frag_off check so the header read is robust whether data is already linear or needs copying. [1] https://syzkaller.appspot.com/bug?extid=1543a7d954d9c6d00407 Link: https://lore.kernel.org/netdev/willemdebruijn.kernel.1a9f35039caab@gmail.com/ Fixes: cbc53e08a793 ("GSO: Add GSO type for fixed IPv4 ID") Reported-by: syzbot+1543a7d954d9c6d00407@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=1543a7d954d9c6d00407 Tested-by: syzbot+1543a7d954d9c6d00407@syzkaller.appspotmail.com Signed-off-by: Guoyu Su Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260327153507.39742-1-yss2813483011xxl@gmail.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index fc5557062414..831129f2a69b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3821,10 +3821,15 @@ static netdev_features_t gso_features_check(const struct sk_buff *skb, * segmentation-offloads.rst). */ if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { - struct iphdr *iph = skb->encapsulation ? - inner_ip_hdr(skb) : ip_hdr(skb); + const struct iphdr *iph; + struct iphdr _iph; + int nhoff = skb->encapsulation ? + skb_inner_network_offset(skb) : + skb_network_offset(skb); - if (!(iph->frag_off & htons(IP_DF))) + iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); + + if (!iph || !(iph->frag_off & htons(IP_DF))) features &= ~dev->mangleid_features; } -- cgit v1.2.3 From e6e3eb5ee89ac4c163d46429391c889a1bb5e404 Mon Sep 17 00:00:00 2001 From: Yochai Eisenrich Date: Sun, 29 Mar 2026 00:14:36 +0300 Subject: net: sched: cls_api: fix tc_chain_fill_node to initialize tcm_info to zero to prevent an info-leak When building netlink messages, tc_chain_fill_node() never initializes the tcm_info field of struct tcmsg. Since the allocation is not zeroed, kernel heap memory is leaked to userspace through this 4-byte field. The fix simply zeroes tcm_info alongside the other fields that are already initialized. Fixes: 32a4f5ecd738 ("net: sched: introduce chain object to uapi") Signed-off-by: Yochai Eisenrich Acked-by: Jamal Hadi Salim Link: https://patch.msgid.link/20260328211436.1010152-1-echelonh@gmail.com Signed-off-by: Jakub Kicinski --- net/sched/cls_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 4829c27446e3..20f7f9ee0b35 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -2969,6 +2969,7 @@ static int tc_chain_fill_node(const struct tcf_proto_ops *tmplt_ops, tcm->tcm__pad1 = 0; tcm->tcm__pad2 = 0; tcm->tcm_handle = 0; + tcm->tcm_info = 0; if (block->q) { tcm->tcm_ifindex = qdisc_dev(block->q)->ifindex; tcm->tcm_parent = block->q->handle; -- cgit v1.2.3 From cedc1bf327de62ec30af9743bd1f601c2de30553 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sun, 29 Mar 2026 12:32:27 +0200 Subject: net: airoha: Delay offloading until all net_devices are fully registered Netfilter flowtable can theoretically try to offload flower rules as soon as a net_device is registered while all the other ones are not registered or initialized, triggering a possible NULL pointer dereferencing of qdma pointer in airoha_ppe_set_cpu_port routine. Moreover, if register_netdev() fails for a particular net_device, there is a small race if Netfilter tries to offload flowtable rules before all the net_devices are properly unregistered in airoha_probe() error patch, triggering a NULL pointer dereferencing in airoha_ppe_set_cpu_port routine. In order to avoid any possible race, delay offloading until all net_devices are registered in the networking subsystem. Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260329-airoha-regiser-race-fix-v2-1-f4ebb139277b@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 2 ++ drivers/net/ethernet/airoha/airoha_eth.h | 1 + drivers/net/ethernet/airoha/airoha_ppe.c | 7 +++++++ 3 files changed, 10 insertions(+) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index c2a54dbcbb0d..95ba99b89428 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -2962,6 +2962,8 @@ static int airoha_register_gdm_devices(struct airoha_eth *eth) return err; } + set_bit(DEV_STATE_REGISTERED, ð->state); + return 0; } diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h index 20e602d61e61..a97903569335 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.h +++ b/drivers/net/ethernet/airoha/airoha_eth.h @@ -88,6 +88,7 @@ enum { enum { DEV_STATE_INITIALIZED, + DEV_STATE_REGISTERED, }; enum { diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c index 5724f8f2defd..c2c32b6833df 100644 --- a/drivers/net/ethernet/airoha/airoha_ppe.c +++ b/drivers/net/ethernet/airoha/airoha_ppe.c @@ -1368,6 +1368,13 @@ int airoha_ppe_setup_tc_block_cb(struct airoha_ppe_dev *dev, void *type_data) struct airoha_eth *eth = ppe->eth; int err = 0; + /* Netfilter flowtable can try to offload flower rules while not all + * the net_devices are registered or initialized. Delay offloading + * until all net_devices are registered in the system. + */ + if (!test_bit(DEV_STATE_REGISTERED, ð->state)) + return -EBUSY; + mutex_lock(&flow_offload_mutex); if (!eth->npu) -- cgit v1.2.3 From 4ee937107d52f9e5c350e4b5e629760e328b3d9f Mon Sep 17 00:00:00 2001 From: Pengpeng Hou Date: Sun, 29 Mar 2026 07:43:56 +0800 Subject: bnxt_en: set backing store type from query type bnxt_hwrm_func_backing_store_qcaps_v2() stores resp->type from the firmware response in ctxm->type and later uses that value to index fixed backing-store metadata arrays such as ctx_arr[] and bnxt_bstore_to_trace[]. ctxm->type is fixed by the current backing-store query type and matches the array index of ctx->ctx_arr. Set ctxm->type from the current loop variable instead of depending on resp->type. Also update the loop to advance type from next_valid_type in the for statement, which keeps the control flow simpler for non-valid and unchanged entries. Fixes: 6a4d0774f02d ("bnxt_en: Add support for new backing store query firmware API") Signed-off-by: Pengpeng Hou Reviewed-by: Michael Chan Tested-by: Michael Chan Link: https://patch.msgid.link/20260328234357.43669-1-pengpeng@iscas.ac.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 0751c0e4581a..7ed805713fbb 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -8671,7 +8671,7 @@ static int bnxt_hwrm_func_backing_store_qcaps_v2(struct bnxt *bp) struct hwrm_func_backing_store_qcaps_v2_output *resp; struct hwrm_func_backing_store_qcaps_v2_input *req; struct bnxt_ctx_mem_info *ctx = bp->ctx; - u16 type; + u16 type, next_type = 0; int rc; rc = hwrm_req_init(bp, req, HWRM_FUNC_BACKING_STORE_QCAPS_V2); @@ -8687,7 +8687,7 @@ static int bnxt_hwrm_func_backing_store_qcaps_v2(struct bnxt *bp) resp = hwrm_req_hold(bp, req); - for (type = 0; type < BNXT_CTX_V2_MAX; ) { + for (type = 0; type < BNXT_CTX_V2_MAX; type = next_type) { struct bnxt_ctx_mem_type *ctxm = &ctx->ctx_arr[type]; u8 init_val, init_off, i; u32 max_entries; @@ -8700,7 +8700,7 @@ static int bnxt_hwrm_func_backing_store_qcaps_v2(struct bnxt *bp) if (rc) goto ctx_done; flags = le32_to_cpu(resp->flags); - type = le16_to_cpu(resp->next_valid_type); + next_type = le16_to_cpu(resp->next_valid_type); if (!(flags & BNXT_CTX_MEM_TYPE_VALID)) { bnxt_free_one_ctx_mem(bp, ctxm, true); continue; @@ -8715,7 +8715,7 @@ static int bnxt_hwrm_func_backing_store_qcaps_v2(struct bnxt *bp) else continue; } - ctxm->type = le16_to_cpu(resp->type); + ctxm->type = type; ctxm->entry_size = entry_size; ctxm->flags = flags; ctxm->instance_bmap = le32_to_cpu(resp->instance_bit_map); -- cgit v1.2.3 From 2884bf72fb8f03409e423397319205de48adca16 Mon Sep 17 00:00:00 2001 From: Xiang Mei Date: Thu, 26 Mar 2026 00:55:53 -0700 Subject: net: bonding: fix use-after-free in bond_xmit_broadcast() bond_xmit_broadcast() reuses the original skb for the last slave (determined by bond_is_last_slave()) and clones it for others. Concurrent slave enslave/release can mutate the slave list during RCU-protected iteration, changing which slave is "last" mid-loop. This causes the original skb to be double-consumed (double-freed). Replace the racy bond_is_last_slave() check with a simple index comparison (i + 1 == slaves_count) against the pre-snapshot slave count taken via READ_ONCE() before the loop. This preserves the zero-copy optimization for the last slave while making the "last" determination stable against concurrent list mutations. The UAF can trigger the following crash: ================================================================== BUG: KASAN: slab-use-after-free in skb_clone Read of size 8 at addr ffff888100ef8d40 by task exploit/147 CPU: 1 UID: 0 PID: 147 Comm: exploit Not tainted 7.0.0-rc3+ #4 PREEMPTLAZY Call Trace: dump_stack_lvl (lib/dump_stack.c:123) print_report (mm/kasan/report.c:379 mm/kasan/report.c:482) kasan_report (mm/kasan/report.c:597) skb_clone (include/linux/skbuff.h:1724 include/linux/skbuff.h:1792 include/linux/skbuff.h:3396 net/core/skbuff.c:2108) bond_xmit_broadcast (drivers/net/bonding/bond_main.c:5334) bond_start_xmit (drivers/net/bonding/bond_main.c:5567 drivers/net/bonding/bond_main.c:5593) dev_hard_start_xmit (include/linux/netdevice.h:5325 include/linux/netdevice.h:5334 net/core/dev.c:3871 net/core/dev.c:3887) __dev_queue_xmit (include/linux/netdevice.h:3601 net/core/dev.c:4838) ip6_finish_output2 (include/net/neighbour.h:540 include/net/neighbour.h:554 net/ipv6/ip6_output.c:136) ip6_finish_output (net/ipv6/ip6_output.c:208 net/ipv6/ip6_output.c:219) ip6_output (net/ipv6/ip6_output.c:250) ip6_send_skb (net/ipv6/ip6_output.c:1985) udp_v6_send_skb (net/ipv6/udp.c:1442) udpv6_sendmsg (net/ipv6/udp.c:1733) __sys_sendto (net/socket.c:730 net/socket.c:742 net/socket.c:2206) __x64_sys_sendto (net/socket.c:2209) do_syscall_64 (arch/x86/entry/syscall_64.c:63 arch/x86/entry/syscall_64.c:94) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) Allocated by task 147: Freed by task 147: The buggy address belongs to the object at ffff888100ef8c80 which belongs to the cache skbuff_head_cache of size 224 The buggy address is located 192 bytes inside of freed 224-byte region [ffff888100ef8c80, ffff888100ef8d60) Memory state around the buggy address: ffff888100ef8c00: fb fb fb fb fc fc fc fc fc fc fc fc fc fc fc fc ffff888100ef8c80: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff888100ef8d00: fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc ^ ffff888100ef8d80: fc fc fc fc fc fc fc fc fa fb fb fb fb fb fb fb ffff888100ef8e00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Fixes: 4e5bd03ae346 ("net: bonding: fix bond_xmit_broadcast return value error bug") Reported-by: Weiming Shi Signed-off-by: Xiang Mei Link: https://patch.msgid.link/20260326075553.3960562-1-xmei5@asu.edu Signed-off-by: Paolo Abeni --- drivers/net/bonding/bond_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 33f414d03ab9..a5484d11553d 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -5326,7 +5326,7 @@ static netdev_tx_t bond_xmit_broadcast(struct sk_buff *skb, if (!(bond_slave_is_up(slave) && slave->link == BOND_LINK_UP)) continue; - if (bond_is_last_slave(bond, slave)) { + if (i + 1 == slaves_count) { skb2 = skb; skb_used = true; } else { -- cgit v1.2.3 From 30fe3f5f6494f827d812ff179f295a8e532709d6 Mon Sep 17 00:00:00 2001 From: Pengpeng Hou Date: Thu, 26 Mar 2026 22:20:33 +0800 Subject: NFC: pn533: bound the UART receive buffer pn532_receive_buf() appends every incoming byte to dev->recv_skb and only resets the buffer after pn532_uart_rx_is_frame() recognizes a complete frame. A continuous stream of bytes without a valid PN532 frame header therefore keeps growing the skb until skb_put_u8() hits the tail limit. Drop the accumulated partial frame once the fixed receive buffer is full so malformed UART traffic cannot grow the skb past PN532_UART_SKB_BUFF_LEN. Fixes: c656aa4c27b1 ("nfc: pn533: add UART phy driver") Signed-off-by: Pengpeng Hou Link: https://patch.msgid.link/20260326142033.82297-1-pengpeng@iscas.ac.cn Signed-off-by: Paolo Abeni --- drivers/nfc/pn533/uart.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nfc/pn533/uart.c b/drivers/nfc/pn533/uart.c index 6d2f520a5bc8..1b82b7b2a5fa 100644 --- a/drivers/nfc/pn533/uart.c +++ b/drivers/nfc/pn533/uart.c @@ -211,6 +211,9 @@ static size_t pn532_receive_buf(struct serdev_device *serdev, timer_delete(&dev->cmd_timeout); for (i = 0; i < count; i++) { + if (unlikely(!skb_tailroom(dev->recv_skb))) + skb_trim(dev->recv_skb, 0); + skb_put_u8(dev->recv_skb, *data++); if (!pn532_uart_rx_is_frame(dev->recv_skb)) continue; -- cgit v1.2.3 From 393e0b4f178ec7fce1141dacc3304e3607a92ee9 Mon Sep 17 00:00:00 2001 From: Suraj Gupta Date: Fri, 27 Mar 2026 13:02:37 +0530 Subject: net: xilinx: axienet: Correct BD length masks to match AXIDMA IP spec The XAXIDMA_BD_CTRL_LENGTH_MASK and XAXIDMA_BD_STS_ACTUAL_LEN_MASK macros were defined as 0x007FFFFF (23 bits), but the AXI DMA IP product guide (PG021) specifies the buffer length field as bits 25:0 (26 bits). Update both masks to match the IP documentation. In practice this had no functional impact, since Ethernet frames are far smaller than 2^23 bytes and the extra bits were always zero, but the masks should still reflect the hardware specification. Fixes: 8a3b7a252dca ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver") Signed-off-by: Suraj Gupta Reviewed-by: Sean Anderson Link: https://patch.msgid.link/20260327073238.134948-2-suraj.gupta2@amd.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/xilinx/xilinx_axienet.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h index 5ff742103beb..fcd3aaef27fc 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h @@ -105,7 +105,7 @@ #define XAXIDMA_BD_HAS_DRE_MASK 0xF00 /* Whether has DRE mask */ #define XAXIDMA_BD_WORDLEN_MASK 0xFF /* Whether has DRE mask */ -#define XAXIDMA_BD_CTRL_LENGTH_MASK 0x007FFFFF /* Requested len */ +#define XAXIDMA_BD_CTRL_LENGTH_MASK GENMASK(25, 0) /* Requested len */ #define XAXIDMA_BD_CTRL_TXSOF_MASK 0x08000000 /* First tx packet */ #define XAXIDMA_BD_CTRL_TXEOF_MASK 0x04000000 /* Last tx packet */ #define XAXIDMA_BD_CTRL_ALL_MASK 0x0C000000 /* All control bits */ @@ -130,7 +130,7 @@ #define XAXIDMA_BD_CTRL_TXEOF_MASK 0x04000000 /* Last tx packet */ #define XAXIDMA_BD_CTRL_ALL_MASK 0x0C000000 /* All control bits */ -#define XAXIDMA_BD_STS_ACTUAL_LEN_MASK 0x007FFFFF /* Actual len */ +#define XAXIDMA_BD_STS_ACTUAL_LEN_MASK GENMASK(25, 0) /* Actual len */ #define XAXIDMA_BD_STS_COMPLETE_MASK 0x80000000 /* Completed */ #define XAXIDMA_BD_STS_DEC_ERR_MASK 0x40000000 /* Decode error */ #define XAXIDMA_BD_STS_SLV_ERR_MASK 0x20000000 /* Slave error */ -- cgit v1.2.3 From d1978d03e86785872871bff9c2623174b10740de Mon Sep 17 00:00:00 2001 From: Suraj Gupta Date: Fri, 27 Mar 2026 13:02:38 +0530 Subject: net: xilinx: axienet: Fix BQL accounting for multi-BD TX packets When a TX packet spans multiple buffer descriptors (scatter-gather), axienet_free_tx_chain sums the per-BD actual length from descriptor status into a caller-provided accumulator. That sum is reset on each NAPI poll. If the BDs for a single packet complete across different polls, the earlier bytes are lost and never credited to BQL. This causes BQL to think bytes are permanently in-flight, eventually stalling the TX queue. The SKB pointer is stored only on the last BD of a packet. When that BD completes, use skb->len for the byte count instead of summing per-BD status lengths. This matches netdev_sent_queue(), which debits skb->len, and naturally survives across polls because no partial packet contributes to the accumulator. Fixes: c900e49d58eb ("net: xilinx: axienet: Implement BQL") Signed-off-by: Suraj Gupta Reviewed-by: Sean Anderson Link: https://patch.msgid.link/20260327073238.134948-3-suraj.gupta2@amd.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index b06e4c37ff61..263c4b67fd5a 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -770,8 +770,8 @@ static int axienet_device_reset(struct net_device *ndev) * @first_bd: Index of first descriptor to clean up * @nr_bds: Max number of descriptors to clean up * @force: Whether to clean descriptors even if not complete - * @sizep: Pointer to a u32 filled with the total sum of all bytes - * in all cleaned-up descriptors. Ignored if NULL. + * @sizep: Pointer to a u32 accumulating the total byte count of + * completed packets (using skb->len). Ignored if NULL. * @budget: NAPI budget (use 0 when not called from NAPI poll) * * Would either be called after a successful transmit operation, or after @@ -805,6 +805,8 @@ static int axienet_free_tx_chain(struct axienet_local *lp, u32 first_bd, DMA_TO_DEVICE); if (cur_p->skb && (status & XAXIDMA_BD_STS_COMPLETE_MASK)) { + if (sizep) + *sizep += cur_p->skb->len; napi_consume_skb(cur_p->skb, budget); packets++; } @@ -818,9 +820,6 @@ static int axienet_free_tx_chain(struct axienet_local *lp, u32 first_bd, wmb(); cur_p->cntrl = 0; cur_p->status = 0; - - if (sizep) - *sizep += status & XAXIDMA_BD_STS_ACTUAL_LEN_MASK; } if (!force) { -- cgit v1.2.3 From fa6e24963342de4370e3a3c9af41e38277b74cf3 Mon Sep 17 00:00:00 2001 From: Xiang Mei Date: Fri, 27 Mar 2026 23:30:00 -0700 Subject: bridge: mrp: reject zero test interval to avoid OOM panic br_mrp_start_test() and br_mrp_start_in_test() accept the user-supplied interval value from netlink without validation. When interval is 0, usecs_to_jiffies(0) yields 0, causing the delayed work (br_mrp_test_work_expired / br_mrp_in_test_work_expired) to reschedule itself with zero delay. This creates a tight loop on system_percpu_wq that allocates and transmits MRP test frames at maximum rate, exhausting all system memory and causing a kernel panic via OOM deadlock. The same zero-interval issue applies to br_mrp_start_in_test_parse() for interconnect test frames. Use NLA_POLICY_MIN(NLA_U32, 1) in the nla_policy tables for both IFLA_BRIDGE_MRP_START_TEST_INTERVAL and IFLA_BRIDGE_MRP_START_IN_TEST_INTERVAL, so zero is rejected at the netlink attribute parsing layer before the value ever reaches the workqueue scheduling code. This is consistent with how other bridge subsystems (br_fdb, br_mst) enforce range constraints on netlink attributes. Fixes: 20f6a05ef635 ("bridge: mrp: Rework the MRP netlink interface") Fixes: 7ab1748e4ce6 ("bridge: mrp: Extend MRP netlink interface for configuring MRP interconnect") Reported-by: Weiming Shi Signed-off-by: Xiang Mei Acked-by: Nikolay Aleksandrov Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20260328063000.1845376-1-xmei5@asu.edu Signed-off-by: Paolo Abeni --- net/bridge/br_mrp_netlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/bridge/br_mrp_netlink.c b/net/bridge/br_mrp_netlink.c index ce6f63c77cc0..86f0e75d6e34 100644 --- a/net/bridge/br_mrp_netlink.c +++ b/net/bridge/br_mrp_netlink.c @@ -196,7 +196,7 @@ static const struct nla_policy br_mrp_start_test_policy[IFLA_BRIDGE_MRP_START_TEST_MAX + 1] = { [IFLA_BRIDGE_MRP_START_TEST_UNSPEC] = { .type = NLA_REJECT }, [IFLA_BRIDGE_MRP_START_TEST_RING_ID] = { .type = NLA_U32 }, - [IFLA_BRIDGE_MRP_START_TEST_INTERVAL] = { .type = NLA_U32 }, + [IFLA_BRIDGE_MRP_START_TEST_INTERVAL] = NLA_POLICY_MIN(NLA_U32, 1), [IFLA_BRIDGE_MRP_START_TEST_MAX_MISS] = { .type = NLA_U32 }, [IFLA_BRIDGE_MRP_START_TEST_PERIOD] = { .type = NLA_U32 }, [IFLA_BRIDGE_MRP_START_TEST_MONITOR] = { .type = NLA_U32 }, @@ -316,7 +316,7 @@ static const struct nla_policy br_mrp_start_in_test_policy[IFLA_BRIDGE_MRP_START_IN_TEST_MAX + 1] = { [IFLA_BRIDGE_MRP_START_IN_TEST_UNSPEC] = { .type = NLA_REJECT }, [IFLA_BRIDGE_MRP_START_IN_TEST_IN_ID] = { .type = NLA_U32 }, - [IFLA_BRIDGE_MRP_START_IN_TEST_INTERVAL] = { .type = NLA_U32 }, + [IFLA_BRIDGE_MRP_START_IN_TEST_INTERVAL] = NLA_POLICY_MIN(NLA_U32, 1), [IFLA_BRIDGE_MRP_START_IN_TEST_MAX_MISS] = { .type = NLA_U32 }, [IFLA_BRIDGE_MRP_START_IN_TEST_PERIOD] = { .type = NLA_U32 }, }; -- cgit v1.2.3 From 9ca562bb8e66978b53028fa32b1a190708e6a091 Mon Sep 17 00:00:00 2001 From: Zhengchuan Liang Date: Mon, 30 Mar 2026 16:46:24 +0800 Subject: net: ipv6: flowlabel: defer exclusive option free until RCU teardown `ip6fl_seq_show()` walks the global flowlabel hash under the seq-file RCU read-side lock and prints `fl->opt->opt_nflen` when an option block is present. Exclusive flowlabels currently free `fl->opt` as soon as `fl->users` drops to zero in `fl_release()`. However, the surrounding `struct ip6_flowlabel` remains visible in the global hash table until later garbage collection removes it and `fl_free_rcu()` finally tears it down. A concurrent `/proc/net/ip6_flowlabel` reader can therefore race that early `kfree()` and dereference freed option state, triggering a crash in `ip6fl_seq_show()`. Fix this by keeping `fl->opt` alive until `fl_free_rcu()`. That matches the lifetime already required for the enclosing flowlabel while readers can still reach it under RCU. Fixes: d3aedd5ebd4b ("ipv6 flowlabel: Convert hash list to RCU.") Reported-by: Yifan Wu Reported-by: Juefei Pu Co-developed-by: Yuan Tan Signed-off-by: Yuan Tan Suggested-by: Xin Liu Tested-by: Ren Wei Signed-off-by: Zhengchuan Liang Signed-off-by: Ren Wei Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/07351f0ec47bcee289576f39f9354f4a64add6e4.1774855883.git.zcliangcn@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_flowlabel.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 7c12bf75beed..c92f98c6f6ec 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -133,11 +133,6 @@ static void fl_release(struct ip6_flowlabel *fl) if (time_after(ttd, fl->expires)) fl->expires = ttd; ttd = fl->expires; - if (fl->opt && fl->share == IPV6_FL_S_EXCL) { - struct ipv6_txoptions *opt = fl->opt; - fl->opt = NULL; - kfree(opt); - } if (!timer_pending(&ip6_fl_gc_timer) || time_after(ip6_fl_gc_timer.expires, ttd)) mod_timer(&ip6_fl_gc_timer, ttd); -- cgit v1.2.3 From 5dd8025a49c268ab6b94d978532af3ad341132a7 Mon Sep 17 00:00:00 2001 From: Li Xiasong Date: Mon, 30 Mar 2026 20:03:35 +0800 Subject: mptcp: fix soft lockup in mptcp_recvmsg() syzbot reported a soft lockup in mptcp_recvmsg() [0]. When receiving data with MSG_PEEK | MSG_WAITALL flags, the skb is not removed from the sk_receive_queue. This causes sk_wait_data() to always find available data and never perform actual waiting, leading to a soft lockup. Fix this by adding a 'last' parameter to track the last peeked skb. This allows sk_wait_data() to make informed waiting decisions and prevent infinite loops when MSG_PEEK is used. [0]: watchdog: BUG: soft lockup - CPU#2 stuck for 156s! [server:1963] Modules linked in: CPU: 2 UID: 0 PID: 1963 Comm: server Not tainted 6.19.0-rc8 #61 PREEMPT(none) Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 RIP: 0010:sk_wait_data+0x15/0x190 Code: 80 00 00 00 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 41 56 41 55 41 54 49 89 f4 55 48 89 d5 53 48 89 fb <48> 83 ec 30 65 48 8b 05 17 a4 6b 01 48 89 44 24 28 31 c0 65 48 8b RSP: 0018:ffffc90000603ca0 EFLAGS: 00000246 RAX: 0000000000000000 RBX: ffff888102bf0800 RCX: 0000000000000001 RDX: 0000000000000000 RSI: ffffc90000603d18 RDI: ffff888102bf0800 RBP: 0000000000000000 R08: 0000000000000002 R09: 0000000000000101 R10: 0000000000000000 R11: 0000000000000075 R12: ffffc90000603d18 R13: ffff888102bf0800 R14: ffff888102bf0800 R15: 0000000000000000 FS: 00007f6e38b8c4c0(0000) GS:ffff8881b877e000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055aa7bff1680 CR3: 0000000105cbe000 CR4: 00000000000006f0 Call Trace: mptcp_recvmsg+0x547/0x8c0 net/mptcp/protocol.c:2329 inet_recvmsg+0x11f/0x130 net/ipv4/af_inet.c:891 sock_recvmsg+0x94/0xc0 net/socket.c:1100 __sys_recvfrom+0xb2/0x130 net/socket.c:2256 __x64_sys_recvfrom+0x1f/0x30 net/socket.c:2267 do_syscall_64+0x59/0x2d0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x76/0x7e arch/x86/entry/entry_64.S:131 RIP: 0033:0x7f6e386a4a1d Code: 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 8d 05 f1 de 2c 00 41 89 ca 8b 00 85 c0 75 20 45 31 c9 45 31 c0 b8 2d 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 6b f3 c3 66 0f 1f 84 00 00 00 00 00 41 56 41 RSP: 002b:00007ffc3c4bb078 EFLAGS: 00000246 ORIG_RAX: 000000000000002d RAX: ffffffffffffffda RBX: 000000000000861e RCX: 00007f6e386a4a1d RDX: 00000000000003ff RSI: 00007ffc3c4bb150 RDI: 0000000000000004 RBP: 00007ffc3c4bb570 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000103 R11: 0000000000000246 R12: 00005605dbc00be0 R13: 00007ffc3c4bb650 R14: 0000000000000000 R15: 0000000000000000 Fixes: 8e04ce45a8db ("mptcp: fix MSG_PEEK stream corruption") Signed-off-by: Li Xiasong Reviewed-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260330120335.659027-1-lixiasong1@huawei.com Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index cf1852b99963..65c3bb8016f4 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2006,7 +2006,7 @@ static void mptcp_eat_recv_skb(struct sock *sk, struct sk_buff *skb) static int __mptcp_recvmsg_mskq(struct sock *sk, struct msghdr *msg, size_t len, int flags, int copied_total, struct scm_timestamping_internal *tss, - int *cmsg_flags) + int *cmsg_flags, struct sk_buff **last) { struct mptcp_sock *msk = mptcp_sk(sk); struct sk_buff *skb, *tmp; @@ -2023,6 +2023,7 @@ static int __mptcp_recvmsg_mskq(struct sock *sk, struct msghdr *msg, /* skip already peeked skbs */ if (total_data_len + data_len <= copied_total) { total_data_len += data_len; + *last = skb; continue; } @@ -2058,6 +2059,8 @@ static int __mptcp_recvmsg_mskq(struct sock *sk, struct msghdr *msg, } mptcp_eat_recv_skb(sk, skb); + } else { + *last = skb; } if (copied >= len) @@ -2288,10 +2291,12 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, cmsg_flags = MPTCP_CMSG_INQ; while (copied < len) { + struct sk_buff *last = NULL; int err, bytes_read; bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags, - copied, &tss, &cmsg_flags); + copied, &tss, &cmsg_flags, + &last); if (unlikely(bytes_read < 0)) { if (!copied) copied = bytes_read; @@ -2343,7 +2348,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, pr_debug("block timeout %ld\n", timeo); mptcp_cleanup_rbuf(msk, copied); - err = sk_wait_data(sk, &timeo, NULL); + err = sk_wait_data(sk, &timeo, last); if (err < 0) { err = copied ? : err; goto out_err; -- cgit v1.2.3 From c0fd0fe745f5e8c568d898cd1513d0083e46204a Mon Sep 17 00:00:00 2001 From: Yufan Chen Date: Sun, 29 Mar 2026 00:32:57 +0800 Subject: net: ftgmac100: fix ring allocation unwind on open failure ftgmac100_alloc_rings() allocates rx_skbs, tx_skbs, rxdes, txdes, and rx_scratch in stages. On intermediate failures it returned -ENOMEM directly, leaking resources allocated earlier in the function. Rework the failure path to use staged local unwind labels and free allocated resources in reverse order before returning -ENOMEM. This matches common netdev allocation cleanup style. Fixes: d72e01a0430f ("ftgmac100: Use a scratch buffer for failed RX allocations") Cc: stable@vger.kernel.org Signed-off-by: Yufan Chen Link: https://patch.msgid.link/20260328163257.60836-1-yufan.chen@linux.dev Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/faraday/ftgmac100.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 1e91e79c8134..6d2fe5c2f390 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -977,19 +977,19 @@ static int ftgmac100_alloc_rings(struct ftgmac100 *priv) priv->tx_skbs = kcalloc(MAX_TX_QUEUE_ENTRIES, sizeof(void *), GFP_KERNEL); if (!priv->tx_skbs) - return -ENOMEM; + goto err_free_rx_skbs; /* Allocate descriptors */ priv->rxdes = dma_alloc_coherent(priv->dev, MAX_RX_QUEUE_ENTRIES * sizeof(struct ftgmac100_rxdes), &priv->rxdes_dma, GFP_KERNEL); if (!priv->rxdes) - return -ENOMEM; + goto err_free_tx_skbs; priv->txdes = dma_alloc_coherent(priv->dev, MAX_TX_QUEUE_ENTRIES * sizeof(struct ftgmac100_txdes), &priv->txdes_dma, GFP_KERNEL); if (!priv->txdes) - return -ENOMEM; + goto err_free_rxdes; /* Allocate scratch packet buffer */ priv->rx_scratch = dma_alloc_coherent(priv->dev, @@ -997,9 +997,29 @@ static int ftgmac100_alloc_rings(struct ftgmac100 *priv) &priv->rx_scratch_dma, GFP_KERNEL); if (!priv->rx_scratch) - return -ENOMEM; + goto err_free_txdes; return 0; + +err_free_txdes: + dma_free_coherent(priv->dev, + MAX_TX_QUEUE_ENTRIES * + sizeof(struct ftgmac100_txdes), + priv->txdes, priv->txdes_dma); + priv->txdes = NULL; +err_free_rxdes: + dma_free_coherent(priv->dev, + MAX_RX_QUEUE_ENTRIES * + sizeof(struct ftgmac100_rxdes), + priv->rxdes, priv->rxdes_dma); + priv->rxdes = NULL; +err_free_tx_skbs: + kfree(priv->tx_skbs); + priv->tx_skbs = NULL; +err_free_rx_skbs: + kfree(priv->rx_skbs); + priv->rx_skbs = NULL; + return -ENOMEM; } static void ftgmac100_init_rings(struct ftgmac100 *priv) -- cgit v1.2.3 From 48b3cd69265f346f64b93064723492da46206e9b Mon Sep 17 00:00:00 2001 From: Michal Piekos Date: Sat, 28 Mar 2026 09:55:51 +0100 Subject: net: stmmac: skip VLAN restore when VLAN hash ops are missing stmmac_vlan_restore() unconditionally calls stmmac_vlan_update() when NETIF_F_VLAN_FEATURES is set. On platforms where priv->hw->vlan (or ->update_vlan_hash) is not provided, stmmac_update_vlan_hash() returns -EINVAL via stmmac_do_void_callback(), resulting in a spurious "Failed to restore VLANs" error even when no VLAN filtering is in use. Remove not needed comment. Remove not used return value from stmmac_vlan_restore(). Tested on Orange Pi Zero 3. Fixes: bd7ad51253a7 ("net: stmmac: Fix VLAN HW state restore") Signed-off-by: Michal Piekos Link: https://patch.msgid.link/20260328-vlan-restore-error-v4-1-f88624c530dc@mmpsystems.pl Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 6827c99bde8c..13d3cac056be 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -156,7 +156,7 @@ static void stmmac_tx_timer_arm(struct stmmac_priv *priv, u32 queue); static void stmmac_flush_tx_descriptors(struct stmmac_priv *priv, int queue); static void stmmac_set_dma_operation_mode(struct stmmac_priv *priv, u32 txmode, u32 rxmode, u32 chan); -static int stmmac_vlan_restore(struct stmmac_priv *priv); +static void stmmac_vlan_restore(struct stmmac_priv *priv); #ifdef CONFIG_DEBUG_FS static const struct net_device_ops stmmac_netdev_ops; @@ -6859,21 +6859,15 @@ del_vlan_error: return ret; } -static int stmmac_vlan_restore(struct stmmac_priv *priv) +static void stmmac_vlan_restore(struct stmmac_priv *priv) { - int ret; - if (!(priv->dev->features & NETIF_F_VLAN_FEATURES)) - return 0; + return; if (priv->hw->num_vlan) stmmac_restore_hw_vlan_rx_fltr(priv, priv->dev, priv->hw); - ret = stmmac_vlan_update(priv, priv->num_double_vlans); - if (ret) - netdev_err(priv->dev, "Failed to restore VLANs\n"); - - return ret; + stmmac_vlan_update(priv, priv->num_double_vlans); } static int stmmac_bpf(struct net_device *dev, struct netdev_bpf *bpf) -- cgit v1.2.3 From 76522fcdbc3a02b568f5d957f7e66fc194abb893 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 26 Mar 2026 00:17:09 +0100 Subject: netfilter: flowtable: strictly check for maximum number of actions The maximum number of flowtable hardware offload actions in IPv6 is: * ethernet mangling (4 payload actions, 2 for each ethernet address) * SNAT (4 payload actions) * DNAT (4 payload actions) * Double VLAN (4 vlan actions, 2 for popping vlan, and 2 for pushing) for QinQ. * Redirect (1 action) Which makes 17, while the maximum is 16. But act_ct supports for tunnels actions too. Note that payload action operates at 32-bit word level, so mangling an IPv6 address takes 4 payload actions. Update flow_action_entry_next() calls to check for the maximum number of supported actions. While at it, rise the maximum number of actions per flow from 16 to 24 so this works fine with IPv6 setups. Fixes: c29f74e0df7a ("netfilter: nf_flow_table: hardware offload support") Reported-by: Hyunwoo Kim Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_offload.c | 196 ++++++++++++++++++++++------------ 1 file changed, 130 insertions(+), 66 deletions(-) diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index 9b677e116487..93d0aa7f8fcc 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -14,6 +14,8 @@ #include #include +#define NF_FLOW_RULE_ACTION_MAX 24 + static struct workqueue_struct *nf_flow_offload_add_wq; static struct workqueue_struct *nf_flow_offload_del_wq; static struct workqueue_struct *nf_flow_offload_stats_wq; @@ -216,7 +218,12 @@ static void flow_offload_mangle(struct flow_action_entry *entry, static inline struct flow_action_entry * flow_action_entry_next(struct nf_flow_rule *flow_rule) { - int i = flow_rule->rule->action.num_entries++; + int i; + + if (unlikely(flow_rule->rule->action.num_entries >= NF_FLOW_RULE_ACTION_MAX)) + return NULL; + + i = flow_rule->rule->action.num_entries++; return &flow_rule->rule->action.entries[i]; } @@ -234,6 +241,9 @@ static int flow_offload_eth_src(struct net *net, u32 mask, val; u16 val16; + if (!entry0 || !entry1) + return -E2BIG; + this_tuple = &flow->tuplehash[dir].tuple; switch (this_tuple->xmit_type) { @@ -284,6 +294,9 @@ static int flow_offload_eth_dst(struct net *net, u8 nud_state; u16 val16; + if (!entry0 || !entry1) + return -E2BIG; + this_tuple = &flow->tuplehash[dir].tuple; switch (this_tuple->xmit_type) { @@ -325,16 +338,19 @@ static int flow_offload_eth_dst(struct net *net, return 0; } -static void flow_offload_ipv4_snat(struct net *net, - const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule) +static int flow_offload_ipv4_snat(struct net *net, + const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) { struct flow_action_entry *entry = flow_action_entry_next(flow_rule); u32 mask = ~htonl(0xffffffff); __be32 addr; u32 offset; + if (!entry) + return -E2BIG; + switch (dir) { case FLOW_OFFLOAD_DIR_ORIGINAL: addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr; @@ -345,23 +361,27 @@ static void flow_offload_ipv4_snat(struct net *net, offset = offsetof(struct iphdr, daddr); break; default: - return; + return -EOPNOTSUPP; } flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset, &addr, &mask); + return 0; } -static void flow_offload_ipv4_dnat(struct net *net, - const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule) +static int flow_offload_ipv4_dnat(struct net *net, + const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) { struct flow_action_entry *entry = flow_action_entry_next(flow_rule); u32 mask = ~htonl(0xffffffff); __be32 addr; u32 offset; + if (!entry) + return -E2BIG; + switch (dir) { case FLOW_OFFLOAD_DIR_ORIGINAL: addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr; @@ -372,14 +392,15 @@ static void flow_offload_ipv4_dnat(struct net *net, offset = offsetof(struct iphdr, saddr); break; default: - return; + return -EOPNOTSUPP; } flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset, &addr, &mask); + return 0; } -static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule, +static int flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule, unsigned int offset, const __be32 *addr, const __be32 *mask) { @@ -388,15 +409,20 @@ static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule, for (i = 0; i < sizeof(struct in6_addr) / sizeof(u32); i++) { entry = flow_action_entry_next(flow_rule); + if (!entry) + return -E2BIG; + flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6, offset + i * sizeof(u32), &addr[i], mask); } + + return 0; } -static void flow_offload_ipv6_snat(struct net *net, - const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule) +static int flow_offload_ipv6_snat(struct net *net, + const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) { u32 mask = ~htonl(0xffffffff); const __be32 *addr; @@ -412,16 +438,16 @@ static void flow_offload_ipv6_snat(struct net *net, offset = offsetof(struct ipv6hdr, daddr); break; default: - return; + return -EOPNOTSUPP; } - flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask); + return flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask); } -static void flow_offload_ipv6_dnat(struct net *net, - const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule) +static int flow_offload_ipv6_dnat(struct net *net, + const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) { u32 mask = ~htonl(0xffffffff); const __be32 *addr; @@ -437,10 +463,10 @@ static void flow_offload_ipv6_dnat(struct net *net, offset = offsetof(struct ipv6hdr, saddr); break; default: - return; + return -EOPNOTSUPP; } - flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask); + return flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask); } static int flow_offload_l4proto(const struct flow_offload *flow) @@ -462,15 +488,18 @@ static int flow_offload_l4proto(const struct flow_offload *flow) return type; } -static void flow_offload_port_snat(struct net *net, - const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule) +static int flow_offload_port_snat(struct net *net, + const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) { struct flow_action_entry *entry = flow_action_entry_next(flow_rule); u32 mask, port; u32 offset; + if (!entry) + return -E2BIG; + switch (dir) { case FLOW_OFFLOAD_DIR_ORIGINAL: port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port); @@ -485,22 +514,26 @@ static void flow_offload_port_snat(struct net *net, mask = ~htonl(0xffff); break; default: - return; + return -EOPNOTSUPP; } flow_offload_mangle(entry, flow_offload_l4proto(flow), offset, &port, &mask); + return 0; } -static void flow_offload_port_dnat(struct net *net, - const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule) +static int flow_offload_port_dnat(struct net *net, + const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) { struct flow_action_entry *entry = flow_action_entry_next(flow_rule); u32 mask, port; u32 offset; + if (!entry) + return -E2BIG; + switch (dir) { case FLOW_OFFLOAD_DIR_ORIGINAL: port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port); @@ -515,20 +548,24 @@ static void flow_offload_port_dnat(struct net *net, mask = ~htonl(0xffff0000); break; default: - return; + return -EOPNOTSUPP; } flow_offload_mangle(entry, flow_offload_l4proto(flow), offset, &port, &mask); + return 0; } -static void flow_offload_ipv4_checksum(struct net *net, - const struct flow_offload *flow, - struct nf_flow_rule *flow_rule) +static int flow_offload_ipv4_checksum(struct net *net, + const struct flow_offload *flow, + struct nf_flow_rule *flow_rule) { u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto; struct flow_action_entry *entry = flow_action_entry_next(flow_rule); + if (!entry) + return -E2BIG; + entry->id = FLOW_ACTION_CSUM; entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR; @@ -540,12 +577,14 @@ static void flow_offload_ipv4_checksum(struct net *net, entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_UDP; break; } + + return 0; } -static void flow_offload_redirect(struct net *net, - const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule) +static int flow_offload_redirect(struct net *net, + const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) { const struct flow_offload_tuple *this_tuple, *other_tuple; struct flow_action_entry *entry; @@ -563,21 +602,28 @@ static void flow_offload_redirect(struct net *net, ifindex = other_tuple->iifidx; break; default: - return; + return -EOPNOTSUPP; } dev = dev_get_by_index(net, ifindex); if (!dev) - return; + return -ENODEV; entry = flow_action_entry_next(flow_rule); + if (!entry) { + dev_put(dev); + return -E2BIG; + } + entry->id = FLOW_ACTION_REDIRECT; entry->dev = dev; + + return 0; } -static void flow_offload_encap_tunnel(const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule) +static int flow_offload_encap_tunnel(const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) { const struct flow_offload_tuple *this_tuple; struct flow_action_entry *entry; @@ -585,7 +631,7 @@ static void flow_offload_encap_tunnel(const struct flow_offload *flow, this_tuple = &flow->tuplehash[dir].tuple; if (this_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) - return; + return 0; dst = this_tuple->dst_cache; if (dst && dst->lwtstate) { @@ -594,15 +640,19 @@ static void flow_offload_encap_tunnel(const struct flow_offload *flow, tun_info = lwt_tun_info(dst->lwtstate); if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) { entry = flow_action_entry_next(flow_rule); + if (!entry) + return -E2BIG; entry->id = FLOW_ACTION_TUNNEL_ENCAP; entry->tunnel = tun_info; } } + + return 0; } -static void flow_offload_decap_tunnel(const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule) +static int flow_offload_decap_tunnel(const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) { const struct flow_offload_tuple *other_tuple; struct flow_action_entry *entry; @@ -610,7 +660,7 @@ static void flow_offload_decap_tunnel(const struct flow_offload *flow, other_tuple = &flow->tuplehash[!dir].tuple; if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) - return; + return 0; dst = other_tuple->dst_cache; if (dst && dst->lwtstate) { @@ -619,9 +669,13 @@ static void flow_offload_decap_tunnel(const struct flow_offload *flow, tun_info = lwt_tun_info(dst->lwtstate); if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) { entry = flow_action_entry_next(flow_rule); + if (!entry) + return -E2BIG; entry->id = FLOW_ACTION_TUNNEL_DECAP; } } + + return 0; } static int @@ -633,8 +687,9 @@ nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow, const struct flow_offload_tuple *tuple; int i; - flow_offload_decap_tunnel(flow, dir, flow_rule); - flow_offload_encap_tunnel(flow, dir, flow_rule); + if (flow_offload_decap_tunnel(flow, dir, flow_rule) < 0 || + flow_offload_encap_tunnel(flow, dir, flow_rule) < 0) + return -1; if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 || flow_offload_eth_dst(net, flow, dir, flow_rule) < 0) @@ -650,6 +705,8 @@ nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow, if (tuple->encap[i].proto == htons(ETH_P_8021Q)) { entry = flow_action_entry_next(flow_rule); + if (!entry) + return -1; entry->id = FLOW_ACTION_VLAN_POP; } } @@ -663,6 +720,8 @@ nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow, continue; entry = flow_action_entry_next(flow_rule); + if (!entry) + return -1; switch (other_tuple->encap[i].proto) { case htons(ETH_P_PPP_SES): @@ -688,18 +747,22 @@ int nf_flow_rule_route_ipv4(struct net *net, struct flow_offload *flow, return -1; if (test_bit(NF_FLOW_SNAT, &flow->flags)) { - flow_offload_ipv4_snat(net, flow, dir, flow_rule); - flow_offload_port_snat(net, flow, dir, flow_rule); + if (flow_offload_ipv4_snat(net, flow, dir, flow_rule) < 0 || + flow_offload_port_snat(net, flow, dir, flow_rule) < 0) + return -1; } if (test_bit(NF_FLOW_DNAT, &flow->flags)) { - flow_offload_ipv4_dnat(net, flow, dir, flow_rule); - flow_offload_port_dnat(net, flow, dir, flow_rule); + if (flow_offload_ipv4_dnat(net, flow, dir, flow_rule) < 0 || + flow_offload_port_dnat(net, flow, dir, flow_rule) < 0) + return -1; } if (test_bit(NF_FLOW_SNAT, &flow->flags) || test_bit(NF_FLOW_DNAT, &flow->flags)) - flow_offload_ipv4_checksum(net, flow, flow_rule); + if (flow_offload_ipv4_checksum(net, flow, flow_rule) < 0) + return -1; - flow_offload_redirect(net, flow, dir, flow_rule); + if (flow_offload_redirect(net, flow, dir, flow_rule) < 0) + return -1; return 0; } @@ -713,22 +776,23 @@ int nf_flow_rule_route_ipv6(struct net *net, struct flow_offload *flow, return -1; if (test_bit(NF_FLOW_SNAT, &flow->flags)) { - flow_offload_ipv6_snat(net, flow, dir, flow_rule); - flow_offload_port_snat(net, flow, dir, flow_rule); + if (flow_offload_ipv6_snat(net, flow, dir, flow_rule) < 0 || + flow_offload_port_snat(net, flow, dir, flow_rule) < 0) + return -1; } if (test_bit(NF_FLOW_DNAT, &flow->flags)) { - flow_offload_ipv6_dnat(net, flow, dir, flow_rule); - flow_offload_port_dnat(net, flow, dir, flow_rule); + if (flow_offload_ipv6_dnat(net, flow, dir, flow_rule) < 0 || + flow_offload_port_dnat(net, flow, dir, flow_rule) < 0) + return -1; } - flow_offload_redirect(net, flow, dir, flow_rule); + if (flow_offload_redirect(net, flow, dir, flow_rule) < 0) + return -1; return 0; } EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6); -#define NF_FLOW_RULE_ACTION_MAX 16 - static struct nf_flow_rule * nf_flow_offload_rule_alloc(struct net *net, const struct flow_offload_work *offload, -- cgit v1.2.3 From 6d52a4a0520a6696bdde51caa11f2d6821cd0c01 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 26 Mar 2026 16:17:24 +0100 Subject: netfilter: nfnetlink_log: account for netlink header size This is a followup to an old bug fix: NLMSG_DONE needs to account for the netlink header size, not just the attribute size. This can result in a WARN splat + drop of the netlink message, but other than this there are no ill effects. Fixes: 9dfa1dfe4d5e ("netfilter: nf_log: account for size of NLMSG_DONE attribute") Reported-by: Yiming Qian Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index fcbe54940b2e..f80978c06fa0 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -726,7 +726,7 @@ nfulnl_log_packet(struct net *net, + nla_total_size(plen) /* prefix */ + nla_total_size(sizeof(struct nfulnl_msg_packet_hw)) + nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp)) - + nla_total_size(sizeof(struct nfgenmsg)); /* NLMSG_DONE */ + + nlmsg_total_size(sizeof(struct nfgenmsg)); /* NLMSG_DONE */ if (in && skb_mac_header_was_set(skb)) { size += nla_total_size(skb->dev->hard_header_len) -- cgit v1.2.3 From a958a4f90ddd7de0800b33ca9d7b886b7d40f74e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 31 Mar 2026 23:13:36 +0200 Subject: netfilter: x_tables: ensure names are nul-terminated Reject names that lack a \0 character before feeding them to functions that expect c-strings. Fixes tag is the most recent commit that needs this change. Fixes: c38c4597e4bf ("netfilter: implement xt_cgroup cgroup2 path match") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_cgroup.c | 6 ++++++ net/netfilter/xt_rateest.c | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c index c437fbd59ec1..43d2ae2be628 100644 --- a/net/netfilter/xt_cgroup.c +++ b/net/netfilter/xt_cgroup.c @@ -65,6 +65,9 @@ static int cgroup_mt_check_v1(const struct xt_mtchk_param *par) info->priv = NULL; if (info->has_path) { + if (strnlen(info->path, sizeof(info->path)) >= sizeof(info->path)) + return -ENAMETOOLONG; + cgrp = cgroup_get_from_path(info->path); if (IS_ERR(cgrp)) { pr_info_ratelimited("invalid path, errno=%ld\n", @@ -102,6 +105,9 @@ static int cgroup_mt_check_v2(const struct xt_mtchk_param *par) info->priv = NULL; if (info->has_path) { + if (strnlen(info->path, sizeof(info->path)) >= sizeof(info->path)) + return -ENAMETOOLONG; + cgrp = cgroup_get_from_path(info->path); if (IS_ERR(cgrp)) { pr_info_ratelimited("invalid path, errno=%ld\n", diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c index 72324bd976af..b1d736c15fcb 100644 --- a/net/netfilter/xt_rateest.c +++ b/net/netfilter/xt_rateest.c @@ -91,6 +91,11 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par) goto err1; } + if (strnlen(info->name1, sizeof(info->name1)) >= sizeof(info->name1)) + return -ENAMETOOLONG; + if (strnlen(info->name2, sizeof(info->name2)) >= sizeof(info->name2)) + return -ENAMETOOLONG; + ret = -ENOENT; est1 = xt_rateest_lookup(par->net, info->name1); if (!est1) -- cgit v1.2.3 From b7e8590987aa94c9dc51518fad0e58cb887b1db5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 30 Mar 2026 14:16:34 +0200 Subject: netfilter: ipset: use nla_strcmp for IPSET_ATTR_NAME attr IPSET_ATTR_NAME and IPSET_ATTR_NAMEREF are of NLA_STRING type, they cannot be treated like a c-string. They either have to be switched to NLA_NUL_STRING, or the compare operations need to use the nla functions. Fixes: f830837f0eed ("netfilter: ipset: list:set set type support") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set.h | 2 +- net/netfilter/ipset/ip_set_core.c | 4 ++-- net/netfilter/ipset/ip_set_list_set.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index e9f4f845d760..b98331572ad2 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -309,7 +309,7 @@ enum { /* register and unregister set references */ extern ip_set_id_t ip_set_get_byname(struct net *net, - const char *name, struct ip_set **set); + const struct nlattr *name, struct ip_set **set); extern void ip_set_put_byindex(struct net *net, ip_set_id_t index); extern void ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name); extern ip_set_id_t ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index); diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index a2fe711cb5e3..d0c9fe59c67d 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -821,7 +821,7 @@ EXPORT_SYMBOL_GPL(ip_set_del); * */ ip_set_id_t -ip_set_get_byname(struct net *net, const char *name, struct ip_set **set) +ip_set_get_byname(struct net *net, const struct nlattr *name, struct ip_set **set) { ip_set_id_t i, index = IPSET_INVALID_ID; struct ip_set *s; @@ -830,7 +830,7 @@ ip_set_get_byname(struct net *net, const char *name, struct ip_set **set) rcu_read_lock(); for (i = 0; i < inst->ip_set_max; i++) { s = rcu_dereference(inst->ip_set_list)[i]; - if (s && STRNCMP(s->name, name)) { + if (s && nla_strcmp(name, s->name) == 0) { __ip_set_get(s); index = i; *set = s; diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 98b91b34c314..1cef84f15e8c 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -367,7 +367,7 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; - e.id = ip_set_get_byname(map->net, nla_data(tb[IPSET_ATTR_NAME]), &s); + e.id = ip_set_get_byname(map->net, tb[IPSET_ATTR_NAME], &s); if (e.id == IPSET_INVALID_ID) return -IPSET_ERR_NAME; /* "Loop detection" */ @@ -389,7 +389,7 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_NAMEREF]) { e.refid = ip_set_get_byname(map->net, - nla_data(tb[IPSET_ATTR_NAMEREF]), + tb[IPSET_ATTR_NAMEREF], &s); if (e.refid == IPSET_INVALID_ID) { ret = -IPSET_ERR_NAMEREF; -- cgit v1.2.3 From a242a9ae58aa46ff7dae51ce64150a93957abe65 Mon Sep 17 00:00:00 2001 From: Qi Tang Date: Mon, 30 Mar 2026 00:50:36 +0800 Subject: netfilter: nf_conntrack_helper: pass helper to expect cleanup nf_conntrack_helper_unregister() calls nf_ct_expect_iterate_destroy() to remove expectations belonging to the helper being unregistered. However, it passes NULL instead of the helper pointer as the data argument, so expect_iter_me() never matches any expectation and all of them survive the cleanup. After unregister returns, nfnl_cthelper_del() frees the helper object immediately. Subsequent expectation dumps or packet-driven init_conntrack() calls then dereference the freed exp->helper, causing a use-after-free. Pass the actual helper pointer so expectations referencing it are properly destroyed before the helper object is freed. BUG: KASAN: slab-use-after-free in string+0x38f/0x430 Read of size 1 at addr ffff888003b14d20 by task poc/103 Call Trace: string+0x38f/0x430 vsnprintf+0x3cc/0x1170 seq_printf+0x17a/0x240 exp_seq_show+0x2e5/0x560 seq_read_iter+0x419/0x1280 proc_reg_read+0x1ac/0x270 vfs_read+0x179/0x930 ksys_read+0xef/0x1c0 Freed by task 103: The buggy address is located 32 bytes inside of freed 192-byte region [ffff888003b14d00, ffff888003b14dc0) Fixes: ac7b84839003 ("netfilter: expect: add and use nf_ct_expect_iterate helpers") Signed-off-by: Qi Tang Reviewed-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 1b330ba6613b..a715304a53d8 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -415,7 +415,7 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) */ synchronize_rcu(); - nf_ct_expect_iterate_destroy(expect_iter_me, NULL); + nf_ct_expect_iterate_destroy(expect_iter_me, me); nf_ct_iterate_destroy(unhelp, me); /* nf_ct_iterate_destroy() does an unconditional synchronize_rcu() as -- cgit v1.2.3 From 35177c6877134a21315f37d57a5577846225623e Mon Sep 17 00:00:00 2001 From: Qi Tang Date: Tue, 31 Mar 2026 14:17:12 +0800 Subject: netfilter: ctnetlink: zero expect NAT fields when CTA_EXPECT_NAT absent ctnetlink_alloc_expect() allocates expectations from a non-zeroing slab cache via nf_ct_expect_alloc(). When CTA_EXPECT_NAT is not present in the netlink message, saved_addr and saved_proto are never initialized. Stale data from a previous slab occupant can then be dumped to userspace by ctnetlink_exp_dump_expect(), which checks these fields to decide whether to emit CTA_EXPECT_NAT. The safe sibling nf_ct_expect_init(), used by the packet path, explicitly zeroes these fields. Zero saved_addr, saved_proto and dir in the else branch, guarded by IS_ENABLED(CONFIG_NF_NAT) since these fields only exist when NAT is enabled. Confirmed by priming the expect slab with NAT-bearing expectations, freeing them, creating a new expectation without CTA_EXPECT_NAT, and observing that the ctnetlink dump emits a spurious CTA_EXPECT_NAT containing stale data from the prior allocation. Fixes: 076a0ca02644 ("netfilter: ctnetlink: add NAT support for expectations") Reported-by: kernel test robot Signed-off-by: Qi Tang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 3f408f3713bb..38bd7124d9f7 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -3588,6 +3588,12 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct, exp, nf_ct_l3num(ct)); if (err < 0) goto err_out; +#if IS_ENABLED(CONFIG_NF_NAT) + } else { + memset(&exp->saved_addr, 0, sizeof(exp->saved_addr)); + memset(&exp->saved_proto, 0, sizeof(exp->saved_proto)); + exp->dir = 0; +#endif } return exp; err_out: -- cgit v1.2.3 From 917b61fa2042f11e2af4c428e43f08199586633a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 30 Mar 2026 11:26:22 +0200 Subject: netfilter: ctnetlink: ignore explicit helper on new expectations Use the existing master conntrack helper, anything else is not really supported and it just makes validation more complicated, so just ignore what helper userspace suggests for this expectation. This was uncovered when validating CTA_EXPECT_CLASS via different helper provided by userspace than the existing master conntrack helper: BUG: KASAN: slab-out-of-bounds in nf_ct_expect_related_report+0x2479/0x27c0 Read of size 4 at addr ffff8880043fe408 by task poc/102 Call Trace: nf_ct_expect_related_report+0x2479/0x27c0 ctnetlink_create_expect+0x22b/0x3b0 ctnetlink_new_expect+0x4bd/0x5c0 nfnetlink_rcv_msg+0x67a/0x950 netlink_rcv_skb+0x120/0x350 Allowing to read kernel memory bytes off the expectation boundary. CTA_EXPECT_HELP_NAME is still used to offer the helper name to userspace via netlink dump. Fixes: bd0779370588 ("netfilter: nfnetlink_queue: allow to attach expectations to conntracks") Reported-by: Qi Tang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 54 ++++++------------------------------ 1 file changed, 9 insertions(+), 45 deletions(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 38bd7124d9f7..a20cd82446c5 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2636,7 +2636,6 @@ static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = { static struct nf_conntrack_expect * ctnetlink_alloc_expect(const struct nlattr *const cda[], struct nf_conn *ct, - struct nf_conntrack_helper *helper, struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *mask); @@ -2865,7 +2864,6 @@ ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct, { struct nlattr *cda[CTA_EXPECT_MAX+1]; struct nf_conntrack_tuple tuple, mask; - struct nf_conntrack_helper *helper = NULL; struct nf_conntrack_expect *exp; int err; @@ -2879,17 +2877,8 @@ ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct, if (err < 0) return err; - if (cda[CTA_EXPECT_HELP_NAME]) { - const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]); - - helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), - nf_ct_protonum(ct)); - if (helper == NULL) - return -EOPNOTSUPP; - } - exp = ctnetlink_alloc_expect((const struct nlattr * const *)cda, ct, - helper, &tuple, &mask); + &tuple, &mask); if (IS_ERR(exp)) return PTR_ERR(exp); @@ -3528,11 +3517,11 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr, static struct nf_conntrack_expect * ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct, - struct nf_conntrack_helper *helper, struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *mask) { struct net *net = read_pnet(&ct->ct_net); + struct nf_conntrack_helper *helper; struct nf_conntrack_expect *exp; struct nf_conn_help *help; u32 class = 0; @@ -3542,7 +3531,11 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct, if (!help) return ERR_PTR(-EOPNOTSUPP); - if (cda[CTA_EXPECT_CLASS] && helper) { + helper = rcu_dereference(help->helper); + if (!helper) + return ERR_PTR(-EOPNOTSUPP); + + if (cda[CTA_EXPECT_CLASS]) { class = ntohl(nla_get_be32(cda[CTA_EXPECT_CLASS])); if (class > helper->expect_class_max) return ERR_PTR(-EINVAL); @@ -3576,8 +3569,6 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct, #ifdef CONFIG_NF_CONNTRACK_ZONES exp->zone = ct->zone; #endif - if (!helper) - helper = rcu_dereference(help->helper); rcu_assign_pointer(exp->helper, helper); exp->tuple = *tuple; exp->mask.src.u3 = mask->src.u3; @@ -3609,7 +3600,6 @@ ctnetlink_create_expect(struct net *net, { struct nf_conntrack_tuple tuple, mask, master_tuple; struct nf_conntrack_tuple_hash *h = NULL; - struct nf_conntrack_helper *helper = NULL; struct nf_conntrack_expect *exp; struct nf_conn *ct; int err; @@ -3635,33 +3625,7 @@ ctnetlink_create_expect(struct net *net, ct = nf_ct_tuplehash_to_ctrack(h); rcu_read_lock(); - if (cda[CTA_EXPECT_HELP_NAME]) { - const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]); - - helper = __nf_conntrack_helper_find(helpname, u3, - nf_ct_protonum(ct)); - if (helper == NULL) { - rcu_read_unlock(); -#ifdef CONFIG_MODULES - if (request_module("nfct-helper-%s", helpname) < 0) { - err = -EOPNOTSUPP; - goto err_ct; - } - rcu_read_lock(); - helper = __nf_conntrack_helper_find(helpname, u3, - nf_ct_protonum(ct)); - if (helper) { - err = -EAGAIN; - goto err_rcu; - } - rcu_read_unlock(); -#endif - err = -EOPNOTSUPP; - goto err_ct; - } - } - - exp = ctnetlink_alloc_expect(cda, ct, helper, &tuple, &mask); + exp = ctnetlink_alloc_expect(cda, ct, &tuple, &mask); if (IS_ERR(exp)) { err = PTR_ERR(exp); goto err_rcu; @@ -3671,8 +3635,8 @@ ctnetlink_create_expect(struct net *net, nf_ct_expect_put(exp); err_rcu: rcu_read_unlock(); -err_ct: nf_ct_put(ct); + return err; } -- cgit v1.2.3 From 9862ef9ab0a116c6dca98842aab7de13a252ae02 Mon Sep 17 00:00:00 2001 From: Yifan Wu Date: Mon, 30 Mar 2026 14:39:24 -0700 Subject: netfilter: ipset: drop logically empty buckets in mtype_del mtype_del() counts empty slots below n->pos in k, but it only drops the bucket when both n->pos and k are zero. This misses buckets whose live entries have all been removed while n->pos still points past deleted slots. Treat a bucket as empty when all positions below n->pos are unused and release it directly instead of shrinking it further. Fixes: 8af1c6fbd923 ("netfilter: ipset: Fix forceadd evaluation path") Cc: stable@vger.kernel.org Reported-by: Juefei Pu Reported-by: Xin Liu Signed-off-by: Yifan Wu Co-developed-by: Yuan Tan Signed-off-by: Yuan Tan Reviewed-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_gen.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 181daa9c2019..b79e5dd2af03 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -1098,7 +1098,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (!test_bit(i, n->used)) k++; } - if (n->pos == 0 && k == 0) { + if (k == n->pos) { t->hregion[r].ext_size -= ext_size(n->size, dsize); rcu_assign_pointer(hbucket(t, key), NULL); kfree_rcu(n, rcu); -- cgit v1.2.3 From 3d5d488f11776738deab9da336038add95d342d1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 31 Mar 2026 16:41:25 +0200 Subject: netfilter: x_tables: restrict xt_check_match/xt_check_target extensions for NFPROTO_ARP Weiming Shi says: xt_match and xt_target structs registered with NFPROTO_UNSPEC can be loaded by any protocol family through nft_compat. When such a match/target sets .hooks to restrict which hooks it may run on, the bitmask uses NF_INET_* constants. This is only correct for families whose hook layout matches NF_INET_*: IPv4, IPv6, INET, and bridge all share the same five hooks (PRE_ROUTING ... POST_ROUTING). ARP only has three hooks (IN=0, OUT=1, FORWARD=2) with different semantics. Because NF_ARP_OUT == 1 == NF_INET_LOCAL_IN, the .hooks validation silently passes for the wrong reasons, allowing matches to run on ARP chains where the hook assumptions (e.g. state->in being set on input hooks) do not hold. This leads to NULL pointer dereferences; xt_devgroup is one concrete example: Oops: general protection fault, probably for non-canonical address 0xdffffc0000000044: 0000 [#1] SMP KASAN NOPTI KASAN: null-ptr-deref in range [0x0000000000000220-0x0000000000000227] RIP: 0010:devgroup_mt+0xff/0x350 Call Trace: nft_match_eval (net/netfilter/nft_compat.c:407) nft_do_chain (net/netfilter/nf_tables_core.c:285) nft_do_chain_arp (net/netfilter/nft_chain_filter.c:61) nf_hook_slow (net/netfilter/core.c:623) arp_xmit (net/ipv4/arp.c:666) Kernel panic - not syncing: Fatal exception in interrupt Fix it by restricting arptables to NFPROTO_ARP extensions only. Note that arptables-legacy only supports: - arpt_CLASSIFY - arpt_mangle - arpt_MARK that provide explicit NFPROTO_ARP match/target declarations. Fixes: 9291747f118d ("netfilter: xtables: add device group match") Reported-by: Xiang Mei Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/x_tables.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index e594b3b7ad82..b39017c80548 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -501,6 +501,17 @@ int xt_check_match(struct xt_mtchk_param *par, par->match->table, par->table); return -EINVAL; } + + /* NFPROTO_UNSPEC implies NF_INET_* hooks which do not overlap with + * NF_ARP_IN,OUT,FORWARD, allow explicit extensions with NFPROTO_ARP + * support. + */ + if (par->family == NFPROTO_ARP && + par->match->family != NFPROTO_ARP) { + pr_info_ratelimited("%s_tables: %s match: not valid for this family\n", + xt_prefix[par->family], par->match->name); + return -EINVAL; + } if (par->match->hooks && (par->hook_mask & ~par->match->hooks) != 0) { char used[64], allow[64]; @@ -1016,6 +1027,18 @@ int xt_check_target(struct xt_tgchk_param *par, par->target->table, par->table); return -EINVAL; } + + /* NFPROTO_UNSPEC implies NF_INET_* hooks which do not overlap with + * NF_ARP_IN,OUT,FORWARD, allow explicit extensions with NFPROTO_ARP + * support. + */ + if (par->family == NFPROTO_ARP && + par->target->family != NFPROTO_ARP) { + pr_info_ratelimited("%s_tables: %s target: not valid for this family\n", + xt_prefix[par->family], par->target->name); + return -EINVAL; + } + if (par->target->hooks && (par->hook_mask & ~par->target->hooks) != 0) { char used[64], allow[64]; -- cgit v1.2.3 From da107398cbd4bbdb6bffecb2ce86d5c9384f4cec Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 31 Mar 2026 23:08:02 +0200 Subject: netfilter: nf_tables: reject immediate NF_QUEUE verdict nft_queue is always used from userspace nftables to deliver the NF_QUEUE verdict. Immediately emitting an NF_QUEUE verdict is never used by the userspace nft tools, so reject immediate NF_QUEUE verdicts. The arp family does not provide queue support, but such an immediate verdict is still reachable. Globally reject NF_QUEUE immediate verdicts to address this issue. Fixes: f342de4e2f33 ("netfilter: nf_tables: reject QUEUE/DROP verdict parameters") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 3922cff1bb3d..8c42247a176c 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -11667,8 +11667,6 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, switch (data->verdict.code) { case NF_ACCEPT: case NF_DROP: - case NF_QUEUE: - break; case NFT_CONTINUE: case NFT_BREAK: case NFT_RETURN: @@ -11703,6 +11701,11 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, data->verdict.chain = chain; break; + case NF_QUEUE: + /* The nft_queue expression is used for this purpose, an + * immediate NF_QUEUE verdict should not ever be seen here. + */ + fallthrough; default: return -EINVAL; } -- cgit v1.2.3 From a834a0b66ec6fb743377201a0f4229bb2503f4ce Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Wed, 25 Mar 2026 21:07:46 +0200 Subject: Bluetooth: hci_sync: call destroy in hci_cmd_sync_run if immediate hci_cmd_sync_run() may run the work immediately if called from existing sync work (otherwise it queues a new sync work). In this case it fails to call the destroy() function. On immediate run, make it behave same way as if item was queued successfully: call destroy, and return 0. The only callsite is hci_abort_conn() via hci_cmd_sync_run_once(), and this changes its return value. However, its return value is not used except as the return value for hci_disconnect(), and nothing uses the return value of hci_disconnect(). Hence there should be no behavior change anywhere. Fixes: c898f6d7b093b ("Bluetooth: hci_sync: Introduce hci_cmd_sync_run/hci_cmd_sync_run_once") Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 45d16639874a..6283a4df78b0 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -801,8 +801,15 @@ int hci_cmd_sync_run(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, return -ENETDOWN; /* If on cmd_sync_work then run immediately otherwise queue */ - if (current_work() == &hdev->cmd_sync_work) - return func(hdev, data); + if (current_work() == &hdev->cmd_sync_work) { + int err; + + err = func(hdev, data); + if (destroy) + destroy(hdev, data, err); + + return 0; + } return hci_cmd_sync_submit(hdev, func, data, destroy); } -- cgit v1.2.3 From 8a5b0135d4a5d9683203a3d9a12a711ccec5936b Mon Sep 17 00:00:00 2001 From: Cen Zhang Date: Thu, 26 Mar 2026 23:16:45 +0800 Subject: Bluetooth: SCO: fix race conditions in sco_sock_connect() sco_sock_connect() checks sk_state and sk_type without holding the socket lock. Two concurrent connect() syscalls on the same socket can both pass the check and enter sco_connect(), leading to use-after-free. The buggy scenario involves three participants and was confirmed with additional logging instrumentation: Thread A (connect): HCI disconnect: Thread B (connect): sco_sock_connect(sk) sco_sock_connect(sk) sk_state==BT_OPEN sk_state==BT_OPEN (pass, no lock) (pass, no lock) sco_connect(sk): sco_connect(sk): hci_dev_lock hci_dev_lock hci_connect_sco <- blocked -> hcon1 sco_conn_add->conn1 lock_sock(sk) sco_chan_add: conn1->sk = sk sk->conn = conn1 sk_state=BT_CONNECT release_sock hci_dev_unlock hci_dev_lock sco_conn_del: lock_sock(sk) sco_chan_del: sk->conn=NULL conn1->sk=NULL sk_state= BT_CLOSED SOCK_ZAPPED release_sock hci_dev_unlock (unblocked) hci_connect_sco -> hcon2 sco_conn_add -> conn2 lock_sock(sk) sco_chan_add: sk->conn=conn2 sk_state= BT_CONNECT // zombie sk! release_sock hci_dev_unlock Thread B revives a BT_CLOSED + SOCK_ZAPPED socket back to BT_CONNECT. Subsequent cleanup triggers double sock_put() and use-after-free. Meanwhile conn1 is leaked as it was orphaned when sco_conn_del() cleared the association. Fix this by: - Moving lock_sock() before the sk_state/sk_type checks in sco_sock_connect() to serialize concurrent connect attempts - Fixing the sk_type != SOCK_SEQPACKET check to actually return the error instead of just assigning it - Adding a state re-check in sco_connect() after lock_sock() to catch state changes during the window between the locks - Adding sco_pi(sk)->conn check in sco_chan_add() to prevent double-attach of a socket to multiple connections - Adding hci_conn_drop() on sco_chan_add failure to prevent HCI connection leaks Fixes: 9a8ec9e8ebb5 ("Bluetooth: SCO: Fix possible circular locking dependency on sco_connect_cfm") Signed-off-by: Cen Zhang Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/sco.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 584e059de20a..b84587811ef4 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -298,7 +298,7 @@ static int sco_chan_add(struct sco_conn *conn, struct sock *sk, int err = 0; sco_conn_lock(conn); - if (conn->sk) + if (conn->sk || sco_pi(sk)->conn) err = -EBUSY; else __sco_chan_add(conn, sk, parent); @@ -353,9 +353,20 @@ static int sco_connect(struct sock *sk) lock_sock(sk); + /* Recheck state after reacquiring the socket lock, as another + * thread may have changed it (e.g., closed the socket). + */ + if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) { + release_sock(sk); + hci_conn_drop(hcon); + err = -EBADFD; + goto unlock; + } + err = sco_chan_add(conn, sk, NULL); if (err) { release_sock(sk); + hci_conn_drop(hcon); goto unlock; } @@ -656,13 +667,18 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr_unsized *addr, addr->sa_family != AF_BLUETOOTH) return -EINVAL; - if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) + lock_sock(sk); + + if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) { + release_sock(sk); return -EBADFD; + } - if (sk->sk_type != SOCK_SEQPACKET) - err = -EINVAL; + if (sk->sk_type != SOCK_SEQPACKET) { + release_sock(sk); + return -EINVAL; + } - lock_sock(sk); /* Set destination address and psm */ bacpy(&sco_pi(sk)->dst, &sa->sco_bdaddr); release_sock(sk); -- cgit v1.2.3 From 2b2bf47cd75518c36fa2d41380e4a40641cc89cd Mon Sep 17 00:00:00 2001 From: Oleh Konko Date: Thu, 26 Mar 2026 17:31:24 +0000 Subject: Bluetooth: hci_event: move wake reason storage into validated event handlers hci_store_wake_reason() is called from hci_event_packet() immediately after stripping the HCI event header but before hci_event_func() enforces the per-event minimum payload length from hci_ev_table. This means a short HCI event frame can reach bacpy() before any bounds check runs. Rather than duplicating skb parsing and per-event length checks inside hci_store_wake_reason(), move wake-address storage into the individual event handlers after their existing event-length validation has succeeded. Convert hci_store_wake_reason() into a small helper that only stores an already-validated bdaddr while the caller holds hci_dev_lock(). Use the same helper after hci_event_func() with a NULL address to preserve the existing unexpected-wake fallback semantics when no validated event handler records a wake address. Annotate the helper with __must_hold(&hdev->lock) and add lockdep_assert_held(&hdev->lock) so future call paths keep the lock contract explicit. Call the helper from hci_conn_request_evt(), hci_conn_complete_evt(), hci_sync_conn_complete_evt(), le_conn_complete_evt(), hci_le_adv_report_evt(), hci_le_ext_adv_report_evt(), hci_le_direct_adv_report_evt(), hci_le_pa_sync_established_evt(), and hci_le_past_received_evt(). Fixes: 2f20216c1d6f ("Bluetooth: Emit controller suspend and resume events") Cc: stable@vger.kernel.org Signed-off-by: Oleh Konko Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 94 ++++++++++++++++++----------------------------- 1 file changed, 35 insertions(+), 59 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 286529d2e554..81d2f9a3eec9 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -80,6 +80,10 @@ static void *hci_le_ev_skb_pull(struct hci_dev *hdev, struct sk_buff *skb, return data; } +static void hci_store_wake_reason(struct hci_dev *hdev, + const bdaddr_t *bdaddr, u8 addr_type) + __must_hold(&hdev->lock); + static u8 hci_cc_inquiry_cancel(struct hci_dev *hdev, void *data, struct sk_buff *skb) { @@ -3111,6 +3115,7 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "status 0x%2.2x", status); hci_dev_lock(hdev); + hci_store_wake_reason(hdev, &ev->bdaddr, BDADDR_BREDR); /* Check for existing connection: * @@ -3274,6 +3279,10 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "bdaddr %pMR type 0x%x", &ev->bdaddr, ev->link_type); + hci_dev_lock(hdev); + hci_store_wake_reason(hdev, &ev->bdaddr, BDADDR_BREDR); + hci_dev_unlock(hdev); + /* Reject incoming connection from device with same BD ADDR against * CVE-2020-26555 */ @@ -5021,6 +5030,7 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "status 0x%2.2x", status); hci_dev_lock(hdev); + hci_store_wake_reason(hdev, &ev->bdaddr, BDADDR_BREDR); conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr); if (!conn) { @@ -5713,6 +5723,7 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, int err; hci_dev_lock(hdev); + hci_store_wake_reason(hdev, bdaddr, bdaddr_type); /* All controllers implicitly stop advertising in the event of a * connection, so ensure that the state bit is cleared. @@ -6005,6 +6016,7 @@ static void hci_le_past_received_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); hci_dev_lock(hdev); + hci_store_wake_reason(hdev, &ev->bdaddr, ev->bdaddr_type); hci_dev_clear_flag(hdev, HCI_PA_SYNC); @@ -6403,6 +6415,8 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, void *data, info->length + 1)) break; + hci_store_wake_reason(hdev, &info->bdaddr, info->bdaddr_type); + if (info->length <= max_adv_len(hdev)) { rssi = info->data[info->length]; process_adv_report(hdev, info->type, &info->bdaddr, @@ -6491,6 +6505,8 @@ static void hci_le_ext_adv_report_evt(struct hci_dev *hdev, void *data, info->length)) break; + hci_store_wake_reason(hdev, &info->bdaddr, info->bdaddr_type); + evt_type = __le16_to_cpu(info->type) & LE_EXT_ADV_EVT_TYPE_MASK; legacy_evt_type = ext_evt_type_to_legacy(hdev, evt_type); @@ -6536,6 +6552,7 @@ static void hci_le_pa_sync_established_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); hci_dev_lock(hdev); + hci_store_wake_reason(hdev, &ev->bdaddr, ev->bdaddr_type); hci_dev_clear_flag(hdev, HCI_PA_SYNC); @@ -6834,6 +6851,8 @@ static void hci_le_direct_adv_report_evt(struct hci_dev *hdev, void *data, for (i = 0; i < ev->num; i++) { struct hci_ev_le_direct_adv_info *info = &ev->info[i]; + hci_store_wake_reason(hdev, &info->bdaddr, info->bdaddr_type); + process_adv_report(hdev, info->type, &info->bdaddr, info->bdaddr_type, &info->direct_addr, info->direct_addr_type, HCI_ADV_PHY_1M, 0, @@ -7517,73 +7536,29 @@ static bool hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode, return true; } -static void hci_store_wake_reason(struct hci_dev *hdev, u8 event, - struct sk_buff *skb) +static void hci_store_wake_reason(struct hci_dev *hdev, + const bdaddr_t *bdaddr, u8 addr_type) + __must_hold(&hdev->lock) { - struct hci_ev_le_advertising_info *adv; - struct hci_ev_le_direct_adv_info *direct_adv; - struct hci_ev_le_ext_adv_info *ext_adv; - const struct hci_ev_conn_complete *conn_complete = (void *)skb->data; - const struct hci_ev_conn_request *conn_request = (void *)skb->data; - - hci_dev_lock(hdev); + lockdep_assert_held(&hdev->lock); /* If we are currently suspended and this is the first BT event seen, * save the wake reason associated with the event. */ if (!hdev->suspended || hdev->wake_reason) - goto unlock; + return; + + if (!bdaddr) { + hdev->wake_reason = MGMT_WAKE_REASON_UNEXPECTED; + return; + } /* Default to remote wake. Values for wake_reason are documented in the * Bluez mgmt api docs. */ hdev->wake_reason = MGMT_WAKE_REASON_REMOTE_WAKE; - - /* Once configured for remote wakeup, we should only wake up for - * reconnections. It's useful to see which device is waking us up so - * keep track of the bdaddr of the connection event that woke us up. - */ - if (event == HCI_EV_CONN_REQUEST) { - bacpy(&hdev->wake_addr, &conn_request->bdaddr); - hdev->wake_addr_type = BDADDR_BREDR; - } else if (event == HCI_EV_CONN_COMPLETE) { - bacpy(&hdev->wake_addr, &conn_complete->bdaddr); - hdev->wake_addr_type = BDADDR_BREDR; - } else if (event == HCI_EV_LE_META) { - struct hci_ev_le_meta *le_ev = (void *)skb->data; - u8 subevent = le_ev->subevent; - u8 *ptr = &skb->data[sizeof(*le_ev)]; - u8 num_reports = *ptr; - - if ((subevent == HCI_EV_LE_ADVERTISING_REPORT || - subevent == HCI_EV_LE_DIRECT_ADV_REPORT || - subevent == HCI_EV_LE_EXT_ADV_REPORT) && - num_reports) { - adv = (void *)(ptr + 1); - direct_adv = (void *)(ptr + 1); - ext_adv = (void *)(ptr + 1); - - switch (subevent) { - case HCI_EV_LE_ADVERTISING_REPORT: - bacpy(&hdev->wake_addr, &adv->bdaddr); - hdev->wake_addr_type = adv->bdaddr_type; - break; - case HCI_EV_LE_DIRECT_ADV_REPORT: - bacpy(&hdev->wake_addr, &direct_adv->bdaddr); - hdev->wake_addr_type = direct_adv->bdaddr_type; - break; - case HCI_EV_LE_EXT_ADV_REPORT: - bacpy(&hdev->wake_addr, &ext_adv->bdaddr); - hdev->wake_addr_type = ext_adv->bdaddr_type; - break; - } - } - } else { - hdev->wake_reason = MGMT_WAKE_REASON_UNEXPECTED; - } - -unlock: - hci_dev_unlock(hdev); + bacpy(&hdev->wake_addr, bdaddr); + hdev->wake_addr_type = addr_type; } #define HCI_EV_VL(_op, _func, _min_len, _max_len) \ @@ -7830,14 +7805,15 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) skb_pull(skb, HCI_EVENT_HDR_SIZE); - /* Store wake reason if we're suspended */ - hci_store_wake_reason(hdev, event, skb); - bt_dev_dbg(hdev, "event 0x%2.2x", event); hci_event_func(hdev, event, skb, &opcode, &status, &req_complete, &req_complete_skb); + hci_dev_lock(hdev); + hci_store_wake_reason(hdev, NULL, 0); + hci_dev_unlock(hdev); + if (req_complete) { req_complete(hdev, status, opcode); } else if (req_complete_skb) { -- cgit v1.2.3 From 2969554bcfccb5c609f6b6cd4a014933f3a66dd0 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Wed, 25 Mar 2026 21:07:43 +0200 Subject: Bluetooth: hci_sync: hci_cmd_sync_queue_once() return -EEXIST if exists hci_cmd_sync_queue_once() needs to indicate whether a queue item was added, so caller can know if callbacks are called, so it can avoid leaking resources. Change the function to return -EEXIST if queue item already exists. Modify all callsites to handle that. Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 53 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 6283a4df78b0..97745710e3ce 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -780,7 +780,7 @@ int hci_cmd_sync_queue_once(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy) { if (hci_cmd_sync_lookup_entry(hdev, func, data, destroy)) - return 0; + return -EEXIST; return hci_cmd_sync_queue(hdev, func, data, destroy); } @@ -3262,6 +3262,8 @@ static int update_passive_scan_sync(struct hci_dev *hdev, void *data) int hci_update_passive_scan(struct hci_dev *hdev) { + int err; + /* Only queue if it would have any effect */ if (!test_bit(HCI_UP, &hdev->flags) || test_bit(HCI_INIT, &hdev->flags) || @@ -3271,8 +3273,9 @@ int hci_update_passive_scan(struct hci_dev *hdev) hci_dev_test_flag(hdev, HCI_UNREGISTER)) return 0; - return hci_cmd_sync_queue_once(hdev, update_passive_scan_sync, NULL, - NULL); + err = hci_cmd_sync_queue_once(hdev, update_passive_scan_sync, NULL, + NULL); + return (err == -EEXIST) ? 0 : err; } int hci_write_sc_support_sync(struct hci_dev *hdev, u8 val) @@ -6965,8 +6968,11 @@ static int hci_acl_create_conn_sync(struct hci_dev *hdev, void *data) int hci_connect_acl_sync(struct hci_dev *hdev, struct hci_conn *conn) { - return hci_cmd_sync_queue_once(hdev, hci_acl_create_conn_sync, conn, - NULL); + int err; + + err = hci_cmd_sync_queue_once(hdev, hci_acl_create_conn_sync, conn, + NULL); + return (err == -EEXIST) ? 0 : err; } static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err) @@ -7002,8 +7008,11 @@ done: int hci_connect_le_sync(struct hci_dev *hdev, struct hci_conn *conn) { - return hci_cmd_sync_queue_once(hdev, hci_le_create_conn_sync, conn, - create_le_conn_complete); + int err; + + err = hci_cmd_sync_queue_once(hdev, hci_le_create_conn_sync, conn, + create_le_conn_complete); + return (err == -EEXIST) ? 0 : err; } int hci_cancel_connect_sync(struct hci_dev *hdev, struct hci_conn *conn) @@ -7210,8 +7219,11 @@ done: int hci_connect_pa_sync(struct hci_dev *hdev, struct hci_conn *conn) { - return hci_cmd_sync_queue_once(hdev, hci_le_pa_create_sync, conn, - create_pa_complete); + int err; + + err = hci_cmd_sync_queue_once(hdev, hci_le_pa_create_sync, conn, + create_pa_complete); + return (err == -EEXIST) ? 0 : err; } static void create_big_complete(struct hci_dev *hdev, void *data, int err) @@ -7273,8 +7285,11 @@ static int hci_le_big_create_sync(struct hci_dev *hdev, void *data) int hci_connect_big_sync(struct hci_dev *hdev, struct hci_conn *conn) { - return hci_cmd_sync_queue_once(hdev, hci_le_big_create_sync, conn, - create_big_complete); + int err; + + err = hci_cmd_sync_queue_once(hdev, hci_le_big_create_sync, conn, + create_big_complete); + return (err == -EEXIST) ? 0 : err; } struct past_data { @@ -7366,7 +7381,7 @@ int hci_past_sync(struct hci_conn *conn, struct hci_conn *le) if (err) kfree(data); - return err; + return (err == -EEXIST) ? 0 : err; } static void le_read_features_complete(struct hci_dev *hdev, void *data, int err) @@ -7453,7 +7468,7 @@ int hci_le_read_remote_features(struct hci_conn *conn) else err = -EOPNOTSUPP; - return err; + return (err == -EEXIST) ? 0 : err; } static void pkt_type_changed(struct hci_dev *hdev, void *data, int err) @@ -7479,6 +7494,7 @@ int hci_acl_change_pkt_type(struct hci_conn *conn, u16 pkt_type) { struct hci_dev *hdev = conn->hdev; struct hci_cp_change_conn_ptype *cp; + int err; cp = kmalloc_obj(*cp); if (!cp) @@ -7487,8 +7503,9 @@ int hci_acl_change_pkt_type(struct hci_conn *conn, u16 pkt_type) cp->handle = cpu_to_le16(conn->handle); cp->pkt_type = cpu_to_le16(pkt_type); - return hci_cmd_sync_queue_once(hdev, hci_change_conn_ptype_sync, cp, - pkt_type_changed); + err = hci_cmd_sync_queue_once(hdev, hci_change_conn_ptype_sync, cp, + pkt_type_changed); + return (err == -EEXIST) ? 0 : err; } static void le_phy_update_complete(struct hci_dev *hdev, void *data, int err) @@ -7514,6 +7531,7 @@ int hci_le_set_phy(struct hci_conn *conn, u8 tx_phys, u8 rx_phys) { struct hci_dev *hdev = conn->hdev; struct hci_cp_le_set_phy *cp; + int err; cp = kmalloc_obj(*cp); if (!cp) @@ -7524,6 +7542,7 @@ int hci_le_set_phy(struct hci_conn *conn, u8 tx_phys, u8 rx_phys) cp->tx_phys = tx_phys; cp->rx_phys = rx_phys; - return hci_cmd_sync_queue_once(hdev, hci_le_set_phy_sync, cp, - le_phy_update_complete); + err = hci_cmd_sync_queue_once(hdev, hci_le_set_phy_sync, cp, + le_phy_update_complete); + return (err == -EEXIST) ? 0 : err; } -- cgit v1.2.3 From aca377208e7f7322bf4e107cdec6e7d7e8aa7a88 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Wed, 25 Mar 2026 21:07:44 +0200 Subject: Bluetooth: hci_sync: fix leaks when hci_cmd_sync_queue_once fails When hci_cmd_sync_queue_once() returns with error, the destroy callback will not be called. Fix leaking references / memory on these failures. Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 97745710e3ce..8cbbba50e77e 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -7460,13 +7460,16 @@ int hci_le_read_remote_features(struct hci_conn *conn) * role is possible. Otherwise just transition into the * connected state without requesting the remote features. */ - if (conn->out || (hdev->le_features[0] & HCI_LE_PERIPHERAL_FEATURES)) + if (conn->out || (hdev->le_features[0] & HCI_LE_PERIPHERAL_FEATURES)) { err = hci_cmd_sync_queue_once(hdev, hci_le_read_remote_features_sync, hci_conn_hold(conn), le_read_features_complete); - else + if (err) + hci_conn_drop(conn); + } else { err = -EOPNOTSUPP; + } return (err == -EEXIST) ? 0 : err; } @@ -7505,6 +7508,9 @@ int hci_acl_change_pkt_type(struct hci_conn *conn, u16 pkt_type) err = hci_cmd_sync_queue_once(hdev, hci_change_conn_ptype_sync, cp, pkt_type_changed); + if (err) + kfree(cp); + return (err == -EEXIST) ? 0 : err; } @@ -7544,5 +7550,8 @@ int hci_le_set_phy(struct hci_conn *conn, u8 tx_phys, u8 rx_phys) err = hci_cmd_sync_queue_once(hdev, hci_le_set_phy_sync, cp, le_phy_update_complete); + if (err) + kfree(cp); + return (err == -EEXIST) ? 0 : err; } -- cgit v1.2.3 From 035c25007c9e698bef3826070ee34bb6d778020c Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 25 Mar 2026 11:11:46 -0400 Subject: Bluetooth: hci_sync: Fix UAF in le_read_features_complete This fixes the following backtrace caused by hci_conn being freed before le_read_features_complete but after hci_le_read_remote_features_sync so hci_conn_del -> hci_cmd_sync_dequeue is not able to prevent it: ================================================================== BUG: KASAN: slab-use-after-free in instrument_atomic_read_write include/linux/instrumented.h:96 [inline] BUG: KASAN: slab-use-after-free in atomic_dec_and_test include/linux/atomic/atomic-instrumented.h:1383 [inline] BUG: KASAN: slab-use-after-free in hci_conn_drop include/net/bluetooth/hci_core.h:1688 [inline] BUG: KASAN: slab-use-after-free in le_read_features_complete+0x5b/0x340 net/bluetooth/hci_sync.c:7344 Write of size 4 at addr ffff8880796b0010 by task kworker/u9:0/52 CPU: 0 UID: 0 PID: 52 Comm: kworker/u9:0 Not tainted syzkaller #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/25/2025 Workqueue: hci0 hci_cmd_sync_work Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:378 [inline] print_report+0xcd/0x630 mm/kasan/report.c:482 kasan_report+0xe0/0x110 mm/kasan/report.c:595 check_region_inline mm/kasan/generic.c:194 [inline] kasan_check_range+0x100/0x1b0 mm/kasan/generic.c:200 instrument_atomic_read_write include/linux/instrumented.h:96 [inline] atomic_dec_and_test include/linux/atomic/atomic-instrumented.h:1383 [inline] hci_conn_drop include/net/bluetooth/hci_core.h:1688 [inline] le_read_features_complete+0x5b/0x340 net/bluetooth/hci_sync.c:7344 hci_cmd_sync_work+0x1ff/0x430 net/bluetooth/hci_sync.c:334 process_one_work+0x9ba/0x1b20 kernel/workqueue.c:3257 process_scheduled_works kernel/workqueue.c:3340 [inline] worker_thread+0x6c8/0xf10 kernel/workqueue.c:3421 kthread+0x3c5/0x780 kernel/kthread.c:463 ret_from_fork+0x983/0xb10 arch/x86/kernel/process.c:158 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:246 Allocated by task 5932: kasan_save_stack+0x33/0x60 mm/kasan/common.c:56 kasan_save_track+0x14/0x30 mm/kasan/common.c:77 poison_kmalloc_redzone mm/kasan/common.c:400 [inline] __kasan_kmalloc+0xaa/0xb0 mm/kasan/common.c:417 kmalloc_noprof include/linux/slab.h:957 [inline] kzalloc_noprof include/linux/slab.h:1094 [inline] __hci_conn_add+0xf8/0x1c70 net/bluetooth/hci_conn.c:963 hci_conn_add_unset+0x76/0x100 net/bluetooth/hci_conn.c:1084 le_conn_complete_evt+0x639/0x1f20 net/bluetooth/hci_event.c:5714 hci_le_enh_conn_complete_evt+0x23d/0x380 net/bluetooth/hci_event.c:5861 hci_le_meta_evt+0x357/0x5e0 net/bluetooth/hci_event.c:7408 hci_event_func net/bluetooth/hci_event.c:7716 [inline] hci_event_packet+0x685/0x11c0 net/bluetooth/hci_event.c:7773 hci_rx_work+0x2c9/0xeb0 net/bluetooth/hci_core.c:4076 process_one_work+0x9ba/0x1b20 kernel/workqueue.c:3257 process_scheduled_works kernel/workqueue.c:3340 [inline] worker_thread+0x6c8/0xf10 kernel/workqueue.c:3421 kthread+0x3c5/0x780 kernel/kthread.c:463 ret_from_fork+0x983/0xb10 arch/x86/kernel/process.c:158 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:246 Freed by task 5932: kasan_save_stack+0x33/0x60 mm/kasan/common.c:56 kasan_save_track+0x14/0x30 mm/kasan/common.c:77 __kasan_save_free_info+0x3b/0x60 mm/kasan/generic.c:587 kasan_save_free_info mm/kasan/kasan.h:406 [inline] poison_slab_object mm/kasan/common.c:252 [inline] __kasan_slab_free+0x5f/0x80 mm/kasan/common.c:284 kasan_slab_free include/linux/kasan.h:234 [inline] slab_free_hook mm/slub.c:2540 [inline] slab_free mm/slub.c:6663 [inline] kfree+0x2f8/0x6e0 mm/slub.c:6871 device_release+0xa4/0x240 drivers/base/core.c:2565 kobject_cleanup lib/kobject.c:689 [inline] kobject_release lib/kobject.c:720 [inline] kref_put include/linux/kref.h:65 [inline] kobject_put+0x1e7/0x590 lib/kobject.c:737 put_device drivers/base/core.c:3797 [inline] device_unregister+0x2f/0xc0 drivers/base/core.c:3920 hci_conn_del_sysfs+0xb4/0x180 net/bluetooth/hci_sysfs.c:79 hci_conn_cleanup net/bluetooth/hci_conn.c:173 [inline] hci_conn_del+0x657/0x1180 net/bluetooth/hci_conn.c:1234 hci_disconn_complete_evt+0x410/0xa00 net/bluetooth/hci_event.c:3451 hci_event_func net/bluetooth/hci_event.c:7719 [inline] hci_event_packet+0xa10/0x11c0 net/bluetooth/hci_event.c:7773 hci_rx_work+0x2c9/0xeb0 net/bluetooth/hci_core.c:4076 process_one_work+0x9ba/0x1b20 kernel/workqueue.c:3257 process_scheduled_works kernel/workqueue.c:3340 [inline] worker_thread+0x6c8/0xf10 kernel/workqueue.c:3421 kthread+0x3c5/0x780 kernel/kthread.c:463 ret_from_fork+0x983/0xb10 arch/x86/kernel/process.c:158 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:246 The buggy address belongs to the object at ffff8880796b0000 which belongs to the cache kmalloc-8k of size 8192 The buggy address is located 16 bytes inside of freed 8192-byte region [ffff8880796b0000, ffff8880796b2000) The buggy address belongs to the physical page: page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x796b0 head: order:3 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0 anon flags: 0xfff00000000040(head|node=0|zone=1|lastcpupid=0x7ff) page_type: f5(slab) raw: 00fff00000000040 ffff88813ff27280 0000000000000000 0000000000000001 raw: 0000000000000000 0000000000020002 00000000f5000000 0000000000000000 head: 00fff00000000040 ffff88813ff27280 0000000000000000 0000000000000001 head: 0000000000000000 0000000000020002 00000000f5000000 0000000000000000 head: 00fff00000000003 ffffea0001e5ac01 00000000ffffffff 00000000ffffffff head: ffffffffffffffff 0000000000000000 00000000ffffffff 0000000000000008 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 3, migratetype Unmovable, gfp_mask 0xd2040(__GFP_IO|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC), pid 5657, tgid 5657 (dhcpcd-run-hook), ts 79819636908, free_ts 79814310558 set_page_owner include/linux/page_owner.h:32 [inline] post_alloc_hook+0x1af/0x220 mm/page_alloc.c:1845 prep_new_page mm/page_alloc.c:1853 [inline] get_page_from_freelist+0xd0b/0x31a0 mm/page_alloc.c:3879 __alloc_frozen_pages_noprof+0x25f/0x2440 mm/page_alloc.c:5183 alloc_pages_mpol+0x1fb/0x550 mm/mempolicy.c:2416 alloc_slab_page mm/slub.c:3075 [inline] allocate_slab mm/slub.c:3248 [inline] new_slab+0x2c3/0x430 mm/slub.c:3302 ___slab_alloc+0xe18/0x1c90 mm/slub.c:4651 __slab_alloc.constprop.0+0x63/0x110 mm/slub.c:4774 __slab_alloc_node mm/slub.c:4850 [inline] slab_alloc_node mm/slub.c:5246 [inline] __kmalloc_cache_noprof+0x477/0x800 mm/slub.c:5766 kmalloc_noprof include/linux/slab.h:957 [inline] kzalloc_noprof include/linux/slab.h:1094 [inline] tomoyo_print_bprm security/tomoyo/audit.c:26 [inline] tomoyo_init_log+0xc8a/0x2140 security/tomoyo/audit.c:264 tomoyo_supervisor+0x302/0x13b0 security/tomoyo/common.c:2198 tomoyo_audit_env_log security/tomoyo/environ.c:36 [inline] tomoyo_env_perm+0x191/0x200 security/tomoyo/environ.c:63 tomoyo_environ security/tomoyo/domain.c:672 [inline] tomoyo_find_next_domain+0xec1/0x20b0 security/tomoyo/domain.c:888 tomoyo_bprm_check_security security/tomoyo/tomoyo.c:102 [inline] tomoyo_bprm_check_security+0x12d/0x1d0 security/tomoyo/tomoyo.c:92 security_bprm_check+0x1b9/0x1e0 security/security.c:794 search_binary_handler fs/exec.c:1659 [inline] exec_binprm fs/exec.c:1701 [inline] bprm_execve fs/exec.c:1753 [inline] bprm_execve+0x81e/0x1620 fs/exec.c:1729 do_execveat_common.isra.0+0x4a5/0x610 fs/exec.c:1859 page last free pid 5657 tgid 5657 stack trace: reset_page_owner include/linux/page_owner.h:25 [inline] free_pages_prepare mm/page_alloc.c:1394 [inline] __free_frozen_pages+0x7df/0x1160 mm/page_alloc.c:2901 discard_slab mm/slub.c:3346 [inline] __put_partials+0x130/0x170 mm/slub.c:3886 qlink_free mm/kasan/quarantine.c:163 [inline] qlist_free_all+0x4c/0xf0 mm/kasan/quarantine.c:179 kasan_quarantine_reduce+0x195/0x1e0 mm/kasan/quarantine.c:286 __kasan_slab_alloc+0x69/0x90 mm/kasan/common.c:352 kasan_slab_alloc include/linux/kasan.h:252 [inline] slab_post_alloc_hook mm/slub.c:4948 [inline] slab_alloc_node mm/slub.c:5258 [inline] __kmalloc_cache_noprof+0x274/0x800 mm/slub.c:5766 kmalloc_noprof include/linux/slab.h:957 [inline] tomoyo_print_header security/tomoyo/audit.c:156 [inline] tomoyo_init_log+0x197/0x2140 security/tomoyo/audit.c:255 tomoyo_supervisor+0x302/0x13b0 security/tomoyo/common.c:2198 tomoyo_audit_env_log security/tomoyo/environ.c:36 [inline] tomoyo_env_perm+0x191/0x200 security/tomoyo/environ.c:63 tomoyo_environ security/tomoyo/domain.c:672 [inline] tomoyo_find_next_domain+0xec1/0x20b0 security/tomoyo/domain.c:888 tomoyo_bprm_check_security security/tomoyo/tomoyo.c:102 [inline] tomoyo_bprm_check_security+0x12d/0x1d0 security/tomoyo/tomoyo.c:92 security_bprm_check+0x1b9/0x1e0 security/security.c:794 search_binary_handler fs/exec.c:1659 [inline] exec_binprm fs/exec.c:1701 [inline] bprm_execve fs/exec.c:1753 [inline] bprm_execve+0x81e/0x1620 fs/exec.c:1729 do_execveat_common.isra.0+0x4a5/0x610 fs/exec.c:1859 do_execve fs/exec.c:1933 [inline] __do_sys_execve fs/exec.c:2009 [inline] __se_sys_execve fs/exec.c:2004 [inline] __x64_sys_execve+0x8e/0xb0 fs/exec.c:2004 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xcd/0xf80 arch/x86/entry/syscall_64.c:94 Memory state around the buggy address: ffff8880796aff00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff8880796aff80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff8880796b0000: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff8880796b0080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8880796b0100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Fixes: a106e50be74b ("Bluetooth: HCI: Add support for LL Extended Feature Set") Reported-by: syzbot+87badbb9094e008e0685@syzkaller.appspotmail.com Tested-by: syzbot+87badbb9094e008e0685@syzkaller.appspotmail.com Closes: https://syzbot.org/bug?extid=87badbb9094e008e0685 Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Pauli Virtanen --- net/bluetooth/hci_sync.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 8cbbba50e77e..ffb0ceda6f7b 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -7390,10 +7390,8 @@ static void le_read_features_complete(struct hci_dev *hdev, void *data, int err) bt_dev_dbg(hdev, "err %d", err); - if (err == -ECANCELED) - return; - hci_conn_drop(conn); + hci_conn_put(conn); } static int hci_le_read_all_remote_features_sync(struct hci_dev *hdev, @@ -7463,10 +7461,12 @@ int hci_le_read_remote_features(struct hci_conn *conn) if (conn->out || (hdev->le_features[0] & HCI_LE_PERIPHERAL_FEATURES)) { err = hci_cmd_sync_queue_once(hdev, hci_le_read_remote_features_sync, - hci_conn_hold(conn), + hci_conn_hold(hci_conn_get(conn)), le_read_features_complete); - if (err) + if (err) { hci_conn_drop(conn); + hci_conn_put(conn); + } } else { err = -EOPNOTSUPP; } -- cgit v1.2.3 From 0ffac654e95c1bdfe2d4edf28fb18d6ba1f103e6 Mon Sep 17 00:00:00 2001 From: Jonathan Rissanen Date: Fri, 27 Mar 2026 11:47:21 +0100 Subject: Bluetooth: hci_h4: Fix race during initialization Commit 5df5dafc171b ("Bluetooth: hci_uart: Fix another race during initialization") fixed a race for hci commands sent during initialization. However, there is still a race that happens if an hci event from one of these commands is received before HCI_UART_REGISTERED has been set at the end of hci_uart_register_dev(). The event will be ignored which causes the command to fail with a timeout in the log: "Bluetooth: hci0: command 0x1003 tx timeout" This is because the hci event receive path (hci_uart_tty_receive -> h4_recv) requires HCI_UART_REGISTERED to be set in h4_recv(), while the hci command transmit path (hci_uart_send_frame -> h4_enqueue) only requires HCI_UART_PROTO_INIT to be set in hci_uart_send_frame(). The check for HCI_UART_REGISTERED was originally added in commit c2578202919a ("Bluetooth: Fix H4 crash from incoming UART packets") to fix a crash caused by hu->hdev being null dereferenced. That can no longer happen: once HCI_UART_PROTO_INIT is set in hci_uart_register_dev() all pointers (hu, hu->priv and hu->hdev) are valid, and hci_uart_tty_receive() already calls h4_recv() on HCI_UART_PROTO_INIT or HCI_UART_PROTO_READY. Remove the check for HCI_UART_REGISTERED in h4_recv() to fix the race condition. Fixes: 5df5dafc171b ("Bluetooth: hci_uart: Fix another race during initialization") Signed-off-by: Jonathan Rissanen Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_h4.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/bluetooth/hci_h4.c b/drivers/bluetooth/hci_h4.c index 07cc2a79a77b..a889a66a326f 100644 --- a/drivers/bluetooth/hci_h4.c +++ b/drivers/bluetooth/hci_h4.c @@ -109,9 +109,6 @@ static int h4_recv(struct hci_uart *hu, const void *data, int count) { struct h4_struct *h4 = hu->priv; - if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) - return -EUNATCH; - h4->rx_skb = h4_recv_buf(hu, h4->rx_skb, data, count, h4_recv_pkts, ARRAY_SIZE(h4_recv_pkts)); if (IS_ERR(h4->rx_skb)) { -- cgit v1.2.3 From b8dbe9648d69059cfe3a28917bfbf7e61efd7f15 Mon Sep 17 00:00:00 2001 From: Keenan Dong Date: Sat, 28 Mar 2026 16:46:47 +0800 Subject: Bluetooth: MGMT: validate LTK enc_size on load Load Long Term Keys stores the user-provided enc_size and later uses it to size fixed-size stack operations when replying to LE LTK requests. An enc_size larger than the 16-byte key buffer can therefore overflow the reply stack buffer. Reject oversized enc_size values while validating the management LTK record so invalid keys never reach the stored key state. Fixes: 346af67b8d11 ("Bluetooth: Add MGMT handlers for dealing with SMP LTK's") Reported-by: Keenan Dong Signed-off-by: Keenan Dong Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/mgmt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index e5f9287fb826..adcd86c15b4e 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -7248,6 +7248,9 @@ static bool ltk_is_valid(struct mgmt_ltk_info *key) if (key->initiator != 0x00 && key->initiator != 0x01) return false; + if (key->enc_size > sizeof(key->val)) + return false; + switch (key->addr.type) { case BDADDR_LE_PUBLIC: return true; -- cgit v1.2.3 From a2639a7f0f5bf7d73f337f8f077c19415c62ed2c Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Sun, 29 Mar 2026 16:43:01 +0300 Subject: Bluetooth: hci_conn: fix potential UAF in set_cig_params_sync hci_conn lookup and field access must be covered by hdev lock in set_cig_params_sync, otherwise it's possible it is freed concurrently. Take hdev lock to prevent hci_conn from being deleted or modified concurrently. Just RCU lock is not suitable here, as we also want to avoid "tearing" in the configuration. Fixes: a091289218202 ("Bluetooth: hci_conn: Fix hci_le_set_cig_params") Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_conn.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index e6393f17576b..11d3ad8d2551 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1843,9 +1843,13 @@ static int set_cig_params_sync(struct hci_dev *hdev, void *data) u8 aux_num_cis = 0; u8 cis_id; + hci_dev_lock(hdev); + conn = hci_conn_hash_lookup_cig(hdev, cig_id); - if (!conn) + if (!conn) { + hci_dev_unlock(hdev); return 0; + } qos = &conn->iso_qos; pdu->cig_id = cig_id; @@ -1884,6 +1888,8 @@ static int set_cig_params_sync(struct hci_dev *hdev, void *data) } pdu->num_cis = aux_num_cis; + hci_dev_unlock(hdev); + if (!pdu->num_cis) return 0; -- cgit v1.2.3 From b255531b27da336571411248c2a72a350662bd09 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Sun, 29 Mar 2026 16:43:02 +0300 Subject: Bluetooth: hci_event: fix potential UAF in hci_le_remote_conn_param_req_evt hci_conn lookup and field access must be covered by hdev lock in hci_le_remote_conn_param_req_evt, otherwise it's possible it is freed concurrently. Extend the hci_dev_lock critical section to cover all conn usage. Fixes: 95118dd4edfec ("Bluetooth: hci_event: Use of a function table to handle LE subevents") Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 81d2f9a3eec9..3ebc5e6d45d9 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -6784,25 +6784,31 @@ static void hci_le_remote_conn_param_req_evt(struct hci_dev *hdev, void *data, latency = le16_to_cpu(ev->latency); timeout = le16_to_cpu(ev->timeout); + hci_dev_lock(hdev); + hcon = hci_conn_hash_lookup_handle(hdev, handle); - if (!hcon || hcon->state != BT_CONNECTED) - return send_conn_param_neg_reply(hdev, handle, - HCI_ERROR_UNKNOWN_CONN_ID); + if (!hcon || hcon->state != BT_CONNECTED) { + send_conn_param_neg_reply(hdev, handle, + HCI_ERROR_UNKNOWN_CONN_ID); + goto unlock; + } - if (max > hcon->le_conn_max_interval) - return send_conn_param_neg_reply(hdev, handle, - HCI_ERROR_INVALID_LL_PARAMS); + if (max > hcon->le_conn_max_interval) { + send_conn_param_neg_reply(hdev, handle, + HCI_ERROR_INVALID_LL_PARAMS); + goto unlock; + } - if (hci_check_conn_params(min, max, latency, timeout)) - return send_conn_param_neg_reply(hdev, handle, - HCI_ERROR_INVALID_LL_PARAMS); + if (hci_check_conn_params(min, max, latency, timeout)) { + send_conn_param_neg_reply(hdev, handle, + HCI_ERROR_INVALID_LL_PARAMS); + goto unlock; + } if (hcon->role == HCI_ROLE_MASTER) { struct hci_conn_params *params; u8 store_hint; - hci_dev_lock(hdev); - params = hci_conn_params_lookup(hdev, &hcon->dst, hcon->dst_type); if (params) { @@ -6815,8 +6821,6 @@ static void hci_le_remote_conn_param_req_evt(struct hci_dev *hdev, void *data, store_hint = 0x00; } - hci_dev_unlock(hdev); - mgmt_new_conn_param(hdev, &hcon->dst, hcon->dst_type, store_hint, min, max, latency, timeout); } @@ -6830,6 +6834,9 @@ static void hci_le_remote_conn_param_req_evt(struct hci_dev *hdev, void *data, cp.max_ce_len = 0; hci_send_cmd(hdev, HCI_OP_LE_CONN_PARAM_REQ_REPLY, sizeof(cp), &cp); + +unlock: + hci_dev_unlock(hdev); } static void hci_le_direct_adv_report_evt(struct hci_dev *hdev, void *data, -- cgit v1.2.3 From bda93eec78cdbfe5cda00785cefebd443e56b88b Mon Sep 17 00:00:00 2001 From: Keenan Dong Date: Wed, 1 Apr 2026 22:25:26 +0800 Subject: Bluetooth: MGMT: validate mesh send advertising payload length mesh_send() currently bounds MGMT_OP_MESH_SEND by total command length, but it never verifies that the bytes supplied for the flexible adv_data[] array actually match the embedded adv_data_len field. MGMT_MESH_SEND_SIZE only covers the fixed header, so a truncated command can still pass the existing 20..50 byte range check and later drive the async mesh send path past the end of the queued command buffer. Keep rejecting zero-length and oversized advertising payloads, but validate adv_data_len explicitly and require the command length to exactly match the flexible array size before queueing the request. Fixes: b338d91703fa ("Bluetooth: Implement support for Mesh") Reported-by: Keenan Dong Signed-off-by: Keenan Dong Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/mgmt.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index adcd86c15b4e..b05bb380e5f8 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2478,6 +2478,7 @@ static int mesh_send(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) struct mgmt_mesh_tx *mesh_tx; struct mgmt_cp_mesh_send *send = data; struct mgmt_rp_mesh_read_features rp; + u16 expected_len; bool sending; int err = 0; @@ -2485,12 +2486,19 @@ static int mesh_send(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) !hci_dev_test_flag(hdev, HCI_MESH_EXPERIMENTAL)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND, MGMT_STATUS_NOT_SUPPORTED); - if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED) || - len <= MGMT_MESH_SEND_SIZE || - len > (MGMT_MESH_SEND_SIZE + 31)) + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND, + MGMT_STATUS_REJECTED); + + if (!send->adv_data_len || send->adv_data_len > 31) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND, MGMT_STATUS_REJECTED); + expected_len = struct_size(send, adv_data, send->adv_data_len); + if (expected_len != len) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND, + MGMT_STATUS_INVALID_PARAMS); + hci_dev_lock(hdev); memset(&rp, 0, sizeof(rp)); -- cgit v1.2.3 From d05111bfe37bfd8bd4d2dfe6675d6bdeef43f7c7 Mon Sep 17 00:00:00 2001 From: Oleh Konko Date: Tue, 31 Mar 2026 11:52:12 +0000 Subject: Bluetooth: SMP: force responder MITM requirements before building the pairing response smp_cmd_pairing_req() currently builds the pairing response from the initiator auth_req before enforcing the local BT_SECURITY_HIGH requirement. If the initiator omits SMP_AUTH_MITM, the response can also omit it even though the local side still requires MITM. tk_request() then sees an auth value without SMP_AUTH_MITM and may select JUST_CFM, making method selection inconsistent with the pairing policy the responder already enforces. When the local side requires HIGH security, first verify that MITM can be achieved from the IO capabilities and then force SMP_AUTH_MITM in the response in both rsp.auth_req and auth. This keeps the responder auth bits and later method selection aligned. Fixes: 2b64d153a0cc ("Bluetooth: Add MITM mechanism to LE-SMP") Cc: stable@vger.kernel.org Suggested-by: Luiz Augusto von Dentz Signed-off-by: Oleh Konko Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/smp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 485e3468bd26..deb8dd244b77 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -1826,7 +1826,7 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) if (sec_level > conn->hcon->pending_sec_level) conn->hcon->pending_sec_level = sec_level; - /* If we need MITM check that it can be achieved */ + /* If we need MITM check that it can be achieved. */ if (conn->hcon->pending_sec_level >= BT_SECURITY_HIGH) { u8 method; @@ -1834,6 +1834,10 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) req->io_capability); if (method == JUST_WORKS || method == JUST_CFM) return SMP_AUTH_REQUIREMENTS; + + /* Force MITM bit if it isn't set by the initiator. */ + auth |= SMP_AUTH_MITM; + rsp.auth_req |= SMP_AUTH_MITM; } key_size = min(req->max_key_size, rsp.max_key_size); -- cgit v1.2.3 From 20756fec2f0108cb88e815941f1ffff88dc286fe Mon Sep 17 00:00:00 2001 From: Oleh Konko Date: Tue, 31 Mar 2026 11:52:13 +0000 Subject: Bluetooth: SMP: derive legacy responder STK authentication from MITM state The legacy responder path in smp_random() currently labels the stored STK as authenticated whenever pending_sec_level is BT_SECURITY_HIGH. That reflects what the local service requested, not what the pairing flow actually achieved. For Just Works/Confirm legacy pairing, SMP_FLAG_MITM_AUTH stays clear and the resulting STK should remain unauthenticated even if the local side requested HIGH security. Use the established MITM state when storing the responder STK so the key metadata matches the pairing result. This also keeps the legacy path aligned with the Secure Connections code, which already treats JUST_WORKS/JUST_CFM as unauthenticated. Fixes: fff3490f4781 ("Bluetooth: Fix setting correct authentication information for SMP STK") Cc: stable@vger.kernel.org Signed-off-by: Oleh Konko Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/smp.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index deb8dd244b77..98f1da4f5f55 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -1018,10 +1018,7 @@ static u8 smp_random(struct smp_chan *smp) smp_s1(smp->tk, smp->prnd, smp->rrnd, stk); - if (hcon->pending_sec_level == BT_SECURITY_HIGH) - auth = 1; - else - auth = 0; + auth = test_bit(SMP_FLAG_MITM_AUTH, &smp->flags) ? 1 : 0; /* Even though there's no _RESPONDER suffix this is the * responder STK we're adding for later lookup (the initiator -- cgit v1.2.3 From bc39a094730ce062fa034a529c93147c096cb488 Mon Sep 17 00:00:00 2001 From: hkbinbin Date: Tue, 31 Mar 2026 05:39:16 +0000 Subject: Bluetooth: hci_sync: fix stack buffer overflow in hci_le_big_create_sync MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit hci_le_big_create_sync() uses DEFINE_FLEX to allocate a struct hci_cp_le_big_create_sync on the stack with room for 0x11 (17) BIS entries. However, conn->num_bis can hold up to HCI_MAX_ISO_BIS (31) entries — validated against ISO_MAX_NUM_BIS (0x1f) in the caller hci_conn_big_create_sync(). When conn->num_bis is between 18 and 31, the memcpy that copies conn->bis into cp->bis writes up to 14 bytes past the stack buffer, corrupting adjacent stack memory. This is trivially reproducible: binding an ISO socket with bc_num_bis = ISO_MAX_NUM_BIS (31) and calling listen() will eventually trigger hci_le_big_create_sync() from the HCI command sync worker, causing a KASAN-detectable stack-out-of-bounds write: BUG: KASAN: stack-out-of-bounds in hci_le_big_create_sync+0x256/0x3b0 Write of size 31 at addr ffffc90000487b48 by task kworker/u9:0/71 Fix this by changing the DEFINE_FLEX count from the incorrect 0x11 to HCI_MAX_ISO_BIS, which matches the maximum number of BIS entries that conn->bis can actually carry. Fixes: 42ecf1947135 ("Bluetooth: ISO: Do not emit LE BIG Create Sync if previous is pending") Cc: stable@vger.kernel.org Signed-off-by: hkbinbin Reviewed-by: Paul Menzel Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index ffb0ceda6f7b..919ec275dd23 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -7241,7 +7241,8 @@ static void create_big_complete(struct hci_dev *hdev, void *data, int err) static int hci_le_big_create_sync(struct hci_dev *hdev, void *data) { - DEFINE_FLEX(struct hci_cp_le_big_create_sync, cp, bis, num_bis, 0x11); + DEFINE_FLEX(struct hci_cp_le_big_create_sync, cp, bis, num_bis, + HCI_MAX_ISO_BIS); struct hci_conn *conn = data; struct bt_iso_qos *qos = &conn->iso_qos; int err; -- cgit v1.2.3 From ffb5a4843c5bde702ed17cbcdbda98b37f7a6dad Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 31 Mar 2026 12:17:18 +0800 Subject: ipv6: fix data race in fib6_metric_set() using cmpxchg fib6_metric_set() may be called concurrently from softirq context without holding the FIB table lock. A typical path is: ndisc_router_discovery() spin_unlock_bh(&table->tb6_lock) <- lock released fib6_metric_set(rt, RTAX_HOPLIMIT, ...) <- lockless call When two CPUs process Router Advertisement packets for the same router simultaneously, they can both arrive at fib6_metric_set() with the same fib6_info pointer whose fib6_metrics still points to dst_default_metrics. if (f6i->fib6_metrics == &dst_default_metrics) { /* both CPUs: true */ struct dst_metrics *p = kzalloc_obj(*p, GFP_ATOMIC); refcount_set(&p->refcnt, 1); f6i->fib6_metrics = p; /* CPU1 overwrites CPU0's p -> p0 leaked */ } The dst_metrics allocated by the losing CPU has refcnt=1 but no pointer to it anywhere in memory, producing a kmemleak report: unreferenced object 0xff1100025aca1400 (size 96): comm "softirq", pid 0, jiffies 4299271239 backtrace: kmalloc_trace+0x28a/0x380 fib6_metric_set+0xcd/0x180 ndisc_router_discovery+0x12dc/0x24b0 icmpv6_rcv+0xc16/0x1360 Fix this by: - Set val for p->metrics before published via cmpxchg() so the metrics value is ready before the pointer becomes visible to other CPUs. - Replace the plain pointer store with cmpxchg() and free the allocation safely when competition failed. - Add READ_ONCE()/WRITE_ONCE() for metrics[] setting in the non-default metrics path to prevent compiler-based data races. Fixes: d4ead6b34b67 ("net/ipv6: move metrics from dst to rt6_info") Reported-by: Fei Liu Reviewed-by: Jiayuan Chen Signed-off-by: Hangbin Liu Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260331-b4-fib6_metric_set-kmemleak-v3-1-88d27f4d8825@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_fib.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index dd26657b6a4a..45ef4d65dcbc 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -727,20 +727,28 @@ unlock: void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val) { + struct dst_metrics *m; + if (!f6i) return; - if (f6i->fib6_metrics == &dst_default_metrics) { + if (READ_ONCE(f6i->fib6_metrics) == &dst_default_metrics) { + struct dst_metrics *dflt = (struct dst_metrics *)&dst_default_metrics; struct dst_metrics *p = kzalloc_obj(*p, GFP_ATOMIC); if (!p) return; + p->metrics[metric - 1] = val; refcount_set(&p->refcnt, 1); - f6i->fib6_metrics = p; + if (cmpxchg(&f6i->fib6_metrics, dflt, p) != dflt) + kfree(p); + else + return; } - f6i->fib6_metrics->metrics[metric - 1] = val; + m = READ_ONCE(f6i->fib6_metrics); + WRITE_ONCE(m->metrics[metric - 1], val); } /* -- cgit v1.2.3 From a54ecccfae62c5c85259ae5ea5d9c20009519049 Mon Sep 17 00:00:00 2001 From: Weiming Shi Date: Tue, 31 Mar 2026 00:32:38 +0800 Subject: rds: ib: reject FRMR registration before IB connection is established rds_ib_get_mr() extracts the rds_ib_connection from conn->c_transport_data and passes it to rds_ib_reg_frmr() for FRWR memory registration. On a fresh outgoing connection, ic is allocated in rds_ib_conn_alloc() with i_cm_id = NULL because the connection worker has not yet called rds_ib_conn_path_connect() to create the rdma_cm_id. When sendmsg() with RDS_CMSG_RDMA_MAP is called on such a connection, the sendmsg path parses the control message before any connection establishment, allowing rds_ib_post_reg_frmr() to dereference ic->i_cm_id->qp and crash the kernel. The existing guard in rds_ib_reg_frmr() only checks for !ic (added in commit 9e630bcb7701), which does not catch this case since ic is allocated early and is always non-NULL once the connection object exists. KASAN: null-ptr-deref in range [0x0000000000000010-0x0000000000000017] RIP: 0010:rds_ib_post_reg_frmr+0x50e/0x920 Call Trace: rds_ib_post_reg_frmr (net/rds/ib_frmr.c:167) rds_ib_map_frmr (net/rds/ib_frmr.c:252) rds_ib_reg_frmr (net/rds/ib_frmr.c:430) rds_ib_get_mr (net/rds/ib_rdma.c:615) __rds_rdma_map (net/rds/rdma.c:295) rds_cmsg_rdma_map (net/rds/rdma.c:860) rds_sendmsg (net/rds/send.c:1363) ____sys_sendmsg do_syscall_64 Add a check in rds_ib_get_mr() that verifies ic, i_cm_id, and qp are all non-NULL before proceeding with FRMR registration, mirroring the guard already present in rds_ib_post_inv(). Return -ENODEV when the connection is not ready, which the existing error handling in rds_cmsg_send() converts to -EAGAIN for userspace retry and triggers rds_conn_connect_if_down() to start the connection worker. Fixes: 1659185fb4d0 ("RDS: IB: Support Fastreg MR (FRMR) memory registration mode") Reported-by: Xiang Mei Signed-off-by: Weiming Shi Reviewed-by: Allison Henderson Link: https://patch.msgid.link/20260330163237.2752440-2-bestswngs@gmail.com Signed-off-by: Jakub Kicinski --- net/rds/ib_rdma.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 077f7041df15..2cfec252eeac 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -604,8 +604,13 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, return ibmr; } - if (conn) + if (conn) { ic = conn->c_transport_data; + if (!ic || !ic->i_cm_id || !ic->i_cm_id->qp) { + ret = -ENODEV; + goto out; + } + } if (!rds_ibdev->mr_8k_pool || !rds_ibdev->mr_1m_pool) { ret = -ENODEV; -- cgit v1.2.3 From d64cb81dcbd54927515a7f65e5e24affdc73c14b Mon Sep 17 00:00:00 2001 From: Yucheng Lu Date: Tue, 31 Mar 2026 16:00:21 +0800 Subject: net/sched: sch_netem: fix out-of-bounds access in packet corruption In netem_enqueue(), the packet corruption logic uses get_random_u32_below(skb_headlen(skb)) to select an index for modifying skb->data. When an AF_PACKET TX_RING sends fully non-linear packets over an IPIP tunnel, skb_headlen(skb) evaluates to 0. Passing 0 to get_random_u32_below() takes the variable-ceil slow path which returns an unconstrained 32-bit random integer. Using this unconstrained value as an offset into skb->data results in an out-of-bounds memory access. Fix this by verifying skb_headlen(skb) is non-zero before attempting to corrupt the linear data area. Fully non-linear packets will silently bypass the corruption logic. Fixes: c865e5d99e25 ("[PKT_SCHED] netem: packet corruption option") Reported-by: Yifan Wu Reported-by: Juefei Pu Signed-off-by: Yuan Tan Signed-off-by: Xin Liu Signed-off-by: Yuhang Zheng Signed-off-by: Yucheng Lu Reviewed-by: Stephen Hemminger Link: https://patch.msgid.link/45435c0935df877853a81e6d06205ac738ec65fa.1774941614.git.kanolyc@gmail.com Signed-off-by: Jakub Kicinski --- net/sched/sch_netem.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 5de1c932944a..20df1c08b1e9 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -519,8 +519,9 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, goto finish_segs; } - skb->data[get_random_u32_below(skb_headlen(skb))] ^= - 1<data[get_random_u32_below(skb_headlen(skb))] ^= + 1 << get_random_u32_below(8); } if (unlikely(q->t_len >= sch->limit)) { -- cgit v1.2.3 From b4e5f04c58a29c499faa85d12952ca9a4faf1cb9 Mon Sep 17 00:00:00 2001 From: Srujana Challa Date: Thu, 26 Mar 2026 19:53:44 +0530 Subject: virtio_net: clamp rss_max_key_size to NETDEV_RSS_KEY_LEN rss_max_key_size in the virtio spec is the maximum key size supported by the device, not a mandatory size the driver must use. Also the value 40 is a spec minimum, not a spec maximum. The current code rejects RSS and can fail probe when the device reports a larger rss_max_key_size than the driver buffer limit. Instead, clamp the effective key length to min(device rss_max_key_size, NETDEV_RSS_KEY_LEN) and keep RSS enabled. This keeps probe working on devices that advertise larger maximum key sizes while respecting the netdev RSS key buffer size limit. Fixes: 3f7d9c1964fc ("virtio_net: Add hash_key_length check") Cc: stable@vger.kernel.org Signed-off-by: Srujana Challa Acked-by: Michael S. Tsirkin Link: https://patch.msgid.link/20260326142344.1171317-1-schalla@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index ab2108ee206a..c0b9bc5574e2 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -381,8 +381,6 @@ struct receive_queue { struct xdp_buff **xsk_buffs; }; -#define VIRTIO_NET_RSS_MAX_KEY_SIZE 40 - /* Control VQ buffers: protected by the rtnl lock */ struct control_buf { struct virtio_net_ctrl_hdr hdr; @@ -486,7 +484,7 @@ struct virtnet_info { /* Must be last as it ends in a flexible-array member. */ TRAILING_OVERLAP(struct virtio_net_rss_config_trailer, rss_trailer, hash_key_data, - u8 rss_hash_key_data[VIRTIO_NET_RSS_MAX_KEY_SIZE]; + u8 rss_hash_key_data[NETDEV_RSS_KEY_LEN]; ); }; static_assert(offsetof(struct virtnet_info, rss_trailer.hash_key_data) == @@ -6708,6 +6706,7 @@ static int virtnet_probe(struct virtio_device *vdev) struct virtnet_info *vi; u16 max_queue_pairs; int mtu = 0; + u16 key_sz; /* Find if host supports multiqueue/rss virtio_net device */ max_queue_pairs = 1; @@ -6842,14 +6841,13 @@ static int virtnet_probe(struct virtio_device *vdev) } if (vi->has_rss || vi->has_rss_hash_report) { - vi->rss_key_size = - virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size)); - if (vi->rss_key_size > VIRTIO_NET_RSS_MAX_KEY_SIZE) { - dev_err(&vdev->dev, "rss_max_key_size=%u exceeds the limit %u.\n", - vi->rss_key_size, VIRTIO_NET_RSS_MAX_KEY_SIZE); - err = -EINVAL; - goto free; - } + key_sz = virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size)); + + vi->rss_key_size = min_t(u16, key_sz, NETDEV_RSS_KEY_LEN); + if (key_sz > vi->rss_key_size) + dev_warn(&vdev->dev, + "rss_max_key_size=%u exceeds driver limit %u, clamping\n", + key_sz, vi->rss_key_size); vi->rss_hash_types_supported = virtio_cread32(vdev, offsetof(struct virtio_net_config, supported_hash_types)); -- cgit v1.2.3 From ce8fe5287b87e24e225c342f3b0ec04f0b3680fe Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Mon, 30 Mar 2026 21:45:40 +0300 Subject: net: macb: fix clk handling on PCI glue driver removal platform_device_unregister() may still want to use the registered clks during runtime resume callback. Note that there is a commit d82d5303c4c5 ("net: macb: fix use after free on rmmod") that addressed the similar problem of clk vs platform device unregistration but just moved the bug to another place. Save the pointers to clks into local variables for reuse after platform device is unregistered. BUG: KASAN: use-after-free in clk_prepare+0x5a/0x60 Read of size 8 at addr ffff888104f85e00 by task modprobe/597 CPU: 2 PID: 597 Comm: modprobe Not tainted 6.1.164+ #114 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.1-0-g3208b098f51a-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x8d/0xba print_report+0x17f/0x496 kasan_report+0xd9/0x180 clk_prepare+0x5a/0x60 macb_runtime_resume+0x13d/0x410 [macb] pm_generic_runtime_resume+0x97/0xd0 __rpm_callback+0xc8/0x4d0 rpm_callback+0xf6/0x230 rpm_resume+0xeeb/0x1a70 __pm_runtime_resume+0xb4/0x170 bus_remove_device+0x2e3/0x4b0 device_del+0x5b3/0xdc0 platform_device_del+0x4e/0x280 platform_device_unregister+0x11/0x50 pci_device_remove+0xae/0x210 device_remove+0xcb/0x180 device_release_driver_internal+0x529/0x770 driver_detach+0xd4/0x1a0 bus_remove_driver+0x135/0x260 driver_unregister+0x72/0xb0 pci_unregister_driver+0x26/0x220 __do_sys_delete_module+0x32e/0x550 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 Allocated by task 519: kasan_save_stack+0x2c/0x50 kasan_set_track+0x21/0x30 __kasan_kmalloc+0x8e/0x90 __clk_register+0x458/0x2890 clk_hw_register+0x1a/0x60 __clk_hw_register_fixed_rate+0x255/0x410 clk_register_fixed_rate+0x3c/0xa0 macb_probe+0x1d8/0x42e [macb_pci] local_pci_probe+0xd7/0x190 pci_device_probe+0x252/0x600 really_probe+0x255/0x7f0 __driver_probe_device+0x1ee/0x330 driver_probe_device+0x4c/0x1f0 __driver_attach+0x1df/0x4e0 bus_for_each_dev+0x15d/0x1f0 bus_add_driver+0x486/0x5e0 driver_register+0x23a/0x3d0 do_one_initcall+0xfd/0x4d0 do_init_module+0x18b/0x5a0 load_module+0x5663/0x7950 __do_sys_finit_module+0x101/0x180 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 Freed by task 597: kasan_save_stack+0x2c/0x50 kasan_set_track+0x21/0x30 kasan_save_free_info+0x2a/0x50 __kasan_slab_free+0x106/0x180 __kmem_cache_free+0xbc/0x320 clk_unregister+0x6de/0x8d0 macb_remove+0x73/0xc0 [macb_pci] pci_device_remove+0xae/0x210 device_remove+0xcb/0x180 device_release_driver_internal+0x529/0x770 driver_detach+0xd4/0x1a0 bus_remove_driver+0x135/0x260 driver_unregister+0x72/0xb0 pci_unregister_driver+0x26/0x220 __do_sys_delete_module+0x32e/0x550 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 Fixes: d82d5303c4c5 ("net: macb: fix use after free on rmmod") Signed-off-by: Fedor Pchelkin Link: https://patch.msgid.link/20260330184542.626619-1-pchelkin@ispras.ru Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_pci.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_pci.c b/drivers/net/ethernet/cadence/macb_pci.c index fc4f5aee6ab3..0ce5b736ea43 100644 --- a/drivers/net/ethernet/cadence/macb_pci.c +++ b/drivers/net/ethernet/cadence/macb_pci.c @@ -109,10 +109,12 @@ static void macb_remove(struct pci_dev *pdev) { struct platform_device *plat_dev = pci_get_drvdata(pdev); struct macb_platform_data *plat_data = dev_get_platdata(&plat_dev->dev); + struct clk *pclk = plat_data->pclk; + struct clk *hclk = plat_data->hclk; - clk_unregister(plat_data->pclk); - clk_unregister(plat_data->hclk); platform_device_unregister(plat_dev); + clk_unregister(pclk); + clk_unregister(hclk); } static const struct pci_device_id dev_id_table[] = { -- cgit v1.2.3 From f0f367a4f459cc8118aadc43c6bba53c60d93f8d Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Mon, 30 Mar 2026 21:45:41 +0300 Subject: net: macb: properly unregister fixed rate clocks The additional resources allocated with clk_register_fixed_rate() need to be released with clk_unregister_fixed_rate(), otherwise they are lost. Fixes: 83a77e9ec415 ("net: macb: Added PCI wrapper for Platform Driver.") Signed-off-by: Fedor Pchelkin Link: https://patch.msgid.link/20260330184542.626619-2-pchelkin@ispras.ru Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_pci.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_pci.c b/drivers/net/ethernet/cadence/macb_pci.c index 0ce5b736ea43..b79dec17e6b0 100644 --- a/drivers/net/ethernet/cadence/macb_pci.c +++ b/drivers/net/ethernet/cadence/macb_pci.c @@ -96,10 +96,10 @@ static int macb_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; err_plat_dev_register: - clk_unregister(plat_data.hclk); + clk_unregister_fixed_rate(plat_data.hclk); err_hclk_register: - clk_unregister(plat_data.pclk); + clk_unregister_fixed_rate(plat_data.pclk); err_pclk_register: return err; @@ -113,8 +113,8 @@ static void macb_remove(struct pci_dev *pdev) struct clk *hclk = plat_data->hclk; platform_device_unregister(plat_dev); - clk_unregister(pclk); - clk_unregister(hclk); + clk_unregister_fixed_rate(pclk); + clk_unregister_fixed_rate(hclk); } static const struct pci_device_id dev_id_table[] = { -- cgit v1.2.3 From bf16bca6653679d8a514d6c1c5a2c67065033f14 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Mon, 30 Mar 2026 22:40:13 +0300 Subject: net/mlx5: lag: Check for LAG device before creating debugfs __mlx5_lag_dev_add_mdev() may return 0 (success) even when an error occurs that is handled gracefully. Consequently, the initialization flow proceeds to call mlx5_ldev_add_debugfs() even when there is no valid LAG context. mlx5_ldev_add_debugfs() blindly created the debugfs directory and attributes. This exposed interfaces (like the members file) that rely on a valid ldev pointer, leading to potential NULL pointer dereferences if accessed when ldev is NULL. Add a check to verify that mlx5_lag_dev(dev) returns a valid pointer before attempting to create the debugfs entries. Fixes: 7f46a0b7327a ("net/mlx5: Lag, add debugfs to query hardware lag state") Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260330194015.53585-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c index 62b6faa4276a..b8d5f6a44d26 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c @@ -160,8 +160,11 @@ DEFINE_SHOW_ATTRIBUTE(members); void mlx5_ldev_add_debugfs(struct mlx5_core_dev *dev) { + struct mlx5_lag *ldev = mlx5_lag_dev(dev); struct dentry *dbg; + if (!ldev) + return; dbg = debugfs_create_dir("lag", mlx5_debugfs_get_dev_root(dev)); dev->priv.dbg.lag_debugfs = dbg; -- cgit v1.2.3 From 10dc35f6a443d488f219d1a1e3fb8f8dac422070 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 30 Mar 2026 22:40:14 +0300 Subject: net/mlx5: Avoid "No data available" when FW version queries fail Avoid printing the misleading "kernel answers: No data available" devlink output when querying firmware or pending firmware version fails (e.g. MLX5 fw state errors / flash failures). FW can fail on loading the pending flash image and get its version due to various reasons, examples: mlxfw: Firmware flash failed: key not applicable, err (7) mlx5_fw_image_pending: can't read pending fw version while fw state is 1 and the resulting: $ devlink dev info kernel answers: No data available Instead, just report 0 or 0xfff.. versions in case of failure to indicate a problem, and let other information be shown. after the fix: $ devlink dev info pci/0000:00:06.0: driver mlx5_core serial_number xxx... board.serial_number MT2225300179 versions: fixed: fw.psid MT_0000000436 running: fw.version 22.41.0188 fw 22.41.0188 stored: fw.version 255.255.65535 fw 255.255.65535 Fixes: 9c86b07e3069 ("net/mlx5: Added fw version query command") Signed-off-by: Saeed Mahameed Reviewed-by: Moshe Shemesh Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260330194015.53585-3-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 4 +- drivers/net/ethernet/mellanox/mlx5/core/fw.c | 53 ++++++++++++++-------- .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 4 +- 3 files changed, 37 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 6698ac55a4bf..73cf0321bb86 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -107,9 +107,7 @@ mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, if (err) return err; - err = mlx5_fw_version_query(dev, &running_fw, &stored_fw); - if (err) - return err; + mlx5_fw_version_query(dev, &running_fw, &stored_fw); snprintf(version_str, sizeof(version_str), "%d.%d.%04d", mlx5_fw_ver_major(running_fw), mlx5_fw_ver_minor(running_fw), diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index eeb4437975f2..c1f220e5fe18 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -822,48 +822,63 @@ mlx5_fw_image_pending(struct mlx5_core_dev *dev, return 0; } -int mlx5_fw_version_query(struct mlx5_core_dev *dev, - u32 *running_ver, u32 *pending_ver) +void mlx5_fw_version_query(struct mlx5_core_dev *dev, + u32 *running_ver, u32 *pending_ver) { u32 reg_mcqi_version[MLX5_ST_SZ_DW(mcqi_version)] = {}; bool pending_version_exists; int component_index; int err; + *running_ver = 0; + *pending_ver = 0; + if (!MLX5_CAP_GEN(dev, mcam_reg) || !MLX5_CAP_MCAM_REG(dev, mcqi) || !MLX5_CAP_MCAM_REG(dev, mcqs)) { mlx5_core_warn(dev, "fw query isn't supported by the FW\n"); - return -EOPNOTSUPP; + return; } component_index = mlx5_get_boot_img_component_index(dev); - if (component_index < 0) - return component_index; + if (component_index < 0) { + mlx5_core_warn(dev, "fw query failed to find boot img component index, err %d\n", + component_index); + return; + } + *running_ver = U32_MAX; /* indicate failure */ err = mlx5_reg_mcqi_version_query(dev, component_index, MCQI_FW_RUNNING_VERSION, reg_mcqi_version); - if (err) - return err; - - *running_ver = MLX5_GET(mcqi_version, reg_mcqi_version, version); - + if (!err) + *running_ver = MLX5_GET(mcqi_version, reg_mcqi_version, + version); + else + mlx5_core_warn(dev, "failed to query running version, err %d\n", + err); + + *pending_ver = U32_MAX; /* indicate failure */ err = mlx5_fw_image_pending(dev, component_index, &pending_version_exists); - if (err) - return err; + if (err) { + mlx5_core_warn(dev, "failed to query pending image, err %d\n", + err); + return; + } if (!pending_version_exists) { *pending_ver = 0; - return 0; + return; } err = mlx5_reg_mcqi_version_query(dev, component_index, MCQI_FW_STORED_VERSION, reg_mcqi_version); - if (err) - return err; - - *pending_ver = MLX5_GET(mcqi_version, reg_mcqi_version, version); - - return 0; + if (!err) + *pending_ver = MLX5_GET(mcqi_version, reg_mcqi_version, + version); + else + mlx5_core_warn(dev, "failed to query pending version, err %d\n", + err); + + return; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index b635b423d972..1507e881d962 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -393,8 +393,8 @@ int mlx5_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); int mlx5_firmware_flash(struct mlx5_core_dev *dev, const struct firmware *fw, struct netlink_ext_ack *extack); -int mlx5_fw_version_query(struct mlx5_core_dev *dev, - u32 *running_ver, u32 *stored_ver); +void mlx5_fw_version_query(struct mlx5_core_dev *dev, u32 *running_ver, + u32 *stored_ver); #ifdef CONFIG_MLX5_CORE_EN int mlx5e_init(void); -- cgit v1.2.3 From 403186400a1a6166efe7031edc549c15fee4723f Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 30 Mar 2026 22:40:15 +0300 Subject: net/mlx5: Fix switchdev mode rollback in case of failure If for some internal reason switchdev mode fails, we rollback to legacy mode, before this patch, rollback will unregister the uplink netdev and leave it unregistered causing the below kernel bug. To fix this, we need to avoid netdev unregister by setting the proper rollback flag 'MLX5_PRIV_FLAGS_SWITCH_LEGACY' to indicate legacy mode. devlink (431) used greatest stack depth: 11048 bytes left mlx5_core 0000:00:03.0: E-Switch: Disable: mode(LEGACY), nvfs(0), \ necvfs(0), active vports(0) mlx5_core 0000:00:03.0: E-Switch: Supported tc chains and prios offload mlx5_core 0000:00:03.0: Loading uplink representor for vport 65535 mlx5_core 0000:00:03.0: mlx5_cmd_out_err:816:(pid 456): \ QUERY_HCA_CAP(0x100) op_mod(0x0) failed, \ status bad parameter(0x3), syndrome (0x3a3846), err(-22) mlx5_core 0000:00:03.0 enp0s3np0 (unregistered): Unloading uplink \ representor for vport 65535 ------------[ cut here ]------------ kernel BUG at net/core/dev.c:12070! Oops: invalid opcode: 0000 [#1] SMP NOPTI CPU: 2 UID: 0 PID: 456 Comm: devlink Not tainted 6.16.0-rc3+ \ #9 PREEMPT(voluntary) RIP: 0010:unregister_netdevice_many_notify+0x123/0xae0 ... Call Trace: [ 90.923094] unregister_netdevice_queue+0xad/0xf0 [ 90.923323] unregister_netdev+0x1c/0x40 [ 90.923522] mlx5e_vport_rep_unload+0x61/0xc6 [ 90.923736] esw_offloads_enable+0x8e6/0x920 [ 90.923947] mlx5_eswitch_enable_locked+0x349/0x430 [ 90.924182] ? is_mp_supported+0x57/0xb0 [ 90.924376] mlx5_devlink_eswitch_mode_set+0x167/0x350 [ 90.924628] devlink_nl_eswitch_set_doit+0x6f/0xf0 [ 90.924862] genl_family_rcv_msg_doit+0xe8/0x140 [ 90.925088] genl_rcv_msg+0x18b/0x290 [ 90.925269] ? __pfx_devlink_nl_pre_doit+0x10/0x10 [ 90.925506] ? __pfx_devlink_nl_eswitch_set_doit+0x10/0x10 [ 90.925766] ? __pfx_devlink_nl_post_doit+0x10/0x10 [ 90.926001] ? __pfx_genl_rcv_msg+0x10/0x10 [ 90.926206] netlink_rcv_skb+0x52/0x100 [ 90.926393] genl_rcv+0x28/0x40 [ 90.926557] netlink_unicast+0x27d/0x3d0 [ 90.926749] netlink_sendmsg+0x1f7/0x430 [ 90.926942] __sys_sendto+0x213/0x220 [ 90.927127] ? __sys_recvmsg+0x6a/0xd0 [ 90.927312] __x64_sys_sendto+0x24/0x30 [ 90.927504] do_syscall_64+0x50/0x1c0 [ 90.927687] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 90.927929] RIP: 0033:0x7f7d0363e047 Fixes: 2a4f56fbcc47 ("net/mlx5e: Keep netdev when leave switchdev for devlink set legacy only") Signed-off-by: Saeed Mahameed Reviewed-by: Jianbo Liu Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260330194015.53585-4-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 7a9ee36b8dca..01f6aecc4fcc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -3761,6 +3761,8 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) return 0; err_vports: + /* rollback to legacy, indicates don't unregister the uplink netdev */ + esw->dev->priv.flags |= MLX5_PRIV_FLAGS_SWITCH_LEGACY; mlx5_esw_offloads_rep_unload(esw, MLX5_VPORT_UPLINK); err_uplink: esw_offloads_steering_cleanup(esw); -- cgit v1.2.3 From ceee35e5674aa84cf9e504c2a9dae4587511556c Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 30 Mar 2026 23:51:36 -0700 Subject: bnxt_en: Refactor some basic ring setup and adjustment logic Refactor out the basic code that trims the default rings, sets up and adjusts XDP TX rings and CP rings. There is no change in behavior. This is to prepare for the next bug fix patch. Reviewed-by: Kalesh AP Reviewed-by: Pavan Chebbi Reviewed-by: Andy Gospodarek Signed-off-by: Michael Chan Link: https://patch.msgid.link/20260331065138.948205-2-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 53 ++++++++++++++++------- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 5 +-- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 5 +-- 4 files changed, 41 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 7ed805713fbb..173f962fc2ab 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -12992,6 +12992,21 @@ static int bnxt_tx_nr_rings_per_tc(struct bnxt *bp) return bp->num_tc ? bp->tx_nr_rings / bp->num_tc : bp->tx_nr_rings; } +static void bnxt_set_xdp_tx_rings(struct bnxt *bp) +{ + bp->tx_nr_rings_xdp = bp->tx_nr_rings_per_tc; + bp->tx_nr_rings += bp->tx_nr_rings_xdp; +} + +static void bnxt_adj_tx_rings(struct bnxt *bp) +{ + /* Make adjustments if reserved TX rings are less than requested */ + bp->tx_nr_rings -= bp->tx_nr_rings_xdp; + bp->tx_nr_rings_per_tc = bnxt_tx_nr_rings_per_tc(bp); + if (bp->tx_nr_rings_xdp) + bnxt_set_xdp_tx_rings(bp); +} + static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) { int rc = 0; @@ -13009,13 +13024,7 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) if (rc) return rc; - /* Make adjustments if reserved TX rings are less than requested */ - bp->tx_nr_rings -= bp->tx_nr_rings_xdp; - bp->tx_nr_rings_per_tc = bnxt_tx_nr_rings_per_tc(bp); - if (bp->tx_nr_rings_xdp) { - bp->tx_nr_rings_xdp = bp->tx_nr_rings_per_tc; - bp->tx_nr_rings += bp->tx_nr_rings_xdp; - } + bnxt_adj_tx_rings(bp); rc = bnxt_alloc_mem(bp, irq_re_init); if (rc) { netdev_err(bp->dev, "bnxt_alloc_mem err: %x\n", rc); @@ -15436,11 +15445,19 @@ static int bnxt_change_mtu(struct net_device *dev, int new_mtu) return 0; } +void bnxt_set_cp_rings(struct bnxt *bp, bool sh) +{ + int tx_cp = bnxt_num_tx_to_cp(bp, bp->tx_nr_rings); + + bp->cp_nr_rings = sh ? max_t(int, tx_cp, bp->rx_nr_rings) : + tx_cp + bp->rx_nr_rings; +} + int bnxt_setup_mq_tc(struct net_device *dev, u8 tc) { struct bnxt *bp = netdev_priv(dev); bool sh = false; - int rc, tx_cp; + int rc; if (tc > bp->max_tc) { netdev_err(dev, "Too many traffic classes requested: %d. Max supported is %d.\n", @@ -15473,9 +15490,7 @@ int bnxt_setup_mq_tc(struct net_device *dev, u8 tc) bp->num_tc = 0; } bp->tx_nr_rings += bp->tx_nr_rings_xdp; - tx_cp = bnxt_num_tx_to_cp(bp, bp->tx_nr_rings); - bp->cp_nr_rings = sh ? max_t(int, tx_cp, bp->rx_nr_rings) : - tx_cp + bp->rx_nr_rings; + bnxt_set_cp_rings(bp, sh); if (netif_running(bp->dev)) return bnxt_open_nic(bp, true, false); @@ -16525,6 +16540,15 @@ static void bnxt_trim_dflt_sh_rings(struct bnxt *bp) bp->tx_nr_rings = bnxt_tx_nr_rings(bp); } +static void bnxt_adj_dflt_rings(struct bnxt *bp, bool sh) +{ + if (sh) + bnxt_trim_dflt_sh_rings(bp); + else + bp->cp_nr_rings = bp->tx_nr_rings_per_tc + bp->rx_nr_rings; + bp->tx_nr_rings = bnxt_tx_nr_rings(bp); +} + static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh) { int dflt_rings, max_rx_rings, max_tx_rings, rc; @@ -16550,11 +16574,8 @@ static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh) return rc; bp->rx_nr_rings = min_t(int, dflt_rings, max_rx_rings); bp->tx_nr_rings_per_tc = min_t(int, dflt_rings, max_tx_rings); - if (sh) - bnxt_trim_dflt_sh_rings(bp); - else - bp->cp_nr_rings = bp->tx_nr_rings_per_tc + bp->rx_nr_rings; - bp->tx_nr_rings = bnxt_tx_nr_rings(bp); + + bnxt_adj_dflt_rings(bp, sh); avail_msix = bnxt_get_max_func_irqs(bp) - bp->cp_nr_rings; if (avail_msix >= BNXT_MIN_ROCE_CP_RINGS) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index a97d651130df..4bc7f7aeaab3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2985,6 +2985,7 @@ int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs, int tx_xdp); int bnxt_fw_init_one(struct bnxt *bp); bool bnxt_hwrm_reset_permitted(struct bnxt *bp); +void bnxt_set_cp_rings(struct bnxt *bp, bool sh); int bnxt_setup_mq_tc(struct net_device *dev, u8 tc); struct bnxt_ntuple_filter *bnxt_lookup_ntp_filter_from_idx(struct bnxt *bp, struct bnxt_ntuple_filter *fltr, u32 idx); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 28d0ece2e7b1..0407aa1b3190 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -945,7 +945,6 @@ static int bnxt_set_channels(struct net_device *dev, bool sh = false; int tx_xdp = 0; int rc = 0; - int tx_cp; if (channel->other_count) return -EINVAL; @@ -1013,9 +1012,7 @@ static int bnxt_set_channels(struct net_device *dev, if (tcs > 1) bp->tx_nr_rings = bp->tx_nr_rings_per_tc * tcs + tx_xdp; - tx_cp = bnxt_num_tx_to_cp(bp, bp->tx_nr_rings); - bp->cp_nr_rings = sh ? max_t(int, tx_cp, bp->rx_nr_rings) : - tx_cp + bp->rx_nr_rings; + bnxt_set_cp_rings(bp, sh); /* After changing number of rx channels, update NTUPLE feature. */ netdev_update_features(dev); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 85cbeb35681c..bebe37e139c9 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -384,7 +384,7 @@ int bnxt_xdp_xmit(struct net_device *dev, int num_frames, static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog) { struct net_device *dev = bp->dev; - int tx_xdp = 0, tx_cp, rc, tc; + int tx_xdp = 0, rc, tc; struct bpf_prog *old; netdev_assert_locked(dev); @@ -431,8 +431,7 @@ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog) } bp->tx_nr_rings_xdp = tx_xdp; bp->tx_nr_rings = bp->tx_nr_rings_per_tc * tc + tx_xdp; - tx_cp = bnxt_num_tx_to_cp(bp, bp->tx_nr_rings); - bp->cp_nr_rings = max_t(int, tx_cp, bp->rx_nr_rings); + bnxt_set_cp_rings(bp, true); bnxt_set_tpa_flags(bp); bnxt_set_ring_params(bp); -- cgit v1.2.3 From e4bf81dcad0a6fff2bbe5331d2c7fb30d45a788c Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 30 Mar 2026 23:51:37 -0700 Subject: bnxt_en: Don't assume XDP is never enabled in bnxt_init_dflt_ring_mode() The original code made the assumption that when we set up the initial default ring mode, we must be just loading the driver and XDP cannot be enabled yet. This is not true when the FW goes through a resource or capability change. Resource reservations will be cancelled and reinitialized with XDP already enabled. devlink reload with XDP enabled will also have the same issue. This scenario will cause the ring arithmetic to be all wrong in the bnxt_init_dflt_ring_mode() path causing failure: bnxt_en 0000:a1:00.0 ens2f0np0: bnxt_setup_int_mode err: ffffffea bnxt_en 0000:a1:00.0 ens2f0np0: bnxt_request_irq err: ffffffea bnxt_en 0000:a1:00.0 ens2f0np0: nic open fail (rc: ffffffea) Fix it by properly accounting for XDP in the bnxt_init_dflt_ring_mode() path by using the refactored helper functions in the previous patch. Reviewed-by: Andy Gospodarek Reviewed-by: Pavan Chebbi Reviewed-by: Kalesh AP Fixes: ec5d31e3c15d ("bnxt_en: Handle firmware reset status during IF_UP.") Fixes: 228ea8c187d8 ("bnxt_en: implement devlink dev reload driver_reinit") Signed-off-by: Michael Chan Link: https://patch.msgid.link/20260331065138.948205-3-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 173f962fc2ab..f11f3a704da5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -16547,6 +16547,10 @@ static void bnxt_adj_dflt_rings(struct bnxt *bp, bool sh) else bp->cp_nr_rings = bp->tx_nr_rings_per_tc + bp->rx_nr_rings; bp->tx_nr_rings = bnxt_tx_nr_rings(bp); + if (sh && READ_ONCE(bp->xdp_prog)) { + bnxt_set_xdp_tx_rings(bp); + bnxt_set_cp_rings(bp, true); + } } static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh) @@ -16588,16 +16592,17 @@ static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh) rc = __bnxt_reserve_rings(bp); if (rc && rc != -ENODEV) netdev_warn(bp->dev, "Unable to reserve tx rings\n"); - bp->tx_nr_rings_per_tc = bnxt_tx_nr_rings_per_tc(bp); + + bnxt_adj_tx_rings(bp); if (sh) - bnxt_trim_dflt_sh_rings(bp); + bnxt_adj_dflt_rings(bp, true); /* Rings may have been trimmed, re-reserve the trimmed rings. */ if (bnxt_need_reserve_rings(bp)) { rc = __bnxt_reserve_rings(bp); if (rc && rc != -ENODEV) netdev_warn(bp->dev, "2nd rings reservation failed.\n"); - bp->tx_nr_rings_per_tc = bnxt_tx_nr_rings_per_tc(bp); + bnxt_adj_tx_rings(bp); } if (BNXT_CHIP_TYPE_NITRO_A0(bp)) { bp->rx_nr_rings++; @@ -16631,7 +16636,7 @@ static int bnxt_init_dflt_ring_mode(struct bnxt *bp) if (rc) goto init_dflt_ring_err; - bp->tx_nr_rings_per_tc = bnxt_tx_nr_rings_per_tc(bp); + bnxt_adj_tx_rings(bp); bnxt_set_dflt_rfs(bp); -- cgit v1.2.3 From 071dbfa304e85a6b04a593e950d18fa170997288 Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Mon, 30 Mar 2026 23:51:38 -0700 Subject: bnxt_en: Restore default stat ctxs for ULP when resource is available During resource reservation, if the L2 driver does not have enough MSIX vectors to provide to the RoCE driver, it sets the stat ctxs for ULP also to 0 so that we don't have to reserve it unnecessarily. However, subsequently the user may reduce L2 rings thereby freeing up some resources that the L2 driver can now earmark for RoCE. In this case, the driver should restore the default ULP stat ctxs to make sure that all RoCE resources are ready for use. The RoCE driver may fail to initialize in this scenario without this fix. Fixes: d630624ebd70 ("bnxt_en: Utilize ulp client resources if RoCE is not registered") Reviewed-by: Kalesh AP Signed-off-by: Pavan Chebbi Signed-off-by: Michael Chan Link: https://patch.msgid.link/20260331065138.948205-4-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index f11f3a704da5..3f775196ef81 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -8045,6 +8045,8 @@ static int __bnxt_reserve_rings(struct bnxt *bp) ulp_msix = bnxt_get_avail_msix(bp, bp->ulp_num_msix_want); if (!ulp_msix) bnxt_set_ulp_stat_ctxs(bp, 0); + else + bnxt_set_dflt_ulp_stat_ctxs(bp); if (ulp_msix > bp->ulp_num_msix_want) ulp_msix = bp->ulp_num_msix_want; -- cgit v1.2.3 From d10a26aa4d072320530e6968ef945c8c575edf61 Mon Sep 17 00:00:00 2001 From: Martin Schiller Date: Tue, 31 Mar 2026 09:43:17 +0200 Subject: net/x25: Fix potential double free of skb When alloc_skb fails in x25_queue_rx_frame it calls kfree_skb(skb) at line 48 and returns 1 (error). This error propagates back through the call chain: x25_queue_rx_frame returns 1 | v x25_state3_machine receives the return value 1 and takes the else branch at line 278, setting queued=0 and returning 0 | v x25_process_rx_frame returns queued=0 | v x25_backlog_rcv at line 452 sees queued=0 and calls kfree_skb(skb) again This would free the same skb twice. Looking at x25_backlog_rcv: net/x25/x25_in.c:x25_backlog_rcv() { ... queued = x25_process_rx_frame(sk, skb); ... if (!queued) kfree_skb(skb); } Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Martin Schiller Link: https://patch.msgid.link/20260331-x25_fraglen-v4-1-3e69f18464b4@dev.tdt.de Signed-off-by: Paolo Abeni --- net/x25/x25_in.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c index b981a4828d08..0dbc73efab1c 100644 --- a/net/x25/x25_in.c +++ b/net/x25/x25_in.c @@ -44,10 +44,9 @@ static int x25_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more) if (x25->fraglen > 0) { /* End of fragment */ int len = x25->fraglen + skb->len; - if ((skbn = alloc_skb(len, GFP_ATOMIC)) == NULL){ - kfree_skb(skb); + skbn = alloc_skb(len, GFP_ATOMIC); + if (!skbn) return 1; - } skb_queue_tail(&x25->fragment_queue, skb); -- cgit v1.2.3 From a1822cb524e89b4cd2cf0b82e484a2335496a6d9 Mon Sep 17 00:00:00 2001 From: Martin Schiller Date: Tue, 31 Mar 2026 09:43:18 +0200 Subject: net/x25: Fix overflow when accumulating packets Add a check to ensure that `x25_sock.fraglen` does not overflow. The `fraglen` also needs to be resetted when purging `fragment_queue` in `x25_clear_queues()`. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Suggested-by: Yiming Qian Signed-off-by: Martin Schiller Link: https://patch.msgid.link/20260331-x25_fraglen-v4-2-3e69f18464b4@dev.tdt.de Signed-off-by: Paolo Abeni --- net/x25/x25_in.c | 4 ++++ net/x25/x25_subr.c | 1 + 2 files changed, 5 insertions(+) diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c index 0dbc73efab1c..e47ebd8acd21 100644 --- a/net/x25/x25_in.c +++ b/net/x25/x25_in.c @@ -34,6 +34,10 @@ static int x25_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more) struct sk_buff *skbo, *skbn = skb; struct x25_sock *x25 = x25_sk(sk); + /* make sure we don't overflow */ + if (x25->fraglen + skb->len > USHRT_MAX) + return 1; + if (more) { x25->fraglen += skb->len; skb_queue_tail(&x25->fragment_queue, skb); diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c index 0285aaa1e93c..159708d9ad20 100644 --- a/net/x25/x25_subr.c +++ b/net/x25/x25_subr.c @@ -40,6 +40,7 @@ void x25_clear_queues(struct sock *sk) skb_queue_purge(&x25->interrupt_in_queue); skb_queue_purge(&x25->interrupt_out_queue); skb_queue_purge(&x25->fragment_queue); + x25->fraglen = 0; } -- cgit v1.2.3 From faeea8bbf6e958bf3c00cb08263109661975987c Mon Sep 17 00:00:00 2001 From: Xiang Mei Date: Mon, 30 Mar 2026 22:02:15 -0700 Subject: net/sched: cls_fw: fix NULL pointer dereference on shared blocks The old-method path in fw_classify() calls tcf_block_q() and dereferences q->handle. Shared blocks leave block->q NULL, causing a NULL deref when an empty cls_fw filter is attached to a shared block and a packet with a nonzero major skb mark is classified. Reject the configuration in fw_change() when the old method (no TCA_OPTIONS) is used on a shared block, since fw_classify()'s old-method path needs block->q which is NULL for shared blocks. The fixed null-ptr-deref calling stack: KASAN: null-ptr-deref in range [0x0000000000000038-0x000000000000003f] RIP: 0010:fw_classify (net/sched/cls_fw.c:81) Call Trace: tcf_classify (./include/net/tc_wrapper.h:197 net/sched/cls_api.c:1764 net/sched/cls_api.c:1860) tc_run (net/core/dev.c:4401) __dev_queue_xmit (net/core/dev.c:4535 net/core/dev.c:4790) Fixes: 1abf272022cf ("net: sched: tcindex, fw, flow: use tcf_block_q helper to get struct Qdisc") Reported-by: Weiming Shi Signed-off-by: Xiang Mei Acked-by: Jamal Hadi Salim Link: https://patch.msgid.link/20260331050217.504278-1-xmei5@asu.edu Signed-off-by: Paolo Abeni --- net/sched/cls_fw.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index be81c108179d..23884ef8b80c 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -247,8 +247,18 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, struct nlattr *tb[TCA_FW_MAX + 1]; int err; - if (!opt) - return handle ? -EINVAL : 0; /* Succeed if it is old method. */ + if (!opt) { + if (handle) + return -EINVAL; + + if (tcf_block_shared(tp->chain->block)) { + NL_SET_ERR_MSG(extack, + "Must specify mark when attaching fw filter to block"); + return -EINVAL; + } + + return 0; /* Succeed if it is old method. */ + } err = nla_parse_nested_deprecated(tb, TCA_FW_MAX, opt, fw_policy, NULL); -- cgit v1.2.3 From 1a280dd4bd1d616a01d6ffe0de284c907b555504 Mon Sep 17 00:00:00 2001 From: Xiang Mei Date: Mon, 30 Mar 2026 22:02:16 -0700 Subject: net/sched: cls_flow: fix NULL pointer dereference on shared blocks flow_change() calls tcf_block_q() and dereferences q->handle to derive a default baseclass. Shared blocks leave block->q NULL, causing a NULL deref when a flow filter without a fully qualified baseclass is created on a shared block. Check tcf_block_shared() before accessing block->q and return -EINVAL for shared blocks. This avoids the null-deref shown below: ======================================================================= KASAN: null-ptr-deref in range [0x0000000000000038-0x000000000000003f] RIP: 0010:flow_change (net/sched/cls_flow.c:508) Call Trace: tc_new_tfilter (net/sched/cls_api.c:2432) rtnetlink_rcv_msg (net/core/rtnetlink.c:6980) [...] ======================================================================= Fixes: 1abf272022cf ("net: sched: tcindex, fw, flow: use tcf_block_q helper to get struct Qdisc") Reported-by: Weiming Shi Signed-off-by: Xiang Mei Acked-by: Jamal Hadi Salim Link: https://patch.msgid.link/20260331050217.504278-2-xmei5@asu.edu Signed-off-by: Paolo Abeni --- net/sched/cls_flow.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 339c664beff6..ab364e4e4686 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -503,8 +503,16 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, } if (TC_H_MAJ(baseclass) == 0) { - struct Qdisc *q = tcf_block_q(tp->chain->block); + struct tcf_block *block = tp->chain->block; + struct Qdisc *q; + if (tcf_block_shared(block)) { + NL_SET_ERR_MSG(extack, + "Must specify baseclass when attaching flow filter to block"); + goto err2; + } + + q = tcf_block_q(block); baseclass = TC_H_MAKE(q->handle, baseclass); } if (TC_H_MIN(baseclass) == 0) -- cgit v1.2.3 From 70f73562d278d9f88e7095e327f2a50082a82c65 Mon Sep 17 00:00:00 2001 From: Xiang Mei Date: Mon, 30 Mar 2026 22:02:17 -0700 Subject: selftests/tc-testing: add tests for cls_fw and cls_flow on shared blocks Regression tests for the shared-block NULL derefs fixed in the previous two patches: - fw: attempt to attach an empty fw filter to a shared block and verify the configuration is rejected with EINVAL. - flow: create a flow filter on a shared block without a baseclass and verify the configuration is rejected with EINVAL. Signed-off-by: Xiang Mei Acked-by: Jamal Hadi Salim Reviewed-by: Victor Nogueira Link: https://patch.msgid.link/20260331050217.504278-3-xmei5@asu.edu Signed-off-by: Paolo Abeni --- .../tc-testing/tc-tests/infra/filter.json | 44 ++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/filter.json b/tools/testing/selftests/tc-testing/tc-tests/infra/filter.json index 8d10042b489b..dbce6436ed26 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/filter.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/filter.json @@ -22,5 +22,49 @@ "teardown": [ "$TC qdisc del dev $DUMMY root handle 1: htb default 1" ] + }, + { + "id": "b7e3", + "name": "Empty fw filter on shared block - rejected at config time", + "category": [ + "filter", + "fw" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 egress_block 1 clsact" + ], + "cmdUnderTest": "$TC filter add block 1 protocol ip prio 1 fw", + "expExitCode": "2", + "verifyCmd": "$TC filter show block 1", + "matchPattern": "fw", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 clsact" + ] + }, + { + "id": "c8f4", + "name": "Flow filter on shared block without baseclass - rejected at config time", + "category": [ + "filter", + "flow" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress_block 1 clsact" + ], + "cmdUnderTest": "$TC filter add block 1 protocol ip prio 1 handle 1 flow map key dst", + "expExitCode": "2", + "verifyCmd": "$TC filter show block 1", + "matchPattern": "flow", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 clsact" + ] } ] -- cgit v1.2.3 From b18c833888742ca9de80c250f9d40d0e97caa9f6 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Wed, 1 Apr 2026 11:21:53 +0200 Subject: vsock: initialize child_ns_mode_locked in vsock_net_init() The `child_ns_mode_locked` field lives in `struct net`, which persists across vsock module reloads. When the module is unloaded and reloaded, `vsock_net_init()` resets `mode` and `child_ns_mode` back to their default values, but does not reset `child_ns_mode_locked`. The stale lock from the previous module load causes subsequent writes to `child_ns_mode` to silently fail: `vsock_net_set_child_mode()` sees the old lock, skips updating the actual value, and returns success when the requested mode matches the stale lock. The sysctl handler reports no error, but `child_ns_mode` remains unchanged. Steps to reproduce: $ modprobe vsock $ echo local > /proc/sys/net/vsock/child_ns_mode $ cat /proc/sys/net/vsock/child_ns_mode local $ modprobe -r vsock $ modprobe vsock $ echo local > /proc/sys/net/vsock/child_ns_mode $ cat /proc/sys/net/vsock/child_ns_mode global <--- expected "local" Fix this by initializing `child_ns_mode_locked` to 0 (unlocked) in `vsock_net_init()`, so the write-once mechanism works correctly after module reload. Fixes: 102eab95f025 ("vsock: lock down child_ns_mode as write-once") Reported-by: Jin Liu Signed-off-by: Stefano Garzarella Reviewed-by: Bobby Eshleman Link: https://patch.msgid.link/20260401092153.28462-1-sgarzare@redhat.com Signed-off-by: Jakub Kicinski --- net/vmw_vsock/af_vsock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 2f7d94d682cb..d912ed2f012a 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -2928,6 +2928,7 @@ static void vsock_net_init(struct net *net) net->vsock.mode = vsock_net_child_mode(current->nsproxy->net_ns); net->vsock.child_ns_mode = net->vsock.mode; + net->vsock.child_ns_mode_locked = 0; } static __net_init int vsock_sysctl_init_net(struct net *net) -- cgit v1.2.3 From f5df2990c364d1ac596d24b3118dbc56503f7cd4 Mon Sep 17 00:00:00 2001 From: Luka Gejak Date: Wed, 1 Apr 2026 11:22:42 +0200 Subject: net: hsr: serialize seq_blocks merge across nodes During node merging, hsr_handle_sup_frame() walks node_curr->seq_blocks to update node_real without holding node_curr->seq_out_lock. This allows concurrent mutations from duplicate registration paths, risking inconsistent state or XArray/bitmap corruption. Fix this by locking both nodes' seq_out_lock during the merge. To prevent ABBA deadlocks, locks are acquired in order of memory address. Reviewed-by: Felix Maurer Fixes: 415e6367512b ("hsr: Implement more robust duplicate discard for PRP") Signed-off-by: Luka Gejak Link: https://patch.msgid.link/20260401092243.52121-2-luka.gejak@linux.dev Signed-off-by: Jakub Kicinski --- net/hsr/hsr_framereg.c | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 50996f4de7f9..d41863593674 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -123,6 +123,40 @@ static void hsr_free_node_rcu(struct rcu_head *rn) hsr_free_node(node); } +static void hsr_lock_seq_out_pair(struct hsr_node *node_a, + struct hsr_node *node_b) +{ + if (node_a == node_b) { + spin_lock_bh(&node_a->seq_out_lock); + return; + } + + if (node_a < node_b) { + spin_lock_bh(&node_a->seq_out_lock); + spin_lock_nested(&node_b->seq_out_lock, SINGLE_DEPTH_NESTING); + } else { + spin_lock_bh(&node_b->seq_out_lock); + spin_lock_nested(&node_a->seq_out_lock, SINGLE_DEPTH_NESTING); + } +} + +static void hsr_unlock_seq_out_pair(struct hsr_node *node_a, + struct hsr_node *node_b) +{ + if (node_a == node_b) { + spin_unlock_bh(&node_a->seq_out_lock); + return; + } + + if (node_a < node_b) { + spin_unlock(&node_b->seq_out_lock); + spin_unlock_bh(&node_a->seq_out_lock); + } else { + spin_unlock(&node_a->seq_out_lock); + spin_unlock_bh(&node_b->seq_out_lock); + } +} + void hsr_del_nodes(struct list_head *node_db) { struct hsr_node *node; @@ -432,7 +466,7 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame) } ether_addr_copy(node_real->macaddress_B, ethhdr->h_source); - spin_lock_bh(&node_real->seq_out_lock); + hsr_lock_seq_out_pair(node_real, node_curr); for (i = 0; i < HSR_PT_PORTS; i++) { if (!node_curr->time_in_stale[i] && time_after(node_curr->time_in[i], node_real->time_in[i])) { @@ -455,7 +489,7 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame) src_blk->seq_nrs[i], HSR_SEQ_BLOCK_SIZE); } } - spin_unlock_bh(&node_real->seq_out_lock); + hsr_unlock_seq_out_pair(node_real, node_curr); node_real->addr_B_port = port_rcv->type; spin_lock_bh(&hsr->list_lock); -- cgit v1.2.3 From 2e3514e63bfb0e972b1f19668547a455d0129e88 Mon Sep 17 00:00:00 2001 From: Luka Gejak Date: Wed, 1 Apr 2026 11:22:43 +0200 Subject: net: hsr: fix VLAN add unwind on slave errors When vlan_vid_add() fails for a secondary slave, the error path calls vlan_vid_del() on the failing port instead of the peer slave that had already succeeded. This results in asymmetric VLAN state across the HSR pair. Fix this by switching to a centralized unwind path that removes the VID from any slave device that was already programmed. Fixes: 1a8a63a5305e ("net: hsr: Add VLAN CTAG filter support") Signed-off-by: Luka Gejak Link: https://patch.msgid.link/20260401092243.52121-3-luka.gejak@linux.dev Signed-off-by: Jakub Kicinski --- net/hsr/hsr_device.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index d1bfc49b5f01..fd2fea25eff0 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -532,8 +532,8 @@ static void hsr_change_rx_flags(struct net_device *dev, int change) static int hsr_ndo_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) { - bool is_slave_a_added = false; - bool is_slave_b_added = false; + struct net_device *slave_a_dev = NULL; + struct net_device *slave_b_dev = NULL; struct hsr_port *port; struct hsr_priv *hsr; int ret = 0; @@ -549,33 +549,35 @@ static int hsr_ndo_vlan_rx_add_vid(struct net_device *dev, switch (port->type) { case HSR_PT_SLAVE_A: if (ret) { - /* clean up Slave-B */ netdev_err(dev, "add vid failed for Slave-A\n"); - if (is_slave_b_added) - vlan_vid_del(port->dev, proto, vid); - return ret; + goto unwind; } - - is_slave_a_added = true; + slave_a_dev = port->dev; break; - case HSR_PT_SLAVE_B: if (ret) { - /* clean up Slave-A */ netdev_err(dev, "add vid failed for Slave-B\n"); - if (is_slave_a_added) - vlan_vid_del(port->dev, proto, vid); - return ret; + goto unwind; } - - is_slave_b_added = true; + slave_b_dev = port->dev; break; default: + if (ret) + goto unwind; break; } } return 0; + +unwind: + if (slave_a_dev) + vlan_vid_del(slave_a_dev, proto, vid); + + if (slave_b_dev) + vlan_vid_del(slave_b_dev, proto, vid); + + return ret; } static int hsr_ndo_vlan_rx_kill_vid(struct net_device *dev, -- cgit v1.2.3 From 4e453375561fc60820e6b9d8ebeb6b3ee177d42e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 1 Apr 2026 15:47:21 +0000 Subject: ipv6: avoid overflows in ip6_datagram_send_ctl() Yiming Qian reported : I believe I found a locally triggerable kernel bug in the IPv6 sendmsg ancillary-data path that can panic the kernel via `skb_under_panic()` (local DoS). The core issue is a mismatch between: - a 16-bit length accumulator (`struct ipv6_txoptions::opt_flen`, type `__u16`) and - a pointer to the *last* provided destination-options header (`opt->dst1opt`) when multiple `IPV6_DSTOPTS` control messages (cmsgs) are provided. - `include/net/ipv6.h`: - `struct ipv6_txoptions::opt_flen` is `__u16` (wrap possible). (lines 291-307, especially 298) - `net/ipv6/datagram.c:ip6_datagram_send_ctl()`: - Accepts repeated `IPV6_DSTOPTS` and accumulates into `opt_flen` without rejecting duplicates. (lines 909-933) - `net/ipv6/ip6_output.c:__ip6_append_data()`: - Uses `opt->opt_flen + opt->opt_nflen` to compute header sizes/headroom decisions. (lines 1448-1466, especially 1463-1465) - `net/ipv6/ip6_output.c:__ip6_make_skb()`: - Calls `ipv6_push_frag_opts()` if `opt->opt_flen` is non-zero. (lines 1930-1934) - `net/ipv6/exthdrs.c:ipv6_push_frag_opts()` / `ipv6_push_exthdr()`: - Push size comes from `ipv6_optlen(opt->dst1opt)` (based on the pointed-to header). (lines 1179-1185 and 1206-1211) 1. `opt_flen` is a 16-bit accumulator: - `include/net/ipv6.h:298` defines `__u16 opt_flen; /* after fragment hdr */`. 2. `ip6_datagram_send_ctl()` accepts *repeated* `IPV6_DSTOPTS` cmsgs and increments `opt_flen` each time: - In `net/ipv6/datagram.c:909-933`, for `IPV6_DSTOPTS`: - It computes `len = ((hdr->hdrlen + 1) << 3);` - It checks `CAP_NET_RAW` using `ns_capable(net->user_ns, CAP_NET_RAW)`. (line 922) - Then it does: - `opt->opt_flen += len;` (line 927) - `opt->dst1opt = hdr;` (line 928) There is no duplicate rejection here (unlike the legacy `IPV6_2292DSTOPTS` path which rejects duplicates at `net/ipv6/datagram.c:901-904`). If enough large `IPV6_DSTOPTS` cmsgs are provided, `opt_flen` wraps while `dst1opt` still points to a large (2048-byte) destination-options header. In the attached PoC (`poc.c`): - 32 cmsgs with `hdrlen=255` => `len = (255+1)*8 = 2048` - 1 cmsg with `hdrlen=0` => `len = 8` - Total increment: `32*2048 + 8 = 65544`, so `(__u16)opt_flen == 8` - The last cmsg is 2048 bytes, so `dst1opt` points to a 2048-byte header. 3. The transmit path sizes headers using the wrapped `opt_flen`: - In `net/ipv6/ip6_output.c:1463-1465`: - `headersize = sizeof(struct ipv6hdr) + (opt ? opt->opt_flen + opt->opt_nflen : 0) + ...;` With wrapped `opt_flen`, `headersize`/headroom decisions underestimate what will be pushed later. 4. When building the final skb, the actual push length comes from `dst1opt` and is not limited by wrapped `opt_flen`: - In `net/ipv6/ip6_output.c:1930-1934`: - `if (opt->opt_flen) proto = ipv6_push_frag_opts(skb, opt, proto);` - In `net/ipv6/exthdrs.c:1206-1211`, `ipv6_push_frag_opts()` pushes `dst1opt` via `ipv6_push_exthdr()`. - In `net/ipv6/exthdrs.c:1179-1184`, `ipv6_push_exthdr()` does: - `skb_push(skb, ipv6_optlen(opt));` - `memcpy(h, opt, ipv6_optlen(opt));` With insufficient headroom, `skb_push()` underflows and triggers `skb_under_panic()` -> `BUG()`: - `net/core/skbuff.c:2669-2675` (`skb_push()` calls `skb_under_panic()`) - `net/core/skbuff.c:207-214` (`skb_panic()` ends in `BUG()`) - The `IPV6_DSTOPTS` cmsg path requires `CAP_NET_RAW` in the target netns user namespace (`ns_capable(net->user_ns, CAP_NET_RAW)`). - Root (or any task with `CAP_NET_RAW`) can trigger this without user namespaces. - An unprivileged `uid=1000` user can trigger this if unprivileged user namespaces are enabled and it can create a userns+netns to obtain namespaced `CAP_NET_RAW` (the attached PoC does this). - Local denial of service: kernel BUG/panic (system crash). - Reproducible with a small userspace PoC. This patch does not reject duplicated options, as this might break some user applications. Instead, it makes sure to adjust opt_flen and opt_nflen to correctly reflect the size of the current option headers, preventing the overflows and the potential for panics. This applies to IPV6_DSTOPTS, IPV6_HOPOPTS, and IPV6_RTHDR. Specifically: When a new IPV6_DSTOPTS is processed, the length of the old opt->dst1opt is subtracted from opt->opt_flen before adding the new length. When a new IPV6_HOPOPTS is processed, the length of the old opt->dst0opt is subtracted from opt->opt_nflen. When a new Routing Header (IPV6_RTHDR or IPV6_2292RTHDR) is processed, the length of the old opt->srcrt is subtracted from opt->opt_nflen. In the special case within IPV6_2292RTHDR handling where dst1opt is moved to dst0opt, the length of the old opt->dst0opt is subtracted from opt->opt_nflen before the new one is added. Fixes: 333fad5364d6 ("[IPV6]: Support several new sockopt / ancillary data in Advanced API (RFC3542).") Reported-by: Yiming Qian Closes: https://lore.kernel.org/netdev/CAL_bE8JNzawgr5OX5m+3jnQDHry2XxhQT5=jThW1zDPtUikRYA@mail.gmail.com/ Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260401154721.3740056-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/datagram.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index c564b68a0562..993e2d76fc1f 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -763,6 +763,7 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, { struct in6_pktinfo *src_info; struct cmsghdr *cmsg; + struct ipv6_rt_hdr *orthdr; struct ipv6_rt_hdr *rthdr; struct ipv6_opt_hdr *hdr; struct ipv6_txoptions *opt = ipc6->opt; @@ -924,9 +925,13 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, goto exit_f; } if (cmsg->cmsg_type == IPV6_DSTOPTS) { + if (opt->dst1opt) + opt->opt_flen -= ipv6_optlen(opt->dst1opt); opt->opt_flen += len; opt->dst1opt = hdr; } else { + if (opt->dst0opt) + opt->opt_nflen -= ipv6_optlen(opt->dst0opt); opt->opt_nflen += len; opt->dst0opt = hdr; } @@ -969,12 +974,17 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, goto exit_f; } + orthdr = opt->srcrt; + if (orthdr) + opt->opt_nflen -= ((orthdr->hdrlen + 1) << 3); opt->opt_nflen += len; opt->srcrt = rthdr; if (cmsg->cmsg_type == IPV6_2292RTHDR && opt->dst1opt) { int dsthdrlen = ((opt->dst1opt->hdrlen+1)<<3); + if (opt->dst0opt) + opt->opt_nflen -= ipv6_optlen(opt->dst0opt); opt->opt_nflen += dsthdrlen; opt->dst0opt = opt->dst1opt; opt->dst1opt = NULL; -- cgit v1.2.3 From ec7067e661193403a7a00980bda8612db5954142 Mon Sep 17 00:00:00 2001 From: Dimitri Daskalakis Date: Wed, 1 Apr 2026 09:28:48 -0700 Subject: eth: fbnic: Increase FBNIC_QUEUE_SIZE_MIN to 64 On systems with 64K pages, RX queues will be wedged if users set the descriptor count to the current minimum (16). Fbnic fragments large pages into 4K chunks, and scales down the ring size accordingly. With 64K pages and 16 descriptors, the ring size mask is 0 and will never be filled. 32 descriptors is another special case that wedges the RX rings. Internally, the rings track pages for the head/tail pointers, not page fragments. So with 32 descriptors, there's only 1 usable page as one ring slot is kept empty to disambiguate between an empty/full ring. As a result, the head pointer never advances and the HW stalls after consuming 16 page fragments. Fixes: 0cb4c0a13723 ("eth: fbnic: Implement Rx queue alloc/start/stop/free") Signed-off-by: Dimitri Daskalakis Link: https://patch.msgid.link/20260401162848.2335350-1-dimitri.daskalakis1@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/meta/fbnic/fbnic_txrx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h index 980965274079..e03c9d2c38dc 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h @@ -38,7 +38,7 @@ struct fbnic_net; #define FBNIC_MAX_XDPQS 128u /* These apply to TWQs, TCQ, RCQ */ -#define FBNIC_QUEUE_SIZE_MIN 16u +#define FBNIC_QUEUE_SIZE_MIN 64u #define FBNIC_QUEUE_SIZE_MAX SZ_64K #define FBNIC_TXQ_SIZE_DEFAULT 1024 -- cgit v1.2.3