Merge branch 'fix-bpf_link-grace-period-wait-for-tracepoints'

Kumar Kartikeya Dwivedi says: ==================== Fix bpf_link grace period wait for tracepoints A recent change to non-faultable tracepoints switched from preempt-disabled critical sections to SRCU-fast, which breaks assumptions in the bpf_link_free() path. Use call_srcu() to fix the breakage. Changelog: ---------- v3 -> v4 v3: https://lore.kernel.org/bpf/20260331005215.2813492-1-memxor@gmail.com * Introduce call_tracepoint_unregister_{atomic,syscall} instead. (Alexei, Steven) v2 -> v3 v2: https://lore.kernel.org/bpf/20260330143102.1265391-1-memxor@gmail.com * Introduce and switch to call_tracepoint_unregister_non_faultable(). (Steven) * Address Andrii's comment and add Acked-by. (Andrii) * Drop rcu_trace_implies_rcu_gp() conversion. (Alexei) v1 -> v2 v1: https://lore.kernel.org/bpf/20260330032124.3141001-1-memxor@gmail.com * Add Reviewed-by tags. (Paul, Puranjay) * Adjust commit descriptions and comments to clarify intent. (Puranjay) ==================== Link: https://patch.msgid.link/20260331211021.1632902-1-memxor@gmail.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
author: Alexei Starovoitov <ast@kernel.org> 2026-03-31 16:01:13 -0700
committer: Alexei Starovoitov <ast@kernel.org> 2026-03-31 16:01:13 -0700
commit: e2d072d6a3d1369d289667f51cf771eefa3c0b26 (patch)
tree: 4b92dd00f6a02cbea11ab3da57c37ee63586baa0
parent: a8502a79e832b861e99218cbd2d8f4312d62e225 (diff)
parent: c76fef7dcd9372e3476d4df5e0a72ed5919a814b (diff)
3 files changed, 47 insertions, 2 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 05b34a6355b0..35b1e25bd104 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1854,6 +1854,10 @@ struct bpf_link_ops {
 	 * target hook is sleepable, we'll go through tasks trace RCU GP and
 	 * then "classic" RCU GP; this need for chaining tasks trace and
 	 * classic RCU GPs is designated by setting bpf_link->sleepable flag
+	 *
+	 * For non-sleepable tracepoint links we go through SRCU gp instead,
+	 * since RCU is not used in that case. Sleepable tracepoints still
+	 * follow the scheme above.
 	 */
 	void (*dealloc_deferred)(struct bpf_link *link);
 	int (*detach)(struct bpf_link *link);
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 22ca1c8b54f3..1d7f29f5e901 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -122,6 +122,22 @@ static inline bool tracepoint_is_faultable(struct tracepoint *tp)
 {
 	return tp->ext && tp->ext->faultable;
 }
+/*
+ * Run RCU callback with the appropriate grace period wait for non-faultable
+ * tracepoints, e.g., those used in atomic context.
+ */
+static inline void call_tracepoint_unregister_atomic(struct rcu_head *rcu, rcu_callback_t func)
+{
+	call_srcu(&tracepoint_srcu, rcu, func);
+}
+/*
+ * Run RCU callback with the appropriate grace period wait for faultable
+ * tracepoints, e.g., those used in syscall context.
+ */
+static inline void call_tracepoint_unregister_syscall(struct rcu_head *rcu, rcu_callback_t func)
+{
+	call_rcu_tasks_trace(rcu, func);
+}
 #else
 static inline void tracepoint_synchronize_unregister(void)
 { }
@@ -129,6 +145,10 @@ static inline bool tracepoint_is_faultable(struct tracepoint *tp)
 {
 	return false;
 }
+static inline void call_tracepoint_unregister_atomic(struct rcu_head *rcu, rcu_callback_t func)
+{  }
+static inline void call_tracepoint_unregister_syscall(struct rcu_head *rcu, rcu_callback_t func)
+{  }
 #endif
 
 #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 274039e36465..700938782bed 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3261,6 +3261,18 @@ static void bpf_link_defer_dealloc_rcu_gp(struct rcu_head *rcu)
 	bpf_link_dealloc(link);
 }
 
+static bool bpf_link_is_tracepoint(struct bpf_link *link)
+{
+	/*
+	 * Only these combinations support a tracepoint bpf_link.
+	 * BPF_LINK_TYPE_TRACING raw_tp progs are hardcoded to use
+	 * bpf_raw_tp_link_lops and thus dealloc_deferred(), see
+	 * bpf_raw_tp_link_attach().
+	 */
+	return link->type == BPF_LINK_TYPE_RAW_TRACEPOINT ||
+	       (link->type == BPF_LINK_TYPE_TRACING && link->attach_type == BPF_TRACE_RAW_TP);
+}
+
 static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu)
 {
 	if (rcu_trace_implies_rcu_gp())
@@ -3279,16 +3291,25 @@ static void bpf_link_free(struct bpf_link *link)
 	if (link->prog)
 		ops->release(link);
 	if (ops->dealloc_deferred) {
-		/* Schedule BPF link deallocation, which will only then
+		/*
+		 * Schedule BPF link deallocation, which will only then
 		 * trigger putting BPF program refcount.
 		 * If underlying BPF program is sleepable or BPF link's target
 		 * attach hookpoint is sleepable or otherwise requires RCU GPs
 		 * to ensure link and its underlying BPF program is not
 		 * reachable anymore, we need to first wait for RCU tasks
-		 * trace sync, and then go through "classic" RCU grace period
+		 * trace sync, and then go through "classic" RCU grace period.
+		 *
+		 * For tracepoint BPF links, we need to go through SRCU grace
+		 * period wait instead when non-faultable tracepoint is used. We
+		 * don't need to chain SRCU grace period waits, however, for the
+		 * faultable case, since it exclusively uses RCU Tasks Trace.
 		 */
 		if (link->sleepable || (link->prog && link->prog->sleepable))
 			call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp);
+		/* We need to do a SRCU grace period wait for non-faultable tracepoint BPF links. */
+		else if (bpf_link_is_tracepoint(link))
+			call_tracepoint_unregister_atomic(&link->rcu, bpf_link_defer_dealloc_rcu_gp);
 		else
 			call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp);
 	} else if (ops->dealloc) {
author	Alexei Starovoitov <ast@kernel.org>	2026-03-31 16:01:13 -0700
committer	Alexei Starovoitov <ast@kernel.org>	2026-03-31 16:01:13 -0700
commit	e2d072d6a3d1369d289667f51cf771eefa3c0b26 (patch)
tree	4b92dd00f6a02cbea11ab3da57c37ee63586baa0
parent	a8502a79e832b861e99218cbd2d8f4312d62e225 (diff)
parent	c76fef7dcd9372e3476d4df5e0a72ed5919a814b (diff)