summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2026-03-31 16:01:13 -0700
committerAlexei Starovoitov <ast@kernel.org>2026-03-31 16:01:13 -0700
commite2d072d6a3d1369d289667f51cf771eefa3c0b26 (patch)
tree4b92dd00f6a02cbea11ab3da57c37ee63586baa0
parenta8502a79e832b861e99218cbd2d8f4312d62e225 (diff)
parentc76fef7dcd9372e3476d4df5e0a72ed5919a814b (diff)
Merge branch 'fix-bpf_link-grace-period-wait-for-tracepoints'
Kumar Kartikeya Dwivedi says: ==================== Fix bpf_link grace period wait for tracepoints A recent change to non-faultable tracepoints switched from preempt-disabled critical sections to SRCU-fast, which breaks assumptions in the bpf_link_free() path. Use call_srcu() to fix the breakage. Changelog: ---------- v3 -> v4 v3: https://lore.kernel.org/bpf/20260331005215.2813492-1-memxor@gmail.com * Introduce call_tracepoint_unregister_{atomic,syscall} instead. (Alexei, Steven) v2 -> v3 v2: https://lore.kernel.org/bpf/20260330143102.1265391-1-memxor@gmail.com * Introduce and switch to call_tracepoint_unregister_non_faultable(). (Steven) * Address Andrii's comment and add Acked-by. (Andrii) * Drop rcu_trace_implies_rcu_gp() conversion. (Alexei) v1 -> v2 v1: https://lore.kernel.org/bpf/20260330032124.3141001-1-memxor@gmail.com * Add Reviewed-by tags. (Paul, Puranjay) * Adjust commit descriptions and comments to clarify intent. (Puranjay) ==================== Link: https://patch.msgid.link/20260331211021.1632902-1-memxor@gmail.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r--include/linux/bpf.h4
-rw-r--r--include/linux/tracepoint.h20
-rw-r--r--kernel/bpf/syscall.c25
3 files changed, 47 insertions, 2 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 05b34a6355b0..35b1e25bd104 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1854,6 +1854,10 @@ struct bpf_link_ops {
* target hook is sleepable, we'll go through tasks trace RCU GP and
* then "classic" RCU GP; this need for chaining tasks trace and
* classic RCU GPs is designated by setting bpf_link->sleepable flag
+ *
+ * For non-sleepable tracepoint links we go through SRCU gp instead,
+ * since RCU is not used in that case. Sleepable tracepoints still
+ * follow the scheme above.
*/
void (*dealloc_deferred)(struct bpf_link *link);
int (*detach)(struct bpf_link *link);
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 22ca1c8b54f3..1d7f29f5e901 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -122,6 +122,22 @@ static inline bool tracepoint_is_faultable(struct tracepoint *tp)
{
return tp->ext && tp->ext->faultable;
}
+/*
+ * Run RCU callback with the appropriate grace period wait for non-faultable
+ * tracepoints, e.g., those used in atomic context.
+ */
+static inline void call_tracepoint_unregister_atomic(struct rcu_head *rcu, rcu_callback_t func)
+{
+ call_srcu(&tracepoint_srcu, rcu, func);
+}
+/*
+ * Run RCU callback with the appropriate grace period wait for faultable
+ * tracepoints, e.g., those used in syscall context.
+ */
+static inline void call_tracepoint_unregister_syscall(struct rcu_head *rcu, rcu_callback_t func)
+{
+ call_rcu_tasks_trace(rcu, func);
+}
#else
static inline void tracepoint_synchronize_unregister(void)
{ }
@@ -129,6 +145,10 @@ static inline bool tracepoint_is_faultable(struct tracepoint *tp)
{
return false;
}
+static inline void call_tracepoint_unregister_atomic(struct rcu_head *rcu, rcu_callback_t func)
+{ }
+static inline void call_tracepoint_unregister_syscall(struct rcu_head *rcu, rcu_callback_t func)
+{ }
#endif
#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 274039e36465..700938782bed 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3261,6 +3261,18 @@ static void bpf_link_defer_dealloc_rcu_gp(struct rcu_head *rcu)
bpf_link_dealloc(link);
}
+static bool bpf_link_is_tracepoint(struct bpf_link *link)
+{
+ /*
+ * Only these combinations support a tracepoint bpf_link.
+ * BPF_LINK_TYPE_TRACING raw_tp progs are hardcoded to use
+ * bpf_raw_tp_link_lops and thus dealloc_deferred(), see
+ * bpf_raw_tp_link_attach().
+ */
+ return link->type == BPF_LINK_TYPE_RAW_TRACEPOINT ||
+ (link->type == BPF_LINK_TYPE_TRACING && link->attach_type == BPF_TRACE_RAW_TP);
+}
+
static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu)
{
if (rcu_trace_implies_rcu_gp())
@@ -3279,16 +3291,25 @@ static void bpf_link_free(struct bpf_link *link)
if (link->prog)
ops->release(link);
if (ops->dealloc_deferred) {
- /* Schedule BPF link deallocation, which will only then
+ /*
+ * Schedule BPF link deallocation, which will only then
* trigger putting BPF program refcount.
* If underlying BPF program is sleepable or BPF link's target
* attach hookpoint is sleepable or otherwise requires RCU GPs
* to ensure link and its underlying BPF program is not
* reachable anymore, we need to first wait for RCU tasks
- * trace sync, and then go through "classic" RCU grace period
+ * trace sync, and then go through "classic" RCU grace period.
+ *
+ * For tracepoint BPF links, we need to go through SRCU grace
+ * period wait instead when non-faultable tracepoint is used. We
+ * don't need to chain SRCU grace period waits, however, for the
+ * faultable case, since it exclusively uses RCU Tasks Trace.
*/
if (link->sleepable || (link->prog && link->prog->sleepable))
call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp);
+ /* We need to do a SRCU grace period wait for non-faultable tracepoint BPF links. */
+ else if (bpf_link_is_tracepoint(link))
+ call_tracepoint_unregister_atomic(&link->rcu, bpf_link_defer_dealloc_rcu_gp);
else
call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp);
} else if (ops->dealloc) {