diff options
| author | Alexei Starovoitov <ast@kernel.org> | 2026-03-31 16:01:13 -0700 |
|---|---|---|
| committer | Alexei Starovoitov <ast@kernel.org> | 2026-03-31 16:01:13 -0700 |
| commit | e2d072d6a3d1369d289667f51cf771eefa3c0b26 (patch) | |
| tree | 4b92dd00f6a02cbea11ab3da57c37ee63586baa0 | |
| parent | a8502a79e832b861e99218cbd2d8f4312d62e225 (diff) | |
| parent | c76fef7dcd9372e3476d4df5e0a72ed5919a814b (diff) | |
Merge branch 'fix-bpf_link-grace-period-wait-for-tracepoints'
Kumar Kartikeya Dwivedi says:
====================
Fix bpf_link grace period wait for tracepoints
A recent change to non-faultable tracepoints switched from
preempt-disabled critical sections to SRCU-fast, which breaks
assumptions in the bpf_link_free() path. Use call_srcu() to fix the
breakage.
Changelog:
----------
v3 -> v4
v3: https://lore.kernel.org/bpf/20260331005215.2813492-1-memxor@gmail.com
* Introduce call_tracepoint_unregister_{atomic,syscall} instead. (Alexei, Steven)
v2 -> v3
v2: https://lore.kernel.org/bpf/20260330143102.1265391-1-memxor@gmail.com
* Introduce and switch to call_tracepoint_unregister_non_faultable(). (Steven)
* Address Andrii's comment and add Acked-by. (Andrii)
* Drop rcu_trace_implies_rcu_gp() conversion. (Alexei)
v1 -> v2
v1: https://lore.kernel.org/bpf/20260330032124.3141001-1-memxor@gmail.com
* Add Reviewed-by tags. (Paul, Puranjay)
* Adjust commit descriptions and comments to clarify intent. (Puranjay)
====================
Link: https://patch.msgid.link/20260331211021.1632902-1-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
| -rw-r--r-- | include/linux/bpf.h | 4 | ||||
| -rw-r--r-- | include/linux/tracepoint.h | 20 | ||||
| -rw-r--r-- | kernel/bpf/syscall.c | 25 |
3 files changed, 47 insertions, 2 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 05b34a6355b0..35b1e25bd104 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1854,6 +1854,10 @@ struct bpf_link_ops { * target hook is sleepable, we'll go through tasks trace RCU GP and * then "classic" RCU GP; this need for chaining tasks trace and * classic RCU GPs is designated by setting bpf_link->sleepable flag + * + * For non-sleepable tracepoint links we go through SRCU gp instead, + * since RCU is not used in that case. Sleepable tracepoints still + * follow the scheme above. */ void (*dealloc_deferred)(struct bpf_link *link); int (*detach)(struct bpf_link *link); diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 22ca1c8b54f3..1d7f29f5e901 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -122,6 +122,22 @@ static inline bool tracepoint_is_faultable(struct tracepoint *tp) { return tp->ext && tp->ext->faultable; } +/* + * Run RCU callback with the appropriate grace period wait for non-faultable + * tracepoints, e.g., those used in atomic context. + */ +static inline void call_tracepoint_unregister_atomic(struct rcu_head *rcu, rcu_callback_t func) +{ + call_srcu(&tracepoint_srcu, rcu, func); +} +/* + * Run RCU callback with the appropriate grace period wait for faultable + * tracepoints, e.g., those used in syscall context. + */ +static inline void call_tracepoint_unregister_syscall(struct rcu_head *rcu, rcu_callback_t func) +{ + call_rcu_tasks_trace(rcu, func); +} #else static inline void tracepoint_synchronize_unregister(void) { } @@ -129,6 +145,10 @@ static inline bool tracepoint_is_faultable(struct tracepoint *tp) { return false; } +static inline void call_tracepoint_unregister_atomic(struct rcu_head *rcu, rcu_callback_t func) +{ } +static inline void call_tracepoint_unregister_syscall(struct rcu_head *rcu, rcu_callback_t func) +{ } #endif #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 274039e36465..700938782bed 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3261,6 +3261,18 @@ static void bpf_link_defer_dealloc_rcu_gp(struct rcu_head *rcu) bpf_link_dealloc(link); } +static bool bpf_link_is_tracepoint(struct bpf_link *link) +{ + /* + * Only these combinations support a tracepoint bpf_link. + * BPF_LINK_TYPE_TRACING raw_tp progs are hardcoded to use + * bpf_raw_tp_link_lops and thus dealloc_deferred(), see + * bpf_raw_tp_link_attach(). + */ + return link->type == BPF_LINK_TYPE_RAW_TRACEPOINT || + (link->type == BPF_LINK_TYPE_TRACING && link->attach_type == BPF_TRACE_RAW_TP); +} + static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu) { if (rcu_trace_implies_rcu_gp()) @@ -3279,16 +3291,25 @@ static void bpf_link_free(struct bpf_link *link) if (link->prog) ops->release(link); if (ops->dealloc_deferred) { - /* Schedule BPF link deallocation, which will only then + /* + * Schedule BPF link deallocation, which will only then * trigger putting BPF program refcount. * If underlying BPF program is sleepable or BPF link's target * attach hookpoint is sleepable or otherwise requires RCU GPs * to ensure link and its underlying BPF program is not * reachable anymore, we need to first wait for RCU tasks - * trace sync, and then go through "classic" RCU grace period + * trace sync, and then go through "classic" RCU grace period. + * + * For tracepoint BPF links, we need to go through SRCU grace + * period wait instead when non-faultable tracepoint is used. We + * don't need to chain SRCU grace period waits, however, for the + * faultable case, since it exclusively uses RCU Tasks Trace. */ if (link->sleepable || (link->prog && link->prog->sleepable)) call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp); + /* We need to do a SRCU grace period wait for non-faultable tracepoint BPF links. */ + else if (bpf_link_is_tracepoint(link)) + call_tracepoint_unregister_atomic(&link->rcu, bpf_link_defer_dealloc_rcu_gp); else call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp); } else if (ops->dealloc) { |
