summaryrefslogtreecommitdiff
path: root/block/elevator.c
diff options
context:
space:
mode:
authorNilay Shroff <nilay@linux.ibm.com>2025-07-30 13:16:09 +0530
committerJens Axboe <axboe@kernel.dk>2025-07-30 06:20:51 -0600
commit04225d13aef11b2a539014def5e47d8c21fd74a5 (patch)
tree322bda04c657a2aaee282737e32b35afa0be17e1 /block/elevator.c
parentf5a6604f7a4405450e4a1f54e5430f47290c500f (diff)
block: fix potential deadlock while running nr_hw_queue update
Move scheduler tags (sched_tags) allocation and deallocation outside both the ->elevator_lock and ->freeze_lock when updating nr_hw_queues. This change breaks the dependency chain from the percpu allocator lock to the elevator lock, helping to prevent potential deadlocks, as observed in the reported lockdep splat[1]. This commit introduces batch allocation and deallocation helpers for sched_tags, which are now used from within __blk_mq_update_nr_hw_queues routine while iterating through the tagset. With this change, all sched_tags memory management is handled entirely outside the ->elevator_lock and the ->freeze_lock context, thereby eliminating the lock dependency that could otherwise manifest during nr_hw_queues updates. [1] https://lore.kernel.org/all/0659ea8d-a463-47c8-9180-43c719e106eb@linux.ibm.com/ Reported-by: Stefan Haberland <sth@linux.ibm.com> Closes: https://lore.kernel.org/all/0659ea8d-a463-47c8-9180-43c719e106eb@linux.ibm.com/ Reviewed-by: Ming Lei <ming.lei@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Hannes Reinecke <hare@suse.de> Signed-off-by: Nilay Shroff <nilay@linux.ibm.com> Link: https://lore.kernel.org/r/20250730074614.2537382-4-nilay@linux.ibm.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block/elevator.c')
-rw-r--r--block/elevator.c15
1 files changed, 6 insertions, 9 deletions
diff --git a/block/elevator.c b/block/elevator.c
index e9dc837b7b70..fe96c6f4753c 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -705,7 +705,8 @@ static int elevator_change(struct request_queue *q, struct elv_change_ctx *ctx)
* The I/O scheduler depends on the number of hardware queues, this forces a
* reattachment when nr_hw_queues changes.
*/
-void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e)
+void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e,
+ struct elevator_tags *t)
{
struct blk_mq_tag_set *set = q->tag_set;
struct elv_change_ctx ctx = {};
@@ -715,25 +716,21 @@ void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e)
if (e && !blk_queue_dying(q) && blk_queue_registered(q)) {
ctx.name = e->elevator_name;
- ctx.et = blk_mq_alloc_sched_tags(set, set->nr_hw_queues);
- if (!ctx.et) {
- WARN_ON_ONCE(1);
- goto unfreeze;
- }
+ ctx.et = t;
+
mutex_lock(&q->elevator_lock);
/* force to reattach elevator after nr_hw_queue is updated */
ret = elevator_switch(q, &ctx);
mutex_unlock(&q->elevator_lock);
}
-unfreeze:
blk_mq_unfreeze_queue_nomemrestore(q);
if (!ret)
WARN_ON_ONCE(elevator_change_done(q, &ctx));
/*
* Free sched tags if it's allocated but we couldn't switch elevator.
*/
- if (ctx.et && !ctx.new)
- blk_mq_free_sched_tags(ctx.et, set);
+ if (t && !ctx.new)
+ blk_mq_free_sched_tags(t, set);
}
/*