From 43b729bfe9cf30ad11499a66e3b7bd300c716d44 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Sep 2018 09:43:47 +0200 Subject: block: move integrity_req_gap_{back,front}_merge to blk.h No need to expose these to drivers. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 31 ------------------------------- 1 file changed, 31 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d6869e0e2b64..bc534c857344 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1843,26 +1843,6 @@ queue_max_integrity_segments(struct request_queue *q) return q->limits.max_integrity_segments; } -static inline bool integrity_req_gap_back_merge(struct request *req, - struct bio *next) -{ - struct bio_integrity_payload *bip = bio_integrity(req->bio); - struct bio_integrity_payload *bip_next = bio_integrity(next); - - return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1], - bip_next->bip_vec[0].bv_offset); -} - -static inline bool integrity_req_gap_front_merge(struct request *req, - struct bio *bio) -{ - struct bio_integrity_payload *bip = bio_integrity(bio); - struct bio_integrity_payload *bip_next = bio_integrity(req->bio); - - return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1], - bip_next->bip_vec[0].bv_offset); -} - /** * bio_integrity_intervals - Return number of integrity intervals for a bio * @bi: blk_integrity profile for device @@ -1947,17 +1927,6 @@ static inline bool blk_integrity_merge_bio(struct request_queue *rq, return true; } -static inline bool integrity_req_gap_back_merge(struct request *req, - struct bio *next) -{ - return false; -} -static inline bool integrity_req_gap_front_merge(struct request *req, - struct bio *bio) -{ - return false; -} - static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi, unsigned int sectors) { -- cgit v1.2.3 From e9907009cbfc0c93d987d5a8fdf3d6c3c7b89717 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Sep 2018 09:43:48 +0200 Subject: block: move req_gap_{back,front}_merge to blk-merge.c Keep it close to the actual users instead of exposing the function to all drivers. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-merge.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/blkdev.h | 69 -------------------------------------------------- 2 files changed, 65 insertions(+), 69 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/block/blk-merge.c b/block/blk-merge.c index aaec38cc37b8..ad8a226347a6 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -12,6 +12,71 @@ #include "blk.h" +/* + * Check if the two bvecs from two bios can be merged to one segment. If yes, + * no need to check gap between the two bios since the 1st bio and the 1st bvec + * in the 2nd bio can be handled in one segment. + */ +static inline bool bios_segs_mergeable(struct request_queue *q, + struct bio *prev, struct bio_vec *prev_last_bv, + struct bio_vec *next_first_bv) +{ + if (!BIOVEC_PHYS_MERGEABLE(prev_last_bv, next_first_bv)) + return false; + if (!BIOVEC_SEG_BOUNDARY(q, prev_last_bv, next_first_bv)) + return false; + if (prev->bi_seg_back_size + next_first_bv->bv_len > + queue_max_segment_size(q)) + return false; + return true; +} + +static inline bool bio_will_gap(struct request_queue *q, + struct request *prev_rq, struct bio *prev, struct bio *next) +{ + struct bio_vec pb, nb; + + if (!bio_has_data(prev) || !queue_virt_boundary(q)) + return false; + + /* + * Don't merge if the 1st bio starts with non-zero offset, otherwise it + * is quite difficult to respect the sg gap limit. We work hard to + * merge a huge number of small single bios in case of mkfs. + */ + if (prev_rq) + bio_get_first_bvec(prev_rq->bio, &pb); + else + bio_get_first_bvec(prev, &pb); + if (pb.bv_offset) + return true; + + /* + * We don't need to worry about the situation that the merged segment + * ends in unaligned virt boundary: + * + * - if 'pb' ends aligned, the merged segment ends aligned + * - if 'pb' ends unaligned, the next bio must include + * one single bvec of 'nb', otherwise the 'nb' can't + * merge with 'pb' + */ + bio_get_last_bvec(prev, &pb); + bio_get_first_bvec(next, &nb); + if (bios_segs_mergeable(q, prev, &pb, &nb)) + return false; + return __bvec_gap_to_prev(q, &pb, nb.bv_offset); +} + +static inline bool req_gap_back_merge(struct request *req, struct bio *bio) +{ + return bio_will_gap(req->q, req, req->biotail, bio); +} + +static inline bool req_gap_front_merge(struct request *req, struct bio *bio) +{ + return bio_will_gap(req->q, NULL, bio, req->bio); +} + static struct bio *blk_bio_discard_split(struct request_queue *q, struct bio *bio, struct bio_set *bs, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bc534c857344..b7e676bb01bc 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1695,75 +1695,6 @@ static inline bool bvec_gap_to_prev(struct request_queue *q, return __bvec_gap_to_prev(q, bprv, offset); } -/* - * Check if the two bvecs from two bios can be merged to one segment. - * If yes, no need to check gap between the two bios since the 1st bio - * and the 1st bvec in the 2nd bio can be handled in one segment. - */ -static inline bool bios_segs_mergeable(struct request_queue *q, - struct bio *prev, struct bio_vec *prev_last_bv, - struct bio_vec *next_first_bv) -{ - if (!BIOVEC_PHYS_MERGEABLE(prev_last_bv, next_first_bv)) - return false; - if (!BIOVEC_SEG_BOUNDARY(q, prev_last_bv, next_first_bv)) - return false; - if (prev->bi_seg_back_size + next_first_bv->bv_len > - queue_max_segment_size(q)) - return false; - return true; -} - -static inline bool bio_will_gap(struct request_queue *q, - struct request *prev_rq, - struct bio *prev, - struct bio *next) -{ - if (bio_has_data(prev) && queue_virt_boundary(q)) { - struct bio_vec pb, nb; - - /* - * don't merge if the 1st bio starts with non-zero - * offset, otherwise it is quite difficult to respect - * sg gap limit. We work hard to merge a huge number of small - * single bios in case of mkfs. - */ - if (prev_rq) - bio_get_first_bvec(prev_rq->bio, &pb); - else - bio_get_first_bvec(prev, &pb); - if (pb.bv_offset) - return true; - - /* - * We don't need to worry about the situation that the - * merged segment ends in unaligned virt boundary: - * - * - if 'pb' ends aligned, the merged segment ends aligned - * - if 'pb' ends unaligned, the next bio must include - * one single bvec of 'nb', otherwise the 'nb' can't - * merge with 'pb' - */ - bio_get_last_bvec(prev, &pb); - bio_get_first_bvec(next, &nb); - - if (!bios_segs_mergeable(q, prev, &pb, &nb)) - return __bvec_gap_to_prev(q, &pb, nb.bv_offset); - } - - return false; -} - -static inline bool req_gap_back_merge(struct request *req, struct bio *bio) -{ - return bio_will_gap(req->q, req, req->biotail, bio); -} - -static inline bool req_gap_front_merge(struct request *req, struct bio *bio) -{ - return bio_will_gap(req->q, NULL, bio, req->bio); -} - int kblockd_schedule_work(struct work_struct *work); int kblockd_schedule_work_on(int cpu, struct work_struct *work); int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay); -- cgit v1.2.3 From 27ca1d4ed04ea29dc77b47190a3cc82697023e76 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Sep 2018 09:43:49 +0200 Subject: block: move req_gap_back_merge to blk.h No need to expose these helpers outside the block layer. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk.h | 19 +++++++++++++++++++ include/linux/blkdev.h | 19 ------------------- 2 files changed, 19 insertions(+), 19 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/block/blk.h b/block/blk.h index 441c2de1d4b9..63035c95689c 100644 --- a/block/blk.h +++ b/block/blk.h @@ -149,6 +149,25 @@ static inline void blk_queue_enter_live(struct request_queue *q) percpu_ref_get(&q->q_usage_counter); } +static inline bool __bvec_gap_to_prev(struct request_queue *q, + struct bio_vec *bprv, unsigned int offset) +{ + return offset || + ((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q)); +} + +/* + * Check if adding a bio_vec after bprv with offset would create a gap in + * the SG list. Most drivers don't care about this, but some do. + */ +static inline bool bvec_gap_to_prev(struct request_queue *q, + struct bio_vec *bprv, unsigned int offset) +{ + if (!queue_virt_boundary(q)) + return false; + return __bvec_gap_to_prev(q, bprv, offset); +} + #ifdef CONFIG_BLK_DEV_INTEGRITY void blk_flush_integrity(void); bool __bio_integrity_endio(struct bio *); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b7e676bb01bc..1d5e14139795 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1676,25 +1676,6 @@ static inline void put_dev_sector(Sector p) put_page(p.v); } -static inline bool __bvec_gap_to_prev(struct request_queue *q, - struct bio_vec *bprv, unsigned int offset) -{ - return offset || - ((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q)); -} - -/* - * Check if adding a bio_vec after bprv with offset would create a gap in - * the SG list. Most drivers don't care about this, but some do. - */ -static inline bool bvec_gap_to_prev(struct request_queue *q, - struct bio_vec *bprv, unsigned int offset) -{ - if (!queue_virt_boundary(q)) - return false; - return __bvec_gap_to_prev(q, bprv, offset); -} - int kblockd_schedule_work(struct work_struct *work); int kblockd_schedule_work_on(int cpu, struct work_struct *work); int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay); -- cgit v1.2.3 From bca6b067b0b269a7b8ba129e2a918309ca8b4a55 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 26 Sep 2018 14:01:03 -0700 Subject: block: Move power management code into a new source file Move the code for runtime power management from blk-core.c into the new source file blk-pm.c. Move the corresponding declarations from into . For CONFIG_PM=n, leave out the declarations of the functions that are not used in that mode. This patch not only reduces the number of #ifdefs in the block layer core code but also reduces the size of header file and hence should help to reduce the build time of the Linux kernel if CONFIG_PM is not defined. Signed-off-by: Bart Van Assche Reviewed-by: Ming Lei Reviewed-by: Christoph Hellwig Cc: Jianchao Wang Cc: Hannes Reinecke Cc: Johannes Thumshirn Cc: Alan Stern Signed-off-by: Jens Axboe --- block/Kconfig | 3 + block/Makefile | 1 + block/blk-core.c | 196 +------------------------------------------------ block/blk-pm.c | 188 +++++++++++++++++++++++++++++++++++++++++++++++ block/blk-pm.h | 43 +++++++++++ block/elevator.c | 22 +----- drivers/scsi/scsi_pm.c | 1 + drivers/scsi/sd.c | 1 + drivers/scsi/sr.c | 1 + include/linux/blk-pm.h | 24 ++++++ include/linux/blkdev.h | 23 ------ 11 files changed, 264 insertions(+), 239 deletions(-) create mode 100644 block/blk-pm.c create mode 100644 block/blk-pm.h create mode 100644 include/linux/blk-pm.h (limited to 'include/linux/blkdev.h') diff --git a/block/Kconfig b/block/Kconfig index 1f2469a0123c..85263e7bded6 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -228,4 +228,7 @@ config BLK_MQ_RDMA depends on BLOCK && INFINIBAND default y +config BLK_PM + def_bool BLOCK && PM + source block/Kconfig.iosched diff --git a/block/Makefile b/block/Makefile index 572b33f32c07..27eac600474f 100644 --- a/block/Makefile +++ b/block/Makefile @@ -37,3 +37,4 @@ obj-$(CONFIG_BLK_WBT) += blk-wbt.o obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o obj-$(CONFIG_BLK_DEBUG_FS_ZONED)+= blk-mq-debugfs-zoned.o obj-$(CONFIG_BLK_SED_OPAL) += sed-opal.o +obj-$(CONFIG_BLK_PM) += blk-pm.o diff --git a/block/blk-core.c b/block/blk-core.c index 4dbc93f43b38..6d4dd176bd9d 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -42,6 +42,7 @@ #include "blk.h" #include "blk-mq.h" #include "blk-mq-sched.h" +#include "blk-pm.h" #include "blk-rq-qos.h" #ifdef CONFIG_DEBUG_FS @@ -1726,16 +1727,6 @@ void part_round_stats(struct request_queue *q, int cpu, struct hd_struct *part) } EXPORT_SYMBOL_GPL(part_round_stats); -#ifdef CONFIG_PM -static void blk_pm_put_request(struct request *rq) -{ - if (rq->q->dev && !(rq->rq_flags & RQF_PM) && !--rq->q->nr_pending) - pm_runtime_mark_last_busy(rq->q->dev); -} -#else -static inline void blk_pm_put_request(struct request *rq) {} -#endif - void __blk_put_request(struct request_queue *q, struct request *req) { req_flags_t rq_flags = req->rq_flags; @@ -3757,191 +3748,6 @@ void blk_finish_plug(struct blk_plug *plug) } EXPORT_SYMBOL(blk_finish_plug); -#ifdef CONFIG_PM -/** - * blk_pm_runtime_init - Block layer runtime PM initialization routine - * @q: the queue of the device - * @dev: the device the queue belongs to - * - * Description: - * Initialize runtime-PM-related fields for @q and start auto suspend for - * @dev. Drivers that want to take advantage of request-based runtime PM - * should call this function after @dev has been initialized, and its - * request queue @q has been allocated, and runtime PM for it can not happen - * yet(either due to disabled/forbidden or its usage_count > 0). In most - * cases, driver should call this function before any I/O has taken place. - * - * This function takes care of setting up using auto suspend for the device, - * the autosuspend delay is set to -1 to make runtime suspend impossible - * until an updated value is either set by user or by driver. Drivers do - * not need to touch other autosuspend settings. - * - * The block layer runtime PM is request based, so only works for drivers - * that use request as their IO unit instead of those directly use bio's. - */ -void blk_pm_runtime_init(struct request_queue *q, struct device *dev) -{ - /* Don't enable runtime PM for blk-mq until it is ready */ - if (q->mq_ops) { - pm_runtime_disable(dev); - return; - } - - q->dev = dev; - q->rpm_status = RPM_ACTIVE; - pm_runtime_set_autosuspend_delay(q->dev, -1); - pm_runtime_use_autosuspend(q->dev); -} -EXPORT_SYMBOL(blk_pm_runtime_init); - -/** - * blk_pre_runtime_suspend - Pre runtime suspend check - * @q: the queue of the device - * - * Description: - * This function will check if runtime suspend is allowed for the device - * by examining if there are any requests pending in the queue. If there - * are requests pending, the device can not be runtime suspended; otherwise, - * the queue's status will be updated to SUSPENDING and the driver can - * proceed to suspend the device. - * - * For the not allowed case, we mark last busy for the device so that - * runtime PM core will try to autosuspend it some time later. - * - * This function should be called near the start of the device's - * runtime_suspend callback. - * - * Return: - * 0 - OK to runtime suspend the device - * -EBUSY - Device should not be runtime suspended - */ -int blk_pre_runtime_suspend(struct request_queue *q) -{ - int ret = 0; - - if (!q->dev) - return ret; - - spin_lock_irq(q->queue_lock); - if (q->nr_pending) { - ret = -EBUSY; - pm_runtime_mark_last_busy(q->dev); - } else { - q->rpm_status = RPM_SUSPENDING; - } - spin_unlock_irq(q->queue_lock); - return ret; -} -EXPORT_SYMBOL(blk_pre_runtime_suspend); - -/** - * blk_post_runtime_suspend - Post runtime suspend processing - * @q: the queue of the device - * @err: return value of the device's runtime_suspend function - * - * Description: - * Update the queue's runtime status according to the return value of the - * device's runtime suspend function and mark last busy for the device so - * that PM core will try to auto suspend the device at a later time. - * - * This function should be called near the end of the device's - * runtime_suspend callback. - */ -void blk_post_runtime_suspend(struct request_queue *q, int err) -{ - if (!q->dev) - return; - - spin_lock_irq(q->queue_lock); - if (!err) { - q->rpm_status = RPM_SUSPENDED; - } else { - q->rpm_status = RPM_ACTIVE; - pm_runtime_mark_last_busy(q->dev); - } - spin_unlock_irq(q->queue_lock); -} -EXPORT_SYMBOL(blk_post_runtime_suspend); - -/** - * blk_pre_runtime_resume - Pre runtime resume processing - * @q: the queue of the device - * - * Description: - * Update the queue's runtime status to RESUMING in preparation for the - * runtime resume of the device. - * - * This function should be called near the start of the device's - * runtime_resume callback. - */ -void blk_pre_runtime_resume(struct request_queue *q) -{ - if (!q->dev) - return; - - spin_lock_irq(q->queue_lock); - q->rpm_status = RPM_RESUMING; - spin_unlock_irq(q->queue_lock); -} -EXPORT_SYMBOL(blk_pre_runtime_resume); - -/** - * blk_post_runtime_resume - Post runtime resume processing - * @q: the queue of the device - * @err: return value of the device's runtime_resume function - * - * Description: - * Update the queue's runtime status according to the return value of the - * device's runtime_resume function. If it is successfully resumed, process - * the requests that are queued into the device's queue when it is resuming - * and then mark last busy and initiate autosuspend for it. - * - * This function should be called near the end of the device's - * runtime_resume callback. - */ -void blk_post_runtime_resume(struct request_queue *q, int err) -{ - if (!q->dev) - return; - - spin_lock_irq(q->queue_lock); - if (!err) { - q->rpm_status = RPM_ACTIVE; - __blk_run_queue(q); - pm_runtime_mark_last_busy(q->dev); - pm_request_autosuspend(q->dev); - } else { - q->rpm_status = RPM_SUSPENDED; - } - spin_unlock_irq(q->queue_lock); -} -EXPORT_SYMBOL(blk_post_runtime_resume); - -/** - * blk_set_runtime_active - Force runtime status of the queue to be active - * @q: the queue of the device - * - * If the device is left runtime suspended during system suspend the resume - * hook typically resumes the device and corrects runtime status - * accordingly. However, that does not affect the queue runtime PM status - * which is still "suspended". This prevents processing requests from the - * queue. - * - * This function can be used in driver's resume hook to correct queue - * runtime PM status and re-enable peeking requests from the queue. It - * should be called before first request is added to the queue. - */ -void blk_set_runtime_active(struct request_queue *q) -{ - spin_lock_irq(q->queue_lock); - q->rpm_status = RPM_ACTIVE; - pm_runtime_mark_last_busy(q->dev); - pm_request_autosuspend(q->dev); - spin_unlock_irq(q->queue_lock); -} -EXPORT_SYMBOL(blk_set_runtime_active); -#endif - int __init blk_dev_init(void) { BUILD_BUG_ON(REQ_OP_LAST >= (1 << REQ_OP_BITS)); diff --git a/block/blk-pm.c b/block/blk-pm.c new file mode 100644 index 000000000000..9b636960d285 --- /dev/null +++ b/block/blk-pm.c @@ -0,0 +1,188 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include + +/** + * blk_pm_runtime_init - Block layer runtime PM initialization routine + * @q: the queue of the device + * @dev: the device the queue belongs to + * + * Description: + * Initialize runtime-PM-related fields for @q and start auto suspend for + * @dev. Drivers that want to take advantage of request-based runtime PM + * should call this function after @dev has been initialized, and its + * request queue @q has been allocated, and runtime PM for it can not happen + * yet(either due to disabled/forbidden or its usage_count > 0). In most + * cases, driver should call this function before any I/O has taken place. + * + * This function takes care of setting up using auto suspend for the device, + * the autosuspend delay is set to -1 to make runtime suspend impossible + * until an updated value is either set by user or by driver. Drivers do + * not need to touch other autosuspend settings. + * + * The block layer runtime PM is request based, so only works for drivers + * that use request as their IO unit instead of those directly use bio's. + */ +void blk_pm_runtime_init(struct request_queue *q, struct device *dev) +{ + /* Don't enable runtime PM for blk-mq until it is ready */ + if (q->mq_ops) { + pm_runtime_disable(dev); + return; + } + + q->dev = dev; + q->rpm_status = RPM_ACTIVE; + pm_runtime_set_autosuspend_delay(q->dev, -1); + pm_runtime_use_autosuspend(q->dev); +} +EXPORT_SYMBOL(blk_pm_runtime_init); + +/** + * blk_pre_runtime_suspend - Pre runtime suspend check + * @q: the queue of the device + * + * Description: + * This function will check if runtime suspend is allowed for the device + * by examining if there are any requests pending in the queue. If there + * are requests pending, the device can not be runtime suspended; otherwise, + * the queue's status will be updated to SUSPENDING and the driver can + * proceed to suspend the device. + * + * For the not allowed case, we mark last busy for the device so that + * runtime PM core will try to autosuspend it some time later. + * + * This function should be called near the start of the device's + * runtime_suspend callback. + * + * Return: + * 0 - OK to runtime suspend the device + * -EBUSY - Device should not be runtime suspended + */ +int blk_pre_runtime_suspend(struct request_queue *q) +{ + int ret = 0; + + if (!q->dev) + return ret; + + spin_lock_irq(q->queue_lock); + if (q->nr_pending) { + ret = -EBUSY; + pm_runtime_mark_last_busy(q->dev); + } else { + q->rpm_status = RPM_SUSPENDING; + } + spin_unlock_irq(q->queue_lock); + return ret; +} +EXPORT_SYMBOL(blk_pre_runtime_suspend); + +/** + * blk_post_runtime_suspend - Post runtime suspend processing + * @q: the queue of the device + * @err: return value of the device's runtime_suspend function + * + * Description: + * Update the queue's runtime status according to the return value of the + * device's runtime suspend function and mark last busy for the device so + * that PM core will try to auto suspend the device at a later time. + * + * This function should be called near the end of the device's + * runtime_suspend callback. + */ +void blk_post_runtime_suspend(struct request_queue *q, int err) +{ + if (!q->dev) + return; + + spin_lock_irq(q->queue_lock); + if (!err) { + q->rpm_status = RPM_SUSPENDED; + } else { + q->rpm_status = RPM_ACTIVE; + pm_runtime_mark_last_busy(q->dev); + } + spin_unlock_irq(q->queue_lock); +} +EXPORT_SYMBOL(blk_post_runtime_suspend); + +/** + * blk_pre_runtime_resume - Pre runtime resume processing + * @q: the queue of the device + * + * Description: + * Update the queue's runtime status to RESUMING in preparation for the + * runtime resume of the device. + * + * This function should be called near the start of the device's + * runtime_resume callback. + */ +void blk_pre_runtime_resume(struct request_queue *q) +{ + if (!q->dev) + return; + + spin_lock_irq(q->queue_lock); + q->rpm_status = RPM_RESUMING; + spin_unlock_irq(q->queue_lock); +} +EXPORT_SYMBOL(blk_pre_runtime_resume); + +/** + * blk_post_runtime_resume - Post runtime resume processing + * @q: the queue of the device + * @err: return value of the device's runtime_resume function + * + * Description: + * Update the queue's runtime status according to the return value of the + * device's runtime_resume function. If it is successfully resumed, process + * the requests that are queued into the device's queue when it is resuming + * and then mark last busy and initiate autosuspend for it. + * + * This function should be called near the end of the device's + * runtime_resume callback. + */ +void blk_post_runtime_resume(struct request_queue *q, int err) +{ + if (!q->dev) + return; + + spin_lock_irq(q->queue_lock); + if (!err) { + q->rpm_status = RPM_ACTIVE; + __blk_run_queue(q); + pm_runtime_mark_last_busy(q->dev); + pm_request_autosuspend(q->dev); + } else { + q->rpm_status = RPM_SUSPENDED; + } + spin_unlock_irq(q->queue_lock); +} +EXPORT_SYMBOL(blk_post_runtime_resume); + +/** + * blk_set_runtime_active - Force runtime status of the queue to be active + * @q: the queue of the device + * + * If the device is left runtime suspended during system suspend the resume + * hook typically resumes the device and corrects runtime status + * accordingly. However, that does not affect the queue runtime PM status + * which is still "suspended". This prevents processing requests from the + * queue. + * + * This function can be used in driver's resume hook to correct queue + * runtime PM status and re-enable peeking requests from the queue. It + * should be called before first request is added to the queue. + */ +void blk_set_runtime_active(struct request_queue *q) +{ + spin_lock_irq(q->queue_lock); + q->rpm_status = RPM_ACTIVE; + pm_runtime_mark_last_busy(q->dev); + pm_request_autosuspend(q->dev); + spin_unlock_irq(q->queue_lock); +} +EXPORT_SYMBOL(blk_set_runtime_active); diff --git a/block/blk-pm.h b/block/blk-pm.h new file mode 100644 index 000000000000..1ffc8ef203ec --- /dev/null +++ b/block/blk-pm.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _BLOCK_BLK_PM_H_ +#define _BLOCK_BLK_PM_H_ + +#include + +#ifdef CONFIG_PM +static inline void blk_pm_requeue_request(struct request *rq) +{ + if (rq->q->dev && !(rq->rq_flags & RQF_PM)) + rq->q->nr_pending--; +} + +static inline void blk_pm_add_request(struct request_queue *q, + struct request *rq) +{ + if (q->dev && !(rq->rq_flags & RQF_PM) && q->nr_pending++ == 0 && + (q->rpm_status == RPM_SUSPENDED || q->rpm_status == RPM_SUSPENDING)) + pm_request_resume(q->dev); +} + +static inline void blk_pm_put_request(struct request *rq) +{ + if (rq->q->dev && !(rq->rq_flags & RQF_PM) && !--rq->q->nr_pending) + pm_runtime_mark_last_busy(rq->q->dev); +} +#else +static inline void blk_pm_requeue_request(struct request *rq) +{ +} + +static inline void blk_pm_add_request(struct request_queue *q, + struct request *rq) +{ +} + +static inline void blk_pm_put_request(struct request *rq) +{ +} +#endif + +#endif /* _BLOCK_BLK_PM_H_ */ diff --git a/block/elevator.c b/block/elevator.c index 6a06b5d040e5..e18ac68626e3 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -41,6 +41,7 @@ #include "blk.h" #include "blk-mq-sched.h" +#include "blk-pm.h" #include "blk-wbt.h" static DEFINE_SPINLOCK(elv_list_lock); @@ -557,27 +558,6 @@ void elv_bio_merged(struct request_queue *q, struct request *rq, e->type->ops.sq.elevator_bio_merged_fn(q, rq, bio); } -#ifdef CONFIG_PM -static void blk_pm_requeue_request(struct request *rq) -{ - if (rq->q->dev && !(rq->rq_flags & RQF_PM)) - rq->q->nr_pending--; -} - -static void blk_pm_add_request(struct request_queue *q, struct request *rq) -{ - if (q->dev && !(rq->rq_flags & RQF_PM) && q->nr_pending++ == 0 && - (q->rpm_status == RPM_SUSPENDED || q->rpm_status == RPM_SUSPENDING)) - pm_request_resume(q->dev); -} -#else -static inline void blk_pm_requeue_request(struct request *rq) {} -static inline void blk_pm_add_request(struct request_queue *q, - struct request *rq) -{ -} -#endif - void elv_requeue_request(struct request_queue *q, struct request *rq) { /* diff --git a/drivers/scsi/scsi_pm.c b/drivers/scsi/scsi_pm.c index b44c1bb687a2..a2b4179bfdf7 100644 --- a/drivers/scsi/scsi_pm.c +++ b/drivers/scsi/scsi_pm.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index b79b366a94f7..64514e8359e4 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index d0389b20574d..4f07b3410595 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/blk-pm.h b/include/linux/blk-pm.h new file mode 100644 index 000000000000..b80c65aba249 --- /dev/null +++ b/include/linux/blk-pm.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _BLK_PM_H_ +#define _BLK_PM_H_ + +struct device; +struct request_queue; + +/* + * block layer runtime pm functions + */ +#ifdef CONFIG_PM +extern void blk_pm_runtime_init(struct request_queue *q, struct device *dev); +extern int blk_pre_runtime_suspend(struct request_queue *q); +extern void blk_post_runtime_suspend(struct request_queue *q, int err); +extern void blk_pre_runtime_resume(struct request_queue *q); +extern void blk_post_runtime_resume(struct request_queue *q, int err); +extern void blk_set_runtime_active(struct request_queue *q); +#else +static inline void blk_pm_runtime_init(struct request_queue *q, + struct device *dev) {} +#endif + +#endif /* _BLK_PM_H_ */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1d5e14139795..cd863511dedb 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1280,29 +1280,6 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id, extern void blk_put_queue(struct request_queue *); extern void blk_set_queue_dying(struct request_queue *); -/* - * block layer runtime pm functions - */ -#ifdef CONFIG_PM -extern void blk_pm_runtime_init(struct request_queue *q, struct device *dev); -extern int blk_pre_runtime_suspend(struct request_queue *q); -extern void blk_post_runtime_suspend(struct request_queue *q, int err); -extern void blk_pre_runtime_resume(struct request_queue *q); -extern void blk_post_runtime_resume(struct request_queue *q, int err); -extern void blk_set_runtime_active(struct request_queue *q); -#else -static inline void blk_pm_runtime_init(struct request_queue *q, - struct device *dev) {} -static inline int blk_pre_runtime_suspend(struct request_queue *q) -{ - return -ENOSYS; -} -static inline void blk_post_runtime_suspend(struct request_queue *q, int err) {} -static inline void blk_pre_runtime_resume(struct request_queue *q) {} -static inline void blk_post_runtime_resume(struct request_queue *q, int err) {} -static inline void blk_set_runtime_active(struct request_queue *q) {} -#endif - /* * blk_plug permits building a queue of related requests by holding the I/O * fragments for a short period. This allows merging of sequential requests -- cgit v1.2.3 From cd84a62e0078dce09f4ed349bec84f86c9d54b30 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 26 Sep 2018 14:01:04 -0700 Subject: block, scsi: Change the preempt-only flag into a counter The RQF_PREEMPT flag is used for three purposes: - In the SCSI core, for making sure that power management requests are executed even if a device is in the "quiesced" state. - For domain validation by SCSI drivers that use the parallel port. - In the IDE driver, for IDE preempt requests. Rename "preempt-only" into "pm-only" because the primary purpose of this mode is power management. Since the power management core may but does not have to resume a runtime suspended device before performing system-wide suspend and since a later patch will set "pm-only" mode as long as a block device is runtime suspended, make it possible to set "pm-only" mode from more than one context. Since with this change scsi_device_quiesce() is no longer idempotent, make that function return early if it is called for a quiesced queue. Signed-off-by: Bart Van Assche Acked-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Ming Lei Cc: Jianchao Wang Cc: Johannes Thumshirn Cc: Alan Stern Signed-off-by: Jens Axboe --- block/blk-core.c | 35 ++++++++++++++++++----------------- block/blk-mq-debugfs.c | 10 +++++++++- drivers/scsi/scsi_lib.c | 11 +++++++---- include/linux/blkdev.h | 14 +++++++++----- 4 files changed, 43 insertions(+), 27 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/block/blk-core.c b/block/blk-core.c index 6d4dd176bd9d..1a691f5269bb 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -422,24 +422,25 @@ void blk_sync_queue(struct request_queue *q) EXPORT_SYMBOL(blk_sync_queue); /** - * blk_set_preempt_only - set QUEUE_FLAG_PREEMPT_ONLY + * blk_set_pm_only - increment pm_only counter * @q: request queue pointer - * - * Returns the previous value of the PREEMPT_ONLY flag - 0 if the flag was not - * set and 1 if the flag was already set. */ -int blk_set_preempt_only(struct request_queue *q) +void blk_set_pm_only(struct request_queue *q) { - return blk_queue_flag_test_and_set(QUEUE_FLAG_PREEMPT_ONLY, q); + atomic_inc(&q->pm_only); } -EXPORT_SYMBOL_GPL(blk_set_preempt_only); +EXPORT_SYMBOL_GPL(blk_set_pm_only); -void blk_clear_preempt_only(struct request_queue *q) +void blk_clear_pm_only(struct request_queue *q) { - blk_queue_flag_clear(QUEUE_FLAG_PREEMPT_ONLY, q); - wake_up_all(&q->mq_freeze_wq); + int pm_only; + + pm_only = atomic_dec_return(&q->pm_only); + WARN_ON_ONCE(pm_only < 0); + if (pm_only == 0) + wake_up_all(&q->mq_freeze_wq); } -EXPORT_SYMBOL_GPL(blk_clear_preempt_only); +EXPORT_SYMBOL_GPL(blk_clear_pm_only); /** * __blk_run_queue_uncond - run a queue whether or not it has been stopped @@ -918,7 +919,7 @@ EXPORT_SYMBOL(blk_alloc_queue); */ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) { - const bool preempt = flags & BLK_MQ_REQ_PREEMPT; + const bool pm = flags & BLK_MQ_REQ_PREEMPT; while (true) { bool success = false; @@ -926,11 +927,11 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) rcu_read_lock(); if (percpu_ref_tryget_live(&q->q_usage_counter)) { /* - * The code that sets the PREEMPT_ONLY flag is - * responsible for ensuring that that flag is globally - * visible before the queue is unfrozen. + * The code that increments the pm_only counter is + * responsible for ensuring that that counter is + * globally visible before the queue is unfrozen. */ - if (preempt || !blk_queue_preempt_only(q)) { + if (pm || !blk_queue_pm_only(q)) { success = true; } else { percpu_ref_put(&q->q_usage_counter); @@ -955,7 +956,7 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) wait_event(q->mq_freeze_wq, (atomic_read(&q->mq_freeze_depth) == 0 && - (preempt || !blk_queue_preempt_only(q))) || + (pm || !blk_queue_pm_only(q))) || blk_queue_dying(q)); if (blk_queue_dying(q)) return -ENODEV; diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index cb1e6cf7ac48..a5ea86835fcb 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -102,6 +102,14 @@ static int blk_flags_show(struct seq_file *m, const unsigned long flags, return 0; } +static int queue_pm_only_show(void *data, struct seq_file *m) +{ + struct request_queue *q = data; + + seq_printf(m, "%d\n", atomic_read(&q->pm_only)); + return 0; +} + #define QUEUE_FLAG_NAME(name) [QUEUE_FLAG_##name] = #name static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(QUEUED), @@ -132,7 +140,6 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(REGISTERED), QUEUE_FLAG_NAME(SCSI_PASSTHROUGH), QUEUE_FLAG_NAME(QUIESCED), - QUEUE_FLAG_NAME(PREEMPT_ONLY), }; #undef QUEUE_FLAG_NAME @@ -209,6 +216,7 @@ static ssize_t queue_write_hint_store(void *data, const char __user *buf, static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = { { "poll_stat", 0400, queue_poll_stat_show }, { "requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops }, + { "pm_only", 0600, queue_pm_only_show, NULL }, { "state", 0600, queue_state_show, queue_state_write }, { "write_hints", 0600, queue_write_hint_show, queue_write_hint_store }, { "zone_wlock", 0400, queue_zone_wlock_show, NULL }, diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index eb97d2dd3651..62348412ed1b 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -3046,11 +3046,14 @@ scsi_device_quiesce(struct scsi_device *sdev) */ WARN_ON_ONCE(sdev->quiesced_by && sdev->quiesced_by != current); - blk_set_preempt_only(q); + if (sdev->quiesced_by == current) + return 0; + + blk_set_pm_only(q); blk_mq_freeze_queue(q); /* - * Ensure that the effect of blk_set_preempt_only() will be visible + * Ensure that the effect of blk_set_pm_only() will be visible * for percpu_ref_tryget() callers that occur after the queue * unfreeze even if the queue was already frozen before this function * was called. See also https://lwn.net/Articles/573497/. @@ -3063,7 +3066,7 @@ scsi_device_quiesce(struct scsi_device *sdev) if (err == 0) sdev->quiesced_by = current; else - blk_clear_preempt_only(q); + blk_clear_pm_only(q); mutex_unlock(&sdev->state_mutex); return err; @@ -3088,7 +3091,7 @@ void scsi_device_resume(struct scsi_device *sdev) mutex_lock(&sdev->state_mutex); WARN_ON_ONCE(!sdev->quiesced_by); sdev->quiesced_by = NULL; - blk_clear_preempt_only(sdev->request_queue); + blk_clear_pm_only(sdev->request_queue); if (sdev->sdev_state == SDEV_QUIESCE) scsi_device_set_state(sdev, SDEV_RUNNING); mutex_unlock(&sdev->state_mutex); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index cd863511dedb..13bb54f26736 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -504,6 +504,12 @@ struct request_queue { * various queue flags, see QUEUE_* below */ unsigned long queue_flags; + /* + * Number of contexts that have called blk_set_pm_only(). If this + * counter is above zero then only RQF_PM and RQF_PREEMPT requests are + * processed. + */ + atomic_t pm_only; /* * ida allocated id for this queue. Used to index queues from @@ -698,7 +704,6 @@ struct request_queue { #define QUEUE_FLAG_REGISTERED 26 /* queue has been registered to a disk */ #define QUEUE_FLAG_SCSI_PASSTHROUGH 27 /* queue supports SCSI commands */ #define QUEUE_FLAG_QUIESCED 28 /* queue has been quiesced */ -#define QUEUE_FLAG_PREEMPT_ONLY 29 /* only process REQ_PREEMPT requests */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_SAME_COMP) | \ @@ -736,12 +741,11 @@ bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q); ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \ REQ_FAILFAST_DRIVER)) #define blk_queue_quiesced(q) test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags) -#define blk_queue_preempt_only(q) \ - test_bit(QUEUE_FLAG_PREEMPT_ONLY, &(q)->queue_flags) +#define blk_queue_pm_only(q) atomic_read(&(q)->pm_only) #define blk_queue_fua(q) test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags) -extern int blk_set_preempt_only(struct request_queue *q); -extern void blk_clear_preempt_only(struct request_queue *q); +extern void blk_set_pm_only(struct request_queue *q); +extern void blk_clear_pm_only(struct request_queue *q); static inline int queue_in_flight(struct request_queue *q) { -- cgit v1.2.3 From 4822e902f9bdffaea2817471365e000966f0d1a1 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 11 Oct 2018 10:07:06 +0300 Subject: block: describe difference between flags IO_STAT and STATS This adds reasonable comments, but they definitely needs better names. Signed-off-by: Konstantin Khlebnikov Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index dee46c20701b..61207560e826 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -108,7 +108,7 @@ typedef __u32 __bitwise req_flags_t; #define RQF_QUIET ((__force req_flags_t)(1 << 11)) /* elevator private data attached */ #define RQF_ELVPRIV ((__force req_flags_t)(1 << 12)) -/* account I/O stat */ +/* account into disk and partition IO statistics */ #define RQF_IO_STAT ((__force req_flags_t)(1 << 13)) /* request came from our alloc pool */ #define RQF_ALLOCED ((__force req_flags_t)(1 << 14)) @@ -116,7 +116,7 @@ typedef __u32 __bitwise req_flags_t; #define RQF_PM ((__force req_flags_t)(1 << 15)) /* on IO scheduler merge hash */ #define RQF_HASHED ((__force req_flags_t)(1 << 16)) -/* IO stats tracking on */ +/* track IO completion time */ #define RQF_STATS ((__force req_flags_t)(1 << 17)) /* Look at ->special_vec for the actual data payload instead of the bio chain. */ @@ -685,7 +685,7 @@ struct request_queue { #define QUEUE_FLAG_FAIL_IO 7 /* fake timeout */ #define QUEUE_FLAG_NONROT 9 /* non-rotational device (SSD) */ #define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ -#define QUEUE_FLAG_IO_STAT 10 /* do IO stats */ +#define QUEUE_FLAG_IO_STAT 10 /* do disk/partitions IO accounting */ #define QUEUE_FLAG_DISCARD 11 /* supports DISCARD */ #define QUEUE_FLAG_NOXMERGES 12 /* No extended merges */ #define QUEUE_FLAG_ADD_RANDOM 13 /* Contributes to random pool */ @@ -699,7 +699,7 @@ struct request_queue { #define QUEUE_FLAG_FUA 21 /* device supports FUA writes */ #define QUEUE_FLAG_FLUSH_NQ 22 /* flush not queueuable */ #define QUEUE_FLAG_DAX 23 /* device supports DAX */ -#define QUEUE_FLAG_STATS 24 /* track rq completion times */ +#define QUEUE_FLAG_STATS 24 /* track IO start and completion times */ #define QUEUE_FLAG_POLL_STATS 25 /* collecting stats for hybrid polling */ #define QUEUE_FLAG_REGISTERED 26 /* queue has been registered to a disk */ #define QUEUE_FLAG_SCSI_PASSTHROUGH 27 /* queue supports SCSI commands */ -- cgit v1.2.3