From 4c529a4a7260776bb4abe264498857b4537aa70d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 7 Jun 2025 14:22:56 +0200 Subject: x86/smp: PM/hibernate: Split arch_resume_nosmt() Move the inner part of the arch_resume_nosmt() code into a separate function called arch_cpu_rescan_dead_smt_siblings(), so it can be used in other places where "dead" SMT siblings may need to be taken online and offline again in order to get into deep idle states. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Dave Hansen Tested-by: Artem Bityutskiy Link: https://patch.msgid.link/3361688.44csPzL39Z@rjwysocki.net [ rjw: Prevent build issues with CONFIG_SMP unset ] Signed-off-by: Rafael J. Wysocki --- include/linux/cpu.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index e6089abc28e2..96a3a0d6a60e 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -120,6 +120,7 @@ extern void cpu_maps_update_begin(void); extern void cpu_maps_update_done(void); int bringup_hibernate_cpu(unsigned int sleep_cpu); void bringup_nonboot_cpus(unsigned int max_cpus); +int arch_cpu_rescan_dead_smt_siblings(void); #else /* CONFIG_SMP */ #define cpuhp_tasks_frozen 0 @@ -134,6 +135,8 @@ static inline void cpu_maps_update_done(void) static inline int add_cpu(unsigned int cpu) { return 0;} +static inline int arch_cpu_rescan_dead_smt_siblings(void) { return 0; } + #endif /* CONFIG_SMP */ extern const struct bus_type cpu_subsys; -- cgit v1.2.3 From 488ef3560196ee10fc1c5547e1574a87068c3494 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 28 May 2025 13:18:17 +0100 Subject: KEYS: Invert FINAL_PUT bit Invert the FINAL_PUT bit so that test_bit_acquire and clear_bit_unlock can be used instead of smp_mb. Signed-off-by: Herbert Xu Signed-off-by: David Howells Reviewed-by: Jarkko Sakkinen Signed-off-by: Linus Torvalds --- include/linux/key.h | 2 +- security/keys/gc.c | 4 ++-- security/keys/key.c | 5 +++-- 3 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/key.h b/include/linux/key.h index ba05de8579ec..81b8f05c6898 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -236,7 +236,7 @@ struct key { #define KEY_FLAG_ROOT_CAN_INVAL 7 /* set if key can be invalidated by root without permission */ #define KEY_FLAG_KEEP 8 /* set if key should not be removed */ #define KEY_FLAG_UID_KEYRING 9 /* set if key is a user or user session keyring */ -#define KEY_FLAG_FINAL_PUT 10 /* set if final put has happened on key */ +#define KEY_FLAG_USER_ALIVE 10 /* set if final put has not happened on key yet */ /* the key type and key description string * - the desc is used to match a key against search criteria diff --git a/security/keys/gc.c b/security/keys/gc.c index f27223ea4578..748e83818a76 100644 --- a/security/keys/gc.c +++ b/security/keys/gc.c @@ -218,8 +218,8 @@ continue_scanning: key = rb_entry(cursor, struct key, serial_node); cursor = rb_next(cursor); - if (test_bit(KEY_FLAG_FINAL_PUT, &key->flags)) { - smp_mb(); /* Clobber key->user after FINAL_PUT seen. */ + if (!test_bit_acquire(KEY_FLAG_USER_ALIVE, &key->flags)) { + /* Clobber key->user after final put seen. */ goto found_unreferenced_key; } diff --git a/security/keys/key.c b/security/keys/key.c index 7198cd2ac3a3..3bbdde778631 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -298,6 +298,7 @@ struct key *key_alloc(struct key_type *type, const char *desc, key->restrict_link = restrict_link; key->last_used_at = ktime_get_real_seconds(); + key->flags |= 1 << KEY_FLAG_USER_ALIVE; if (!(flags & KEY_ALLOC_NOT_IN_QUOTA)) key->flags |= 1 << KEY_FLAG_IN_QUOTA; if (flags & KEY_ALLOC_BUILT_IN) @@ -658,8 +659,8 @@ void key_put(struct key *key) key->user->qnbytes -= key->quotalen; spin_unlock_irqrestore(&key->user->lock, flags); } - smp_mb(); /* key->user before FINAL_PUT set. */ - set_bit(KEY_FLAG_FINAL_PUT, &key->flags); + /* Mark key as safe for GC after key->user done. */ + clear_bit_unlock(KEY_FLAG_USER_ALIVE, &key->flags); schedule_work(&key_gc_work); } } -- cgit v1.2.3 From 331843c845d15413e9d89fd91cdcfa6912e291c3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 4 Jun 2025 17:23:37 -0700 Subject: scatterlist: fix extraneous '@'-sign kernel-doc notation Using "@argname@" in kernel-doc produces "argname****" (with "argname" in bold) in the generated html output, so use the expected kernel-doc notation of just "@argname" instead. "Fixes:" lines are added in case Matthew's patch [1] is backported. Link: https://lkml.kernel.org/r/20250605002337.2842659-1-rdunlap@infradead.org Link: https://lore.kernel.org/linux-doc/3bc4e779-7a79-42c1-8867-024f643a22fc@infradead.org/T/#m5d2bd9d21fb34f297aa4e7db069f09bc27b89007 [1] Fixes: 0db9299f48eb ("SG: Move functions to lib/scatterlist.c and add sg chaining allocator helpers") Fixes: 8d1d4b538bb1 ("scatterlist: inline sg_next()") Fixes: 18dabf473e15 ("Change table chaining layout") Signed-off-by: Randy Dunlap Reviewed-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/scatterlist.h | 4 ++-- lib/scatterlist.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 0cdbfc42f153..6f8a4965f9b9 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -99,7 +99,7 @@ static inline bool sg_is_last(struct scatterlist *sg) * @sg: The current sg entry * * Description: - * Usually the next entry will be @sg@ + 1, but if this sg element is part + * Usually the next entry will be @sg + 1, but if this sg element is part * of a chained scatterlist, it could jump to the start of a new * scatterlist array. * @@ -254,7 +254,7 @@ static inline void __sg_chain(struct scatterlist *chain_sg, * @sgl: Second scatterlist * * Description: - * Links @prv@ and @sgl@ together, to form a longer scatterlist. + * Links @prv and @sgl together, to form a longer scatterlist. * **/ static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents, diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 7582dfab7fe3..4af1c8b0775a 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -73,9 +73,9 @@ EXPORT_SYMBOL(sg_nents_for_len); * Should only be used casually, it (currently) scans the entire list * to get the last entry. * - * Note that the @sgl@ pointer passed in need not be the first one, - * the important bit is that @nents@ denotes the number of entries that - * exist from @sgl@. + * Note that the @sgl pointer passed in need not be the first one, + * the important bit is that @nents denotes the number of entries that + * exist from @sgl. * **/ struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents) @@ -345,7 +345,7 @@ EXPORT_SYMBOL(__sg_alloc_table); * @gfp_mask: GFP allocation mask * * Description: - * Allocate and initialize an sg table. If @nents@ is larger than + * Allocate and initialize an sg table. If @nents is larger than * SG_MAX_SINGLE_ALLOC a chained sg table will be setup. * **/ -- cgit v1.2.3 From bb666b7c27073b986b75699e51a7102910f58060 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 9 Jun 2025 17:57:49 +0100 Subject: mm: add mmap_prepare() compatibility layer for nested file systems Nested file systems, that is those which invoke call_mmap() within their own f_op->mmap() handlers, may encounter underlying file systems which provide the f_op->mmap_prepare() hook introduced by commit c84bf6dd2b83 ("mm: introduce new .mmap_prepare() file callback"). We have a chicken-and-egg scenario here - until all file systems are converted to using .mmap_prepare(), we cannot convert these nested handlers, as we can't call f_op->mmap from an .mmap_prepare() hook. So we have to do it the other way round - invoke the .mmap_prepare() hook from an .mmap() one. in order to do so, we need to convert VMA state into a struct vm_area_desc descriptor, invoking the underlying file system's f_op->mmap_prepare() callback passing a pointer to this, and then setting VMA state accordingly and safely. This patch achieves this via the compat_vma_mmap_prepare() function, which we invoke from call_mmap() if f_op->mmap_prepare() is specified in the passed in file pointer. We place the fundamental logic into mm/vma.h where VMA manipulation belongs. We also update the VMA userland tests to accommodate the changes. The compat_vma_mmap_prepare() function and its associated machinery is temporary, and will be removed once the conversion of file systems is complete. We carefully place this code so it can be used with CONFIG_MMU and also with cutting edge nommu silicon. [akpm@linux-foundation.org: export compat_vma_mmap_prepare tp fix build] [lorenzo.stoakes@oracle.com: remove unused declarations] Link: https://lkml.kernel.org/r/ac3ae324-4c65-432a-8c6d-2af988b18ac8@lucifer.local Link: https://lkml.kernel.org/r/20250609165749.344976-1-lorenzo.stoakes@oracle.com Fixes: c84bf6dd2b83 ("mm: introduce new .mmap_prepare() file callback"). Signed-off-by: Lorenzo Stoakes Reported-by: Jann Horn Closes: https://lore.kernel.org/linux-mm/CAG48ez04yOEVx1ekzOChARDDBZzAKwet8PEoPM4Ln3_rk91AzQ@mail.gmail.com/ Reviewed-by: Pedro Falcato Reviewed-by: Vlastimil Babka Cc: Al Viro Cc: Christian Brauner Cc: Jan Kara Cc: Jann Horn Cc: Liam Howlett Signed-off-by: Andrew Morton --- include/linux/fs.h | 6 +++-- mm/util.c | 40 ++++++++++++++++++++++++++++++++++ mm/vma.c | 1 - mm/vma.h | 47 ++++++++++++++++++++++++++++++++++++++++ tools/testing/vma/vma_internal.h | 16 ++++++++++++++ 5 files changed, 107 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 96c7925a6551..4ec77da65f14 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2274,10 +2274,12 @@ static inline bool file_has_valid_mmap_hooks(struct file *file) return true; } +int compat_vma_mmap_prepare(struct file *file, struct vm_area_struct *vma); + static inline int call_mmap(struct file *file, struct vm_area_struct *vma) { - if (WARN_ON_ONCE(file->f_op->mmap_prepare)) - return -EINVAL; + if (file->f_op->mmap_prepare) + return compat_vma_mmap_prepare(file, vma); return file->f_op->mmap(file, vma); } diff --git a/mm/util.c b/mm/util.c index 448117da071f..0b270c43d7d1 100644 --- a/mm/util.c +++ b/mm/util.c @@ -1131,3 +1131,43 @@ void flush_dcache_folio(struct folio *folio) } EXPORT_SYMBOL(flush_dcache_folio); #endif + +/** + * compat_vma_mmap_prepare() - Apply the file's .mmap_prepare() hook to an + * existing VMA + * @file: The file which possesss an f_op->mmap_prepare() hook + * @vma: The VMA to apply the .mmap_prepare() hook to. + * + * Ordinarily, .mmap_prepare() is invoked directly upon mmap(). However, certain + * 'wrapper' file systems invoke a nested mmap hook of an underlying file. + * + * Until all filesystems are converted to use .mmap_prepare(), we must be + * conservative and continue to invoke these 'wrapper' filesystems using the + * deprecated .mmap() hook. + * + * However we have a problem if the underlying file system possesses an + * .mmap_prepare() hook, as we are in a different context when we invoke the + * .mmap() hook, already having a VMA to deal with. + * + * compat_vma_mmap_prepare() is a compatibility function that takes VMA state, + * establishes a struct vm_area_desc descriptor, passes to the underlying + * .mmap_prepare() hook and applies any changes performed by it. + * + * Once the conversion of filesystems is complete this function will no longer + * be required and will be removed. + * + * Returns: 0 on success or error. + */ +int compat_vma_mmap_prepare(struct file *file, struct vm_area_struct *vma) +{ + struct vm_area_desc desc; + int err; + + err = file->f_op->mmap_prepare(vma_to_desc(vma, &desc)); + if (err) + return err; + set_vma_from_desc(vma, &desc); + + return 0; +} +EXPORT_SYMBOL(compat_vma_mmap_prepare); diff --git a/mm/vma.c b/mm/vma.c index 0fb9b2c7b734..fef67a66a095 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -3113,7 +3113,6 @@ int __vm_munmap(unsigned long start, size_t len, bool unlock) return ret; } - /* Insert vm structure into process list sorted by address * and into the inode's i_mmap tree. If vm_file is non-NULL * then i_mmap_rwsem is taken here. diff --git a/mm/vma.h b/mm/vma.h index 0db066e7a45d..f47112a352db 100644 --- a/mm/vma.h +++ b/mm/vma.h @@ -222,6 +222,53 @@ static inline int vma_iter_store_gfp(struct vma_iterator *vmi, return 0; } + +/* + * Temporary helper functions for file systems which wrap an invocation of + * f_op->mmap() but which might have an underlying file system which implements + * f_op->mmap_prepare(). + */ + +static inline struct vm_area_desc *vma_to_desc(struct vm_area_struct *vma, + struct vm_area_desc *desc) +{ + desc->mm = vma->vm_mm; + desc->start = vma->vm_start; + desc->end = vma->vm_end; + + desc->pgoff = vma->vm_pgoff; + desc->file = vma->vm_file; + desc->vm_flags = vma->vm_flags; + desc->page_prot = vma->vm_page_prot; + + desc->vm_ops = NULL; + desc->private_data = NULL; + + return desc; +} + +static inline void set_vma_from_desc(struct vm_area_struct *vma, + struct vm_area_desc *desc) +{ + /* + * Since we're invoking .mmap_prepare() despite having a partially + * established VMA, we must take care to handle setting fields + * correctly. + */ + + /* Mutable fields. Populated with initial state. */ + vma->vm_pgoff = desc->pgoff; + if (vma->vm_file != desc->file) + vma_set_file(vma, desc->file); + if (vma->vm_flags != desc->vm_flags) + vm_flags_set(vma, desc->vm_flags); + vma->vm_page_prot = desc->page_prot; + + /* User-defined fields. */ + vma->vm_ops = desc->vm_ops; + vma->vm_private_data = desc->private_data; +} + int do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, struct mm_struct *mm, unsigned long start, diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index 4505b1c31be1..14718ca23a05 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -159,6 +159,14 @@ typedef __bitwise unsigned int vm_fault_t; #define ASSERT_EXCLUSIVE_WRITER(x) +/** + * swap - swap values of @a and @b + * @a: first value + * @b: second value + */ +#define swap(a, b) \ + do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) + struct kref { refcount_t refcount; }; @@ -1468,4 +1476,12 @@ static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma) (void)vma; } +static inline void vma_set_file(struct vm_area_struct *vma, struct file *file) +{ + /* Changing an anonymous vma with this is illegal */ + get_file(file); + swap(vma->vm_file, file); + fput(file); +} + #endif /* __MM_VMA_INTERNAL_H */ -- cgit v1.2.3 From f826ec7966a63d48e16e0868af4e038bf9a1a3ae Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 12 Jun 2025 15:41:25 +0100 Subject: bio: Fix bio_first_folio() for SPARSEMEM without VMEMMAP It is possible for physically contiguous folios to have discontiguous struct pages if SPARSEMEM is enabled and SPARSEMEM_VMEMMAP is not. This is correctly handled by folio_page_idx(), so remove this open-coded implementation. Fixes: 640d1930bef4 (block: Add bio_for_each_folio_all()) Signed-off-by: Matthew Wilcox (Oracle) Link: https://lore.kernel.org/r/20250612144126.2849931-1-willy@infradead.org Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 9c37c66ef9ca..46ffac5caab7 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -291,7 +291,7 @@ static inline void bio_first_folio(struct folio_iter *fi, struct bio *bio, fi->folio = page_folio(bvec->bv_page); fi->offset = bvec->bv_offset + - PAGE_SIZE * (bvec->bv_page - &fi->folio->page); + PAGE_SIZE * folio_page_idx(fi->folio, bvec->bv_page); fi->_seg_count = bvec->bv_len; fi->length = min(folio_size(fi->folio) - fi->offset, fi->_seg_count); fi->_next = folio_next(fi->folio); -- cgit v1.2.3 From 5e223e06ee7c6d8f630041a0645ac90e39a42cc6 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 12 Jun 2025 15:42:53 +0100 Subject: block: Fix bvec_set_folio() for very large folios Similarly to 26064d3e2b4d ("block: fix adding folio to bio"), if we attempt to add a folio that is larger than 4GB, we'll silently truncate the offset and len. Widen the parameters to size_t, assert that the length is less than 4GB and set the first page that contains the interesting data rather than the first page of the folio. Fixes: 26db5ee15851 (block: add a bvec_set_folio helper) Signed-off-by: Matthew Wilcox (Oracle) Link: https://lore.kernel.org/r/20250612144255.2850278-1-willy@infradead.org Signed-off-by: Jens Axboe --- include/linux/bvec.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 204b22a99c4b..0a80e1f9aa20 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -57,9 +57,12 @@ static inline void bvec_set_page(struct bio_vec *bv, struct page *page, * @offset: offset into the folio */ static inline void bvec_set_folio(struct bio_vec *bv, struct folio *folio, - unsigned int len, unsigned int offset) + size_t len, size_t offset) { - bvec_set_page(bv, &folio->page, len, offset); + unsigned long nr = offset / PAGE_SIZE; + + WARN_ON_ONCE(len > UINT_MAX); + bvec_set_page(bv, folio_page(folio, nr), len, offset % PAGE_SIZE); } /** -- cgit v1.2.3 From 271ff96d6066347cd267ac3bcd6021bd4d38913d Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 16 Jun 2025 09:12:07 +0300 Subject: PM: runtime: Document return values of suspend-related API functions Document return values for device suspend and idle related API functions. Signed-off-by: Sakari Ailus Link: https://patch.msgid.link/20250616061212.2286741-2-sakari.ailus@linux.intel.com Signed-off-by: Rafael J. Wysocki --- include/linux/pm_runtime.h | 147 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 138 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index e7cb70fcc0af..9dd2e4031a27 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -337,6 +337,20 @@ static inline void pm_runtime_release_supplier(struct device_link *link) {} * Invoke the "idle check" callback of @dev and, depending on its return value, * set up autosuspend of @dev or suspend it (depending on whether or not * autosuspend has been enabled for it). + * + * Return: + * * 0: Success. + * * -EINVAL: Runtime PM error. + * * -EACCES: Runtime PM disabled. + * * -EAGAIN: Runtime PM usage_count non-zero, Runtime PM status change ongoing + * or device not in %RPM_ACTIVE state. + * * -EBUSY: Runtime PM child_count non-zero. + * * -EPERM: Device PM QoS resume latency 0. + * * -EINPROGRESS: Suspend already in progress. + * * -ENOSYS: CONFIG_PM not enabled. + * * 1: Device already suspended. + * Other values and conditions for the above values are possible as returned by + * Runtime PM idle and suspend callbacks. */ static inline int pm_runtime_idle(struct device *dev) { @@ -346,6 +360,18 @@ static inline int pm_runtime_idle(struct device *dev) /** * pm_runtime_suspend - Suspend a device synchronously. * @dev: Target device. + * + * Return: + * * 0: Success. + * * -EINVAL: Runtime PM error. + * * -EACCES: Runtime PM disabled. + * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EBUSY: Runtime PM child_count non-zero. + * * -EPERM: Device PM QoS resume latency 0. + * * -ENOSYS: CONFIG_PM not enabled. + * * 1: Device already suspended. + * Other values and conditions for the above values are possible as returned by + * Runtime PM suspend callbacks. */ static inline int pm_runtime_suspend(struct device *dev) { @@ -358,6 +384,18 @@ static inline int pm_runtime_suspend(struct device *dev) * * Set up autosuspend of @dev or suspend it (depending on whether or not * autosuspend is enabled for it) without engaging its "idle check" callback. + * + * Return: + * * 0: Success. + * * -EINVAL: Runtime PM error. + * * -EACCES: Runtime PM disabled. + * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EBUSY: Runtime PM child_count non-zero. + * * -EPERM: Device PM QoS resume latency 0. + * * -ENOSYS: CONFIG_PM not enabled. + * * 1: Device already suspended. + * Other values and conditions for the above values are possible as returned by + * Runtime PM suspend callbacks. */ static inline int pm_runtime_autosuspend(struct device *dev) { @@ -379,6 +417,18 @@ static inline int pm_runtime_resume(struct device *dev) * * Queue up a work item to run an equivalent of pm_runtime_idle() for @dev * asynchronously. + * + * Return: + * * 0: Success. + * * -EINVAL: Runtime PM error. + * * -EACCES: Runtime PM disabled. + * * -EAGAIN: Runtime PM usage_count non-zero, Runtime PM status change ongoing + * or device not in %RPM_ACTIVE state. + * * -EBUSY: Runtime PM child_count non-zero. + * * -EPERM: Device PM QoS resume latency 0. + * * -EINPROGRESS: Suspend already in progress. + * * -ENOSYS: CONFIG_PM not enabled. + * * 1: Device already suspended. */ static inline int pm_request_idle(struct device *dev) { @@ -400,6 +450,17 @@ static inline int pm_request_resume(struct device *dev) * * Queue up a work item to run an equivalent pm_runtime_autosuspend() for @dev * asynchronously. + * + * Return: + * * 0: Success. + * * -EINVAL: Runtime PM error. + * * -EACCES: Runtime PM disabled. + * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EBUSY: Runtime PM child_count non-zero. + * * -EPERM: Device PM QoS resume latency 0. + * * -EINPROGRESS: Suspend already in progress. + * * -ENOSYS: CONFIG_PM not enabled. + * * 1: Device already suspended. */ static inline int pm_request_autosuspend(struct device *dev) { @@ -464,6 +525,17 @@ static inline int pm_runtime_resume_and_get(struct device *dev) * * Decrement the runtime PM usage counter of @dev and if it turns out to be * equal to 0, queue up a work item for @dev like in pm_request_idle(). + * + * Return: + * * 0: Success. + * * -EINVAL: Runtime PM error. + * * -EACCES: Runtime PM disabled. + * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EBUSY: Runtime PM child_count non-zero. + * * -EPERM: Device PM QoS resume latency 0. + * * -EINPROGRESS: Suspend already in progress. + * * -ENOSYS: CONFIG_PM not enabled. + * * 1: Device already suspended. */ static inline int pm_runtime_put(struct device *dev) { @@ -478,6 +550,17 @@ DEFINE_FREE(pm_runtime_put, struct device *, if (_T) pm_runtime_put(_T)) * * Decrement the runtime PM usage counter of @dev and if it turns out to be * equal to 0, queue up a work item for @dev like in pm_request_autosuspend(). + * + * Return: + * * 0: Success. + * * -EINVAL: Runtime PM error. + * * -EACCES: Runtime PM disabled. + * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EBUSY: Runtime PM child_count non-zero. + * * -EPERM: Device PM QoS resume latency 0. + * * -EINPROGRESS: Suspend already in progress. + * * -ENOSYS: CONFIG_PM not enabled. + * * 1: Device already suspended. */ static inline int __pm_runtime_put_autosuspend(struct device *dev) { @@ -490,6 +573,17 @@ static inline int __pm_runtime_put_autosuspend(struct device *dev) * * Decrement the runtime PM usage counter of @dev and if it turns out to be * equal to 0, queue up a work item for @dev like in pm_request_autosuspend(). + * + * Return: + * * 0: Success. + * * -EINVAL: Runtime PM error. + * * -EACCES: Runtime PM disabled. + * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EBUSY: Runtime PM child_count non-zero. + * * -EPERM: Device PM QoS resume latency 0. + * * -EINPROGRESS: Suspend already in progress. + * * -ENOSYS: CONFIG_PM not enabled. + * * 1: Device already suspended. */ static inline int pm_runtime_put_autosuspend(struct device *dev) { @@ -506,9 +600,20 @@ static inline int pm_runtime_put_autosuspend(struct device *dev) * return value, set up autosuspend of @dev or suspend it (depending on whether * or not autosuspend has been enabled for it). * - * The possible return values of this function are the same as for - * pm_runtime_idle() and the runtime PM usage counter of @dev remains - * decremented in all cases, even if it returns an error code. + * The runtime PM usage counter of @dev remains decremented in all cases, even + * if it returns an error code. + * + * Return: + * * 0: Success. + * * -EINVAL: Runtime PM error. + * * -EACCES: Runtime PM disabled. + * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EBUSY: Runtime PM child_count non-zero. + * * -EPERM: Device PM QoS resume latency 0. + * * -ENOSYS: CONFIG_PM not enabled. + * * 1: Device already suspended. + * Other values and conditions for the above values are possible as returned by + * Runtime PM suspend callbacks. */ static inline int pm_runtime_put_sync(struct device *dev) { @@ -522,9 +627,21 @@ static inline int pm_runtime_put_sync(struct device *dev) * Decrement the runtime PM usage counter of @dev and if it turns out to be * equal to 0, carry out runtime-suspend of @dev synchronously. * - * The possible return values of this function are the same as for - * pm_runtime_suspend() and the runtime PM usage counter of @dev remains - * decremented in all cases, even if it returns an error code. + * The runtime PM usage counter of @dev remains decremented in all cases, even + * if it returns an error code. + * + * Return: + * * 0: Success. + * * -EINVAL: Runtime PM error. + * * -EACCES: Runtime PM disabled. + * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EAGAIN: usage_count non-zero or Runtime PM status change ongoing. + * * -EBUSY: Runtime PM child_count non-zero. + * * -EPERM: Device PM QoS resume latency 0. + * * -ENOSYS: CONFIG_PM not enabled. + * * 1: Device already suspended. + * Other values and conditions for the above values are possible as returned by + * Runtime PM suspend callbacks. */ static inline int pm_runtime_put_sync_suspend(struct device *dev) { @@ -539,9 +656,21 @@ static inline int pm_runtime_put_sync_suspend(struct device *dev) * equal to 0, set up autosuspend of @dev or suspend it synchronously (depending * on whether or not autosuspend has been enabled for it). * - * The possible return values of this function are the same as for - * pm_runtime_autosuspend() and the runtime PM usage counter of @dev remains - * decremented in all cases, even if it returns an error code. + * The runtime PM usage counter of @dev remains decremented in all cases, even + * if it returns an error code. + * + * Return: + * * 0: Success. + * * -EINVAL: Runtime PM error. + * * -EACCES: Runtime PM disabled. + * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EBUSY: Runtime PM child_count non-zero. + * * -EPERM: Device PM QoS resume latency 0. + * * -EINPROGRESS: Suspend already in progress. + * * -ENOSYS: CONFIG_PM not enabled. + * * 1: Device already suspended. + * Other values and conditions for the above values are possible as returned by + * Runtime PM suspend callbacks. */ static inline int pm_runtime_put_sync_autosuspend(struct device *dev) { -- cgit v1.2.3 From b3db492e8335417dfd66c1fa2ea08e1d2f7b6736 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 16 Jun 2025 09:12:08 +0300 Subject: PM: runtime: Mark last busy stamp in pm_runtime_put_autosuspend() Set device's last busy timestamp to current time in pm_runtime_put_autosuspend(). Callers wishing not to do that will need to use __pm_runtime_put_autosuspend(). Signed-off-by: Sakari Ailus Reviewed-by: Laurent Pinchart Link: https://patch.msgid.link/20250616061212.2286741-3-sakari.ailus@linux.intel.com Signed-off-by: Rafael J. Wysocki --- Documentation/power/runtime_pm.rst | 23 ++++++++++------------- include/linux/pm_runtime.h | 12 +++++++----- 2 files changed, 17 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/Documentation/power/runtime_pm.rst b/Documentation/power/runtime_pm.rst index 63344bea8393..e7bbdc66d64c 100644 --- a/Documentation/power/runtime_pm.rst +++ b/Documentation/power/runtime_pm.rst @@ -411,8 +411,9 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h: pm_request_idle(dev) and return its result `int pm_runtime_put_autosuspend(struct device *dev);` - - does the same as __pm_runtime_put_autosuspend() for now, but in the - future, will also call pm_runtime_mark_last_busy() as well, DO NOT USE! + - set the power.last_busy field to the current time and decrement the + device's usage counter; if the result is 0 then run + pm_request_autosuspend(dev) and return its result `int __pm_runtime_put_autosuspend(struct device *dev);` - decrement the device's usage counter; if the result is 0 then run @@ -870,11 +871,9 @@ device is automatically suspended (the subsystem or driver still has to call the appropriate PM routines); rather it means that runtime suspends will automatically be delayed until the desired period of inactivity has elapsed. -Inactivity is determined based on the power.last_busy field. Drivers should -call pm_runtime_mark_last_busy() to update this field after carrying out I/O, -typically just before calling __pm_runtime_put_autosuspend(). The desired -length of the inactivity period is a matter of policy. Subsystems can set this -length initially by calling pm_runtime_set_autosuspend_delay(), but after device +Inactivity is determined based on the power.last_busy field. The desired length +of the inactivity period is a matter of policy. Subsystems can set this length +initially by calling pm_runtime_set_autosuspend_delay(), but after device registration the length should be controlled by user space, using the /sys/devices/.../power/autosuspend_delay_ms attribute. @@ -885,7 +884,7 @@ instead of the non-autosuspend counterparts:: Instead of: pm_runtime_suspend use: pm_runtime_autosuspend; Instead of: pm_schedule_suspend use: pm_request_autosuspend; - Instead of: pm_runtime_put use: __pm_runtime_put_autosuspend; + Instead of: pm_runtime_put use: pm_runtime_put_autosuspend; Instead of: pm_runtime_put_sync use: pm_runtime_put_sync_autosuspend. Drivers may also continue to use the non-autosuspend helper functions; they @@ -922,12 +921,10 @@ Here is a schematic pseudo-code example:: foo_io_completion(struct foo_priv *foo, void *req) { lock(&foo->private_lock); - if (--foo->num_pending_requests == 0) { - pm_runtime_mark_last_busy(&foo->dev); - __pm_runtime_put_autosuspend(&foo->dev); - } else { + if (--foo->num_pending_requests == 0) + pm_runtime_put_autosuspend(&foo->dev); + else foo_process_next_request(foo); - } unlock(&foo->private_lock); /* Send req result back to the user ... */ } diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 9dd2e4031a27..14ca7be96686 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -568,11 +568,13 @@ static inline int __pm_runtime_put_autosuspend(struct device *dev) } /** - * pm_runtime_put_autosuspend - Drop device usage counter and queue autosuspend if 0. + * pm_runtime_put_autosuspend - Update the last access time of a device, drop + * its usage counter and queue autosuspend if the usage counter becomes 0. * @dev: Target device. * - * Decrement the runtime PM usage counter of @dev and if it turns out to be - * equal to 0, queue up a work item for @dev like in pm_request_autosuspend(). + * Update the last access time of @dev, decrement runtime PM usage counter of + * @dev and if it turns out to be equal to 0, queue up a work item for @dev like + * in pm_request_autosuspend(). * * Return: * * 0: Success. @@ -587,8 +589,8 @@ static inline int __pm_runtime_put_autosuspend(struct device *dev) */ static inline int pm_runtime_put_autosuspend(struct device *dev) { - return __pm_runtime_suspend(dev, - RPM_GET_PUT | RPM_ASYNC | RPM_AUTO); + pm_runtime_mark_last_busy(dev); + return __pm_runtime_put_autosuspend(dev); } /** -- cgit v1.2.3 From e24e0630b5ba13e83f65905becde9945518efa0b Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 16 Jun 2025 09:12:09 +0300 Subject: PM: runtime: Mark last busy stamp in pm_runtime_put_sync_autosuspend() Set device's last busy timestamp to current time in pm_runtime_put_sync_autosuspend(). Signed-off-by: Sakari Ailus Reviewed-by: Laurent Pinchart Link: https://patch.msgid.link/20250616061212.2286741-4-sakari.ailus@linux.intel.com Signed-off-by: Rafael J. Wysocki --- Documentation/power/runtime_pm.rst | 3 ++- include/linux/pm_runtime.h | 11 +++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/Documentation/power/runtime_pm.rst b/Documentation/power/runtime_pm.rst index e7bbdc66d64c..9c21c913f9cf 100644 --- a/Documentation/power/runtime_pm.rst +++ b/Documentation/power/runtime_pm.rst @@ -428,7 +428,8 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h: pm_runtime_suspend(dev) and return its result `int pm_runtime_put_sync_autosuspend(struct device *dev);` - - decrement the device's usage counter; if the result is 0 then run + - set the power.last_busy field to the current time and decrement the + device's usage counter; if the result is 0 then run pm_runtime_autosuspend(dev) and return its result `void pm_runtime_enable(struct device *dev);` diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 14ca7be96686..3a0d5f0ea471 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -651,12 +651,14 @@ static inline int pm_runtime_put_sync_suspend(struct device *dev) } /** - * pm_runtime_put_sync_autosuspend - Drop device usage counter and autosuspend if 0. + * pm_runtime_put_sync_autosuspend - Update the last access time of a device, + * drop device usage counter and autosuspend if 0. * @dev: Target device. * - * Decrement the runtime PM usage counter of @dev and if it turns out to be - * equal to 0, set up autosuspend of @dev or suspend it synchronously (depending - * on whether or not autosuspend has been enabled for it). + * Update the last access time of @dev, decrement the runtime PM usage counter + * of @dev and if it turns out to be equal to 0, set up autosuspend of @dev or + * suspend it synchronously (depending on whether or not autosuspend has been + * enabled for it). * * The runtime PM usage counter of @dev remains decremented in all cases, even * if it returns an error code. @@ -676,6 +678,7 @@ static inline int pm_runtime_put_sync_suspend(struct device *dev) */ static inline int pm_runtime_put_sync_autosuspend(struct device *dev) { + pm_runtime_mark_last_busy(dev); return __pm_runtime_suspend(dev, RPM_GET_PUT | RPM_AUTO); } -- cgit v1.2.3 From 08071e64cb642ae19ebd6ffeb13b4f3d130b5860 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 16 Jun 2025 09:12:10 +0300 Subject: PM: runtime: Mark last busy stamp in pm_runtime_autosuspend() Set device's last busy timestamp to current time in pm_runtime_autosuspend(). Signed-off-by: Sakari Ailus Reviewed-by: Laurent Pinchart Link: https://patch.msgid.link/20250616061212.2286741-5-sakari.ailus@linux.intel.com Signed-off-by: Rafael J. Wysocki --- Documentation/power/runtime_pm.rst | 15 ++++++--------- include/linux/pm_runtime.h | 9 ++++++--- 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/Documentation/power/runtime_pm.rst b/Documentation/power/runtime_pm.rst index 9c21c913f9cf..39a0b62f6648 100644 --- a/Documentation/power/runtime_pm.rst +++ b/Documentation/power/runtime_pm.rst @@ -154,11 +154,9 @@ suspending the device are satisfied) and to queue up a suspend request for the device in that case. If there is no idle callback, or if the callback returns 0, then the PM core will attempt to carry out a runtime suspend of the device, also respecting devices configured for autosuspend. In essence this means a -call to pm_runtime_autosuspend() (do note that drivers needs to update the -device last busy mark, pm_runtime_mark_last_busy(), to control the delay under -this circumstance). To prevent this (for example, if the callback routine has -started a delayed suspend), the routine must return a non-zero value. Negative -error return codes are ignored by the PM core. +call to pm_runtime_autosuspend(). To prevent this (for example, if the callback +routine has started a delayed suspend), the routine must return a non-zero +value. Negative error return codes are ignored by the PM core. The helper functions provided by the PM core, described in Section 4, guarantee that the following constraints are met with respect to runtime PM callbacks for @@ -330,10 +328,9 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h: 'power.disable_depth' is different from 0 `int pm_runtime_autosuspend(struct device *dev);` - - same as pm_runtime_suspend() except that the autosuspend delay is taken - `into account;` if pm_runtime_autosuspend_expiration() says the delay has - not yet expired then an autosuspend is scheduled for the appropriate time - and 0 is returned + - same as pm_runtime_suspend() except that a call to + pm_runtime_mark_last_busy() is made and an autosuspend is scheduled for + the appropriate time and 0 is returned `int pm_runtime_resume(struct device *dev);` - execute the subsystem-level resume callback for the device; returns 0 on diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 3a0d5f0ea471..566a07b60f63 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -379,11 +379,13 @@ static inline int pm_runtime_suspend(struct device *dev) } /** - * pm_runtime_autosuspend - Set up autosuspend of a device or suspend it. + * pm_runtime_autosuspend - Update the last access time and set up autosuspend + * of a device. * @dev: Target device. * - * Set up autosuspend of @dev or suspend it (depending on whether or not - * autosuspend is enabled for it) without engaging its "idle check" callback. + * First update the last access time, then set up autosuspend of @dev or suspend + * it (depending on whether or not autosuspend is enabled for it) without + * engaging its "idle check" callback. * * Return: * * 0: Success. @@ -399,6 +401,7 @@ static inline int pm_runtime_suspend(struct device *dev) */ static inline int pm_runtime_autosuspend(struct device *dev) { + pm_runtime_mark_last_busy(dev); return __pm_runtime_suspend(dev, RPM_AUTO); } -- cgit v1.2.3 From 18c1fe53d186867243f4cf17f4eef60737a16c4c Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 16 Jun 2025 09:12:11 +0300 Subject: PM: runtime: Mark last busy stamp in pm_request_autosuspend() Set device's last busy timestamp to current time in pm_request_autosuspend(). Signed-off-by: Sakari Ailus Reviewed-by: Laurent Pinchart Link: https://patch.msgid.link/20250616061212.2286741-6-sakari.ailus@linux.intel.com Signed-off-by: Rafael J. Wysocki --- Documentation/power/runtime_pm.rst | 6 +++--- include/linux/pm_runtime.h | 8 +++++--- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/Documentation/power/runtime_pm.rst b/Documentation/power/runtime_pm.rst index 39a0b62f6648..91bc93422262 100644 --- a/Documentation/power/runtime_pm.rst +++ b/Documentation/power/runtime_pm.rst @@ -354,9 +354,9 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h: success or error code if the request has not been queued up `int pm_request_autosuspend(struct device *dev);` - - schedule the execution of the subsystem-level suspend callback for the - device when the autosuspend delay has expired; if the delay has already - expired then the work item is queued up immediately + - Call pm_runtime_mark_last_busy() and schedule the execution of the + subsystem-level suspend callback for the device when the autosuspend delay + expires `int pm_schedule_suspend(struct device *dev, unsigned int delay);` - schedule the execution of the subsystem-level suspend callback for the diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 566a07b60f63..778d5988f35e 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -448,11 +448,12 @@ static inline int pm_request_resume(struct device *dev) } /** - * pm_request_autosuspend - Queue up autosuspend of a device. + * pm_request_autosuspend - Update the last access time and queue up autosuspend + * of a device. * @dev: Target device. * - * Queue up a work item to run an equivalent pm_runtime_autosuspend() for @dev - * asynchronously. + * Update the last access time of a device and queue up a work item to run an + * equivalent pm_runtime_autosuspend() for @dev asynchronously. * * Return: * * 0: Success. @@ -467,6 +468,7 @@ static inline int pm_request_resume(struct device *dev) */ static inline int pm_request_autosuspend(struct device *dev) { + pm_runtime_mark_last_busy(dev); return __pm_runtime_suspend(dev, RPM_ASYNC | RPM_AUTO); } -- cgit v1.2.3