From 9f1edf1aedac1b287355f63f768ba4275de72dca Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Mon, 6 Oct 2025 10:51:06 -0700 Subject: mm: readahead: make thp readahead conditional to mmap_miss logic Commit 4687fdbb805a ("mm/filemap: Support VM_HUGEPAGE for file mappings") introduced a special handling for VM_HUGEPAGE mappings: even if the readahead is disabled, 1 or 2 HPAGE_PMD_ORDER pages are allocated. This change causes a significant regression for containers with a tight memory.max limit, if VM_HUGEPAGE is widely used. Prior to this commit, mmap_miss logic would eventually lead to the readahead disablement, effectively reducing the memory pressure in the cgroup. With this change the kernel is trying to allocate 1-2 huge pages for each fault, no matter if these pages are used or not before being evicted, increasing the memory pressure multi-fold. To fix the regression, let's make the new VM_HUGEPAGE conditional to the mmap_miss check, but keep independent from the ra->ra_pages. This way the main intention of commit 4687fdbb805a ("mm/filemap: Support VM_HUGEPAGE for file mappings") stays intact, but the regression is resolved. The logic behind this changes is simple: even if a user explicitly requests using huge pages to back the file mapping (using VM_HUGEPAGE flag), under a very strong memory pressure it's better to fall back to ordinary pages. Link: https://lkml.kernel.org/r/20251006175106.377411-1-roman.gushchin@linux.dev Fixes: 4687fdbb805a ("mm/filemap: Support VM_HUGEPAGE for file mappings") Signed-off-by: Roman Gushchin Reviewed-by: Dev Jain Reviewed-by: Jan Kara Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- mm/filemap.c | 68 +++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 38 insertions(+), 30 deletions(-) (limited to 'mm/filemap.c') diff --git a/mm/filemap.c b/mm/filemap.c index 2f1e7e283a51..526ad8c92250 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3253,11 +3253,47 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) DEFINE_READAHEAD(ractl, file, ra, mapping, vmf->pgoff); struct file *fpin = NULL; vm_flags_t vm_flags = vmf->vma->vm_flags; + bool force_thp_readahead = false; unsigned short mmap_miss; -#ifdef CONFIG_TRANSPARENT_HUGEPAGE /* Use the readahead code, even if readahead is disabled */ - if ((vm_flags & VM_HUGEPAGE) && HPAGE_PMD_ORDER <= MAX_PAGECACHE_ORDER) { + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && + (vm_flags & VM_HUGEPAGE) && HPAGE_PMD_ORDER <= MAX_PAGECACHE_ORDER) + force_thp_readahead = true; + + if (!force_thp_readahead) { + /* + * If we don't want any read-ahead, don't bother. + * VM_EXEC case below is already intended for random access. + */ + if ((vm_flags & (VM_RAND_READ | VM_EXEC)) == VM_RAND_READ) + return fpin; + + if (!ra->ra_pages) + return fpin; + + if (vm_flags & VM_SEQ_READ) { + fpin = maybe_unlock_mmap_for_io(vmf, fpin); + page_cache_sync_ra(&ractl, ra->ra_pages); + return fpin; + } + } + + if (!(vm_flags & VM_SEQ_READ)) { + /* Avoid banging the cache line if not needed */ + mmap_miss = READ_ONCE(ra->mmap_miss); + if (mmap_miss < MMAP_LOTSAMISS * 10) + WRITE_ONCE(ra->mmap_miss, ++mmap_miss); + + /* + * Do we miss much more than hit in this file? If so, + * stop bothering with read-ahead. It will only hurt. + */ + if (mmap_miss > MMAP_LOTSAMISS) + return fpin; + } + + if (force_thp_readahead) { fpin = maybe_unlock_mmap_for_io(vmf, fpin); ractl._index &= ~((unsigned long)HPAGE_PMD_NR - 1); ra->size = HPAGE_PMD_NR; @@ -3272,34 +3308,6 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) page_cache_ra_order(&ractl, ra); return fpin; } -#endif - - /* - * If we don't want any read-ahead, don't bother. VM_EXEC case below is - * already intended for random access. - */ - if ((vm_flags & (VM_RAND_READ | VM_EXEC)) == VM_RAND_READ) - return fpin; - if (!ra->ra_pages) - return fpin; - - if (vm_flags & VM_SEQ_READ) { - fpin = maybe_unlock_mmap_for_io(vmf, fpin); - page_cache_sync_ra(&ractl, ra->ra_pages); - return fpin; - } - - /* Avoid banging the cache line if not needed */ - mmap_miss = READ_ONCE(ra->mmap_miss); - if (mmap_miss < MMAP_LOTSAMISS * 10) - WRITE_ONCE(ra->mmap_miss, ++mmap_miss); - - /* - * Do we miss much more than hit in this file? If so, - * stop bothering with read-ahead. It will only hurt. - */ - if (mmap_miss > MMAP_LOTSAMISS) - return fpin; if (vm_flags & VM_EXEC) { /* -- cgit v1.2.3 From ad8b2e096181bd23a32d8672de107136d0c478e9 Mon Sep 17 00:00:00 2001 From: Harry Yoo Date: Fri, 24 Oct 2025 20:30:47 +0900 Subject: treewide: include linux/pgalloc.h instead of asm/pgalloc.h For now, including instead of is technically fine unless the .c file calls p*d_populate_kernel() helper functions. But it is a better practice to always include . Include instead of outside arch/. Link: https://lkml.kernel.org/r/20251024113047.119058-3-harry.yoo@oracle.com Signed-off-by: Harry Yoo Acked-by: David Hildenbrand Reviewed-by: Lorenzo Stoakes Reviewed-by: Mike Rapoport (Microsoft) Cc: Liam Howlett Cc: Michal Hocko Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: SeongJae Park Signed-off-by: Andrew Morton --- drivers/firmware/efi/arm-runtime.c | 4 ++-- drivers/firmware/efi/riscv-runtime.c | 4 ++-- drivers/s390/char/sclp_sd.c | 3 +-- fs/dax.c | 2 +- kernel/fork.c | 4 ++-- mm/debug_vm_pgtable.c | 2 +- mm/filemap.c | 3 ++- mm/huge_memory.c | 2 +- mm/hugetlb.c | 2 +- mm/hugetlb_vmemmap.c | 3 ++- mm/khugepaged.c | 2 +- mm/memory.c | 4 ++-- mm/mmu_gather.c | 2 +- mm/mremap.c | 2 +- mm/pgtable-generic.c | 3 ++- mm/pt_reclaim.c | 3 ++- 16 files changed, 24 insertions(+), 21 deletions(-) (limited to 'mm/filemap.c') diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c index 83092d93f36a..53a5336cde5a 100644 --- a/drivers/firmware/efi/arm-runtime.c +++ b/drivers/firmware/efi/arm-runtime.c @@ -12,18 +12,18 @@ #include #include #include +#include +#include #include #include #include #include #include #include -#include #include #include #include -#include #if defined(CONFIG_PTDUMP_DEBUGFS) || defined(CONFIG_ARM_PTDUMP_DEBUGFS) #include diff --git a/drivers/firmware/efi/riscv-runtime.c b/drivers/firmware/efi/riscv-runtime.c index fa71cd898120..7ceb02bc57f7 100644 --- a/drivers/firmware/efi/riscv-runtime.c +++ b/drivers/firmware/efi/riscv-runtime.c @@ -14,18 +14,18 @@ #include #include #include +#include +#include #include #include #include #include #include #include -#include #include #include #include -#include static bool __init efi_virtmap_init(void) { diff --git a/drivers/s390/char/sclp_sd.c b/drivers/s390/char/sclp_sd.c index 129b89fe40a3..7a791cb35aea 100644 --- a/drivers/s390/char/sclp_sd.c +++ b/drivers/s390/char/sclp_sd.c @@ -17,8 +17,7 @@ #include #include #include - -#include +#include #include "sclp.h" diff --git a/fs/dax.c b/fs/dax.c index 516f995a988c..0e766aec4303 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #define CREATE_TRACE_POINTS #include diff --git a/kernel/fork.c b/kernel/fork.c index 3da0f08615a9..dd0bb5fe4305 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -106,9 +106,9 @@ #include #include #include - -#include +#include #include + #include #include #include diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index 133543ca2821..055e0e025b42 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -30,9 +30,9 @@ #include #include #include +#include #include -#include #include /* diff --git a/mm/filemap.c b/mm/filemap.c index 526ad8c92250..ff75bd89b68c 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -48,7 +48,8 @@ #include #include #include -#include +#include + #include #include "internal.h" diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 0a521cf9b10a..d716c6965e27 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -37,11 +37,11 @@ #include #include #include +#include #include #include #include -#include #include "internal.h" #include "swap.h" diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 86e672fcb305..1ea459723cce 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -39,9 +39,9 @@ #include #include #include +#include #include -#include #include #include diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index 96ee2bd16ee1..9d01f883fd71 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -15,7 +15,8 @@ #include #include #include -#include +#include + #include #include "hugetlb_vmemmap.h" diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 643abf4be236..f6a92958157d 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -21,9 +21,9 @@ #include #include #include +#include #include -#include #include "internal.h" #include "mm_slot.h" diff --git a/mm/memory.c b/mm/memory.c index 8e02b8d75535..8d8c36adafa8 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -76,13 +76,13 @@ #include #include #include +#include +#include #include #include #include -#include -#include #include #include diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index 374aa6f021c6..247e3f9db6c7 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -9,8 +9,8 @@ #include #include #include +#include -#include #include #ifndef CONFIG_MMU_GATHER_NO_GATHER diff --git a/mm/mremap.c b/mm/mremap.c index 419a0ea0a870..8ad06cf50783 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -25,10 +25,10 @@ #include #include #include +#include #include #include -#include #include "internal.h" diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index 8c22be79b734..e46f0cf2159c 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -14,7 +14,8 @@ #include #include #include -#include +#include + #include /* diff --git a/mm/pt_reclaim.c b/mm/pt_reclaim.c index 7e9455a18aae..0d9cfbf4fe5d 100644 --- a/mm/pt_reclaim.c +++ b/mm/pt_reclaim.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include + #include -#include #include "internal.h" -- cgit v1.2.3 From 0ac881efe16468503e8c1e7d8a7210b75f027ce3 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 10 Nov 2025 22:21:28 +0000 Subject: mm: replace pmd_to_swp_entry() with softleaf_from_pmd() Introduce softleaf_from_pmd() to do the equivalent operation for PMDs that softleaf_from_pte() fulfils, and cascade changes through code base accordingly, introducing helpers as necessary. We are then able to eliminate pmd_to_swp_entry(), is_pmd_migration_entry(), is_pmd_device_private_entry() and is_pmd_non_present_folio_entry(). This further establishes the use of leaf operations throughout the code base and further establishes the foundations for eliminating is_swap_pmd(). No functional change intended. [lorenzo.stoakes@oracle.com: check writable, not readable/writable, per Vlastimil] Link: https://lkml.kernel.org/r/cd97b6ec-00f9-45a4-9ae0-8f009c212a94@lucifer.local Link: https://lkml.kernel.org/r/3fb431699639ded8fdc63d2210aa77a38c8891f1.1762812360.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: SeongJae Park \ Reviewed-by: Vlastimil Babka Cc: Alexander Gordeev Cc: Alistair Popple Cc: Al Viro Cc: Arnd Bergmann Cc: Axel Rasmussen Cc: Baolin Wang Cc: Baoquan He Cc: Barry Song Cc: Byungchul Park Cc: Chengming Zhou Cc: Chris Li Cc: Christian Borntraeger Cc: Christian Brauner Cc: Claudio Imbrenda Cc: David Hildenbrand Cc: Dev Jain Cc: Gerald Schaefer Cc: Gregory Price Cc: Heiko Carstens Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Jan Kara Cc: Jann Horn Cc: Janosch Frank Cc: Jason Gunthorpe Cc: Joshua Hahn Cc: Kairui Song Cc: Kemeng Shi Cc: Lance Yang Cc: Leon Romanovsky Cc: Liam Howlett Cc: Mathew Brost Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Michal Hocko Cc: Mike Rapoport Cc: Muchun Song Cc: Naoya Horiguchi Cc: Nhat Pham Cc: Nico Pache Cc: Oscar Salvador Cc: Pasha Tatashin Cc: Peter Xu Cc: Rakie Kim Cc: Rik van Riel Cc: Ryan Roberts Cc: Suren Baghdasaryan Cc: Sven Schnelle Cc: Vasily Gorbik Cc: Wei Xu Cc: xu xin Cc: Yuanchu Xie Cc: Zi Yan Signed-off-by: Andrew Morton --- fs/proc/task_mmu.c | 27 +++--- include/linux/leafops.h | 218 +++++++++++++++++++++++++++++++++++++++++++++++- include/linux/migrate.h | 2 +- include/linux/swapops.h | 100 ---------------------- mm/damon/ops-common.c | 6 +- mm/filemap.c | 6 +- mm/hmm.c | 16 ++-- mm/huge_memory.c | 98 +++++++++++----------- mm/khugepaged.c | 4 +- mm/madvise.c | 2 +- mm/memory.c | 4 +- mm/mempolicy.c | 4 +- mm/migrate.c | 20 ++--- mm/migrate_device.c | 14 ++-- mm/page_table_check.c | 16 ++-- mm/page_vma_mapped.c | 15 ++-- mm/pagewalk.c | 8 +- mm/rmap.c | 4 +- 18 files changed, 339 insertions(+), 225 deletions(-) (limited to 'mm/filemap.c') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 1bedf7fa5e79..898df952b6bc 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1065,10 +1065,10 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, page = vm_normal_page_pmd(vma, addr, *pmd); present = true; } else if (unlikely(thp_migration_supported())) { - swp_entry_t entry = pmd_to_swp_entry(*pmd); + const softleaf_t entry = softleaf_from_pmd(*pmd); - if (is_pfn_swap_entry(entry)) - page = pfn_swap_entry_to_page(entry); + if (softleaf_has_pfn(entry)) + page = softleaf_to_page(entry); } if (IS_ERR_OR_NULL(page)) return; @@ -1655,7 +1655,7 @@ static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, pmd = pmd_clear_soft_dirty(pmd); set_pmd_at(vma->vm_mm, addr, pmdp, pmd); - } else if (is_migration_entry(pmd_to_swp_entry(pmd))) { + } else if (pmd_is_migration_entry(pmd)) { pmd = pmd_swp_clear_soft_dirty(pmd); set_pmd_at(vma->vm_mm, addr, pmdp, pmd); } @@ -2016,12 +2016,12 @@ static int pagemap_pmd_range_thp(pmd_t *pmdp, unsigned long addr, if (pm->show_pfn) frame = pmd_pfn(pmd) + idx; } else if (thp_migration_supported()) { - swp_entry_t entry = pmd_to_swp_entry(pmd); + const softleaf_t entry = softleaf_from_pmd(pmd); unsigned long offset; if (pm->show_pfn) { - if (is_pfn_swap_entry(entry)) - offset = swp_offset_pfn(entry) + idx; + if (softleaf_has_pfn(entry)) + offset = softleaf_to_pfn(entry) + idx; else offset = swp_offset(entry) + idx; frame = swp_type(entry) | @@ -2032,7 +2032,7 @@ static int pagemap_pmd_range_thp(pmd_t *pmdp, unsigned long addr, flags |= PM_SOFT_DIRTY; if (pmd_swp_uffd_wp(pmd)) flags |= PM_UFFD_WP; - VM_WARN_ON_ONCE(!is_pmd_migration_entry(pmd)); + VM_WARN_ON_ONCE(!pmd_is_migration_entry(pmd)); page = pfn_swap_entry_to_page(entry); } @@ -2426,8 +2426,6 @@ static unsigned long pagemap_thp_category(struct pagemap_scan_private *p, if (pmd_soft_dirty(pmd)) categories |= PAGE_IS_SOFT_DIRTY; } else { - swp_entry_t swp; - categories |= PAGE_IS_SWAPPED; if (!pmd_swp_uffd_wp(pmd)) categories |= PAGE_IS_WRITTEN; @@ -2435,9 +2433,10 @@ static unsigned long pagemap_thp_category(struct pagemap_scan_private *p, categories |= PAGE_IS_SOFT_DIRTY; if (p->masks_of_interest & PAGE_IS_FILE) { - swp = pmd_to_swp_entry(pmd); - if (is_pfn_swap_entry(swp) && - !folio_test_anon(pfn_swap_entry_folio(swp))) + const softleaf_t entry = softleaf_from_pmd(pmd); + + if (softleaf_has_pfn(entry) && + !folio_test_anon(softleaf_to_folio(entry))) categories |= PAGE_IS_FILE; } } @@ -2454,7 +2453,7 @@ static void make_uffd_wp_pmd(struct vm_area_struct *vma, old = pmdp_invalidate_ad(vma, addr, pmdp); pmd = pmd_mkuffd_wp(old); set_pmd_at(vma->vm_mm, addr, pmdp, pmd); - } else if (is_migration_entry(pmd_to_swp_entry(pmd))) { + } else if (pmd_is_migration_entry(pmd)) { pmd = pmd_swp_mkuffd_wp(pmd); set_pmd_at(vma->vm_mm, addr, pmdp, pmd); } diff --git a/include/linux/leafops.h b/include/linux/leafops.h index cff9d94fd5d1..f5ea9b0385ff 100644 --- a/include/linux/leafops.h +++ b/include/linux/leafops.h @@ -61,6 +61,57 @@ static inline softleaf_t softleaf_from_pte(pte_t pte) return pte_to_swp_entry(pte); } +/** + * softleaf_to_pte() - Obtain a PTE entry from a leaf entry. + * @entry: Leaf entry. + * + * This generates an architecture-specific PTE entry that can be utilised to + * encode the metadata the leaf entry encodes. + * + * Returns: Architecture-specific PTE entry encoding leaf entry. + */ +static inline pte_t softleaf_to_pte(softleaf_t entry) +{ + /* Temporary until swp_entry_t eliminated. */ + return swp_entry_to_pte(entry); +} + +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION +/** + * softleaf_from_pmd() - Obtain a leaf entry from a PMD entry. + * @pmd: PMD entry. + * + * If @pmd is present (therefore not a leaf entry) the function returns an empty + * leaf entry. Otherwise, it returns a leaf entry. + * + * Returns: Leaf entry. + */ +static inline softleaf_t softleaf_from_pmd(pmd_t pmd) +{ + softleaf_t arch_entry; + + if (pmd_present(pmd) || pmd_none(pmd)) + return softleaf_mk_none(); + + if (pmd_swp_soft_dirty(pmd)) + pmd = pmd_swp_clear_soft_dirty(pmd); + if (pmd_swp_uffd_wp(pmd)) + pmd = pmd_swp_clear_uffd_wp(pmd); + arch_entry = __pmd_to_swp_entry(pmd); + + /* Temporary until swp_entry_t eliminated. */ + return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry)); +} + +#else + +static inline softleaf_t softleaf_from_pmd(pmd_t pmd) +{ + return softleaf_mk_none(); +} + +#endif + /** * softleaf_is_none() - Is the leaf entry empty? * @entry: Leaf entry. @@ -134,6 +185,43 @@ static inline bool softleaf_is_swap(softleaf_t entry) return softleaf_type(entry) == SOFTLEAF_SWAP; } +/** + * softleaf_is_migration_write() - Is this leaf entry a writable migration entry? + * @entry: Leaf entry. + * + * Returns: true if the leaf entry is a writable migration entry, otherwise + * false. + */ +static inline bool softleaf_is_migration_write(softleaf_t entry) +{ + return softleaf_type(entry) == SOFTLEAF_MIGRATION_WRITE; +} + +/** + * softleaf_is_migration_read() - Is this leaf entry a readable migration entry? + * @entry: Leaf entry. + * + * Returns: true if the leaf entry is a readable migration entry, otherwise + * false. + */ +static inline bool softleaf_is_migration_read(softleaf_t entry) +{ + return softleaf_type(entry) == SOFTLEAF_MIGRATION_READ; +} + +/** + * softleaf_is_migration_read_exclusive() - Is this leaf entry an exclusive + * readable migration entry? + * @entry: Leaf entry. + * + * Returns: true if the leaf entry is an exclusive readable migration entry, + * otherwise false. + */ +static inline bool softleaf_is_migration_read_exclusive(softleaf_t entry) +{ + return softleaf_type(entry) == SOFTLEAF_MIGRATION_READ_EXCLUSIVE; +} + /** * softleaf_is_migration() - Is this leaf entry a migration entry? * @entry: Leaf entry. @@ -152,6 +240,19 @@ static inline bool softleaf_is_migration(softleaf_t entry) } } +/** + * softleaf_is_device_private_write() - Is this leaf entry a device private + * writable entry? + * @entry: Leaf entry. + * + * Returns: true if the leaf entry is a device private writable entry, otherwise + * false. + */ +static inline bool softleaf_is_device_private_write(softleaf_t entry) +{ + return softleaf_type(entry) == SOFTLEAF_DEVICE_PRIVATE_WRITE; +} + /** * softleaf_is_device_private() - Is this leaf entry a device private entry? * @entry: Leaf entry. @@ -170,10 +271,10 @@ static inline bool softleaf_is_device_private(softleaf_t entry) } /** - * softleaf_is_device_exclusive() - Is this leaf entry a device exclusive entry? + * softleaf_is_device_exclusive() - Is this leaf entry a device-exclusive entry? * @entry: Leaf entry. * - * Returns: true if the leaf entry is a device exclusive entry, otherwise false. + * Returns: true if the leaf entry is a device-exclusive entry, otherwise false. */ static inline bool softleaf_is_device_exclusive(softleaf_t entry) { @@ -332,6 +433,61 @@ static inline bool softleaf_is_uffd_wp_marker(softleaf_t entry) return softleaf_to_marker(entry) & PTE_MARKER_UFFD_WP; } +#ifdef CONFIG_MIGRATION + +/** + * softleaf_is_migration_young() - Does this migration entry contain an accessed + * bit? + * @entry: Leaf entry. + * + * If the architecture can support storing A/D bits in migration entries, this + * determines whether the accessed (or 'young') bit was set on the migrated page + * table entry. + * + * Returns: true if the entry contains an accessed bit, otherwise false. + */ +static inline bool softleaf_is_migration_young(softleaf_t entry) +{ + VM_WARN_ON_ONCE(!softleaf_is_migration(entry)); + + if (migration_entry_supports_ad()) + return swp_offset(entry) & SWP_MIG_YOUNG; + /* Keep the old behavior of aging page after migration */ + return false; +} + +/** + * softleaf_is_migration_dirty() - Does this migration entry contain a dirty bit? + * @entry: Leaf entry. + * + * If the architecture can support storing A/D bits in migration entries, this + * determines whether the dirty bit was set on the migrated page table entry. + * + * Returns: true if the entry contains a dirty bit, otherwise false. + */ +static inline bool softleaf_is_migration_dirty(softleaf_t entry) +{ + VM_WARN_ON_ONCE(!softleaf_is_migration(entry)); + + if (migration_entry_supports_ad()) + return swp_offset(entry) & SWP_MIG_DIRTY; + /* Keep the old behavior of clean page after migration */ + return false; +} + +#else /* CONFIG_MIGRATION */ + +static inline bool softleaf_is_migration_young(softleaf_t entry) +{ + return false; +} + +static inline bool softleaf_is_migration_dirty(softleaf_t entry) +{ + return false; +} +#endif /* CONFIG_MIGRATION */ + /** * pte_is_marker() - Does the PTE entry encode a marker leaf entry? * @pte: PTE entry. @@ -383,5 +539,63 @@ static inline bool pte_is_uffd_marker(pte_t pte) return false; } +#if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_ARCH_ENABLE_THP_MIGRATION) + +/** + * pmd_is_device_private_entry() - Check if PMD contains a device private swap + * entry. + * @pmd: The PMD to check. + * + * Returns true if the PMD contains a swap entry that represents a device private + * page mapping. This is used for zone device private pages that have been + * swapped out but still need special handling during various memory management + * operations. + * + * Return: true if PMD contains device private entry, false otherwise + */ +static inline bool pmd_is_device_private_entry(pmd_t pmd) +{ + return softleaf_is_device_private(softleaf_from_pmd(pmd)); +} + +#else /* CONFIG_ZONE_DEVICE && CONFIG_ARCH_ENABLE_THP_MIGRATION */ + +static inline bool pmd_is_device_private_entry(pmd_t pmd) +{ + return false; +} + +#endif /* CONFIG_ZONE_DEVICE && CONFIG_ARCH_ENABLE_THP_MIGRATION */ + +/** + * pmd_is_migration_entry() - Does this PMD entry encode a migration entry? + * @pmd: PMD entry. + * + * Returns: true if the PMD encodes a migration entry, otherwise false. + */ +static inline bool pmd_is_migration_entry(pmd_t pmd) +{ + return softleaf_is_migration(softleaf_from_pmd(pmd)); +} + +/** + * pmd_is_valid_softleaf() - Is this PMD entry a valid leaf entry? + * @pmd: PMD entry. + * + * PMD leaf entries are valid only if they are device private or migration + * entries. This function asserts that a PMD leaf entry is valid in this + * respect. + * + * Returns: true if the PMD entry is a valid leaf entry, otherwise false. + */ +static inline bool pmd_is_valid_softleaf(pmd_t pmd) +{ + const softleaf_t entry = softleaf_from_pmd(pmd); + + /* Only device private, migration entries valid for PMD. */ + return softleaf_is_device_private(entry) || + softleaf_is_migration(entry); +} + #endif /* CONFIG_MMU */ #endif /* _LINUX_LEAFOPS_H */ diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 41b4cc05a450..26ca00c325d9 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -65,7 +65,7 @@ bool isolate_folio_to_list(struct folio *folio, struct list_head *list); int migrate_huge_page_move_mapping(struct address_space *mapping, struct folio *dst, struct folio *src); -void migration_entry_wait_on_locked(swp_entry_t entry, spinlock_t *ptl) +void migration_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl) __releases(ptl); void folio_migrate_flags(struct folio *newfolio, struct folio *folio); int folio_migrate_mapping(struct address_space *mapping, diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 3e8dd6ea94dd..f1277647262d 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -283,14 +283,6 @@ static inline swp_entry_t make_migration_entry_young(swp_entry_t entry) return entry; } -static inline bool is_migration_entry_young(swp_entry_t entry) -{ - if (migration_entry_supports_ad()) - return swp_offset(entry) & SWP_MIG_YOUNG; - /* Keep the old behavior of aging page after migration */ - return false; -} - static inline swp_entry_t make_migration_entry_dirty(swp_entry_t entry) { if (migration_entry_supports_ad()) @@ -299,14 +291,6 @@ static inline swp_entry_t make_migration_entry_dirty(swp_entry_t entry) return entry; } -static inline bool is_migration_entry_dirty(swp_entry_t entry) -{ - if (migration_entry_supports_ad()) - return swp_offset(entry) & SWP_MIG_DIRTY; - /* Keep the old behavior of clean page after migration */ - return false; -} - extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, unsigned long address); extern void migration_entry_wait_huge(struct vm_area_struct *vma, unsigned long addr, pte_t *pte); @@ -349,20 +333,11 @@ static inline swp_entry_t make_migration_entry_young(swp_entry_t entry) return entry; } -static inline bool is_migration_entry_young(swp_entry_t entry) -{ - return false; -} - static inline swp_entry_t make_migration_entry_dirty(swp_entry_t entry) { return entry; } -static inline bool is_migration_entry_dirty(swp_entry_t entry) -{ - return false; -} #endif /* CONFIG_MIGRATION */ #ifdef CONFIG_MEMORY_FAILURE @@ -487,18 +462,6 @@ extern void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, extern void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd); -static inline swp_entry_t pmd_to_swp_entry(pmd_t pmd) -{ - swp_entry_t arch_entry; - - if (pmd_swp_soft_dirty(pmd)) - pmd = pmd_swp_clear_soft_dirty(pmd); - if (pmd_swp_uffd_wp(pmd)) - pmd = pmd_swp_clear_uffd_wp(pmd); - arch_entry = __pmd_to_swp_entry(pmd); - return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry)); -} - static inline pmd_t swp_entry_to_pmd(swp_entry_t entry) { swp_entry_t arch_entry; @@ -507,23 +470,7 @@ static inline pmd_t swp_entry_to_pmd(swp_entry_t entry) return __swp_entry_to_pmd(arch_entry); } -static inline int is_pmd_migration_entry(pmd_t pmd) -{ - swp_entry_t entry; - - if (pmd_present(pmd)) - return 0; - - entry = pmd_to_swp_entry(pmd); - return is_migration_entry(entry); -} #else /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ -static inline int set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, - struct page *page) -{ - BUILD_BUG(); -} - static inline void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new) { @@ -532,64 +479,17 @@ static inline void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, static inline void pmd_migration_entry_wait(struct mm_struct *m, pmd_t *p) { } -static inline swp_entry_t pmd_to_swp_entry(pmd_t pmd) -{ - return swp_entry(0, 0); -} - static inline pmd_t swp_entry_to_pmd(swp_entry_t entry) { return __pmd(0); } -static inline int is_pmd_migration_entry(pmd_t pmd) -{ - return 0; -} #endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ -#if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_ARCH_ENABLE_THP_MIGRATION) - -/** - * is_pmd_device_private_entry() - Check if PMD contains a device private swap entry - * @pmd: The PMD to check - * - * Returns true if the PMD contains a swap entry that represents a device private - * page mapping. This is used for zone device private pages that have been - * swapped out but still need special handling during various memory management - * operations. - * - * Return: 1 if PMD contains device private entry, 0 otherwise - */ -static inline int is_pmd_device_private_entry(pmd_t pmd) -{ - swp_entry_t entry; - - if (pmd_present(pmd)) - return 0; - - entry = pmd_to_swp_entry(pmd); - return is_device_private_entry(entry); -} - -#else /* CONFIG_ZONE_DEVICE && CONFIG_ARCH_ENABLE_THP_MIGRATION */ - -static inline int is_pmd_device_private_entry(pmd_t pmd) -{ - return 0; -} - -#endif /* CONFIG_ZONE_DEVICE && CONFIG_ARCH_ENABLE_THP_MIGRATION */ - static inline int non_swap_entry(swp_entry_t entry) { return swp_type(entry) >= MAX_SWAPFILES; } -static inline int is_pmd_non_present_folio_entry(pmd_t pmd) -{ - return is_pmd_migration_entry(pmd) || is_pmd_device_private_entry(pmd); -} - #endif /* CONFIG_MMU */ #endif /* _LINUX_SWAPOPS_H */ diff --git a/mm/damon/ops-common.c b/mm/damon/ops-common.c index 971df8a16ba4..a218d9922234 100644 --- a/mm/damon/ops-common.c +++ b/mm/damon/ops-common.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include "../internal.h" #include "ops-common.h" @@ -51,7 +51,7 @@ void damon_ptep_mkold(pte_t *pte, struct vm_area_struct *vma, unsigned long addr if (likely(pte_present(pteval))) pfn = pte_pfn(pteval); else - pfn = swp_offset_pfn(pte_to_swp_entry(pteval)); + pfn = softleaf_to_pfn(softleaf_from_pte(pteval)); folio = damon_get_folio(pfn); if (!folio) @@ -83,7 +83,7 @@ void damon_pmdp_mkold(pmd_t *pmd, struct vm_area_struct *vma, unsigned long addr if (likely(pmd_present(pmdval))) pfn = pmd_pfn(pmdval); else - pfn = swp_offset_pfn(pmd_to_swp_entry(pmdval)); + pfn = softleaf_to_pfn(softleaf_from_pmd(pmdval)); folio = damon_get_folio(pfn); if (!folio) diff --git a/mm/filemap.c b/mm/filemap.c index f0c36df1def7..02355aa46324 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include @@ -1402,7 +1402,7 @@ repeat: * This follows the same logic as folio_wait_bit_common() so see the comments * there. */ -void migration_entry_wait_on_locked(swp_entry_t entry, spinlock_t *ptl) +void migration_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl) __releases(ptl) { struct wait_page_queue wait_page; @@ -1411,7 +1411,7 @@ void migration_entry_wait_on_locked(swp_entry_t entry, spinlock_t *ptl) unsigned long pflags; bool in_thrashing; wait_queue_head_t *q; - struct folio *folio = pfn_swap_entry_folio(entry); + struct folio *folio = softleaf_to_folio(entry); q = folio_waitqueue(folio); if (!folio_test_uptodate(folio) && folio_test_workingset(folio)) { diff --git a/mm/hmm.c b/mm/hmm.c index e350d0cc9d41..e9735a9b6102 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include @@ -339,19 +339,19 @@ static int hmm_vma_handle_absent_pmd(struct mm_walk *walk, unsigned long start, struct hmm_vma_walk *hmm_vma_walk = walk->private; struct hmm_range *range = hmm_vma_walk->range; unsigned long npages = (end - start) >> PAGE_SHIFT; + const softleaf_t entry = softleaf_from_pmd(pmd); unsigned long addr = start; - swp_entry_t entry = pmd_to_swp_entry(pmd); unsigned int required_fault; - if (is_device_private_entry(entry) && - pfn_swap_entry_folio(entry)->pgmap->owner == + if (softleaf_is_device_private(entry) && + softleaf_to_folio(entry)->pgmap->owner == range->dev_private_owner) { unsigned long cpu_flags = HMM_PFN_VALID | hmm_pfn_flags_order(PMD_SHIFT - PAGE_SHIFT); - unsigned long pfn = swp_offset_pfn(entry); + unsigned long pfn = softleaf_to_pfn(entry); unsigned long i; - if (is_writable_device_private_entry(entry)) + if (softleaf_is_device_private_write(entry)) cpu_flags |= HMM_PFN_WRITE; /* @@ -370,7 +370,7 @@ static int hmm_vma_handle_absent_pmd(struct mm_walk *walk, unsigned long start, required_fault = hmm_range_need_fault(hmm_vma_walk, hmm_pfns, npages, 0); if (required_fault) { - if (is_device_private_entry(entry)) + if (softleaf_is_device_private(entry)) return hmm_vma_fault(addr, end, required_fault, walk); else return -EFAULT; @@ -412,7 +412,7 @@ again: if (pmd_none(pmd)) return hmm_vma_walk_hole(start, end, -1, walk); - if (thp_migration_supported() && is_pmd_migration_entry(pmd)) { + if (thp_migration_supported() && pmd_is_migration_entry(pmd)) { if (hmm_range_need_fault(hmm_vma_walk, hmm_pfns, npages, 0)) { hmm_vma_walk->last = addr; pmd_migration_entry_wait(walk->mm, pmdp); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 0fdb3be39e31..9aa933723355 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1299,7 +1299,7 @@ vm_fault_t do_huge_pmd_device_private(struct vm_fault *vmf) struct vm_area_struct *vma = vmf->vma; vm_fault_t ret = 0; spinlock_t *ptl; - swp_entry_t swp_entry; + softleaf_t entry; struct page *page; struct folio *folio; @@ -1314,8 +1314,8 @@ vm_fault_t do_huge_pmd_device_private(struct vm_fault *vmf) return 0; } - swp_entry = pmd_to_swp_entry(vmf->orig_pmd); - page = pfn_swap_entry_to_page(swp_entry); + entry = softleaf_from_pmd(vmf->orig_pmd); + page = softleaf_to_page(entry); folio = page_folio(page); vmf->page = page; vmf->pte = NULL; @@ -1705,13 +1705,13 @@ static void copy_huge_non_present_pmd( struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, pmd_t pmd, pgtable_t pgtable) { - swp_entry_t entry = pmd_to_swp_entry(pmd); + softleaf_t entry = softleaf_from_pmd(pmd); struct folio *src_folio; - VM_WARN_ON(!is_pmd_non_present_folio_entry(pmd)); + VM_WARN_ON_ONCE(!pmd_is_valid_softleaf(pmd)); - if (is_writable_migration_entry(entry) || - is_readable_exclusive_migration_entry(entry)) { + if (softleaf_is_migration_write(entry) || + softleaf_is_migration_read_exclusive(entry)) { entry = make_readable_migration_entry(swp_offset(entry)); pmd = swp_entry_to_pmd(entry); if (pmd_swp_soft_dirty(*src_pmd)) @@ -1719,12 +1719,12 @@ static void copy_huge_non_present_pmd( if (pmd_swp_uffd_wp(*src_pmd)) pmd = pmd_swp_mkuffd_wp(pmd); set_pmd_at(src_mm, addr, src_pmd, pmd); - } else if (is_device_private_entry(entry)) { + } else if (softleaf_is_device_private(entry)) { /* * For device private entries, since there are no * read exclusive entries, writable = !readable */ - if (is_writable_device_private_entry(entry)) { + if (softleaf_is_device_private_write(entry)) { entry = make_readable_device_private_entry(swp_offset(entry)); pmd = swp_entry_to_pmd(entry); @@ -1735,7 +1735,7 @@ static void copy_huge_non_present_pmd( set_pmd_at(src_mm, addr, src_pmd, pmd); } - src_folio = pfn_swap_entry_folio(entry); + src_folio = softleaf_to_folio(entry); VM_WARN_ON(!folio_test_large(src_folio)); folio_get(src_folio); @@ -2195,7 +2195,7 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, if (unlikely(!pmd_present(orig_pmd))) { VM_BUG_ON(thp_migration_supported() && - !is_pmd_migration_entry(orig_pmd)); + !pmd_is_migration_entry(orig_pmd)); goto out; } @@ -2293,11 +2293,10 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, folio_remove_rmap_pmd(folio, page, vma); WARN_ON_ONCE(folio_mapcount(folio) < 0); VM_BUG_ON_PAGE(!PageHead(page), page); - } else if (is_pmd_non_present_folio_entry(orig_pmd)) { - swp_entry_t entry; + } else if (pmd_is_valid_softleaf(orig_pmd)) { + const softleaf_t entry = softleaf_from_pmd(orig_pmd); - entry = pmd_to_swp_entry(orig_pmd); - folio = pfn_swap_entry_folio(entry); + folio = softleaf_to_folio(entry); flush_needed = 0; if (!thp_migration_supported()) @@ -2353,7 +2352,7 @@ static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl, static pmd_t move_soft_dirty_pmd(pmd_t pmd) { #ifdef CONFIG_MEM_SOFT_DIRTY - if (unlikely(is_pmd_migration_entry(pmd))) + if (unlikely(pmd_is_migration_entry(pmd))) pmd = pmd_swp_mksoft_dirty(pmd); else if (pmd_present(pmd)) pmd = pmd_mksoft_dirty(pmd); @@ -2428,12 +2427,12 @@ static void change_non_present_huge_pmd(struct mm_struct *mm, unsigned long addr, pmd_t *pmd, bool uffd_wp, bool uffd_wp_resolve) { - swp_entry_t entry = pmd_to_swp_entry(*pmd); - struct folio *folio = pfn_swap_entry_folio(entry); + softleaf_t entry = softleaf_from_pmd(*pmd); + const struct folio *folio = softleaf_to_folio(entry); pmd_t newpmd; - VM_WARN_ON(!is_pmd_non_present_folio_entry(*pmd)); - if (is_writable_migration_entry(entry)) { + VM_WARN_ON(!pmd_is_valid_softleaf(*pmd)); + if (softleaf_is_migration_write(entry)) { /* * A protection check is difficult so * just be safe and disable write @@ -2445,7 +2444,7 @@ static void change_non_present_huge_pmd(struct mm_struct *mm, newpmd = swp_entry_to_pmd(entry); if (pmd_swp_soft_dirty(*pmd)) newpmd = pmd_swp_mksoft_dirty(newpmd); - } else if (is_writable_device_private_entry(entry)) { + } else if (softleaf_is_device_private_write(entry)) { entry = make_readable_device_private_entry(swp_offset(entry)); newpmd = swp_entry_to_pmd(entry); } else { @@ -2643,7 +2642,7 @@ int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pm if (!pmd_trans_huge(src_pmdval)) { spin_unlock(src_ptl); - if (is_pmd_migration_entry(src_pmdval)) { + if (pmd_is_migration_entry(src_pmdval)) { pmd_migration_entry_wait(mm, &src_pmdval); return -EAGAIN; } @@ -2908,13 +2907,12 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr; pte_t *pte; int i; - swp_entry_t entry; VM_BUG_ON(haddr & ~HPAGE_PMD_MASK); VM_BUG_ON_VMA(vma->vm_start > haddr, vma); VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PMD_SIZE, vma); - VM_WARN_ON(!is_pmd_non_present_folio_entry(*pmd) && !pmd_trans_huge(*pmd)); + VM_WARN_ON_ONCE(!pmd_is_valid_softleaf(*pmd) && !pmd_trans_huge(*pmd)); count_vm_event(THP_SPLIT_PMD); @@ -2928,11 +2926,10 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, zap_deposited_table(mm, pmd); if (!vma_is_dax(vma) && vma_is_special_huge(vma)) return; - if (unlikely(is_pmd_migration_entry(old_pmd))) { - swp_entry_t entry; + if (unlikely(pmd_is_migration_entry(old_pmd))) { + const softleaf_t old_entry = softleaf_from_pmd(old_pmd); - entry = pmd_to_swp_entry(old_pmd); - folio = pfn_swap_entry_folio(entry); + folio = softleaf_to_folio(old_entry); } else if (is_huge_zero_pmd(old_pmd)) { return; } else { @@ -2962,31 +2959,34 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, return __split_huge_zero_page_pmd(vma, haddr, pmd); } + if (pmd_is_migration_entry(*pmd)) { + softleaf_t entry; - if (is_pmd_migration_entry(*pmd)) { old_pmd = *pmd; - entry = pmd_to_swp_entry(old_pmd); - page = pfn_swap_entry_to_page(entry); + entry = softleaf_from_pmd(old_pmd); + page = softleaf_to_page(entry); folio = page_folio(page); soft_dirty = pmd_swp_soft_dirty(old_pmd); uffd_wp = pmd_swp_uffd_wp(old_pmd); - write = is_writable_migration_entry(entry); + write = softleaf_is_migration_write(entry); if (PageAnon(page)) - anon_exclusive = is_readable_exclusive_migration_entry(entry); - young = is_migration_entry_young(entry); - dirty = is_migration_entry_dirty(entry); - } else if (is_pmd_device_private_entry(*pmd)) { + anon_exclusive = softleaf_is_migration_read_exclusive(entry); + young = softleaf_is_migration_young(entry); + dirty = softleaf_is_migration_dirty(entry); + } else if (pmd_is_device_private_entry(*pmd)) { + softleaf_t entry; + old_pmd = *pmd; - entry = pmd_to_swp_entry(old_pmd); - page = pfn_swap_entry_to_page(entry); + entry = softleaf_from_pmd(old_pmd); + page = softleaf_to_page(entry); folio = page_folio(page); soft_dirty = pmd_swp_soft_dirty(old_pmd); uffd_wp = pmd_swp_uffd_wp(old_pmd); - write = is_writable_device_private_entry(entry); + write = softleaf_is_device_private_write(entry); anon_exclusive = PageAnonExclusive(page); /* @@ -3090,7 +3090,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, * Note that NUMA hinting access restrictions are not transferred to * avoid any possibility of altering permissions across VMAs. */ - if (freeze || is_pmd_migration_entry(old_pmd)) { + if (freeze || pmd_is_migration_entry(old_pmd)) { pte_t entry; swp_entry_t swp_entry; @@ -3116,7 +3116,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, VM_WARN_ON(!pte_none(ptep_get(pte + i))); set_pte_at(mm, addr, pte + i, entry); } - } else if (is_pmd_device_private_entry(old_pmd)) { + } else if (pmd_is_device_private_entry(old_pmd)) { pte_t entry; swp_entry_t swp_entry; @@ -3166,7 +3166,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, } pte_unmap(pte); - if (!is_pmd_migration_entry(*pmd)) + if (!pmd_is_migration_entry(*pmd)) folio_remove_rmap_pmd(folio, page, vma); if (freeze) put_page(page); @@ -3179,7 +3179,7 @@ void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address, pmd_t *pmd, bool freeze) { VM_WARN_ON_ONCE(!IS_ALIGNED(address, HPAGE_PMD_SIZE)); - if (pmd_trans_huge(*pmd) || is_pmd_non_present_folio_entry(*pmd)) + if (pmd_trans_huge(*pmd) || pmd_is_valid_softleaf(*pmd)) __split_huge_pmd_locked(vma, pmd, address, freeze); } @@ -4749,25 +4749,25 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new) unsigned long address = pvmw->address; unsigned long haddr = address & HPAGE_PMD_MASK; pmd_t pmde; - swp_entry_t entry; + softleaf_t entry; if (!(pvmw->pmd && !pvmw->pte)) return; - entry = pmd_to_swp_entry(*pvmw->pmd); + entry = softleaf_from_pmd(*pvmw->pmd); folio_get(folio); pmde = folio_mk_pmd(folio, READ_ONCE(vma->vm_page_prot)); if (pmd_swp_soft_dirty(*pvmw->pmd)) pmde = pmd_mksoft_dirty(pmde); - if (is_writable_migration_entry(entry)) + if (softleaf_is_migration_write(entry)) pmde = pmd_mkwrite(pmde, vma); if (pmd_swp_uffd_wp(*pvmw->pmd)) pmde = pmd_mkuffd_wp(pmde); - if (!is_migration_entry_young(entry)) + if (!softleaf_is_migration_young(entry)) pmde = pmd_mkold(pmde); /* NOTE: this may contain setting soft-dirty on some archs */ - if (folio_test_dirty(folio) && is_migration_entry_dirty(entry)) + if (folio_test_dirty(folio) && softleaf_is_migration_dirty(entry)) pmde = pmd_mkdirty(pmde); if (folio_is_device_private(folio)) { @@ -4790,7 +4790,7 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new) if (folio_test_anon(folio)) { rmap_t rmap_flags = RMAP_NONE; - if (!is_readable_migration_entry(entry)) + if (!softleaf_is_migration_read(entry)) rmap_flags |= RMAP_EXCLUSIVE; folio_add_anon_rmap_pmd(folio, new, vma, haddr, rmap_flags); diff --git a/mm/khugepaged.c b/mm/khugepaged.c index d7e71c2e2571..7e8cb181d5bd 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include @@ -941,7 +941,7 @@ static inline int check_pmd_state(pmd_t *pmd) * collapse it. Migration success or failure will eventually end * up with a present PMD mapping a folio again. */ - if (is_pmd_migration_entry(pmde)) + if (pmd_is_migration_entry(pmde)) return SCAN_PMD_MAPPED; if (!pmd_present(pmde)) return SCAN_PMD_NULL; diff --git a/mm/madvise.c b/mm/madvise.c index 2d7dd7901bae..5979a4a39738 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -390,7 +390,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, if (unlikely(!pmd_present(orig_pmd))) { VM_BUG_ON(thp_migration_supported() && - !is_pmd_migration_entry(orig_pmd)); + !pmd_is_migration_entry(orig_pmd)); goto huge_unlock; } diff --git a/mm/memory.c b/mm/memory.c index 76c17feff88b..9d0d527e95a8 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -6352,10 +6352,10 @@ retry_pud: goto fallback; if (unlikely(!pmd_present(vmf.orig_pmd))) { - if (is_pmd_device_private_entry(vmf.orig_pmd)) + if (pmd_is_device_private_entry(vmf.orig_pmd)) return do_huge_pmd_device_private(&vmf); - if (is_pmd_migration_entry(vmf.orig_pmd)) + if (pmd_is_migration_entry(vmf.orig_pmd)) pmd_migration_entry_wait(mm, vmf.pmd); return 0; } diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 7ae3f5e2dee6..01c3b98f87a6 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -110,7 +110,7 @@ #include #include #include -#include +#include #include #include @@ -647,7 +647,7 @@ static void queue_folios_pmd(pmd_t *pmd, struct mm_walk *walk) struct folio *folio; struct queue_pages *qp = walk->private; - if (unlikely(is_pmd_migration_entry(*pmd))) { + if (unlikely(pmd_is_migration_entry(*pmd))) { qp->nr_failed++; return; } diff --git a/mm/migrate.c b/mm/migrate.c index 847c1ec17628..ca4ec170a89b 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include @@ -353,7 +353,7 @@ static bool remove_migration_pte(struct folio *folio, rmap_t rmap_flags = RMAP_NONE; pte_t old_pte; pte_t pte; - swp_entry_t entry; + softleaf_t entry; struct page *new; unsigned long idx = 0; @@ -379,22 +379,22 @@ static bool remove_migration_pte(struct folio *folio, folio_get(folio); pte = mk_pte(new, READ_ONCE(vma->vm_page_prot)); - entry = pte_to_swp_entry(old_pte); - if (!is_migration_entry_young(entry)) + entry = softleaf_from_pte(old_pte); + if (!softleaf_is_migration_young(entry)) pte = pte_mkold(pte); - if (folio_test_dirty(folio) && is_migration_entry_dirty(entry)) + if (folio_test_dirty(folio) && softleaf_is_migration_dirty(entry)) pte = pte_mkdirty(pte); if (pte_swp_soft_dirty(old_pte)) pte = pte_mksoft_dirty(pte); else pte = pte_clear_soft_dirty(pte); - if (is_writable_migration_entry(entry)) + if (softleaf_is_migration_write(entry)) pte = pte_mkwrite(pte, vma); else if (pte_swp_uffd_wp(old_pte)) pte = pte_mkuffd_wp(pte); - if (folio_test_anon(folio) && !is_readable_migration_entry(entry)) + if (folio_test_anon(folio) && !softleaf_is_migration_read(entry)) rmap_flags |= RMAP_EXCLUSIVE; if (unlikely(is_device_private_page(new))) { @@ -404,7 +404,7 @@ static bool remove_migration_pte(struct folio *folio, else entry = make_readable_device_private_entry( page_to_pfn(new)); - pte = swp_entry_to_pte(entry); + pte = softleaf_to_pte(entry); if (pte_swp_soft_dirty(old_pte)) pte = pte_swp_mksoft_dirty(pte); if (pte_swp_uffd_wp(old_pte)) @@ -543,9 +543,9 @@ void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd) spinlock_t *ptl; ptl = pmd_lock(mm, pmd); - if (!is_pmd_migration_entry(*pmd)) + if (!pmd_is_migration_entry(*pmd)) goto unlock; - migration_entry_wait_on_locked(pmd_to_swp_entry(*pmd), ptl); + migration_entry_wait_on_locked(softleaf_from_pmd(*pmd), ptl); return; unlock: spin_unlock(ptl); diff --git a/mm/migrate_device.c b/mm/migrate_device.c index ab373fd38961..592b4561507c 100644 --- a/mm/migrate_device.c +++ b/mm/migrate_device.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include "internal.h" @@ -141,7 +141,6 @@ static int migrate_vma_collect_huge_pmd(pmd_t *pmdp, unsigned long start, struct folio *folio; struct migrate_vma *migrate = walk->private; spinlock_t *ptl; - swp_entry_t entry; int ret; unsigned long write = 0; @@ -165,23 +164,24 @@ static int migrate_vma_collect_huge_pmd(pmd_t *pmdp, unsigned long start, if (pmd_write(*pmdp)) write = MIGRATE_PFN_WRITE; } else if (!pmd_present(*pmdp)) { - entry = pmd_to_swp_entry(*pmdp); - folio = pfn_swap_entry_folio(entry); + const softleaf_t entry = softleaf_from_pmd(*pmdp); - if (!is_device_private_entry(entry) || + folio = softleaf_to_folio(entry); + + if (!softleaf_is_device_private(entry) || !(migrate->flags & MIGRATE_VMA_SELECT_DEVICE_PRIVATE) || (folio->pgmap->owner != migrate->pgmap_owner)) { spin_unlock(ptl); return migrate_vma_collect_skip(start, end, walk); } - if (is_migration_entry(entry)) { + if (softleaf_is_migration(entry)) { migration_entry_wait_on_locked(entry, ptl); spin_unlock(ptl); return -EAGAIN; } - if (is_writable_device_private_entry(entry)) + if (softleaf_is_device_private_write(entry)) write = MIGRATE_PFN_WRITE; } else { spin_unlock(ptl); diff --git a/mm/page_table_check.c b/mm/page_table_check.c index f5f25e120f69..741884645ab0 100644 --- a/mm/page_table_check.c +++ b/mm/page_table_check.c @@ -8,7 +8,7 @@ #include #include #include -#include +#include #undef pr_fmt #define pr_fmt(fmt) "page_table_check: " fmt @@ -179,10 +179,10 @@ void __page_table_check_pud_clear(struct mm_struct *mm, pud_t pud) EXPORT_SYMBOL(__page_table_check_pud_clear); /* Whether the swap entry cached writable information */ -static inline bool swap_cached_writable(swp_entry_t entry) +static inline bool softleaf_cached_writable(softleaf_t entry) { - return is_writable_device_private_entry(entry) || - is_writable_migration_entry(entry); + return softleaf_is_device_private_write(entry) || + softleaf_is_migration_write(entry); } static void page_table_check_pte_flags(pte_t pte) @@ -190,9 +190,9 @@ static void page_table_check_pte_flags(pte_t pte) if (pte_present(pte)) { WARN_ON_ONCE(pte_uffd_wp(pte) && pte_write(pte)); } else if (pte_swp_uffd_wp(pte)) { - const swp_entry_t entry = pte_to_swp_entry(pte); + const softleaf_t entry = softleaf_from_pte(pte); - WARN_ON_ONCE(swap_cached_writable(entry)); + WARN_ON_ONCE(softleaf_cached_writable(entry)); } } @@ -219,9 +219,9 @@ static inline void page_table_check_pmd_flags(pmd_t pmd) if (pmd_uffd_wp(pmd)) WARN_ON_ONCE(pmd_write(pmd)); } else if (pmd_swp_uffd_wp(pmd)) { - swp_entry_t entry = pmd_to_swp_entry(pmd); + const softleaf_t entry = softleaf_from_pmd(pmd); - WARN_ON_ONCE(swap_cached_writable(entry)); + WARN_ON_ONCE(softleaf_cached_writable(entry)); } } diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index a4e23818f37f..8137d2366722 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -242,18 +242,19 @@ restart: */ pmde = pmdp_get_lockless(pvmw->pmd); - if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) { + if (pmd_trans_huge(pmde) || pmd_is_migration_entry(pmde)) { pvmw->ptl = pmd_lock(mm, pvmw->pmd); pmde = *pvmw->pmd; if (!pmd_present(pmde)) { - swp_entry_t entry; + softleaf_t entry; if (!thp_migration_supported() || !(pvmw->flags & PVMW_MIGRATION)) return not_found(pvmw); - entry = pmd_to_swp_entry(pmde); - if (!is_migration_entry(entry) || - !check_pmd(swp_offset_pfn(entry), pvmw)) + entry = softleaf_from_pmd(pmde); + + if (!softleaf_is_migration(entry) || + !check_pmd(softleaf_to_pfn(entry), pvmw)) return not_found(pvmw); return true; } @@ -273,9 +274,9 @@ restart: * cannot return prematurely, while zap_huge_pmd() has * cleared *pmd but not decremented compound_mapcount(). */ - swp_entry_t entry = pmd_to_swp_entry(pmde); + const softleaf_t entry = softleaf_from_pmd(pmde); - if (is_device_private_entry(entry)) { + if (softleaf_is_device_private(entry)) { pvmw->ptl = pmd_lock(mm, pvmw->pmd); return true; } diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 8a29b7237bc6..378c774795fc 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include @@ -973,10 +973,10 @@ pmd_table: goto found; } } else if ((flags & FW_MIGRATION) && - is_pmd_migration_entry(pmd)) { - swp_entry_t entry = pmd_to_swp_entry(pmd); + pmd_is_migration_entry(pmd)) { + const softleaf_t entry = softleaf_from_pmd(pmd); - page = pfn_swap_entry_to_page(entry); + page = softleaf_to_page(entry); expose_page = false; goto found; } diff --git a/mm/rmap.c b/mm/rmap.c index 1954c538a991..775710115a41 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -57,7 +57,7 @@ #include #include #include -#include +#include #include #include #include @@ -2341,7 +2341,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, if (likely(pmd_present(pmdval))) pfn = pmd_pfn(pmdval); else - pfn = swp_offset_pfn(pmd_to_swp_entry(pmdval)); + pfn = softleaf_to_pfn(softleaf_from_pmd(pmdval)); subpage = folio_page(folio, pfn - folio_pfn(folio)); -- cgit v1.2.3 From 9ff30bb9ab40b34908eefd661f12f99aa00d04c3 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 10 Nov 2025 22:21:31 +0000 Subject: mm: remove non_swap_entry() and use softleaf helpers instead There is simply no need for the hugely confusing concept of 'non-swap' swap entries now we have the concept of softleaf entries and relevant softleaf_xxx() helpers. Adjust all callers to use these instead and remove non_swap_entry() altogether. No functional change intended. Link: https://lkml.kernel.org/r/2562093f37f4a9cffea0447058014485eb50aaaf.1762812360.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Cc: Alexander Gordeev Cc: Alistair Popple Cc: Al Viro Cc: Arnd Bergmann Cc: Axel Rasmussen Cc: Baolin Wang Cc: Baoquan He Cc: Barry Song Cc: Byungchul Park Cc: Chengming Zhou Cc: Chris Li Cc: Christian Borntraeger Cc: Christian Brauner Cc: Claudio Imbrenda Cc: David Hildenbrand Cc: Dev Jain Cc: Gerald Schaefer Cc: Gregory Price Cc: Heiko Carstens Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Jan Kara Cc: Jann Horn Cc: Janosch Frank Cc: Jason Gunthorpe Cc: Joshua Hahn Cc: Kairui Song Cc: Kemeng Shi Cc: Lance Yang Cc: Leon Romanovsky Cc: Liam Howlett Cc: Mathew Brost Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Michal Hocko Cc: Mike Rapoport Cc: Muchun Song Cc: Naoya Horiguchi Cc: Nhat Pham Cc: Nico Pache Cc: Oscar Salvador Cc: Pasha Tatashin Cc: Peter Xu Cc: Rakie Kim Cc: Rik van Riel Cc: Ryan Roberts Cc: SeongJae Park Cc: Suren Baghdasaryan Cc: Sven Schnelle Cc: Vasily Gorbik Cc: Vlastimil Babka Cc: Wei Xu Cc: xu xin Cc: Yuanchu Xie Cc: Zi Yan Signed-off-by: Andrew Morton --- arch/s390/mm/gmap_helpers.c | 20 ++++++++++---------- arch/s390/mm/pgtable.c | 12 ++++++------ fs/proc/task_mmu.c | 12 ++++++------ include/linux/swapops.h | 5 ----- mm/filemap.c | 2 +- mm/hmm.c | 16 ++++++++-------- mm/madvise.c | 2 +- mm/memory.c | 36 ++++++++++++++++++------------------ mm/mincore.c | 2 +- mm/userfaultfd.c | 24 ++++++++++++------------ 10 files changed, 63 insertions(+), 68 deletions(-) (limited to 'mm/filemap.c') diff --git a/arch/s390/mm/gmap_helpers.c b/arch/s390/mm/gmap_helpers.c index d4c3c36855e2..549f14ad08af 100644 --- a/arch/s390/mm/gmap_helpers.c +++ b/arch/s390/mm/gmap_helpers.c @@ -11,27 +11,27 @@ #include #include #include -#include +#include #include #include #include #include /** - * ptep_zap_swap_entry() - discard a swap entry. + * ptep_zap_softleaf_entry() - discard a software leaf entry. * @mm: the mm - * @entry: the swap entry that needs to be zapped + * @entry: the software leaf entry that needs to be zapped * - * Discards the given swap entry. If the swap entry was an actual swap - * entry (and not a migration entry, for example), the actual swapped + * Discards the given software leaf entry. If the leaf entry was an actual + * swap entry (and not a migration entry, for example), the actual swapped * page is also discarded from swap. */ -static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) +static void ptep_zap_softleaf_entry(struct mm_struct *mm, softleaf_t entry) { - if (!non_swap_entry(entry)) + if (softleaf_is_swap(entry)) dec_mm_counter(mm, MM_SWAPENTS); - else if (is_migration_entry(entry)) - dec_mm_counter(mm, mm_counter(pfn_swap_entry_folio(entry))); + else if (softleaf_is_migration(entry)) + dec_mm_counter(mm, mm_counter(softleaf_to_folio(entry))); free_swap_and_cache(entry); } @@ -66,7 +66,7 @@ void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr) preempt_disable(); pgste = pgste_get_lock(ptep); - ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep)); + ptep_zap_softleaf_entry(mm, softleaf_from_pte(*ptep)); pte_clear(mm, vmaddr, ptep); pgste_set_unlock(ptep, pgste); diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 0fde20bbc50b..d670bfb47d9b 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include @@ -683,12 +683,12 @@ void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep) pgste_set_unlock(ptep, pgste); } -static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) +static void ptep_zap_softleaf_entry(struct mm_struct *mm, softleaf_t entry) { - if (!non_swap_entry(entry)) + if (softleaf_is_swap(entry)) dec_mm_counter(mm, MM_SWAPENTS); - else if (is_migration_entry(entry)) { - struct folio *folio = pfn_swap_entry_folio(entry); + else if (softleaf_is_migration(entry)) { + struct folio *folio = softleaf_to_folio(entry); dec_mm_counter(mm, mm_counter(folio)); } @@ -710,7 +710,7 @@ void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, if (!reset && pte_swap(pte) && ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED || (pgstev & _PGSTE_GPS_ZERO))) { - ptep_zap_swap_entry(mm, pte_to_swp_entry(pte)); + ptep_zap_softleaf_entry(mm, softleaf_from_pte(pte)); pte_clear(mm, addr, ptep); } if (reset) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 898df952b6bc..1f49c81b3591 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1020,13 +1020,13 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, } else if (pte_none(ptent)) { smaps_pte_hole_lookup(addr, walk); } else { - swp_entry_t swpent = pte_to_swp_entry(ptent); + const softleaf_t entry = softleaf_from_pte(ptent); - if (!non_swap_entry(swpent)) { + if (softleaf_is_swap(entry)) { int mapcount; mss->swap += PAGE_SIZE; - mapcount = swp_swapcount(swpent); + mapcount = swp_swapcount(entry); if (mapcount >= 2) { u64 pss_delta = (u64)PAGE_SIZE << PSS_SHIFT; @@ -1035,10 +1035,10 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, } else { mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT; } - } else if (is_pfn_swap_entry(swpent)) { - if (is_device_private_entry(swpent)) + } else if (softleaf_has_pfn(entry)) { + if (softleaf_is_device_private(entry)) present = true; - page = pfn_swap_entry_to_page(swpent); + page = softleaf_to_page(entry); } } diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 41cfc6d59054..c8e6f927da48 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -492,10 +492,5 @@ static inline pmd_t swp_entry_to_pmd(swp_entry_t entry) #endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ -static inline int non_swap_entry(swp_entry_t entry) -{ - return swp_type(entry) >= MAX_SWAPFILES; -} - #endif /* CONFIG_MMU */ #endif /* _LINUX_SWAPOPS_H */ diff --git a/mm/filemap.c b/mm/filemap.c index 02355aa46324..07634b7d9934 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -4567,7 +4567,7 @@ static void filemap_cachestat(struct address_space *mapping, swp_entry_t swp = radix_to_swp_entry(folio); /* swapin error results in poisoned entry */ - if (non_swap_entry(swp)) + if (!softleaf_is_swap(swp)) goto resched; /* diff --git a/mm/hmm.c b/mm/hmm.c index e9735a9b6102..0158f2d1e027 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -258,17 +258,17 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, } if (!pte_present(pte)) { - swp_entry_t entry = pte_to_swp_entry(pte); + const softleaf_t entry = softleaf_from_pte(pte); /* * Don't fault in device private pages owned by the caller, * just report the PFN. */ - if (is_device_private_entry(entry) && - page_pgmap(pfn_swap_entry_to_page(entry))->owner == + if (softleaf_is_device_private(entry) && + page_pgmap(softleaf_to_page(entry))->owner == range->dev_private_owner) { cpu_flags = HMM_PFN_VALID; - if (is_writable_device_private_entry(entry)) + if (softleaf_is_device_private_write(entry)) cpu_flags |= HMM_PFN_WRITE; new_pfn_flags = swp_offset_pfn(entry) | cpu_flags; goto out; @@ -279,16 +279,16 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, if (!required_fault) goto out; - if (!non_swap_entry(entry)) + if (softleaf_is_swap(entry)) goto fault; - if (is_device_private_entry(entry)) + if (softleaf_is_device_private(entry)) goto fault; - if (is_device_exclusive_entry(entry)) + if (softleaf_is_device_exclusive(entry)) goto fault; - if (is_migration_entry(entry)) { + if (softleaf_is_migration(entry)) { pte_unmap(ptep); hmm_vma_walk->last = addr; migration_entry_wait(walk->mm, pmdp, addr); diff --git a/mm/madvise.c b/mm/madvise.c index 5979a4a39738..d8bc51e1bea7 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -249,7 +249,7 @@ static void shmem_swapin_range(struct vm_area_struct *vma, continue; entry = radix_to_swp_entry(folio); /* There might be swapin error entries in shmem mapping. */ - if (non_swap_entry(entry)) + if (!softleaf_is_swap(entry)) continue; addr = vma->vm_start + diff --git a/mm/memory.c b/mm/memory.c index 95dac6a1cbc4..a3f001a47ecf 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -932,7 +932,7 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, struct folio *folio; struct page *page; - if (likely(!non_swap_entry(entry))) { + if (likely(softleaf_is_swap(entry))) { if (swap_duplicate(entry) < 0) return -EIO; @@ -950,12 +950,12 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, set_pte_at(src_mm, addr, src_pte, pte); } rss[MM_SWAPENTS]++; - } else if (is_migration_entry(entry)) { - folio = pfn_swap_entry_folio(entry); + } else if (softleaf_is_migration(entry)) { + folio = softleaf_to_folio(entry); rss[mm_counter(folio)]++; - if (!is_readable_migration_entry(entry) && + if (!softleaf_is_migration_read(entry) && is_cow_mapping(vm_flags)) { /* * COW mappings require pages in both parent and child @@ -964,15 +964,15 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, */ entry = make_readable_migration_entry( swp_offset(entry)); - pte = swp_entry_to_pte(entry); + pte = softleaf_to_pte(entry); if (pte_swp_soft_dirty(orig_pte)) pte = pte_swp_mksoft_dirty(pte); if (pte_swp_uffd_wp(orig_pte)) pte = pte_swp_mkuffd_wp(pte); set_pte_at(src_mm, addr, src_pte, pte); } - } else if (is_device_private_entry(entry)) { - page = pfn_swap_entry_to_page(entry); + } else if (softleaf_is_device_private(entry)) { + page = softleaf_to_page(entry); folio = page_folio(page); /* @@ -996,7 +996,7 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, * when a device driver is involved (you cannot easily * save and restore device driver state). */ - if (is_writable_device_private_entry(entry) && + if (softleaf_is_device_private_write(entry) && is_cow_mapping(vm_flags)) { entry = make_readable_device_private_entry( swp_offset(entry)); @@ -1005,7 +1005,7 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, pte = pte_swp_mkuffd_wp(pte); set_pte_at(src_mm, addr, src_pte, pte); } - } else if (is_device_exclusive_entry(entry)) { + } else if (softleaf_is_device_exclusive(entry)) { /* * Make device exclusive entries present by restoring the * original entry then copying as for a present pte. Device @@ -4625,7 +4625,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) rmap_t rmap_flags = RMAP_NONE; bool need_clear_cache = false; bool exclusive = false; - swp_entry_t entry; + softleaf_t entry; pte_t pte; vm_fault_t ret = 0; void *shadow = NULL; @@ -4637,15 +4637,15 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) if (!pte_unmap_same(vmf)) goto out; - entry = pte_to_swp_entry(vmf->orig_pte); - if (unlikely(non_swap_entry(entry))) { - if (is_migration_entry(entry)) { + entry = softleaf_from_pte(vmf->orig_pte); + if (unlikely(!softleaf_is_swap(entry))) { + if (softleaf_is_migration(entry)) { migration_entry_wait(vma->vm_mm, vmf->pmd, vmf->address); - } else if (is_device_exclusive_entry(entry)) { - vmf->page = pfn_swap_entry_to_page(entry); + } else if (softleaf_is_device_exclusive(entry)) { + vmf->page = softleaf_to_page(entry); ret = remove_device_exclusive_entry(vmf); - } else if (is_device_private_entry(entry)) { + } else if (softleaf_is_device_private(entry)) { if (vmf->flags & FAULT_FLAG_VMA_LOCK) { /* * migrate_to_ram is not yet ready to operate @@ -4656,7 +4656,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) goto out; } - vmf->page = pfn_swap_entry_to_page(entry); + vmf->page = softleaf_to_page(entry); vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); if (unlikely(!vmf->pte || @@ -4680,7 +4680,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) } else { pte_unmap_unlock(vmf->pte, vmf->ptl); } - } else if (is_hwpoison_entry(entry)) { + } else if (softleaf_is_hwpoison(entry)) { ret = VM_FAULT_HWPOISON; } else if (softleaf_is_marker(entry)) { ret = handle_pte_marker(vmf); diff --git a/mm/mincore.c b/mm/mincore.c index b3682488a65d..9a908d8bb706 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -74,7 +74,7 @@ static unsigned char mincore_swap(swp_entry_t entry, bool shmem) * absent. Page table may contain migration or hwpoison * entries which are always uptodate. */ - if (non_swap_entry(entry)) + if (!softleaf_is_swap(entry)) return !shmem; /* diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 055ec1050776..bd1f74a7a5ac 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -1256,7 +1256,6 @@ static long move_pages_ptes(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd unsigned long dst_addr, unsigned long src_addr, unsigned long len, __u64 mode) { - swp_entry_t entry; struct swap_info_struct *si = NULL; pte_t orig_src_pte, orig_dst_pte; pte_t src_folio_pte; @@ -1430,19 +1429,20 @@ retry: orig_dst_pte, orig_src_pte, dst_pmd, dst_pmdval, dst_ptl, src_ptl, &src_folio, len); - } else { + } else { /* !pte_present() */ struct folio *folio = NULL; + const softleaf_t entry = softleaf_from_pte(orig_src_pte); - entry = pte_to_swp_entry(orig_src_pte); - if (non_swap_entry(entry)) { - if (is_migration_entry(entry)) { - pte_unmap(src_pte); - pte_unmap(dst_pte); - src_pte = dst_pte = NULL; - migration_entry_wait(mm, src_pmd, src_addr); - ret = -EAGAIN; - } else - ret = -EFAULT; + if (softleaf_is_migration(entry)) { + pte_unmap(src_pte); + pte_unmap(dst_pte); + src_pte = dst_pte = NULL; + migration_entry_wait(mm, src_pmd, src_addr); + + ret = -EAGAIN; + goto out; + } else if (!softleaf_is_swap(entry)) { + ret = -EFAULT; goto out; } -- cgit v1.2.3 From c1bd09994c4d5b897571671bed16581335e93242 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Mon, 10 Nov 2025 15:20:08 -0800 Subject: memcg: remove __lruvec_stat_mod_folio __lruvec_stat_mod_folio() is already safe against irqs, so there is no need to have a separate interface (i.e. lruvec_stat_mod_folio) which wraps calls to it with irq disabling and reenabling. Let's rename __lruvec_stat_mod_folio() to lruvec_stat_mod_folio(). Link: https://lkml.kernel.org/r/20251110232008.1352063-5-shakeel.butt@linux.dev Signed-off-by: Shakeel Butt Reviewed-by: Harry Yoo Acked-by: Roman Gushchin Acked-by: Vlastimil Babka Cc: Johannes Weiner Cc: Michal Hocko Cc: Muchun Song Cc: Qi Zheng Signed-off-by: Andrew Morton --- include/linux/vmstat.h | 30 +----------------------------- mm/filemap.c | 20 ++++++++++---------- mm/huge_memory.c | 4 ++-- mm/khugepaged.c | 8 ++++---- mm/memcontrol.c | 4 ++-- mm/page-writeback.c | 2 +- mm/rmap.c | 4 ++-- mm/shmem.c | 6 +++--- 8 files changed, 25 insertions(+), 53 deletions(-) (limited to 'mm/filemap.c') diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 4eb7753e6e5c..3398a345bda8 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -523,19 +523,9 @@ static inline const char *vm_event_name(enum vm_event_item item) void mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val); -void __lruvec_stat_mod_folio(struct folio *folio, +void lruvec_stat_mod_folio(struct folio *folio, enum node_stat_item idx, int val); -static inline void lruvec_stat_mod_folio(struct folio *folio, - enum node_stat_item idx, int val) -{ - unsigned long flags; - - local_irq_save(flags); - __lruvec_stat_mod_folio(folio, idx, val); - local_irq_restore(flags); -} - static inline void mod_lruvec_page_state(struct page *page, enum node_stat_item idx, int val) { @@ -550,12 +540,6 @@ static inline void mod_lruvec_state(struct lruvec *lruvec, mod_node_page_state(lruvec_pgdat(lruvec), idx, val); } -static inline void __lruvec_stat_mod_folio(struct folio *folio, - enum node_stat_item idx, int val) -{ - mod_node_page_state(folio_pgdat(folio), idx, val); -} - static inline void lruvec_stat_mod_folio(struct folio *folio, enum node_stat_item idx, int val) { @@ -570,18 +554,6 @@ static inline void mod_lruvec_page_state(struct page *page, #endif /* CONFIG_MEMCG */ -static inline void __lruvec_stat_add_folio(struct folio *folio, - enum node_stat_item idx) -{ - __lruvec_stat_mod_folio(folio, idx, folio_nr_pages(folio)); -} - -static inline void __lruvec_stat_sub_folio(struct folio *folio, - enum node_stat_item idx) -{ - __lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio)); -} - static inline void lruvec_stat_add_folio(struct folio *folio, enum node_stat_item idx) { diff --git a/mm/filemap.c b/mm/filemap.c index 07634b7d9934..7d15a9c216ef 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -182,13 +182,13 @@ static void filemap_unaccount_folio(struct address_space *mapping, nr = folio_nr_pages(folio); - __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, -nr); + lruvec_stat_mod_folio(folio, NR_FILE_PAGES, -nr); if (folio_test_swapbacked(folio)) { - __lruvec_stat_mod_folio(folio, NR_SHMEM, -nr); + lruvec_stat_mod_folio(folio, NR_SHMEM, -nr); if (folio_test_pmd_mappable(folio)) - __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, -nr); + lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, -nr); } else if (folio_test_pmd_mappable(folio)) { - __lruvec_stat_mod_folio(folio, NR_FILE_THPS, -nr); + lruvec_stat_mod_folio(folio, NR_FILE_THPS, -nr); filemap_nr_thps_dec(mapping); } if (test_bit(AS_KERNEL_FILE, &folio->mapping->flags)) @@ -844,13 +844,13 @@ void replace_page_cache_folio(struct folio *old, struct folio *new) old->mapping = NULL; /* hugetlb pages do not participate in page cache accounting. */ if (!folio_test_hugetlb(old)) - __lruvec_stat_sub_folio(old, NR_FILE_PAGES); + lruvec_stat_sub_folio(old, NR_FILE_PAGES); if (!folio_test_hugetlb(new)) - __lruvec_stat_add_folio(new, NR_FILE_PAGES); + lruvec_stat_add_folio(new, NR_FILE_PAGES); if (folio_test_swapbacked(old)) - __lruvec_stat_sub_folio(old, NR_SHMEM); + lruvec_stat_sub_folio(old, NR_SHMEM); if (folio_test_swapbacked(new)) - __lruvec_stat_add_folio(new, NR_SHMEM); + lruvec_stat_add_folio(new, NR_SHMEM); xas_unlock_irq(&xas); if (free_folio) free_folio(old); @@ -933,9 +933,9 @@ noinline int __filemap_add_folio(struct address_space *mapping, /* hugetlb pages do not participate in page cache accounting */ if (!huge) { - __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr); + lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr); if (folio_test_pmd_mappable(folio)) - __lruvec_stat_mod_folio(folio, + lruvec_stat_mod_folio(folio, NR_FILE_THPS, nr); } diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 53a8d380eab2..7af3e037d891 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3783,10 +3783,10 @@ static int __folio_freeze_and_split_unmapped(struct folio *folio, unsigned int n if (folio_test_pmd_mappable(folio) && new_order < HPAGE_PMD_ORDER) { if (folio_test_swapbacked(folio)) { - __lruvec_stat_mod_folio(folio, + lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, -nr); } else { - __lruvec_stat_mod_folio(folio, + lruvec_stat_mod_folio(folio, NR_FILE_THPS, -nr); filemap_nr_thps_dec(mapping); } diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 40f9d5939aa5..89c33ef7aac3 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -2195,14 +2195,14 @@ immap_locked: } if (is_shmem) - __lruvec_stat_mod_folio(new_folio, NR_SHMEM_THPS, HPAGE_PMD_NR); + lruvec_stat_mod_folio(new_folio, NR_SHMEM_THPS, HPAGE_PMD_NR); else - __lruvec_stat_mod_folio(new_folio, NR_FILE_THPS, HPAGE_PMD_NR); + lruvec_stat_mod_folio(new_folio, NR_FILE_THPS, HPAGE_PMD_NR); if (nr_none) { - __lruvec_stat_mod_folio(new_folio, NR_FILE_PAGES, nr_none); + lruvec_stat_mod_folio(new_folio, NR_FILE_PAGES, nr_none); /* nr_none is always 0 for non-shmem. */ - __lruvec_stat_mod_folio(new_folio, NR_SHMEM, nr_none); + lruvec_stat_mod_folio(new_folio, NR_SHMEM, nr_none); } /* diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9a659f16af77..9b07db2cb232 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -777,7 +777,7 @@ void mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, mod_memcg_lruvec_state(lruvec, idx, val); } -void __lruvec_stat_mod_folio(struct folio *folio, enum node_stat_item idx, +void lruvec_stat_mod_folio(struct folio *folio, enum node_stat_item idx, int val) { struct mem_cgroup *memcg; @@ -797,7 +797,7 @@ void __lruvec_stat_mod_folio(struct folio *folio, enum node_stat_item idx, mod_lruvec_state(lruvec, idx, val); rcu_read_unlock(); } -EXPORT_SYMBOL(__lruvec_stat_mod_folio); +EXPORT_SYMBOL(lruvec_stat_mod_folio); void mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val) { diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 757bc4d3b5b5..d6b339cc876d 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2658,7 +2658,7 @@ static void folio_account_dirtied(struct folio *folio, inode_attach_wb(inode, folio); wb = inode_to_wb(inode); - __lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, nr); + lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, nr); __zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, nr); __node_stat_mod_folio(folio, NR_DIRTIED, nr); wb_stat_mod(wb, WB_RECLAIMABLE, nr); diff --git a/mm/rmap.c b/mm/rmap.c index d871f2eb821c..f955f02d570e 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1212,12 +1212,12 @@ static void __folio_mod_stat(struct folio *folio, int nr, int nr_pmdmapped) if (nr) { idx = folio_test_anon(folio) ? NR_ANON_MAPPED : NR_FILE_MAPPED; - __lruvec_stat_mod_folio(folio, idx, nr); + lruvec_stat_mod_folio(folio, idx, nr); } if (nr_pmdmapped) { if (folio_test_anon(folio)) { idx = NR_ANON_THPS; - __lruvec_stat_mod_folio(folio, idx, nr_pmdmapped); + lruvec_stat_mod_folio(folio, idx, nr_pmdmapped); } else { /* NR_*_PMDMAPPED are not maintained per-memcg */ idx = folio_test_swapbacked(folio) ? diff --git a/mm/shmem.c b/mm/shmem.c index fc835b3e4914..ad18172ff831 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -871,9 +871,9 @@ static unsigned int shmem_huge_global_enabled(struct inode *inode, pgoff_t index static void shmem_update_stats(struct folio *folio, int nr_pages) { if (folio_test_pmd_mappable(folio)) - __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, nr_pages); - __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr_pages); - __lruvec_stat_mod_folio(folio, NR_SHMEM, nr_pages); + lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, nr_pages); + lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr_pages); + lruvec_stat_mod_folio(folio, NR_SHMEM, nr_pages); } /* -- cgit v1.2.3