summaryrefslogtreecommitdiff
path: root/mm/swap_table.h
diff options
context:
space:
mode:
authorKairui Song <kasong@tencent.com>2025-09-17 00:00:56 +0800
committerAndrew Morton <akpm@linux-foundation.org>2025-09-21 14:22:24 -0700
commit8578e0c00dcf0c58fbc32d4904ecaf8e802a6590 (patch)
treea70afbc31ef8a41fd309b8dba959b9262033ce72 /mm/swap_table.h
parent094dc8b059b11eef0888f43eeb0f3ac53ade5c87 (diff)
mm, swap: use the swap table for the swap cache and switch API
Introduce basic swap table infrastructures, which are now just a fixed-sized flat array inside each swap cluster, with access wrappers. Each cluster contains a swap table of 512 entries. Each table entry is an opaque atomic long. It could be in 3 types: a shadow type (XA_VALUE), a folio type (pointer), or NULL. In this first step, it only supports storing a folio or shadow, and it is a drop-in replacement for the current swap cache. Convert all swap cache users to use the new sets of APIs. Chris Li has been suggesting using a new infrastructure for swap cache for better performance, and that idea combined well with the swap table as the new backing structure. Now the lock contention range is reduced to 2M clusters, which is much smaller than the 64M address_space. And we can also drop the multiple address_space design. All the internal works are done with swap_cache_get_* helpers. Swap cache lookup is still lock-less like before, and the helper's contexts are same with original swap cache helpers. They still require a pin on the swap device to prevent the backing data from being freed. Swap cache updates are now protected by the swap cluster lock instead of the XArray lock. This is mostly handled internally, but new __swap_cache_* helpers require the caller to lock the cluster. So, a few new cluster access and locking helpers are also introduced. A fully cluster-based unified swap table can be implemented on top of this to take care of all count tracking and synchronization work, with dynamic allocation. It should reduce the memory usage while making the performance even better. Link: https://lkml.kernel.org/r/20250916160100.31545-12-ryncsn@gmail.com Co-developed-by: Chris Li <chrisl@kernel.org> Signed-off-by: Chris Li <chrisl@kernel.org> Signed-off-by: Kairui Song <kasong@tencent.com> Acked-by: Chris Li <chrisl@kernel.org> Suggested-by: Chris Li <chrisl@kernel.org> Cc: Baolin Wang <baolin.wang@linux.alibaba.com> Cc: Baoquan He <bhe@redhat.com> Cc: Barry Song <baohua@kernel.org> Cc: David Hildenbrand <david@redhat.com> Cc: "Huang, Ying" <ying.huang@linux.alibaba.com> Cc: Hugh Dickins <hughd@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Kemeng Shi <shikemeng@huaweicloud.com> Cc: kernel test robot <oliver.sang@intel.com> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Nhat Pham <nphamcs@gmail.com> Cc: Yosry Ahmed <yosryahmed@google.com> Cc: Zi Yan <ziy@nvidia.com> Cc: SeongJae Park <sj@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'mm/swap_table.h')
-rw-r--r--mm/swap_table.h97
1 files changed, 97 insertions, 0 deletions
diff --git a/mm/swap_table.h b/mm/swap_table.h
new file mode 100644
index 000000000000..e1f7cc009701
--- /dev/null
+++ b/mm/swap_table.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _MM_SWAP_TABLE_H
+#define _MM_SWAP_TABLE_H
+
+#include "swap.h"
+
+/*
+ * A swap table entry represents the status of a swap slot on a swap
+ * (physical or virtual) device. The swap table in each cluster is a
+ * 1:1 map of the swap slots in this cluster.
+ *
+ * Each swap table entry could be a pointer (folio), a XA_VALUE
+ * (shadow), or NULL.
+ */
+
+/*
+ * Helpers for casting one type of info into a swap table entry.
+ */
+static inline unsigned long null_to_swp_tb(void)
+{
+ BUILD_BUG_ON(sizeof(unsigned long) != sizeof(atomic_long_t));
+ return 0;
+}
+
+static inline unsigned long folio_to_swp_tb(struct folio *folio)
+{
+ BUILD_BUG_ON(sizeof(unsigned long) != sizeof(void *));
+ return (unsigned long)folio;
+}
+
+static inline unsigned long shadow_swp_to_tb(void *shadow)
+{
+ BUILD_BUG_ON((BITS_PER_XA_VALUE + 1) !=
+ BITS_PER_BYTE * sizeof(unsigned long));
+ VM_WARN_ON_ONCE(shadow && !xa_is_value(shadow));
+ return (unsigned long)shadow;
+}
+
+/*
+ * Helpers for swap table entry type checking.
+ */
+static inline bool swp_tb_is_null(unsigned long swp_tb)
+{
+ return !swp_tb;
+}
+
+static inline bool swp_tb_is_folio(unsigned long swp_tb)
+{
+ return !xa_is_value((void *)swp_tb) && !swp_tb_is_null(swp_tb);
+}
+
+static inline bool swp_tb_is_shadow(unsigned long swp_tb)
+{
+ return xa_is_value((void *)swp_tb);
+}
+
+/*
+ * Helpers for retrieving info from swap table.
+ */
+static inline struct folio *swp_tb_to_folio(unsigned long swp_tb)
+{
+ VM_WARN_ON(!swp_tb_is_folio(swp_tb));
+ return (void *)swp_tb;
+}
+
+static inline void *swp_tb_to_shadow(unsigned long swp_tb)
+{
+ VM_WARN_ON(!swp_tb_is_shadow(swp_tb));
+ return (void *)swp_tb;
+}
+
+/*
+ * Helpers for accessing or modifying the swap table of a cluster,
+ * the swap cluster must be locked.
+ */
+static inline void __swap_table_set(struct swap_cluster_info *ci,
+ unsigned int off, unsigned long swp_tb)
+{
+ VM_WARN_ON_ONCE(off >= SWAPFILE_CLUSTER);
+ atomic_long_set(&ci->table[off], swp_tb);
+}
+
+static inline unsigned long __swap_table_xchg(struct swap_cluster_info *ci,
+ unsigned int off, unsigned long swp_tb)
+{
+ VM_WARN_ON_ONCE(off >= SWAPFILE_CLUSTER);
+ /* Ordering is guaranteed by cluster lock, relax */
+ return atomic_long_xchg_relaxed(&ci->table[off], swp_tb);
+}
+
+static inline unsigned long __swap_table_get(struct swap_cluster_info *ci,
+ unsigned int off)
+{
+ VM_WARN_ON_ONCE(off >= SWAPFILE_CLUSTER);
+ return atomic_long_read(&ci->table[off]);
+}
+#endif