summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>2026-03-19 16:15:33 +0100
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2026-03-19 16:15:33 +0100
commit7e2dc8ed7862ac622b5a59953b679de97001dc83 (patch)
treed2d2cf61a22f5a6404000ee007c5e80bc2d9eca9 /mm
parenta7e8c9cc3a13baf3dcf9734dd55609aa7ff9a1a0 (diff)
parent4a2b0ed2ac7abe9743e1559d212075a0ebac96b3 (diff)
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/damon/core.c79
-rw-r--r--mm/damon/lru_sort.c4
-rw-r--r--mm/damon/reclaim.c4
-rw-r--r--mm/damon/stat.c2
-rw-r--r--mm/damon/sysfs.c11
-rw-r--r--mm/damon/tests/vaddr-kunit.h2
-rw-r--r--mm/damon/vaddr.c24
-rw-r--r--mm/filemap.c15
-rw-r--r--mm/huge_memory.c13
-rw-r--r--mm/kfence/core.c29
-rw-r--r--mm/memcontrol.c2
-rw-r--r--mm/memfd_luo.c56
-rw-r--r--mm/memory.c3
-rw-r--r--mm/migrate.c8
-rw-r--r--mm/migrate_device.c2
-rw-r--r--mm/page_alloc.c3
-rw-r--r--mm/slub.c54
17 files changed, 206 insertions, 105 deletions
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 84f80a20f233..ba3b7ff8ecf4 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -197,7 +197,7 @@ static int damon_fill_regions_holes(struct damon_region *first,
* @t: the given target.
* @ranges: array of new monitoring target ranges.
* @nr_ranges: length of @ranges.
- * @min_sz_region: minimum region size.
+ * @min_region_sz: minimum region size.
*
* This function adds new regions to, or modify existing regions of a
* monitoring target to fit in specific ranges.
@@ -205,7 +205,7 @@ static int damon_fill_regions_holes(struct damon_region *first,
* Return: 0 if success, or negative error code otherwise.
*/
int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges,
- unsigned int nr_ranges, unsigned long min_sz_region)
+ unsigned int nr_ranges, unsigned long min_region_sz)
{
struct damon_region *r, *next;
unsigned int i;
@@ -242,16 +242,16 @@ int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges,
/* no region intersects with this range */
newr = damon_new_region(
ALIGN_DOWN(range->start,
- min_sz_region),
- ALIGN(range->end, min_sz_region));
+ min_region_sz),
+ ALIGN(range->end, min_region_sz));
if (!newr)
return -ENOMEM;
damon_insert_region(newr, damon_prev_region(r), r, t);
} else {
/* resize intersecting regions to fit in this range */
first->ar.start = ALIGN_DOWN(range->start,
- min_sz_region);
- last->ar.end = ALIGN(range->end, min_sz_region);
+ min_region_sz);
+ last->ar.end = ALIGN(range->end, min_region_sz);
/* fill possible holes in the range */
err = damon_fill_regions_holes(first, last, t);
@@ -546,7 +546,7 @@ struct damon_ctx *damon_new_ctx(void)
ctx->attrs.max_nr_regions = 1000;
ctx->addr_unit = 1;
- ctx->min_sz_region = DAMON_MIN_REGION;
+ ctx->min_region_sz = DAMON_MIN_REGION_SZ;
INIT_LIST_HEAD(&ctx->adaptive_targets);
INIT_LIST_HEAD(&ctx->schemes);
@@ -1131,7 +1131,7 @@ static struct damon_target *damon_nth_target(int n, struct damon_ctx *ctx)
* If @src has no region, @dst keeps current regions.
*/
static int damon_commit_target_regions(struct damon_target *dst,
- struct damon_target *src, unsigned long src_min_sz_region)
+ struct damon_target *src, unsigned long src_min_region_sz)
{
struct damon_region *src_region;
struct damon_addr_range *ranges;
@@ -1148,7 +1148,7 @@ static int damon_commit_target_regions(struct damon_target *dst,
i = 0;
damon_for_each_region(src_region, src)
ranges[i++] = src_region->ar;
- err = damon_set_regions(dst, ranges, i, src_min_sz_region);
+ err = damon_set_regions(dst, ranges, i, src_min_region_sz);
kfree(ranges);
return err;
}
@@ -1156,11 +1156,11 @@ static int damon_commit_target_regions(struct damon_target *dst,
static int damon_commit_target(
struct damon_target *dst, bool dst_has_pid,
struct damon_target *src, bool src_has_pid,
- unsigned long src_min_sz_region)
+ unsigned long src_min_region_sz)
{
int err;
- err = damon_commit_target_regions(dst, src, src_min_sz_region);
+ err = damon_commit_target_regions(dst, src, src_min_region_sz);
if (err)
return err;
if (dst_has_pid)
@@ -1187,7 +1187,7 @@ static int damon_commit_targets(
err = damon_commit_target(
dst_target, damon_target_has_pid(dst),
src_target, damon_target_has_pid(src),
- src->min_sz_region);
+ src->min_region_sz);
if (err)
return err;
} else {
@@ -1214,7 +1214,7 @@ static int damon_commit_targets(
return -ENOMEM;
err = damon_commit_target(new_target, false,
src_target, damon_target_has_pid(src),
- src->min_sz_region);
+ src->min_region_sz);
if (err) {
damon_destroy_target(new_target, NULL);
return err;
@@ -1241,6 +1241,9 @@ int damon_commit_ctx(struct damon_ctx *dst, struct damon_ctx *src)
{
int err;
+ if (!is_power_of_2(src->min_region_sz))
+ return -EINVAL;
+
err = damon_commit_schemes(dst, src);
if (err)
return err;
@@ -1261,7 +1264,7 @@ int damon_commit_ctx(struct damon_ctx *dst, struct damon_ctx *src)
}
dst->ops = src->ops;
dst->addr_unit = src->addr_unit;
- dst->min_sz_region = src->min_sz_region;
+ dst->min_region_sz = src->min_region_sz;
return 0;
}
@@ -1294,8 +1297,8 @@ static unsigned long damon_region_sz_limit(struct damon_ctx *ctx)
if (ctx->attrs.min_nr_regions)
sz /= ctx->attrs.min_nr_regions;
- if (sz < ctx->min_sz_region)
- sz = ctx->min_sz_region;
+ if (sz < ctx->min_region_sz)
+ sz = ctx->min_region_sz;
return sz;
}
@@ -1531,8 +1534,13 @@ int damos_walk(struct damon_ctx *ctx, struct damos_walk_control *control)
}
ctx->walk_control = control;
mutex_unlock(&ctx->walk_control_lock);
- if (!damon_is_running(ctx))
+ if (!damon_is_running(ctx)) {
+ mutex_lock(&ctx->walk_control_lock);
+ if (ctx->walk_control == control)
+ ctx->walk_control = NULL;
+ mutex_unlock(&ctx->walk_control_lock);
return -EINVAL;
+ }
wait_for_completion(&control->completion);
if (control->canceled)
return -ECANCELED;
@@ -1668,7 +1676,7 @@ static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t,
* @t: The target of the region.
* @rp: The pointer to the region.
* @s: The scheme to be applied.
- * @min_sz_region: minimum region size.
+ * @min_region_sz: minimum region size.
*
* If a quota of a scheme has exceeded in a quota charge window, the scheme's
* action would applied to only a part of the target access pattern fulfilling
@@ -1686,7 +1694,8 @@ static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t,
* Return: true if the region should be entirely skipped, false otherwise.
*/
static bool damos_skip_charged_region(struct damon_target *t,
- struct damon_region **rp, struct damos *s, unsigned long min_sz_region)
+ struct damon_region **rp, struct damos *s,
+ unsigned long min_region_sz)
{
struct damon_region *r = *rp;
struct damos_quota *quota = &s->quota;
@@ -1708,11 +1717,11 @@ static bool damos_skip_charged_region(struct damon_target *t,
if (quota->charge_addr_from && r->ar.start <
quota->charge_addr_from) {
sz_to_skip = ALIGN_DOWN(quota->charge_addr_from -
- r->ar.start, min_sz_region);
+ r->ar.start, min_region_sz);
if (!sz_to_skip) {
- if (damon_sz_region(r) <= min_sz_region)
+ if (damon_sz_region(r) <= min_region_sz)
return true;
- sz_to_skip = min_sz_region;
+ sz_to_skip = min_region_sz;
}
damon_split_region_at(t, r, sz_to_skip);
r = damon_next_region(r);
@@ -1738,7 +1747,7 @@ static void damos_update_stat(struct damos *s,
static bool damos_filter_match(struct damon_ctx *ctx, struct damon_target *t,
struct damon_region *r, struct damos_filter *filter,
- unsigned long min_sz_region)
+ unsigned long min_region_sz)
{
bool matched = false;
struct damon_target *ti;
@@ -1755,8 +1764,8 @@ static bool damos_filter_match(struct damon_ctx *ctx, struct damon_target *t,
matched = target_idx == filter->target_idx;
break;
case DAMOS_FILTER_TYPE_ADDR:
- start = ALIGN_DOWN(filter->addr_range.start, min_sz_region);
- end = ALIGN_DOWN(filter->addr_range.end, min_sz_region);
+ start = ALIGN_DOWN(filter->addr_range.start, min_region_sz);
+ end = ALIGN_DOWN(filter->addr_range.end, min_region_sz);
/* inside the range */
if (start <= r->ar.start && r->ar.end <= end) {
@@ -1792,7 +1801,7 @@ static bool damos_filter_out(struct damon_ctx *ctx, struct damon_target *t,
s->core_filters_allowed = false;
damos_for_each_core_filter(filter, s) {
- if (damos_filter_match(ctx, t, r, filter, ctx->min_sz_region)) {
+ if (damos_filter_match(ctx, t, r, filter, ctx->min_region_sz)) {
if (filter->allow)
s->core_filters_allowed = true;
return !filter->allow;
@@ -1927,7 +1936,7 @@ static void damos_apply_scheme(struct damon_ctx *c, struct damon_target *t,
if (c->ops.apply_scheme) {
if (quota->esz && quota->charged_sz + sz > quota->esz) {
sz = ALIGN_DOWN(quota->esz - quota->charged_sz,
- c->min_sz_region);
+ c->min_region_sz);
if (!sz)
goto update_stat;
damon_split_region_at(t, r, sz);
@@ -1975,7 +1984,7 @@ static void damon_do_apply_schemes(struct damon_ctx *c,
if (quota->esz && quota->charged_sz >= quota->esz)
continue;
- if (damos_skip_charged_region(t, &r, s, c->min_sz_region))
+ if (damos_skip_charged_region(t, &r, s, c->min_region_sz))
continue;
if (!damos_valid_target(c, t, r, s))
@@ -2424,7 +2433,7 @@ static void damon_split_region_at(struct damon_target *t,
/* Split every region in the given target into 'nr_subs' regions */
static void damon_split_regions_of(struct damon_target *t, int nr_subs,
- unsigned long min_sz_region)
+ unsigned long min_region_sz)
{
struct damon_region *r, *next;
unsigned long sz_region, sz_sub = 0;
@@ -2434,13 +2443,13 @@ static void damon_split_regions_of(struct damon_target *t, int nr_subs,
sz_region = damon_sz_region(r);
for (i = 0; i < nr_subs - 1 &&
- sz_region > 2 * min_sz_region; i++) {
+ sz_region > 2 * min_region_sz; i++) {
/*
* Randomly select size of left sub-region to be at
* least 10 percent and at most 90% of original region
*/
sz_sub = ALIGN_DOWN(damon_rand(1, 10) *
- sz_region / 10, min_sz_region);
+ sz_region / 10, min_region_sz);
/* Do not allow blank region */
if (sz_sub == 0 || sz_sub >= sz_region)
continue;
@@ -2480,7 +2489,7 @@ static void kdamond_split_regions(struct damon_ctx *ctx)
nr_subregions = 3;
damon_for_each_target(t, ctx)
- damon_split_regions_of(t, nr_subregions, ctx->min_sz_region);
+ damon_split_regions_of(t, nr_subregions, ctx->min_region_sz);
last_nr_regions = nr_regions;
}
@@ -2850,7 +2859,7 @@ static bool damon_find_biggest_system_ram(unsigned long *start,
* @t: The monitoring target to set the region.
* @start: The pointer to the start address of the region.
* @end: The pointer to the end address of the region.
- * @min_sz_region: Minimum region size.
+ * @min_region_sz: Minimum region size.
*
* This function sets the region of @t as requested by @start and @end. If the
* values of @start and @end are zero, however, this function finds the biggest
@@ -2862,7 +2871,7 @@ static bool damon_find_biggest_system_ram(unsigned long *start,
*/
int damon_set_region_biggest_system_ram_default(struct damon_target *t,
unsigned long *start, unsigned long *end,
- unsigned long min_sz_region)
+ unsigned long min_region_sz)
{
struct damon_addr_range addr_range;
@@ -2875,7 +2884,7 @@ int damon_set_region_biggest_system_ram_default(struct damon_target *t,
addr_range.start = *start;
addr_range.end = *end;
- return damon_set_regions(t, &addr_range, 1, min_sz_region);
+ return damon_set_regions(t, &addr_range, 1, min_region_sz);
}
/*
diff --git a/mm/damon/lru_sort.c b/mm/damon/lru_sort.c
index 49b4bc294f4e..9cef1619527f 100644
--- a/mm/damon/lru_sort.c
+++ b/mm/damon/lru_sort.c
@@ -212,7 +212,7 @@ static int damon_lru_sort_apply_parameters(void)
if (!monitor_region_start && !monitor_region_end)
addr_unit = 1;
param_ctx->addr_unit = addr_unit;
- param_ctx->min_sz_region = max(DAMON_MIN_REGION / addr_unit, 1);
+ param_ctx->min_region_sz = max(DAMON_MIN_REGION_SZ / addr_unit, 1);
if (!damon_lru_sort_mon_attrs.sample_interval) {
err = -EINVAL;
@@ -243,7 +243,7 @@ static int damon_lru_sort_apply_parameters(void)
err = damon_set_region_biggest_system_ram_default(param_target,
&monitor_region_start,
&monitor_region_end,
- param_ctx->min_sz_region);
+ param_ctx->min_region_sz);
if (err)
goto out;
err = damon_commit_ctx(ctx, param_ctx);
diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c
index 36a582e09eae..c262ec6cb545 100644
--- a/mm/damon/reclaim.c
+++ b/mm/damon/reclaim.c
@@ -208,7 +208,7 @@ static int damon_reclaim_apply_parameters(void)
if (!monitor_region_start && !monitor_region_end)
addr_unit = 1;
param_ctx->addr_unit = addr_unit;
- param_ctx->min_sz_region = max(DAMON_MIN_REGION / addr_unit, 1);
+ param_ctx->min_region_sz = max(DAMON_MIN_REGION_SZ / addr_unit, 1);
if (!damon_reclaim_mon_attrs.aggr_interval) {
err = -EINVAL;
@@ -251,7 +251,7 @@ static int damon_reclaim_apply_parameters(void)
err = damon_set_region_biggest_system_ram_default(param_target,
&monitor_region_start,
&monitor_region_end,
- param_ctx->min_sz_region);
+ param_ctx->min_region_sz);
if (err)
goto out;
err = damon_commit_ctx(ctx, param_ctx);
diff --git a/mm/damon/stat.c b/mm/damon/stat.c
index ed8e3629d31a..922a6a6e65db 100644
--- a/mm/damon/stat.c
+++ b/mm/damon/stat.c
@@ -189,7 +189,7 @@ static struct damon_ctx *damon_stat_build_ctx(void)
goto free_out;
damon_add_target(ctx, target);
if (damon_set_region_biggest_system_ram_default(target, &start, &end,
- ctx->min_sz_region))
+ ctx->min_region_sz))
goto free_out;
return ctx;
free_out:
diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c
index 95fd9375a7d8..4a74c46770c0 100644
--- a/mm/damon/sysfs.c
+++ b/mm/damon/sysfs.c
@@ -1365,7 +1365,7 @@ static int damon_sysfs_set_attrs(struct damon_ctx *ctx,
static int damon_sysfs_set_regions(struct damon_target *t,
struct damon_sysfs_regions *sysfs_regions,
- unsigned long min_sz_region)
+ unsigned long min_region_sz)
{
struct damon_addr_range *ranges = kmalloc_array(sysfs_regions->nr,
sizeof(*ranges), GFP_KERNEL | __GFP_NOWARN);
@@ -1387,7 +1387,7 @@ static int damon_sysfs_set_regions(struct damon_target *t,
if (ranges[i - 1].end > ranges[i].start)
goto out;
}
- err = damon_set_regions(t, ranges, sysfs_regions->nr, min_sz_region);
+ err = damon_set_regions(t, ranges, sysfs_regions->nr, min_region_sz);
out:
kfree(ranges);
return err;
@@ -1409,7 +1409,8 @@ static int damon_sysfs_add_target(struct damon_sysfs_target *sys_target,
return -EINVAL;
}
t->obsolete = sys_target->obsolete;
- return damon_sysfs_set_regions(t, sys_target->regions, ctx->min_sz_region);
+ return damon_sysfs_set_regions(t, sys_target->regions,
+ ctx->min_region_sz);
}
static int damon_sysfs_add_targets(struct damon_ctx *ctx,
@@ -1469,8 +1470,8 @@ static int damon_sysfs_apply_inputs(struct damon_ctx *ctx,
ctx->addr_unit = sys_ctx->addr_unit;
/* addr_unit is respected by only DAMON_OPS_PADDR */
if (sys_ctx->ops_id == DAMON_OPS_PADDR)
- ctx->min_sz_region = max(
- DAMON_MIN_REGION / sys_ctx->addr_unit, 1);
+ ctx->min_region_sz = max(
+ DAMON_MIN_REGION_SZ / sys_ctx->addr_unit, 1);
err = damon_sysfs_set_attrs(ctx, sys_ctx->attrs);
if (err)
return err;
diff --git a/mm/damon/tests/vaddr-kunit.h b/mm/damon/tests/vaddr-kunit.h
index 30dc5459f1d2..cfae870178bf 100644
--- a/mm/damon/tests/vaddr-kunit.h
+++ b/mm/damon/tests/vaddr-kunit.h
@@ -147,7 +147,7 @@ static void damon_do_test_apply_three_regions(struct kunit *test,
damon_add_region(r, t);
}
- damon_set_regions(t, three_regions, 3, DAMON_MIN_REGION);
+ damon_set_regions(t, three_regions, 3, DAMON_MIN_REGION_SZ);
for (i = 0; i < nr_expected / 2; i++) {
r = __nth_region_of(t, i);
diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
index 23ed738a0bd6..226a3f0c9b4a 100644
--- a/mm/damon/vaddr.c
+++ b/mm/damon/vaddr.c
@@ -19,8 +19,8 @@
#include "ops-common.h"
#ifdef CONFIG_DAMON_VADDR_KUNIT_TEST
-#undef DAMON_MIN_REGION
-#define DAMON_MIN_REGION 1
+#undef DAMON_MIN_REGION_SZ
+#define DAMON_MIN_REGION_SZ 1
#endif
/*
@@ -78,7 +78,7 @@ static int damon_va_evenly_split_region(struct damon_target *t,
orig_end = r->ar.end;
sz_orig = damon_sz_region(r);
- sz_piece = ALIGN_DOWN(sz_orig / nr_pieces, DAMON_MIN_REGION);
+ sz_piece = ALIGN_DOWN(sz_orig / nr_pieces, DAMON_MIN_REGION_SZ);
if (!sz_piece)
return -EINVAL;
@@ -161,12 +161,12 @@ next:
swap(first_gap, second_gap);
/* Store the result */
- regions[0].start = ALIGN(start, DAMON_MIN_REGION);
- regions[0].end = ALIGN(first_gap.start, DAMON_MIN_REGION);
- regions[1].start = ALIGN(first_gap.end, DAMON_MIN_REGION);
- regions[1].end = ALIGN(second_gap.start, DAMON_MIN_REGION);
- regions[2].start = ALIGN(second_gap.end, DAMON_MIN_REGION);
- regions[2].end = ALIGN(prev->vm_end, DAMON_MIN_REGION);
+ regions[0].start = ALIGN(start, DAMON_MIN_REGION_SZ);
+ regions[0].end = ALIGN(first_gap.start, DAMON_MIN_REGION_SZ);
+ regions[1].start = ALIGN(first_gap.end, DAMON_MIN_REGION_SZ);
+ regions[1].end = ALIGN(second_gap.start, DAMON_MIN_REGION_SZ);
+ regions[2].start = ALIGN(second_gap.end, DAMON_MIN_REGION_SZ);
+ regions[2].end = ALIGN(prev->vm_end, DAMON_MIN_REGION_SZ);
return 0;
}
@@ -259,8 +259,8 @@ static void __damon_va_init_regions(struct damon_ctx *ctx,
sz += regions[i].end - regions[i].start;
if (ctx->attrs.min_nr_regions)
sz /= ctx->attrs.min_nr_regions;
- if (sz < DAMON_MIN_REGION)
- sz = DAMON_MIN_REGION;
+ if (sz < DAMON_MIN_REGION_SZ)
+ sz = DAMON_MIN_REGION_SZ;
/* Set the initial three regions of the target */
for (i = 0; i < 3; i++) {
@@ -299,7 +299,7 @@ static void damon_va_update(struct damon_ctx *ctx)
damon_for_each_target(t, ctx) {
if (damon_va_three_regions(t, three_regions))
continue;
- damon_set_regions(t, three_regions, 3, DAMON_MIN_REGION);
+ damon_set_regions(t, three_regions, 3, DAMON_MIN_REGION_SZ);
}
}
diff --git a/mm/filemap.c b/mm/filemap.c
index ebd75684cb0a..d98e4883f13d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1379,14 +1379,16 @@ repeat:
#ifdef CONFIG_MIGRATION
/**
- * migration_entry_wait_on_locked - Wait for a migration entry to be removed
- * @entry: migration swap entry.
+ * softleaf_entry_wait_on_locked - Wait for a migration entry or
+ * device_private entry to be removed.
+ * @entry: migration or device_private swap entry.
* @ptl: already locked ptl. This function will drop the lock.
*
- * Wait for a migration entry referencing the given page to be removed. This is
+ * Wait for a migration entry referencing the given page, or device_private
+ * entry referencing a dvice_private page to be unlocked. This is
* equivalent to folio_put_wait_locked(folio, TASK_UNINTERRUPTIBLE) except
* this can be called without taking a reference on the page. Instead this
- * should be called while holding the ptl for the migration entry referencing
+ * should be called while holding the ptl for @entry referencing
* the page.
*
* Returns after unlocking the ptl.
@@ -1394,7 +1396,7 @@ repeat:
* This follows the same logic as folio_wait_bit_common() so see the comments
* there.
*/
-void migration_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl)
+void softleaf_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl)
__releases(ptl)
{
struct wait_page_queue wait_page;
@@ -1428,6 +1430,9 @@ void migration_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl)
* If a migration entry exists for the page the migration path must hold
* a valid reference to the page, and it must take the ptl to remove the
* migration entry. So the page is valid until the ptl is dropped.
+ * Similarly any path attempting to drop the last reference to a
+ * device-private page needs to grab the ptl to remove the device-private
+ * entry.
*/
spin_unlock(ptl);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index d3beddd8cc30..e3992314df9a 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3631,6 +3631,7 @@ static int __split_unmapped_folio(struct folio *folio, int new_order,
const bool is_anon = folio_test_anon(folio);
int old_order = folio_order(folio);
int start_order = split_type == SPLIT_TYPE_UNIFORM ? new_order : old_order - 1;
+ struct folio *old_folio = folio;
int split_order;
/*
@@ -3651,12 +3652,16 @@ static int __split_unmapped_folio(struct folio *folio, int new_order,
* uniform split has xas_split_alloc() called before
* irq is disabled to allocate enough memory, whereas
* non-uniform split can handle ENOMEM.
+ * Use the to-be-split folio, so that a parallel
+ * folio_try_get() waits on it until xarray is updated
+ * with after-split folios and the original one is
+ * unfrozen.
*/
- if (split_type == SPLIT_TYPE_UNIFORM)
- xas_split(xas, folio, old_order);
- else {
+ if (split_type == SPLIT_TYPE_UNIFORM) {
+ xas_split(xas, old_folio, old_order);
+ } else {
xas_set_order(xas, folio->index, split_order);
- xas_try_split(xas, folio, old_order);
+ xas_try_split(xas, old_folio, old_order);
if (xas_error(xas))
return xas_error(xas);
}
diff --git a/mm/kfence/core.c b/mm/kfence/core.c
index 4f79ec720752..30959c97b881 100644
--- a/mm/kfence/core.c
+++ b/mm/kfence/core.c
@@ -13,6 +13,7 @@
#include <linux/hash.h>
#include <linux/irq_work.h>
#include <linux/jhash.h>
+#include <linux/kasan-enabled.h>
#include <linux/kcsan-checks.h>
#include <linux/kfence.h>
#include <linux/kmemleak.h>
@@ -912,6 +913,20 @@ void __init kfence_alloc_pool_and_metadata(void)
return;
/*
+ * If KASAN hardware tags are enabled, disable KFENCE, because it
+ * does not support MTE yet.
+ */
+ if (kasan_hw_tags_enabled()) {
+ pr_info("disabled as KASAN HW tags are enabled\n");
+ if (__kfence_pool) {
+ memblock_free(__kfence_pool, KFENCE_POOL_SIZE);
+ __kfence_pool = NULL;
+ }
+ kfence_sample_interval = 0;
+ return;
+ }
+
+ /*
* If the pool has already been initialized by arch, there is no need to
* re-allocate the memory pool.
*/
@@ -984,14 +999,14 @@ static int kfence_init_late(void)
#ifdef CONFIG_CONTIG_ALLOC
struct page *pages;
- pages = alloc_contig_pages(nr_pages_pool, GFP_KERNEL, first_online_node,
- NULL);
+ pages = alloc_contig_pages(nr_pages_pool, GFP_KERNEL | __GFP_SKIP_KASAN,
+ first_online_node, NULL);
if (!pages)
return -ENOMEM;
__kfence_pool = page_to_virt(pages);
- pages = alloc_contig_pages(nr_pages_meta, GFP_KERNEL, first_online_node,
- NULL);
+ pages = alloc_contig_pages(nr_pages_meta, GFP_KERNEL | __GFP_SKIP_KASAN,
+ first_online_node, NULL);
if (pages)
kfence_metadata_init = page_to_virt(pages);
#else
@@ -1001,11 +1016,13 @@ static int kfence_init_late(void)
return -EINVAL;
}
- __kfence_pool = alloc_pages_exact(KFENCE_POOL_SIZE, GFP_KERNEL);
+ __kfence_pool = alloc_pages_exact(KFENCE_POOL_SIZE,
+ GFP_KERNEL | __GFP_SKIP_KASAN);
if (!__kfence_pool)
return -ENOMEM;
- kfence_metadata_init = alloc_pages_exact(KFENCE_METADATA_SIZE, GFP_KERNEL);
+ kfence_metadata_init = alloc_pages_exact(KFENCE_METADATA_SIZE,
+ GFP_KERNEL | __GFP_SKIP_KASAN);
#endif
if (!kfence_metadata_init)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 702c3db624a0..a7b5192ad7d5 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3052,7 +3052,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
if (!local_trylock(&obj_stock.lock)) {
if (pgdat)
- mod_objcg_mlstate(objcg, pgdat, idx, nr_bytes);
+ mod_objcg_mlstate(objcg, pgdat, idx, nr_acct);
nr_pages = nr_bytes >> PAGE_SHIFT;
nr_bytes = nr_bytes & (PAGE_SIZE - 1);
atomic_add(nr_bytes, &objcg->nr_charged_bytes);
diff --git a/mm/memfd_luo.c b/mm/memfd_luo.c
index a34fccc23b6a..02993d83d39c 100644
--- a/mm/memfd_luo.c
+++ b/mm/memfd_luo.c
@@ -146,19 +146,56 @@ static int memfd_luo_preserve_folios(struct file *file,
for (i = 0; i < nr_folios; i++) {
struct memfd_luo_folio_ser *pfolio = &folios_ser[i];
struct folio *folio = folios[i];
- unsigned int flags = 0;
err = kho_preserve_folio(folio);
if (err)
goto err_unpreserve;
- if (folio_test_dirty(folio))
- flags |= MEMFD_LUO_FOLIO_DIRTY;
- if (folio_test_uptodate(folio))
- flags |= MEMFD_LUO_FOLIO_UPTODATE;
+ folio_lock(folio);
+
+ /*
+ * A dirty folio is one which has been written to. A clean folio
+ * is its opposite. Since a clean folio does not carry user
+ * data, it can be freed by page reclaim under memory pressure.
+ *
+ * Saving the dirty flag at prepare() time doesn't work since it
+ * can change later. Saving it at freeze() also won't work
+ * because the dirty bit is normally synced at unmap and there
+ * might still be a mapping of the file at freeze().
+ *
+ * To see why this is a problem, say a folio is clean at
+ * preserve, but gets dirtied later. The pfolio flags will mark
+ * it as clean. After retrieve, the next kernel might try to
+ * reclaim this folio under memory pressure, losing user data.
+ *
+ * Unconditionally mark it dirty to avoid this problem. This
+ * comes at the cost of making clean folios un-reclaimable after
+ * live update.
+ */
+ folio_mark_dirty(folio);
+
+ /*
+ * If the folio is not uptodate, it was fallocated but never
+ * used. Saving this flag at prepare() doesn't work since it
+ * might change later when someone uses the folio.
+ *
+ * Since we have taken the performance penalty of allocating,
+ * zeroing, and pinning all the folios in the holes, take a bit
+ * more and zero all non-uptodate folios too.
+ *
+ * NOTE: For someone looking to improve preserve performance,
+ * this is a good place to look.
+ */
+ if (!folio_test_uptodate(folio)) {
+ folio_zero_range(folio, 0, folio_size(folio));
+ flush_dcache_folio(folio);
+ folio_mark_uptodate(folio);
+ }
+
+ folio_unlock(folio);
pfolio->pfn = folio_pfn(folio);
- pfolio->flags = flags;
+ pfolio->flags = MEMFD_LUO_FOLIO_DIRTY | MEMFD_LUO_FOLIO_UPTODATE;
pfolio->index = folio->index;
}
@@ -326,7 +363,12 @@ static void memfd_luo_finish(struct liveupdate_file_op_args *args)
struct memfd_luo_folio_ser *folios_ser;
struct memfd_luo_ser *ser;
- if (args->retrieved)
+ /*
+ * If retrieve was successful, nothing to do. If it failed, retrieve()
+ * already cleaned up everything it could. So nothing to do there
+ * either. Only need to clean up when retrieve was not called.
+ */
+ if (args->retrieve_status)
return;
ser = phys_to_virt(args->serialized_data);
diff --git a/mm/memory.c b/mm/memory.c
index da360a6eb8a4..20172476a57f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4684,7 +4684,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
unlock_page(vmf->page);
put_page(vmf->page);
} else {
- pte_unmap_unlock(vmf->pte, vmf->ptl);
+ pte_unmap(vmf->pte);
+ softleaf_entry_wait_on_locked(entry, vmf->ptl);
}
} else if (softleaf_is_hwpoison(entry)) {
ret = VM_FAULT_HWPOISON;
diff --git a/mm/migrate.c b/mm/migrate.c
index 4688b9e38cd2..cf6449b4202e 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -499,7 +499,7 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
if (!softleaf_is_migration(entry))
goto out;
- migration_entry_wait_on_locked(entry, ptl);
+ softleaf_entry_wait_on_locked(entry, ptl);
return;
out:
spin_unlock(ptl);
@@ -531,10 +531,10 @@ void migration_entry_wait_huge(struct vm_area_struct *vma, unsigned long addr, p
* If migration entry existed, safe to release vma lock
* here because the pgtable page won't be freed without the
* pgtable lock released. See comment right above pgtable
- * lock release in migration_entry_wait_on_locked().
+ * lock release in softleaf_entry_wait_on_locked().
*/
hugetlb_vma_unlock_read(vma);
- migration_entry_wait_on_locked(entry, ptl);
+ softleaf_entry_wait_on_locked(entry, ptl);
return;
}
@@ -552,7 +552,7 @@ void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd)
ptl = pmd_lock(mm, pmd);
if (!pmd_is_migration_entry(*pmd))
goto unlock;
- migration_entry_wait_on_locked(softleaf_from_pmd(*pmd), ptl);
+ softleaf_entry_wait_on_locked(softleaf_from_pmd(*pmd), ptl);
return;
unlock:
spin_unlock(ptl);
diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index 23379663b1e1..deab89fd4541 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -176,7 +176,7 @@ static int migrate_vma_collect_huge_pmd(pmd_t *pmdp, unsigned long start,
}
if (softleaf_is_migration(entry)) {
- migration_entry_wait_on_locked(entry, ptl);
+ softleaf_entry_wait_on_locked(entry, ptl);
spin_unlock(ptl);
return -EAGAIN;
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 04e32adaeb1d..469ee8cb7b2e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6941,7 +6941,8 @@ static int __alloc_contig_verify_gfp_mask(gfp_t gfp_mask, gfp_t *gfp_cc_mask)
{
const gfp_t reclaim_mask = __GFP_IO | __GFP_FS | __GFP_RECLAIM;
const gfp_t action_mask = __GFP_COMP | __GFP_RETRY_MAYFAIL | __GFP_NOWARN |
- __GFP_ZERO | __GFP_ZEROTAGS | __GFP_SKIP_ZERO;
+ __GFP_ZERO | __GFP_ZEROTAGS | __GFP_SKIP_ZERO |
+ __GFP_SKIP_KASAN;
const gfp_t cc_action_mask = __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
/*
diff --git a/mm/slub.c b/mm/slub.c
index b68db0f5a637..92f891816bb8 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2113,13 +2113,6 @@ static inline size_t obj_exts_alloc_size(struct kmem_cache *s,
size_t sz = sizeof(struct slabobj_ext) * slab->objects;
struct kmem_cache *obj_exts_cache;
- /*
- * slabobj_ext array for KMALLOC_CGROUP allocations
- * are served from KMALLOC_NORMAL caches.
- */
- if (!mem_alloc_profiling_enabled())
- return sz;
-
if (sz > KMALLOC_MAX_CACHE_SIZE)
return sz;
@@ -2737,19 +2730,19 @@ static void __kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p);
* object pointers are moved to a on-stack array under the lock. To bound the
* stack usage, limit each batch to PCS_BATCH_MAX.
*
- * returns true if at least partially flushed
+ * Must be called with s->cpu_sheaves->lock locked, returns with the lock
+ * unlocked.
+ *
+ * Returns how many objects are remaining to be flushed
*/
-static bool sheaf_flush_main(struct kmem_cache *s)
+static unsigned int __sheaf_flush_main_batch(struct kmem_cache *s)
{
struct slub_percpu_sheaves *pcs;
unsigned int batch, remaining;
void *objects[PCS_BATCH_MAX];
struct slab_sheaf *sheaf;
- bool ret = false;
-next_batch:
- if (!local_trylock(&s->cpu_sheaves->lock))
- return ret;
+ lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock));
pcs = this_cpu_ptr(s->cpu_sheaves);
sheaf = pcs->main;
@@ -2767,10 +2760,37 @@ next_batch:
stat_add(s, SHEAF_FLUSH, batch);
- ret = true;
+ return remaining;
+}
- if (remaining)
- goto next_batch;
+static void sheaf_flush_main(struct kmem_cache *s)
+{
+ unsigned int remaining;
+
+ do {
+ local_lock(&s->cpu_sheaves->lock);
+
+ remaining = __sheaf_flush_main_batch(s);
+
+ } while (remaining);
+}
+
+/*
+ * Returns true if the main sheaf was at least partially flushed.
+ */
+static bool sheaf_try_flush_main(struct kmem_cache *s)
+{
+ unsigned int remaining;
+ bool ret = false;
+
+ do {
+ if (!local_trylock(&s->cpu_sheaves->lock))
+ return ret;
+
+ ret = true;
+ remaining = __sheaf_flush_main_batch(s);
+
+ } while (remaining);
return ret;
}
@@ -6222,7 +6242,7 @@ alloc_empty:
if (put_fail)
stat(s, BARN_PUT_FAIL);
- if (!sheaf_flush_main(s))
+ if (!sheaf_try_flush_main(s))
return NULL;
if (!local_trylock(&s->cpu_sheaves->lock))