diff options
| author | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2026-03-19 16:15:33 +0100 |
|---|---|---|
| committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2026-03-19 16:15:33 +0100 |
| commit | 7e2dc8ed7862ac622b5a59953b679de97001dc83 (patch) | |
| tree | d2d2cf61a22f5a6404000ee007c5e80bc2d9eca9 /mm | |
| parent | a7e8c9cc3a13baf3dcf9734dd55609aa7ff9a1a0 (diff) | |
| parent | 4a2b0ed2ac7abe9743e1559d212075a0ebac96b3 (diff) | |
Merge v6.19.9linux-rolling-stable
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/damon/core.c | 79 | ||||
| -rw-r--r-- | mm/damon/lru_sort.c | 4 | ||||
| -rw-r--r-- | mm/damon/reclaim.c | 4 | ||||
| -rw-r--r-- | mm/damon/stat.c | 2 | ||||
| -rw-r--r-- | mm/damon/sysfs.c | 11 | ||||
| -rw-r--r-- | mm/damon/tests/vaddr-kunit.h | 2 | ||||
| -rw-r--r-- | mm/damon/vaddr.c | 24 | ||||
| -rw-r--r-- | mm/filemap.c | 15 | ||||
| -rw-r--r-- | mm/huge_memory.c | 13 | ||||
| -rw-r--r-- | mm/kfence/core.c | 29 | ||||
| -rw-r--r-- | mm/memcontrol.c | 2 | ||||
| -rw-r--r-- | mm/memfd_luo.c | 56 | ||||
| -rw-r--r-- | mm/memory.c | 3 | ||||
| -rw-r--r-- | mm/migrate.c | 8 | ||||
| -rw-r--r-- | mm/migrate_device.c | 2 | ||||
| -rw-r--r-- | mm/page_alloc.c | 3 | ||||
| -rw-r--r-- | mm/slub.c | 54 |
17 files changed, 206 insertions, 105 deletions
diff --git a/mm/damon/core.c b/mm/damon/core.c index 84f80a20f233..ba3b7ff8ecf4 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -197,7 +197,7 @@ static int damon_fill_regions_holes(struct damon_region *first, * @t: the given target. * @ranges: array of new monitoring target ranges. * @nr_ranges: length of @ranges. - * @min_sz_region: minimum region size. + * @min_region_sz: minimum region size. * * This function adds new regions to, or modify existing regions of a * monitoring target to fit in specific ranges. @@ -205,7 +205,7 @@ static int damon_fill_regions_holes(struct damon_region *first, * Return: 0 if success, or negative error code otherwise. */ int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges, - unsigned int nr_ranges, unsigned long min_sz_region) + unsigned int nr_ranges, unsigned long min_region_sz) { struct damon_region *r, *next; unsigned int i; @@ -242,16 +242,16 @@ int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges, /* no region intersects with this range */ newr = damon_new_region( ALIGN_DOWN(range->start, - min_sz_region), - ALIGN(range->end, min_sz_region)); + min_region_sz), + ALIGN(range->end, min_region_sz)); if (!newr) return -ENOMEM; damon_insert_region(newr, damon_prev_region(r), r, t); } else { /* resize intersecting regions to fit in this range */ first->ar.start = ALIGN_DOWN(range->start, - min_sz_region); - last->ar.end = ALIGN(range->end, min_sz_region); + min_region_sz); + last->ar.end = ALIGN(range->end, min_region_sz); /* fill possible holes in the range */ err = damon_fill_regions_holes(first, last, t); @@ -546,7 +546,7 @@ struct damon_ctx *damon_new_ctx(void) ctx->attrs.max_nr_regions = 1000; ctx->addr_unit = 1; - ctx->min_sz_region = DAMON_MIN_REGION; + ctx->min_region_sz = DAMON_MIN_REGION_SZ; INIT_LIST_HEAD(&ctx->adaptive_targets); INIT_LIST_HEAD(&ctx->schemes); @@ -1131,7 +1131,7 @@ static struct damon_target *damon_nth_target(int n, struct damon_ctx *ctx) * If @src has no region, @dst keeps current regions. */ static int damon_commit_target_regions(struct damon_target *dst, - struct damon_target *src, unsigned long src_min_sz_region) + struct damon_target *src, unsigned long src_min_region_sz) { struct damon_region *src_region; struct damon_addr_range *ranges; @@ -1148,7 +1148,7 @@ static int damon_commit_target_regions(struct damon_target *dst, i = 0; damon_for_each_region(src_region, src) ranges[i++] = src_region->ar; - err = damon_set_regions(dst, ranges, i, src_min_sz_region); + err = damon_set_regions(dst, ranges, i, src_min_region_sz); kfree(ranges); return err; } @@ -1156,11 +1156,11 @@ static int damon_commit_target_regions(struct damon_target *dst, static int damon_commit_target( struct damon_target *dst, bool dst_has_pid, struct damon_target *src, bool src_has_pid, - unsigned long src_min_sz_region) + unsigned long src_min_region_sz) { int err; - err = damon_commit_target_regions(dst, src, src_min_sz_region); + err = damon_commit_target_regions(dst, src, src_min_region_sz); if (err) return err; if (dst_has_pid) @@ -1187,7 +1187,7 @@ static int damon_commit_targets( err = damon_commit_target( dst_target, damon_target_has_pid(dst), src_target, damon_target_has_pid(src), - src->min_sz_region); + src->min_region_sz); if (err) return err; } else { @@ -1214,7 +1214,7 @@ static int damon_commit_targets( return -ENOMEM; err = damon_commit_target(new_target, false, src_target, damon_target_has_pid(src), - src->min_sz_region); + src->min_region_sz); if (err) { damon_destroy_target(new_target, NULL); return err; @@ -1241,6 +1241,9 @@ int damon_commit_ctx(struct damon_ctx *dst, struct damon_ctx *src) { int err; + if (!is_power_of_2(src->min_region_sz)) + return -EINVAL; + err = damon_commit_schemes(dst, src); if (err) return err; @@ -1261,7 +1264,7 @@ int damon_commit_ctx(struct damon_ctx *dst, struct damon_ctx *src) } dst->ops = src->ops; dst->addr_unit = src->addr_unit; - dst->min_sz_region = src->min_sz_region; + dst->min_region_sz = src->min_region_sz; return 0; } @@ -1294,8 +1297,8 @@ static unsigned long damon_region_sz_limit(struct damon_ctx *ctx) if (ctx->attrs.min_nr_regions) sz /= ctx->attrs.min_nr_regions; - if (sz < ctx->min_sz_region) - sz = ctx->min_sz_region; + if (sz < ctx->min_region_sz) + sz = ctx->min_region_sz; return sz; } @@ -1531,8 +1534,13 @@ int damos_walk(struct damon_ctx *ctx, struct damos_walk_control *control) } ctx->walk_control = control; mutex_unlock(&ctx->walk_control_lock); - if (!damon_is_running(ctx)) + if (!damon_is_running(ctx)) { + mutex_lock(&ctx->walk_control_lock); + if (ctx->walk_control == control) + ctx->walk_control = NULL; + mutex_unlock(&ctx->walk_control_lock); return -EINVAL; + } wait_for_completion(&control->completion); if (control->canceled) return -ECANCELED; @@ -1668,7 +1676,7 @@ static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t, * @t: The target of the region. * @rp: The pointer to the region. * @s: The scheme to be applied. - * @min_sz_region: minimum region size. + * @min_region_sz: minimum region size. * * If a quota of a scheme has exceeded in a quota charge window, the scheme's * action would applied to only a part of the target access pattern fulfilling @@ -1686,7 +1694,8 @@ static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t, * Return: true if the region should be entirely skipped, false otherwise. */ static bool damos_skip_charged_region(struct damon_target *t, - struct damon_region **rp, struct damos *s, unsigned long min_sz_region) + struct damon_region **rp, struct damos *s, + unsigned long min_region_sz) { struct damon_region *r = *rp; struct damos_quota *quota = &s->quota; @@ -1708,11 +1717,11 @@ static bool damos_skip_charged_region(struct damon_target *t, if (quota->charge_addr_from && r->ar.start < quota->charge_addr_from) { sz_to_skip = ALIGN_DOWN(quota->charge_addr_from - - r->ar.start, min_sz_region); + r->ar.start, min_region_sz); if (!sz_to_skip) { - if (damon_sz_region(r) <= min_sz_region) + if (damon_sz_region(r) <= min_region_sz) return true; - sz_to_skip = min_sz_region; + sz_to_skip = min_region_sz; } damon_split_region_at(t, r, sz_to_skip); r = damon_next_region(r); @@ -1738,7 +1747,7 @@ static void damos_update_stat(struct damos *s, static bool damos_filter_match(struct damon_ctx *ctx, struct damon_target *t, struct damon_region *r, struct damos_filter *filter, - unsigned long min_sz_region) + unsigned long min_region_sz) { bool matched = false; struct damon_target *ti; @@ -1755,8 +1764,8 @@ static bool damos_filter_match(struct damon_ctx *ctx, struct damon_target *t, matched = target_idx == filter->target_idx; break; case DAMOS_FILTER_TYPE_ADDR: - start = ALIGN_DOWN(filter->addr_range.start, min_sz_region); - end = ALIGN_DOWN(filter->addr_range.end, min_sz_region); + start = ALIGN_DOWN(filter->addr_range.start, min_region_sz); + end = ALIGN_DOWN(filter->addr_range.end, min_region_sz); /* inside the range */ if (start <= r->ar.start && r->ar.end <= end) { @@ -1792,7 +1801,7 @@ static bool damos_filter_out(struct damon_ctx *ctx, struct damon_target *t, s->core_filters_allowed = false; damos_for_each_core_filter(filter, s) { - if (damos_filter_match(ctx, t, r, filter, ctx->min_sz_region)) { + if (damos_filter_match(ctx, t, r, filter, ctx->min_region_sz)) { if (filter->allow) s->core_filters_allowed = true; return !filter->allow; @@ -1927,7 +1936,7 @@ static void damos_apply_scheme(struct damon_ctx *c, struct damon_target *t, if (c->ops.apply_scheme) { if (quota->esz && quota->charged_sz + sz > quota->esz) { sz = ALIGN_DOWN(quota->esz - quota->charged_sz, - c->min_sz_region); + c->min_region_sz); if (!sz) goto update_stat; damon_split_region_at(t, r, sz); @@ -1975,7 +1984,7 @@ static void damon_do_apply_schemes(struct damon_ctx *c, if (quota->esz && quota->charged_sz >= quota->esz) continue; - if (damos_skip_charged_region(t, &r, s, c->min_sz_region)) + if (damos_skip_charged_region(t, &r, s, c->min_region_sz)) continue; if (!damos_valid_target(c, t, r, s)) @@ -2424,7 +2433,7 @@ static void damon_split_region_at(struct damon_target *t, /* Split every region in the given target into 'nr_subs' regions */ static void damon_split_regions_of(struct damon_target *t, int nr_subs, - unsigned long min_sz_region) + unsigned long min_region_sz) { struct damon_region *r, *next; unsigned long sz_region, sz_sub = 0; @@ -2434,13 +2443,13 @@ static void damon_split_regions_of(struct damon_target *t, int nr_subs, sz_region = damon_sz_region(r); for (i = 0; i < nr_subs - 1 && - sz_region > 2 * min_sz_region; i++) { + sz_region > 2 * min_region_sz; i++) { /* * Randomly select size of left sub-region to be at * least 10 percent and at most 90% of original region */ sz_sub = ALIGN_DOWN(damon_rand(1, 10) * - sz_region / 10, min_sz_region); + sz_region / 10, min_region_sz); /* Do not allow blank region */ if (sz_sub == 0 || sz_sub >= sz_region) continue; @@ -2480,7 +2489,7 @@ static void kdamond_split_regions(struct damon_ctx *ctx) nr_subregions = 3; damon_for_each_target(t, ctx) - damon_split_regions_of(t, nr_subregions, ctx->min_sz_region); + damon_split_regions_of(t, nr_subregions, ctx->min_region_sz); last_nr_regions = nr_regions; } @@ -2850,7 +2859,7 @@ static bool damon_find_biggest_system_ram(unsigned long *start, * @t: The monitoring target to set the region. * @start: The pointer to the start address of the region. * @end: The pointer to the end address of the region. - * @min_sz_region: Minimum region size. + * @min_region_sz: Minimum region size. * * This function sets the region of @t as requested by @start and @end. If the * values of @start and @end are zero, however, this function finds the biggest @@ -2862,7 +2871,7 @@ static bool damon_find_biggest_system_ram(unsigned long *start, */ int damon_set_region_biggest_system_ram_default(struct damon_target *t, unsigned long *start, unsigned long *end, - unsigned long min_sz_region) + unsigned long min_region_sz) { struct damon_addr_range addr_range; @@ -2875,7 +2884,7 @@ int damon_set_region_biggest_system_ram_default(struct damon_target *t, addr_range.start = *start; addr_range.end = *end; - return damon_set_regions(t, &addr_range, 1, min_sz_region); + return damon_set_regions(t, &addr_range, 1, min_region_sz); } /* diff --git a/mm/damon/lru_sort.c b/mm/damon/lru_sort.c index 49b4bc294f4e..9cef1619527f 100644 --- a/mm/damon/lru_sort.c +++ b/mm/damon/lru_sort.c @@ -212,7 +212,7 @@ static int damon_lru_sort_apply_parameters(void) if (!monitor_region_start && !monitor_region_end) addr_unit = 1; param_ctx->addr_unit = addr_unit; - param_ctx->min_sz_region = max(DAMON_MIN_REGION / addr_unit, 1); + param_ctx->min_region_sz = max(DAMON_MIN_REGION_SZ / addr_unit, 1); if (!damon_lru_sort_mon_attrs.sample_interval) { err = -EINVAL; @@ -243,7 +243,7 @@ static int damon_lru_sort_apply_parameters(void) err = damon_set_region_biggest_system_ram_default(param_target, &monitor_region_start, &monitor_region_end, - param_ctx->min_sz_region); + param_ctx->min_region_sz); if (err) goto out; err = damon_commit_ctx(ctx, param_ctx); diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c index 36a582e09eae..c262ec6cb545 100644 --- a/mm/damon/reclaim.c +++ b/mm/damon/reclaim.c @@ -208,7 +208,7 @@ static int damon_reclaim_apply_parameters(void) if (!monitor_region_start && !monitor_region_end) addr_unit = 1; param_ctx->addr_unit = addr_unit; - param_ctx->min_sz_region = max(DAMON_MIN_REGION / addr_unit, 1); + param_ctx->min_region_sz = max(DAMON_MIN_REGION_SZ / addr_unit, 1); if (!damon_reclaim_mon_attrs.aggr_interval) { err = -EINVAL; @@ -251,7 +251,7 @@ static int damon_reclaim_apply_parameters(void) err = damon_set_region_biggest_system_ram_default(param_target, &monitor_region_start, &monitor_region_end, - param_ctx->min_sz_region); + param_ctx->min_region_sz); if (err) goto out; err = damon_commit_ctx(ctx, param_ctx); diff --git a/mm/damon/stat.c b/mm/damon/stat.c index ed8e3629d31a..922a6a6e65db 100644 --- a/mm/damon/stat.c +++ b/mm/damon/stat.c @@ -189,7 +189,7 @@ static struct damon_ctx *damon_stat_build_ctx(void) goto free_out; damon_add_target(ctx, target); if (damon_set_region_biggest_system_ram_default(target, &start, &end, - ctx->min_sz_region)) + ctx->min_region_sz)) goto free_out; return ctx; free_out: diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index 95fd9375a7d8..4a74c46770c0 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -1365,7 +1365,7 @@ static int damon_sysfs_set_attrs(struct damon_ctx *ctx, static int damon_sysfs_set_regions(struct damon_target *t, struct damon_sysfs_regions *sysfs_regions, - unsigned long min_sz_region) + unsigned long min_region_sz) { struct damon_addr_range *ranges = kmalloc_array(sysfs_regions->nr, sizeof(*ranges), GFP_KERNEL | __GFP_NOWARN); @@ -1387,7 +1387,7 @@ static int damon_sysfs_set_regions(struct damon_target *t, if (ranges[i - 1].end > ranges[i].start) goto out; } - err = damon_set_regions(t, ranges, sysfs_regions->nr, min_sz_region); + err = damon_set_regions(t, ranges, sysfs_regions->nr, min_region_sz); out: kfree(ranges); return err; @@ -1409,7 +1409,8 @@ static int damon_sysfs_add_target(struct damon_sysfs_target *sys_target, return -EINVAL; } t->obsolete = sys_target->obsolete; - return damon_sysfs_set_regions(t, sys_target->regions, ctx->min_sz_region); + return damon_sysfs_set_regions(t, sys_target->regions, + ctx->min_region_sz); } static int damon_sysfs_add_targets(struct damon_ctx *ctx, @@ -1469,8 +1470,8 @@ static int damon_sysfs_apply_inputs(struct damon_ctx *ctx, ctx->addr_unit = sys_ctx->addr_unit; /* addr_unit is respected by only DAMON_OPS_PADDR */ if (sys_ctx->ops_id == DAMON_OPS_PADDR) - ctx->min_sz_region = max( - DAMON_MIN_REGION / sys_ctx->addr_unit, 1); + ctx->min_region_sz = max( + DAMON_MIN_REGION_SZ / sys_ctx->addr_unit, 1); err = damon_sysfs_set_attrs(ctx, sys_ctx->attrs); if (err) return err; diff --git a/mm/damon/tests/vaddr-kunit.h b/mm/damon/tests/vaddr-kunit.h index 30dc5459f1d2..cfae870178bf 100644 --- a/mm/damon/tests/vaddr-kunit.h +++ b/mm/damon/tests/vaddr-kunit.h @@ -147,7 +147,7 @@ static void damon_do_test_apply_three_regions(struct kunit *test, damon_add_region(r, t); } - damon_set_regions(t, three_regions, 3, DAMON_MIN_REGION); + damon_set_regions(t, three_regions, 3, DAMON_MIN_REGION_SZ); for (i = 0; i < nr_expected / 2; i++) { r = __nth_region_of(t, i); diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index 23ed738a0bd6..226a3f0c9b4a 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -19,8 +19,8 @@ #include "ops-common.h" #ifdef CONFIG_DAMON_VADDR_KUNIT_TEST -#undef DAMON_MIN_REGION -#define DAMON_MIN_REGION 1 +#undef DAMON_MIN_REGION_SZ +#define DAMON_MIN_REGION_SZ 1 #endif /* @@ -78,7 +78,7 @@ static int damon_va_evenly_split_region(struct damon_target *t, orig_end = r->ar.end; sz_orig = damon_sz_region(r); - sz_piece = ALIGN_DOWN(sz_orig / nr_pieces, DAMON_MIN_REGION); + sz_piece = ALIGN_DOWN(sz_orig / nr_pieces, DAMON_MIN_REGION_SZ); if (!sz_piece) return -EINVAL; @@ -161,12 +161,12 @@ next: swap(first_gap, second_gap); /* Store the result */ - regions[0].start = ALIGN(start, DAMON_MIN_REGION); - regions[0].end = ALIGN(first_gap.start, DAMON_MIN_REGION); - regions[1].start = ALIGN(first_gap.end, DAMON_MIN_REGION); - regions[1].end = ALIGN(second_gap.start, DAMON_MIN_REGION); - regions[2].start = ALIGN(second_gap.end, DAMON_MIN_REGION); - regions[2].end = ALIGN(prev->vm_end, DAMON_MIN_REGION); + regions[0].start = ALIGN(start, DAMON_MIN_REGION_SZ); + regions[0].end = ALIGN(first_gap.start, DAMON_MIN_REGION_SZ); + regions[1].start = ALIGN(first_gap.end, DAMON_MIN_REGION_SZ); + regions[1].end = ALIGN(second_gap.start, DAMON_MIN_REGION_SZ); + regions[2].start = ALIGN(second_gap.end, DAMON_MIN_REGION_SZ); + regions[2].end = ALIGN(prev->vm_end, DAMON_MIN_REGION_SZ); return 0; } @@ -259,8 +259,8 @@ static void __damon_va_init_regions(struct damon_ctx *ctx, sz += regions[i].end - regions[i].start; if (ctx->attrs.min_nr_regions) sz /= ctx->attrs.min_nr_regions; - if (sz < DAMON_MIN_REGION) - sz = DAMON_MIN_REGION; + if (sz < DAMON_MIN_REGION_SZ) + sz = DAMON_MIN_REGION_SZ; /* Set the initial three regions of the target */ for (i = 0; i < 3; i++) { @@ -299,7 +299,7 @@ static void damon_va_update(struct damon_ctx *ctx) damon_for_each_target(t, ctx) { if (damon_va_three_regions(t, three_regions)) continue; - damon_set_regions(t, three_regions, 3, DAMON_MIN_REGION); + damon_set_regions(t, three_regions, 3, DAMON_MIN_REGION_SZ); } } diff --git a/mm/filemap.c b/mm/filemap.c index ebd75684cb0a..d98e4883f13d 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1379,14 +1379,16 @@ repeat: #ifdef CONFIG_MIGRATION /** - * migration_entry_wait_on_locked - Wait for a migration entry to be removed - * @entry: migration swap entry. + * softleaf_entry_wait_on_locked - Wait for a migration entry or + * device_private entry to be removed. + * @entry: migration or device_private swap entry. * @ptl: already locked ptl. This function will drop the lock. * - * Wait for a migration entry referencing the given page to be removed. This is + * Wait for a migration entry referencing the given page, or device_private + * entry referencing a dvice_private page to be unlocked. This is * equivalent to folio_put_wait_locked(folio, TASK_UNINTERRUPTIBLE) except * this can be called without taking a reference on the page. Instead this - * should be called while holding the ptl for the migration entry referencing + * should be called while holding the ptl for @entry referencing * the page. * * Returns after unlocking the ptl. @@ -1394,7 +1396,7 @@ repeat: * This follows the same logic as folio_wait_bit_common() so see the comments * there. */ -void migration_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl) +void softleaf_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl) __releases(ptl) { struct wait_page_queue wait_page; @@ -1428,6 +1430,9 @@ void migration_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl) * If a migration entry exists for the page the migration path must hold * a valid reference to the page, and it must take the ptl to remove the * migration entry. So the page is valid until the ptl is dropped. + * Similarly any path attempting to drop the last reference to a + * device-private page needs to grab the ptl to remove the device-private + * entry. */ spin_unlock(ptl); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index d3beddd8cc30..e3992314df9a 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3631,6 +3631,7 @@ static int __split_unmapped_folio(struct folio *folio, int new_order, const bool is_anon = folio_test_anon(folio); int old_order = folio_order(folio); int start_order = split_type == SPLIT_TYPE_UNIFORM ? new_order : old_order - 1; + struct folio *old_folio = folio; int split_order; /* @@ -3651,12 +3652,16 @@ static int __split_unmapped_folio(struct folio *folio, int new_order, * uniform split has xas_split_alloc() called before * irq is disabled to allocate enough memory, whereas * non-uniform split can handle ENOMEM. + * Use the to-be-split folio, so that a parallel + * folio_try_get() waits on it until xarray is updated + * with after-split folios and the original one is + * unfrozen. */ - if (split_type == SPLIT_TYPE_UNIFORM) - xas_split(xas, folio, old_order); - else { + if (split_type == SPLIT_TYPE_UNIFORM) { + xas_split(xas, old_folio, old_order); + } else { xas_set_order(xas, folio->index, split_order); - xas_try_split(xas, folio, old_order); + xas_try_split(xas, old_folio, old_order); if (xas_error(xas)) return xas_error(xas); } diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 4f79ec720752..30959c97b881 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -13,6 +13,7 @@ #include <linux/hash.h> #include <linux/irq_work.h> #include <linux/jhash.h> +#include <linux/kasan-enabled.h> #include <linux/kcsan-checks.h> #include <linux/kfence.h> #include <linux/kmemleak.h> @@ -912,6 +913,20 @@ void __init kfence_alloc_pool_and_metadata(void) return; /* + * If KASAN hardware tags are enabled, disable KFENCE, because it + * does not support MTE yet. + */ + if (kasan_hw_tags_enabled()) { + pr_info("disabled as KASAN HW tags are enabled\n"); + if (__kfence_pool) { + memblock_free(__kfence_pool, KFENCE_POOL_SIZE); + __kfence_pool = NULL; + } + kfence_sample_interval = 0; + return; + } + + /* * If the pool has already been initialized by arch, there is no need to * re-allocate the memory pool. */ @@ -984,14 +999,14 @@ static int kfence_init_late(void) #ifdef CONFIG_CONTIG_ALLOC struct page *pages; - pages = alloc_contig_pages(nr_pages_pool, GFP_KERNEL, first_online_node, - NULL); + pages = alloc_contig_pages(nr_pages_pool, GFP_KERNEL | __GFP_SKIP_KASAN, + first_online_node, NULL); if (!pages) return -ENOMEM; __kfence_pool = page_to_virt(pages); - pages = alloc_contig_pages(nr_pages_meta, GFP_KERNEL, first_online_node, - NULL); + pages = alloc_contig_pages(nr_pages_meta, GFP_KERNEL | __GFP_SKIP_KASAN, + first_online_node, NULL); if (pages) kfence_metadata_init = page_to_virt(pages); #else @@ -1001,11 +1016,13 @@ static int kfence_init_late(void) return -EINVAL; } - __kfence_pool = alloc_pages_exact(KFENCE_POOL_SIZE, GFP_KERNEL); + __kfence_pool = alloc_pages_exact(KFENCE_POOL_SIZE, + GFP_KERNEL | __GFP_SKIP_KASAN); if (!__kfence_pool) return -ENOMEM; - kfence_metadata_init = alloc_pages_exact(KFENCE_METADATA_SIZE, GFP_KERNEL); + kfence_metadata_init = alloc_pages_exact(KFENCE_METADATA_SIZE, + GFP_KERNEL | __GFP_SKIP_KASAN); #endif if (!kfence_metadata_init) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 702c3db624a0..a7b5192ad7d5 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3052,7 +3052,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes, if (!local_trylock(&obj_stock.lock)) { if (pgdat) - mod_objcg_mlstate(objcg, pgdat, idx, nr_bytes); + mod_objcg_mlstate(objcg, pgdat, idx, nr_acct); nr_pages = nr_bytes >> PAGE_SHIFT; nr_bytes = nr_bytes & (PAGE_SIZE - 1); atomic_add(nr_bytes, &objcg->nr_charged_bytes); diff --git a/mm/memfd_luo.c b/mm/memfd_luo.c index a34fccc23b6a..02993d83d39c 100644 --- a/mm/memfd_luo.c +++ b/mm/memfd_luo.c @@ -146,19 +146,56 @@ static int memfd_luo_preserve_folios(struct file *file, for (i = 0; i < nr_folios; i++) { struct memfd_luo_folio_ser *pfolio = &folios_ser[i]; struct folio *folio = folios[i]; - unsigned int flags = 0; err = kho_preserve_folio(folio); if (err) goto err_unpreserve; - if (folio_test_dirty(folio)) - flags |= MEMFD_LUO_FOLIO_DIRTY; - if (folio_test_uptodate(folio)) - flags |= MEMFD_LUO_FOLIO_UPTODATE; + folio_lock(folio); + + /* + * A dirty folio is one which has been written to. A clean folio + * is its opposite. Since a clean folio does not carry user + * data, it can be freed by page reclaim under memory pressure. + * + * Saving the dirty flag at prepare() time doesn't work since it + * can change later. Saving it at freeze() also won't work + * because the dirty bit is normally synced at unmap and there + * might still be a mapping of the file at freeze(). + * + * To see why this is a problem, say a folio is clean at + * preserve, but gets dirtied later. The pfolio flags will mark + * it as clean. After retrieve, the next kernel might try to + * reclaim this folio under memory pressure, losing user data. + * + * Unconditionally mark it dirty to avoid this problem. This + * comes at the cost of making clean folios un-reclaimable after + * live update. + */ + folio_mark_dirty(folio); + + /* + * If the folio is not uptodate, it was fallocated but never + * used. Saving this flag at prepare() doesn't work since it + * might change later when someone uses the folio. + * + * Since we have taken the performance penalty of allocating, + * zeroing, and pinning all the folios in the holes, take a bit + * more and zero all non-uptodate folios too. + * + * NOTE: For someone looking to improve preserve performance, + * this is a good place to look. + */ + if (!folio_test_uptodate(folio)) { + folio_zero_range(folio, 0, folio_size(folio)); + flush_dcache_folio(folio); + folio_mark_uptodate(folio); + } + + folio_unlock(folio); pfolio->pfn = folio_pfn(folio); - pfolio->flags = flags; + pfolio->flags = MEMFD_LUO_FOLIO_DIRTY | MEMFD_LUO_FOLIO_UPTODATE; pfolio->index = folio->index; } @@ -326,7 +363,12 @@ static void memfd_luo_finish(struct liveupdate_file_op_args *args) struct memfd_luo_folio_ser *folios_ser; struct memfd_luo_ser *ser; - if (args->retrieved) + /* + * If retrieve was successful, nothing to do. If it failed, retrieve() + * already cleaned up everything it could. So nothing to do there + * either. Only need to clean up when retrieve was not called. + */ + if (args->retrieve_status) return; ser = phys_to_virt(args->serialized_data); diff --git a/mm/memory.c b/mm/memory.c index da360a6eb8a4..20172476a57f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4684,7 +4684,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) unlock_page(vmf->page); put_page(vmf->page); } else { - pte_unmap_unlock(vmf->pte, vmf->ptl); + pte_unmap(vmf->pte); + softleaf_entry_wait_on_locked(entry, vmf->ptl); } } else if (softleaf_is_hwpoison(entry)) { ret = VM_FAULT_HWPOISON; diff --git a/mm/migrate.c b/mm/migrate.c index 4688b9e38cd2..cf6449b4202e 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -499,7 +499,7 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, if (!softleaf_is_migration(entry)) goto out; - migration_entry_wait_on_locked(entry, ptl); + softleaf_entry_wait_on_locked(entry, ptl); return; out: spin_unlock(ptl); @@ -531,10 +531,10 @@ void migration_entry_wait_huge(struct vm_area_struct *vma, unsigned long addr, p * If migration entry existed, safe to release vma lock * here because the pgtable page won't be freed without the * pgtable lock released. See comment right above pgtable - * lock release in migration_entry_wait_on_locked(). + * lock release in softleaf_entry_wait_on_locked(). */ hugetlb_vma_unlock_read(vma); - migration_entry_wait_on_locked(entry, ptl); + softleaf_entry_wait_on_locked(entry, ptl); return; } @@ -552,7 +552,7 @@ void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd) ptl = pmd_lock(mm, pmd); if (!pmd_is_migration_entry(*pmd)) goto unlock; - migration_entry_wait_on_locked(softleaf_from_pmd(*pmd), ptl); + softleaf_entry_wait_on_locked(softleaf_from_pmd(*pmd), ptl); return; unlock: spin_unlock(ptl); diff --git a/mm/migrate_device.c b/mm/migrate_device.c index 23379663b1e1..deab89fd4541 100644 --- a/mm/migrate_device.c +++ b/mm/migrate_device.c @@ -176,7 +176,7 @@ static int migrate_vma_collect_huge_pmd(pmd_t *pmdp, unsigned long start, } if (softleaf_is_migration(entry)) { - migration_entry_wait_on_locked(entry, ptl); + softleaf_entry_wait_on_locked(entry, ptl); spin_unlock(ptl); return -EAGAIN; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 04e32adaeb1d..469ee8cb7b2e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6941,7 +6941,8 @@ static int __alloc_contig_verify_gfp_mask(gfp_t gfp_mask, gfp_t *gfp_cc_mask) { const gfp_t reclaim_mask = __GFP_IO | __GFP_FS | __GFP_RECLAIM; const gfp_t action_mask = __GFP_COMP | __GFP_RETRY_MAYFAIL | __GFP_NOWARN | - __GFP_ZERO | __GFP_ZEROTAGS | __GFP_SKIP_ZERO; + __GFP_ZERO | __GFP_ZEROTAGS | __GFP_SKIP_ZERO | + __GFP_SKIP_KASAN; const gfp_t cc_action_mask = __GFP_RETRY_MAYFAIL | __GFP_NOWARN; /* diff --git a/mm/slub.c b/mm/slub.c index b68db0f5a637..92f891816bb8 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2113,13 +2113,6 @@ static inline size_t obj_exts_alloc_size(struct kmem_cache *s, size_t sz = sizeof(struct slabobj_ext) * slab->objects; struct kmem_cache *obj_exts_cache; - /* - * slabobj_ext array for KMALLOC_CGROUP allocations - * are served from KMALLOC_NORMAL caches. - */ - if (!mem_alloc_profiling_enabled()) - return sz; - if (sz > KMALLOC_MAX_CACHE_SIZE) return sz; @@ -2737,19 +2730,19 @@ static void __kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p); * object pointers are moved to a on-stack array under the lock. To bound the * stack usage, limit each batch to PCS_BATCH_MAX. * - * returns true if at least partially flushed + * Must be called with s->cpu_sheaves->lock locked, returns with the lock + * unlocked. + * + * Returns how many objects are remaining to be flushed */ -static bool sheaf_flush_main(struct kmem_cache *s) +static unsigned int __sheaf_flush_main_batch(struct kmem_cache *s) { struct slub_percpu_sheaves *pcs; unsigned int batch, remaining; void *objects[PCS_BATCH_MAX]; struct slab_sheaf *sheaf; - bool ret = false; -next_batch: - if (!local_trylock(&s->cpu_sheaves->lock)) - return ret; + lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); pcs = this_cpu_ptr(s->cpu_sheaves); sheaf = pcs->main; @@ -2767,10 +2760,37 @@ next_batch: stat_add(s, SHEAF_FLUSH, batch); - ret = true; + return remaining; +} - if (remaining) - goto next_batch; +static void sheaf_flush_main(struct kmem_cache *s) +{ + unsigned int remaining; + + do { + local_lock(&s->cpu_sheaves->lock); + + remaining = __sheaf_flush_main_batch(s); + + } while (remaining); +} + +/* + * Returns true if the main sheaf was at least partially flushed. + */ +static bool sheaf_try_flush_main(struct kmem_cache *s) +{ + unsigned int remaining; + bool ret = false; + + do { + if (!local_trylock(&s->cpu_sheaves->lock)) + return ret; + + ret = true; + remaining = __sheaf_flush_main_batch(s); + + } while (remaining); return ret; } @@ -6222,7 +6242,7 @@ alloc_empty: if (put_fail) stat(s, BARN_PUT_FAIL); - if (!sheaf_flush_main(s)) + if (!sheaf_try_flush_main(s)) return NULL; if (!local_trylock(&s->cpu_sheaves->lock)) |
