mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-18 13:31:45 -04:00
damon_stat_start() always allocates the module's damon_ctx object
(damon_stat_context). Meanwhile, if damon_call() in the function fails,
the damon_ctx object is not deallocated. Hence, if the damon_call() is
failed, and the user writes Y to “enabled” again, the previously
allocated damon_ctx object is leaked.
This cannot simply be fixed by deallocating the damon_ctx object when
damon_call() fails. That's because damon_call() failure doesn't guarantee
the kdamond main function, which accesses the damon_ctx object, is
completely finished. In other words, if damon_stat_start() deallocates
the damon_ctx object after damon_call() failure, the not-yet-terminated
kdamond could access the freed memory (use-after-free).
Fix the leak while avoiding the use-after-free by keeping returning
damon_stat_start() without deallocating the damon_ctx object after
damon_call() failure, but deallocating it when the function is invoked
again and the kdamond is completely terminated. If the kdamond is not yet
terminated, simply return -EAGAIN, as the kdamond will soon be terminated.
The issue was discovered [1] by sashiko.
Link: https://lkml.kernel.org/r/20260402134418.74121-1-sj@kernel.org
Link: https://lore.kernel.org/20260401012428.86694-1-sj@kernel.org [1]
Fixes: 405f61996d ("mm/damon/stat: use damon_call() repeat mode instead of damon_callback")
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: <stable@vger.kernel.org> # 6.17.x
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
323 lines
8.0 KiB
C
323 lines
8.0 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Shows data access monitoring results in simple metrics.
|
|
*/
|
|
|
|
#define pr_fmt(fmt) "damon-stat: " fmt
|
|
|
|
#include <linux/damon.h>
|
|
#include <linux/init.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/module.h>
|
|
#include <linux/sort.h>
|
|
|
|
#ifdef MODULE_PARAM_PREFIX
|
|
#undef MODULE_PARAM_PREFIX
|
|
#endif
|
|
#define MODULE_PARAM_PREFIX "damon_stat."
|
|
|
|
static int damon_stat_enabled_store(
|
|
const char *val, const struct kernel_param *kp);
|
|
|
|
static const struct kernel_param_ops enabled_param_ops = {
|
|
.set = damon_stat_enabled_store,
|
|
.get = param_get_bool,
|
|
};
|
|
|
|
static bool enabled __read_mostly = IS_ENABLED(
|
|
CONFIG_DAMON_STAT_ENABLED_DEFAULT);
|
|
module_param_cb(enabled, &enabled_param_ops, &enabled, 0600);
|
|
MODULE_PARM_DESC(enabled, "Enable of disable DAMON_STAT");
|
|
|
|
static unsigned long estimated_memory_bandwidth __read_mostly;
|
|
module_param(estimated_memory_bandwidth, ulong, 0400);
|
|
MODULE_PARM_DESC(estimated_memory_bandwidth,
|
|
"Estimated memory bandwidth usage in bytes per second");
|
|
|
|
static long memory_idle_ms_percentiles[101] = {0,};
|
|
module_param_array(memory_idle_ms_percentiles, long, NULL, 0400);
|
|
MODULE_PARM_DESC(memory_idle_ms_percentiles,
|
|
"Memory idle time percentiles in milliseconds");
|
|
|
|
static unsigned long aggr_interval_us;
|
|
module_param(aggr_interval_us, ulong, 0400);
|
|
MODULE_PARM_DESC(aggr_interval_us,
|
|
"Current tuned aggregation interval in microseconds");
|
|
|
|
static struct damon_ctx *damon_stat_context;
|
|
|
|
static unsigned long damon_stat_last_refresh_jiffies;
|
|
|
|
static void damon_stat_set_estimated_memory_bandwidth(struct damon_ctx *c)
|
|
{
|
|
struct damon_target *t;
|
|
struct damon_region *r;
|
|
unsigned long access_bytes = 0;
|
|
|
|
damon_for_each_target(t, c) {
|
|
damon_for_each_region(r, t)
|
|
access_bytes += (r->ar.end - r->ar.start) *
|
|
r->nr_accesses;
|
|
}
|
|
estimated_memory_bandwidth = access_bytes * USEC_PER_MSEC *
|
|
MSEC_PER_SEC / c->attrs.aggr_interval;
|
|
}
|
|
|
|
static int damon_stat_idletime(const struct damon_region *r)
|
|
{
|
|
if (r->nr_accesses)
|
|
return -1 * (r->age + 1);
|
|
return r->age + 1;
|
|
}
|
|
|
|
static int damon_stat_cmp_regions(const void *a, const void *b)
|
|
{
|
|
const struct damon_region *ra = *(const struct damon_region **)a;
|
|
const struct damon_region *rb = *(const struct damon_region **)b;
|
|
|
|
return damon_stat_idletime(ra) - damon_stat_idletime(rb);
|
|
}
|
|
|
|
static int damon_stat_sort_regions(struct damon_ctx *c,
|
|
struct damon_region ***sorted_ptr, int *nr_regions_ptr,
|
|
unsigned long *total_sz_ptr)
|
|
{
|
|
struct damon_target *t;
|
|
struct damon_region *r;
|
|
struct damon_region **region_pointers;
|
|
unsigned int nr_regions = 0;
|
|
unsigned long total_sz = 0;
|
|
|
|
damon_for_each_target(t, c) {
|
|
/* there is only one target */
|
|
region_pointers = kmalloc_objs(*region_pointers,
|
|
damon_nr_regions(t));
|
|
if (!region_pointers)
|
|
return -ENOMEM;
|
|
damon_for_each_region(r, t) {
|
|
region_pointers[nr_regions++] = r;
|
|
total_sz += r->ar.end - r->ar.start;
|
|
}
|
|
}
|
|
sort(region_pointers, nr_regions, sizeof(*region_pointers),
|
|
damon_stat_cmp_regions, NULL);
|
|
*sorted_ptr = region_pointers;
|
|
*nr_regions_ptr = nr_regions;
|
|
*total_sz_ptr = total_sz;
|
|
return 0;
|
|
}
|
|
|
|
static void damon_stat_set_idletime_percentiles(struct damon_ctx *c)
|
|
{
|
|
struct damon_region **sorted_regions, *region;
|
|
int nr_regions;
|
|
unsigned long total_sz, accounted_bytes = 0;
|
|
int err, i, next_percentile = 0;
|
|
|
|
err = damon_stat_sort_regions(c, &sorted_regions, &nr_regions,
|
|
&total_sz);
|
|
if (err)
|
|
return;
|
|
for (i = 0; i < nr_regions; i++) {
|
|
region = sorted_regions[i];
|
|
accounted_bytes += region->ar.end - region->ar.start;
|
|
while (next_percentile <= accounted_bytes * 100 / total_sz)
|
|
memory_idle_ms_percentiles[next_percentile++] =
|
|
damon_stat_idletime(region) *
|
|
(long)c->attrs.aggr_interval / USEC_PER_MSEC;
|
|
}
|
|
kfree(sorted_regions);
|
|
}
|
|
|
|
static int damon_stat_damon_call_fn(void *data)
|
|
{
|
|
struct damon_ctx *c = data;
|
|
|
|
/* avoid unnecessarily frequent stat update */
|
|
if (time_before_eq(jiffies, damon_stat_last_refresh_jiffies +
|
|
msecs_to_jiffies(5 * MSEC_PER_SEC)))
|
|
return 0;
|
|
damon_stat_last_refresh_jiffies = jiffies;
|
|
|
|
aggr_interval_us = c->attrs.aggr_interval;
|
|
damon_stat_set_estimated_memory_bandwidth(c);
|
|
damon_stat_set_idletime_percentiles(c);
|
|
return 0;
|
|
}
|
|
|
|
struct damon_stat_system_ram_range_walk_arg {
|
|
bool walked;
|
|
struct resource res;
|
|
};
|
|
|
|
static int damon_stat_system_ram_walk_fn(struct resource *res, void *arg)
|
|
{
|
|
struct damon_stat_system_ram_range_walk_arg *a = arg;
|
|
|
|
if (!a->walked) {
|
|
a->walked = true;
|
|
a->res.start = res->start;
|
|
}
|
|
a->res.end = res->end;
|
|
return 0;
|
|
}
|
|
|
|
static unsigned long damon_stat_res_to_core_addr(resource_size_t ra,
|
|
unsigned long addr_unit)
|
|
{
|
|
/*
|
|
* Use div_u64() for avoiding linking errors related with __udivdi3,
|
|
* __aeabi_uldivmod, or similar problems. This should also improve the
|
|
* performance optimization (read div_u64() comment for the detail).
|
|
*/
|
|
if (sizeof(ra) == 8 && sizeof(addr_unit) == 4)
|
|
return div_u64(ra, addr_unit);
|
|
return ra / addr_unit;
|
|
}
|
|
|
|
static int damon_stat_set_monitoring_region(struct damon_target *t,
|
|
unsigned long addr_unit, unsigned long min_region_sz)
|
|
{
|
|
struct damon_addr_range addr_range;
|
|
struct damon_stat_system_ram_range_walk_arg arg = {};
|
|
|
|
walk_system_ram_res(0, -1, &arg, damon_stat_system_ram_walk_fn);
|
|
if (!arg.walked)
|
|
return -EINVAL;
|
|
addr_range.start = damon_stat_res_to_core_addr(
|
|
arg.res.start, addr_unit);
|
|
addr_range.end = damon_stat_res_to_core_addr(
|
|
arg.res.end + 1, addr_unit);
|
|
if (addr_range.end <= addr_range.start)
|
|
return -EINVAL;
|
|
return damon_set_regions(t, &addr_range, 1, min_region_sz);
|
|
}
|
|
|
|
static struct damon_ctx *damon_stat_build_ctx(void)
|
|
{
|
|
struct damon_ctx *ctx;
|
|
struct damon_attrs attrs;
|
|
struct damon_target *target;
|
|
|
|
ctx = damon_new_ctx();
|
|
if (!ctx)
|
|
return NULL;
|
|
attrs = (struct damon_attrs) {
|
|
.sample_interval = 5 * USEC_PER_MSEC,
|
|
.aggr_interval = 100 * USEC_PER_MSEC,
|
|
.ops_update_interval = 60 * USEC_PER_MSEC * MSEC_PER_SEC,
|
|
.min_nr_regions = 10,
|
|
.max_nr_regions = 1000,
|
|
};
|
|
/*
|
|
* auto-tune sampling and aggregation interval aiming 4% DAMON-observed
|
|
* accesses ratio, keeping sampling interval in [5ms, 10s] range.
|
|
*/
|
|
attrs.intervals_goal = (struct damon_intervals_goal) {
|
|
.access_bp = 400, .aggrs = 3,
|
|
.min_sample_us = 5000, .max_sample_us = 10000000,
|
|
};
|
|
if (damon_set_attrs(ctx, &attrs))
|
|
goto free_out;
|
|
|
|
if (damon_select_ops(ctx, DAMON_OPS_PADDR))
|
|
goto free_out;
|
|
|
|
target = damon_new_target();
|
|
if (!target)
|
|
goto free_out;
|
|
damon_add_target(ctx, target);
|
|
if (damon_stat_set_monitoring_region(target, ctx->addr_unit,
|
|
ctx->min_region_sz))
|
|
goto free_out;
|
|
return ctx;
|
|
free_out:
|
|
damon_destroy_ctx(ctx);
|
|
return NULL;
|
|
}
|
|
|
|
static struct damon_call_control call_control = {
|
|
.fn = damon_stat_damon_call_fn,
|
|
.repeat = true,
|
|
};
|
|
|
|
static int damon_stat_start(void)
|
|
{
|
|
int err;
|
|
|
|
if (damon_stat_context) {
|
|
if (damon_is_running(damon_stat_context))
|
|
return -EAGAIN;
|
|
damon_destroy_ctx(damon_stat_context);
|
|
}
|
|
|
|
damon_stat_context = damon_stat_build_ctx();
|
|
if (!damon_stat_context)
|
|
return -ENOMEM;
|
|
err = damon_start(&damon_stat_context, 1, true);
|
|
if (err)
|
|
return err;
|
|
|
|
damon_stat_last_refresh_jiffies = jiffies;
|
|
call_control.data = damon_stat_context;
|
|
return damon_call(damon_stat_context, &call_control);
|
|
}
|
|
|
|
static void damon_stat_stop(void)
|
|
{
|
|
damon_stop(&damon_stat_context, 1);
|
|
damon_destroy_ctx(damon_stat_context);
|
|
damon_stat_context = NULL;
|
|
}
|
|
|
|
static int damon_stat_enabled_store(
|
|
const char *val, const struct kernel_param *kp)
|
|
{
|
|
bool is_enabled = enabled;
|
|
int err;
|
|
|
|
err = kstrtobool(val, &enabled);
|
|
if (err)
|
|
return err;
|
|
|
|
if (is_enabled == enabled)
|
|
return 0;
|
|
|
|
if (!damon_initialized())
|
|
/*
|
|
* probably called from command line parsing (parse_args()).
|
|
* Cannot call damon_new_ctx(). Let damon_stat_init() handle.
|
|
*/
|
|
return 0;
|
|
|
|
if (enabled) {
|
|
err = damon_stat_start();
|
|
if (err)
|
|
enabled = false;
|
|
return err;
|
|
}
|
|
damon_stat_stop();
|
|
return 0;
|
|
}
|
|
|
|
static int __init damon_stat_init(void)
|
|
{
|
|
int err = 0;
|
|
|
|
if (!damon_initialized()) {
|
|
err = -ENOMEM;
|
|
goto out;
|
|
}
|
|
|
|
/* probably set via command line */
|
|
if (enabled)
|
|
err = damon_stat_start();
|
|
|
|
out:
|
|
if (err && enabled)
|
|
enabled = false;
|
|
return err;
|
|
}
|
|
|
|
module_init(damon_stat_init);
|