diff options
| author | Namhyung Kim <namhyung@kernel.org> | 2025-05-09 10:19:50 -0700 |
|---|---|---|
| committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2025-05-09 14:32:15 -0300 |
| commit | c42e219942cb1325fe38160a4fd2be73c74abc0e (patch) | |
| tree | 86074562a1ce028071b311dd52ef3501b472cf93 /tools/perf/builtin-lock.c | |
| parent | 4bfe27140edf8dd1322326c79f5ae8d29ff7e43d (diff) | |
perf lock contention: Add -J/--inject-delay option
This is to slow down lock acquistion (on contention locks) deliberately.
A possible use case is to estimate impact on application performance by
optimization of kernel locking behavior. By delaying the lock it can
simulate the worse condition as a control group, and then compare with
the current behavior as a optimized condition.
The syntax is 'time@function' and the time can have unit suffix like
"us" and "ms". For example, I ran a simple test like below.
$ sudo perf lock con -abl -L tasklist_lock -- \
sh -c 'for i in $(seq 1000); do sleep 1 & done; wait'
contended total wait max wait avg wait address symbol
92 1.18 ms 199.54 us 12.79 us ffffffff8a806080 tasklist_lock (rwlock)
The contention count was 92 and the average wait time was around 10 us.
But if I add 100 usec of delay to the tasklist_lock,
$ sudo perf lock con -abl -L tasklist_lock -J 100us@tasklist_lock -- \
sh -c 'for i in $(seq 1000); do sleep 1 & done; wait'
contended total wait max wait avg wait address symbol
190 15.67 ms 230.10 us 82.46 us ffffffff8a806080 tasklist_lock (rwlock)
The contention count increased and the average wait time was up closed
to 100 usec. If I increase the delay even more,
$ sudo perf lock con -abl -L tasklist_lock -J 1ms@tasklist_lock -- \
sh -c 'for i in $(seq 1000); do sleep 1 & done; wait'
contended total wait max wait avg wait address symbol
1002 2.80 s 3.01 ms 2.80 ms ffffffff8a806080 tasklist_lock (rwlock)
Now every sleep process had contention and the wait time was more than 1
msec. This is on my 4 CPU laptop so I guess one CPU has the lock while
other 3 are waiting for it mostly.
For simplicity, it only supports global locks for now.
Committer testing:
root@number:~# grep -m1 'model name' /proc/cpuinfo
model name : AMD Ryzen 9 9950X3D 16-Core Processor
root@number:~# perf lock con -abl -L tasklist_lock -- sh -c 'for i in $(seq 1000); do sleep 1 & done; wait'
contended total wait max wait avg wait address symbol
142 453.85 us 25.39 us 3.20 us ffffffffae808080 tasklist_lock (rwlock)
root@number:~# perf lock con -abl -L tasklist_lock -J 100us@tasklist_lock -- sh -c 'for i in $(seq 1000); do sleep 1 & done; wait'
contended total wait max wait avg wait address symbol
1040 2.39 s 3.11 ms 2.30 ms ffffffffae808080 tasklist_lock (rwlock)
root@number:~# perf lock con -abl -L tasklist_lock -J 1ms@tasklist_lock -- sh -c 'for i in $(seq 1000); do sleep 1 & done; wait'
contended total wait max wait avg wait address symbol
1025 24.72 s 31.01 ms 24.12 ms ffffffffae808080 tasklist_lock (rwlock)
root@number:~#
Suggested-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20250509171950.183591-1-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/builtin-lock.c')
| -rw-r--r-- | tools/perf/builtin-lock.c | 74 |
1 files changed, 74 insertions, 0 deletions
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 05e7bc30488a..41f6f3d2b779 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -62,6 +62,8 @@ static const char *output_name = NULL; static FILE *lock_output; static struct lock_filter filters; +static struct lock_delay *delays; +static int nr_delays; static enum lock_aggr_mode aggr_mode = LOCK_AGGR_ADDR; @@ -2001,6 +2003,8 @@ static int __cmd_contention(int argc, const char **argv) .max_stack = max_stack_depth, .stack_skip = stack_skip, .filters = &filters, + .delays = delays, + .nr_delays = nr_delays, .save_callstack = needs_callstack(), .owner = show_lock_owner, .cgroups = RB_ROOT, @@ -2504,6 +2508,74 @@ static int parse_cgroup_filter(const struct option *opt __maybe_unused, const ch return ret; } +static bool add_lock_delay(char *spec) +{ + char *at, *pos; + struct lock_delay *tmp; + unsigned long duration; + + at = strchr(spec, '@'); + if (at == NULL) { + pr_err("lock delay should have '@' sign: %s\n", spec); + return false; + } + if (at == spec) { + pr_err("lock delay should have time before '@': %s\n", spec); + return false; + } + + *at = '\0'; + duration = strtoul(spec, &pos, 0); + if (!strcmp(pos, "ns")) + duration *= 1; + else if (!strcmp(pos, "us")) + duration *= 1000; + else if (!strcmp(pos, "ms")) + duration *= 1000 * 1000; + else if (*pos) { + pr_err("invalid delay time: %s@%s\n", spec, at + 1); + return false; + } + + tmp = realloc(delays, (nr_delays + 1) * sizeof(*delays)); + if (tmp == NULL) { + pr_err("Memory allocation failure\n"); + return false; + } + delays = tmp; + + delays[nr_delays].sym = strdup(at + 1); + if (delays[nr_delays].sym == NULL) { + pr_err("Memory allocation failure\n"); + return false; + } + delays[nr_delays].time = duration; + + nr_delays++; + return true; +} + +static int parse_lock_delay(const struct option *opt __maybe_unused, const char *str, + int unset __maybe_unused) +{ + char *s, *tmp, *tok; + int ret = 0; + + s = strdup(str); + if (s == NULL) + return -1; + + for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) { + if (!add_lock_delay(tok)) { + ret = -1; + break; + } + } + + free(s); + return ret; +} + int cmd_lock(int argc, const char **argv) { const struct option lock_options[] = { @@ -2580,6 +2652,8 @@ int cmd_lock(int argc, const char **argv) OPT_BOOLEAN(0, "lock-cgroup", &show_lock_cgroups, "show lock stats by cgroup"), OPT_CALLBACK('G', "cgroup-filter", NULL, "CGROUPS", "Filter specific cgroups", parse_cgroup_filter), + OPT_CALLBACK('J', "inject-delay", NULL, "TIME@FUNC", + "Inject delays to specific locks", parse_lock_delay), OPT_PARENT(lock_options) }; |
