summaryrefslogtreecommitdiff
path: root/tools/perf/builtin-lock.c
diff options
context:
space:
mode:
authorNamhyung Kim <namhyung@kernel.org>2025-05-09 10:19:50 -0700
committerArnaldo Carvalho de Melo <acme@redhat.com>2025-05-09 14:32:15 -0300
commitc42e219942cb1325fe38160a4fd2be73c74abc0e (patch)
tree86074562a1ce028071b311dd52ef3501b472cf93 /tools/perf/builtin-lock.c
parent4bfe27140edf8dd1322326c79f5ae8d29ff7e43d (diff)
perf lock contention: Add -J/--inject-delay option
This is to slow down lock acquistion (on contention locks) deliberately. A possible use case is to estimate impact on application performance by optimization of kernel locking behavior. By delaying the lock it can simulate the worse condition as a control group, and then compare with the current behavior as a optimized condition. The syntax is 'time@function' and the time can have unit suffix like "us" and "ms". For example, I ran a simple test like below. $ sudo perf lock con -abl -L tasklist_lock -- \ sh -c 'for i in $(seq 1000); do sleep 1 & done; wait' contended total wait max wait avg wait address symbol 92 1.18 ms 199.54 us 12.79 us ffffffff8a806080 tasklist_lock (rwlock) The contention count was 92 and the average wait time was around 10 us. But if I add 100 usec of delay to the tasklist_lock, $ sudo perf lock con -abl -L tasklist_lock -J 100us@tasklist_lock -- \ sh -c 'for i in $(seq 1000); do sleep 1 & done; wait' contended total wait max wait avg wait address symbol 190 15.67 ms 230.10 us 82.46 us ffffffff8a806080 tasklist_lock (rwlock) The contention count increased and the average wait time was up closed to 100 usec. If I increase the delay even more, $ sudo perf lock con -abl -L tasklist_lock -J 1ms@tasklist_lock -- \ sh -c 'for i in $(seq 1000); do sleep 1 & done; wait' contended total wait max wait avg wait address symbol 1002 2.80 s 3.01 ms 2.80 ms ffffffff8a806080 tasklist_lock (rwlock) Now every sleep process had contention and the wait time was more than 1 msec. This is on my 4 CPU laptop so I guess one CPU has the lock while other 3 are waiting for it mostly. For simplicity, it only supports global locks for now. Committer testing: root@number:~# grep -m1 'model name' /proc/cpuinfo model name : AMD Ryzen 9 9950X3D 16-Core Processor root@number:~# perf lock con -abl -L tasklist_lock -- sh -c 'for i in $(seq 1000); do sleep 1 & done; wait' contended total wait max wait avg wait address symbol 142 453.85 us 25.39 us 3.20 us ffffffffae808080 tasklist_lock (rwlock) root@number:~# perf lock con -abl -L tasklist_lock -J 100us@tasklist_lock -- sh -c 'for i in $(seq 1000); do sleep 1 & done; wait' contended total wait max wait avg wait address symbol 1040 2.39 s 3.11 ms 2.30 ms ffffffffae808080 tasklist_lock (rwlock) root@number:~# perf lock con -abl -L tasklist_lock -J 1ms@tasklist_lock -- sh -c 'for i in $(seq 1000); do sleep 1 & done; wait' contended total wait max wait avg wait address symbol 1025 24.72 s 31.01 ms 24.12 ms ffffffffae808080 tasklist_lock (rwlock) root@number:~# Suggested-by: Stephane Eranian <eranian@google.com> Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Link: https://lore.kernel.org/r/20250509171950.183591-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/builtin-lock.c')
-rw-r--r--tools/perf/builtin-lock.c74
1 files changed, 74 insertions, 0 deletions
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 05e7bc30488a..41f6f3d2b779 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -62,6 +62,8 @@ static const char *output_name = NULL;
static FILE *lock_output;
static struct lock_filter filters;
+static struct lock_delay *delays;
+static int nr_delays;
static enum lock_aggr_mode aggr_mode = LOCK_AGGR_ADDR;
@@ -2001,6 +2003,8 @@ static int __cmd_contention(int argc, const char **argv)
.max_stack = max_stack_depth,
.stack_skip = stack_skip,
.filters = &filters,
+ .delays = delays,
+ .nr_delays = nr_delays,
.save_callstack = needs_callstack(),
.owner = show_lock_owner,
.cgroups = RB_ROOT,
@@ -2504,6 +2508,74 @@ static int parse_cgroup_filter(const struct option *opt __maybe_unused, const ch
return ret;
}
+static bool add_lock_delay(char *spec)
+{
+ char *at, *pos;
+ struct lock_delay *tmp;
+ unsigned long duration;
+
+ at = strchr(spec, '@');
+ if (at == NULL) {
+ pr_err("lock delay should have '@' sign: %s\n", spec);
+ return false;
+ }
+ if (at == spec) {
+ pr_err("lock delay should have time before '@': %s\n", spec);
+ return false;
+ }
+
+ *at = '\0';
+ duration = strtoul(spec, &pos, 0);
+ if (!strcmp(pos, "ns"))
+ duration *= 1;
+ else if (!strcmp(pos, "us"))
+ duration *= 1000;
+ else if (!strcmp(pos, "ms"))
+ duration *= 1000 * 1000;
+ else if (*pos) {
+ pr_err("invalid delay time: %s@%s\n", spec, at + 1);
+ return false;
+ }
+
+ tmp = realloc(delays, (nr_delays + 1) * sizeof(*delays));
+ if (tmp == NULL) {
+ pr_err("Memory allocation failure\n");
+ return false;
+ }
+ delays = tmp;
+
+ delays[nr_delays].sym = strdup(at + 1);
+ if (delays[nr_delays].sym == NULL) {
+ pr_err("Memory allocation failure\n");
+ return false;
+ }
+ delays[nr_delays].time = duration;
+
+ nr_delays++;
+ return true;
+}
+
+static int parse_lock_delay(const struct option *opt __maybe_unused, const char *str,
+ int unset __maybe_unused)
+{
+ char *s, *tmp, *tok;
+ int ret = 0;
+
+ s = strdup(str);
+ if (s == NULL)
+ return -1;
+
+ for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
+ if (!add_lock_delay(tok)) {
+ ret = -1;
+ break;
+ }
+ }
+
+ free(s);
+ return ret;
+}
+
int cmd_lock(int argc, const char **argv)
{
const struct option lock_options[] = {
@@ -2580,6 +2652,8 @@ int cmd_lock(int argc, const char **argv)
OPT_BOOLEAN(0, "lock-cgroup", &show_lock_cgroups, "show lock stats by cgroup"),
OPT_CALLBACK('G', "cgroup-filter", NULL, "CGROUPS",
"Filter specific cgroups", parse_cgroup_filter),
+ OPT_CALLBACK('J', "inject-delay", NULL, "TIME@FUNC",
+ "Inject delays to specific locks", parse_lock_delay),
OPT_PARENT(lock_options)
};