diff options
author | Yunlong Song <yunlong.song@huawei.com> | 2015-03-31 09:46:30 -0400 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2015-04-08 08:07:22 -0400 |
commit | cb06ac256a16fc1a5ab063107c2b35b3b9e95102 (patch) | |
tree | b0cac7405e64e24c35a349b5b1dcc41cf7e19cd0 /tools | |
parent | a35e27d0e5d801ff75481a8f639bb4d59ea1aafa (diff) |
perf sched replay: Alloc the memory of pid_to_task dynamically to adapt to the unexpected change of pid_max
The current memory allocation of struct task_desc *pid_to_task[MAX_PID]
is in a permanent and preset way, and it has two problems:
Problem 1: If the pid_max, which is the max number of pids in the
system, is much smaller than MAX_PID (1024*1000), then it causes a waste
of stack memory. This may happen in the case where the number of cpu
cores is much smaller than 1000.
Problem 2: If the pid_max is changed from the default value to a value
larger than MAX_PID, then it will cause assertion failure problem. The
maximum value of pid_max can be set to pid_max_max (see pidmap_init
defined in kernel/pid.c), which equals to PID_MAX_LIMIT. In x86_64,
PID_MAX_LIMIT is 4*1024*1024 (defined in include/linux/threads.h). This
value is much larger than MAX_PID, and will take up 32768 Kbytes
(4*1024*1024*8/1024) for memory allocation of pid_to_task, which is much
larger than the default 8192 Kbytes of the stack size of calling
process.
Due to these two problems, we use calloc to allocate the memory of
pid_to_task dynamically.
Example:
Test environment: x86_64 with 160 cores
$ cat /proc/sys/kernel/pid_max
163840
$ echo 1025000 > /proc/sys/kernel/pid_max
$ cat /proc/sys/kernel/pid_max
1025000
Run some applications until the pid of some process is greater than
the value of MAX_PID (1024*1000).
Before this patch:
$ perf sched replay
run measurement overhead: 221 nsecs
sleep measurement overhead: 55480 nsecs
the run test took 1000008 nsecs
the sleep test took 1063151 nsecs
perf: builtin-sched.c:330: register_pid: Assertion `!(pid >= 1024000)'
failed.
Aborted
After this patch:
$ perf sched replay
run measurement overhead: 221 nsecs
sleep measurement overhead: 55435 nsecs
the run test took 1000004 nsecs
the sleep test took 1059312 nsecs
nr_run_events: 10
nr_sleep_events: 1562
nr_wakeup_events: 5
task 0 ( :1: 1), nr_events: 1
task 1 ( :2: 2), nr_events: 1
task 2 ( :3: 3), nr_events: 1
task 3 ( :5: 5), nr_events: 1
...
Signed-off-by: Yunlong Song <yunlong.song@huawei.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1427809596-29559-4-git-send-email-yunlong.song@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools')
-rw-r--r-- | tools/perf/builtin-sched.c | 11 |
1 files changed, 9 insertions, 2 deletions
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index c46610447ede..20d887b222e4 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <semaphore.h> | 23 | #include <semaphore.h> |
24 | #include <pthread.h> | 24 | #include <pthread.h> |
25 | #include <math.h> | 25 | #include <math.h> |
26 | #include <api/fs/fs.h> | ||
26 | 27 | ||
27 | #define PR_SET_NAME 15 /* Set process name */ | 28 | #define PR_SET_NAME 15 /* Set process name */ |
28 | #define MAX_CPUS 4096 | 29 | #define MAX_CPUS 4096 |
@@ -124,7 +125,7 @@ struct perf_sched { | |||
124 | struct perf_tool tool; | 125 | struct perf_tool tool; |
125 | const char *sort_order; | 126 | const char *sort_order; |
126 | unsigned long nr_tasks; | 127 | unsigned long nr_tasks; |
127 | struct task_desc *pid_to_task[MAX_PID]; | 128 | struct task_desc **pid_to_task; |
128 | struct task_desc **tasks; | 129 | struct task_desc **tasks; |
129 | const struct trace_sched_handler *tp_handler; | 130 | const struct trace_sched_handler *tp_handler; |
130 | pthread_mutex_t start_work_mutex; | 131 | pthread_mutex_t start_work_mutex; |
@@ -326,8 +327,14 @@ static struct task_desc *register_pid(struct perf_sched *sched, | |||
326 | unsigned long pid, const char *comm) | 327 | unsigned long pid, const char *comm) |
327 | { | 328 | { |
328 | struct task_desc *task; | 329 | struct task_desc *task; |
330 | static int pid_max; | ||
329 | 331 | ||
330 | BUG_ON(pid >= MAX_PID); | 332 | if (sched->pid_to_task == NULL) { |
333 | if (sysctl__read_int("kernel/pid_max", &pid_max) < 0) | ||
334 | pid_max = MAX_PID; | ||
335 | BUG_ON((sched->pid_to_task = calloc(pid_max, sizeof(struct task_desc *))) == NULL); | ||
336 | } | ||
337 | BUG_ON(pid >= (unsigned long)pid_max); | ||
331 | 338 | ||
332 | task = sched->pid_to_task[pid]; | 339 | task = sched->pid_to_task[pid]; |
333 | 340 | ||