aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorYunlong Song <yunlong.song@huawei.com>2015-03-31 09:46:30 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2015-04-08 08:07:22 -0400
commitcb06ac256a16fc1a5ab063107c2b35b3b9e95102 (patch)
treeb0cac7405e64e24c35a349b5b1dcc41cf7e19cd0 /tools
parenta35e27d0e5d801ff75481a8f639bb4d59ea1aafa (diff)
perf sched replay: Alloc the memory of pid_to_task dynamically to adapt to the unexpected change of pid_max
The current memory allocation of struct task_desc *pid_to_task[MAX_PID] is in a permanent and preset way, and it has two problems: Problem 1: If the pid_max, which is the max number of pids in the system, is much smaller than MAX_PID (1024*1000), then it causes a waste of stack memory. This may happen in the case where the number of cpu cores is much smaller than 1000. Problem 2: If the pid_max is changed from the default value to a value larger than MAX_PID, then it will cause assertion failure problem. The maximum value of pid_max can be set to pid_max_max (see pidmap_init defined in kernel/pid.c), which equals to PID_MAX_LIMIT. In x86_64, PID_MAX_LIMIT is 4*1024*1024 (defined in include/linux/threads.h). This value is much larger than MAX_PID, and will take up 32768 Kbytes (4*1024*1024*8/1024) for memory allocation of pid_to_task, which is much larger than the default 8192 Kbytes of the stack size of calling process. Due to these two problems, we use calloc to allocate the memory of pid_to_task dynamically. Example: Test environment: x86_64 with 160 cores $ cat /proc/sys/kernel/pid_max 163840 $ echo 1025000 > /proc/sys/kernel/pid_max $ cat /proc/sys/kernel/pid_max 1025000 Run some applications until the pid of some process is greater than the value of MAX_PID (1024*1000). Before this patch: $ perf sched replay run measurement overhead: 221 nsecs sleep measurement overhead: 55480 nsecs the run test took 1000008 nsecs the sleep test took 1063151 nsecs perf: builtin-sched.c:330: register_pid: Assertion `!(pid >= 1024000)' failed. Aborted After this patch: $ perf sched replay run measurement overhead: 221 nsecs sleep measurement overhead: 55435 nsecs the run test took 1000004 nsecs the sleep test took 1059312 nsecs nr_run_events: 10 nr_sleep_events: 1562 nr_wakeup_events: 5 task 0 ( :1: 1), nr_events: 1 task 1 ( :2: 2), nr_events: 1 task 2 ( :3: 3), nr_events: 1 task 3 ( :5: 5), nr_events: 1 ... Signed-off-by: Yunlong Song <yunlong.song@huawei.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Wang Nan <wangnan0@huawei.com> Link: http://lkml.kernel.org/r/1427809596-29559-4-git-send-email-yunlong.song@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools')
-rw-r--r--tools/perf/builtin-sched.c11
1 files changed, 9 insertions, 2 deletions
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index c46610447ede..20d887b222e4 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -23,6 +23,7 @@
23#include <semaphore.h> 23#include <semaphore.h>
24#include <pthread.h> 24#include <pthread.h>
25#include <math.h> 25#include <math.h>
26#include <api/fs/fs.h>
26 27
27#define PR_SET_NAME 15 /* Set process name */ 28#define PR_SET_NAME 15 /* Set process name */
28#define MAX_CPUS 4096 29#define MAX_CPUS 4096
@@ -124,7 +125,7 @@ struct perf_sched {
124 struct perf_tool tool; 125 struct perf_tool tool;
125 const char *sort_order; 126 const char *sort_order;
126 unsigned long nr_tasks; 127 unsigned long nr_tasks;
127 struct task_desc *pid_to_task[MAX_PID]; 128 struct task_desc **pid_to_task;
128 struct task_desc **tasks; 129 struct task_desc **tasks;
129 const struct trace_sched_handler *tp_handler; 130 const struct trace_sched_handler *tp_handler;
130 pthread_mutex_t start_work_mutex; 131 pthread_mutex_t start_work_mutex;
@@ -326,8 +327,14 @@ static struct task_desc *register_pid(struct perf_sched *sched,
326 unsigned long pid, const char *comm) 327 unsigned long pid, const char *comm)
327{ 328{
328 struct task_desc *task; 329 struct task_desc *task;
330 static int pid_max;
329 331
330 BUG_ON(pid >= MAX_PID); 332 if (sched->pid_to_task == NULL) {
333 if (sysctl__read_int("kernel/pid_max", &pid_max) < 0)
334 pid_max = MAX_PID;
335 BUG_ON((sched->pid_to_task = calloc(pid_max, sizeof(struct task_desc *))) == NULL);
336 }
337 BUG_ON(pid >= (unsigned long)pid_max);
331 338
332 task = sched->pid_to_task[pid]; 339 task = sched->pid_to_task[pid];
333 340