diff options
author | Nathan Zimmer <nzimmer@sgi.com> | 2013-02-21 18:15:09 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2013-02-22 04:27:25 -0500 |
commit | bbbfeac92beff40eb86c7f682a7f1395f9f0ae52 (patch) | |
tree | 80a9f7d32d7a9a7898349c6e6ca88a1a7c1edf13 /kernel/sched | |
parent | cb152ff26717961b10d0888cd983ba284cb99cd1 (diff) |
sched: Fix /proc/sched_debug failure on very very large systems
On systems with 4096 cores attemping to read /proc/sched_debug
fails because we are trying to push all the data into a single
kmalloc buffer.
The issue is on these very large machines all the data will not
fit in 4mb.
A better solution is to not us the single_open mechanism but to
provide our own seq_operations and treat each cpu as an
individual record.
The output should be identical to the previous version.
Reported-by: Dave Jones <davej@redhat.com>
Signed-off-by: Nathan Zimmer <nzimmer@sgi.com>
Cc: Peter Zijlstra <peterz@infradead.org>)
[ Whitespace fixlet]
[ Fix spello in comment]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched')
-rw-r--r-- | kernel/sched/debug.c | 90 |
1 files changed, 79 insertions, 11 deletions
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 7ae4c4c5420e..c496eb3c6459 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c | |||
@@ -269,11 +269,11 @@ static void print_cpu(struct seq_file *m, int cpu) | |||
269 | { | 269 | { |
270 | unsigned int freq = cpu_khz ? : 1; | 270 | unsigned int freq = cpu_khz ? : 1; |
271 | 271 | ||
272 | SEQ_printf(m, "\ncpu#%d, %u.%03u MHz\n", | 272 | SEQ_printf(m, "cpu#%d, %u.%03u MHz\n", |
273 | cpu, freq / 1000, (freq % 1000)); | 273 | cpu, freq / 1000, (freq % 1000)); |
274 | } | 274 | } |
275 | #else | 275 | #else |
276 | SEQ_printf(m, "\ncpu#%d\n", cpu); | 276 | SEQ_printf(m, "cpu#%d\n", cpu); |
277 | #endif | 277 | #endif |
278 | 278 | ||
279 | #define P(x) \ | 279 | #define P(x) \ |
@@ -330,6 +330,7 @@ do { \ | |||
330 | print_rq(m, rq, cpu); | 330 | print_rq(m, rq, cpu); |
331 | rcu_read_unlock(); | 331 | rcu_read_unlock(); |
332 | spin_unlock_irqrestore(&sched_debug_lock, flags); | 332 | spin_unlock_irqrestore(&sched_debug_lock, flags); |
333 | SEQ_printf(m, "\n"); | ||
333 | } | 334 | } |
334 | 335 | ||
335 | static const char *sched_tunable_scaling_names[] = { | 336 | static const char *sched_tunable_scaling_names[] = { |
@@ -338,11 +339,10 @@ static const char *sched_tunable_scaling_names[] = { | |||
338 | "linear" | 339 | "linear" |
339 | }; | 340 | }; |
340 | 341 | ||
341 | static int sched_debug_show(struct seq_file *m, void *v) | 342 | static void sched_debug_header(struct seq_file *m) |
342 | { | 343 | { |
343 | u64 ktime, sched_clk, cpu_clk; | 344 | u64 ktime, sched_clk, cpu_clk; |
344 | unsigned long flags; | 345 | unsigned long flags; |
345 | int cpu; | ||
346 | 346 | ||
347 | local_irq_save(flags); | 347 | local_irq_save(flags); |
348 | ktime = ktime_to_ns(ktime_get()); | 348 | ktime = ktime_to_ns(ktime_get()); |
@@ -384,33 +384,101 @@ static int sched_debug_show(struct seq_file *m, void *v) | |||
384 | #undef PN | 384 | #undef PN |
385 | #undef P | 385 | #undef P |
386 | 386 | ||
387 | SEQ_printf(m, " .%-40s: %d (%s)\n", "sysctl_sched_tunable_scaling", | 387 | SEQ_printf(m, " .%-40s: %d (%s)\n", |
388 | "sysctl_sched_tunable_scaling", | ||
388 | sysctl_sched_tunable_scaling, | 389 | sysctl_sched_tunable_scaling, |
389 | sched_tunable_scaling_names[sysctl_sched_tunable_scaling]); | 390 | sched_tunable_scaling_names[sysctl_sched_tunable_scaling]); |
391 | SEQ_printf(m, "\n"); | ||
392 | } | ||
390 | 393 | ||
391 | for_each_online_cpu(cpu) | 394 | static int sched_debug_show(struct seq_file *m, void *v) |
392 | print_cpu(m, cpu); | 395 | { |
396 | int cpu = (unsigned long)(v - 2); | ||
393 | 397 | ||
394 | SEQ_printf(m, "\n"); | 398 | if (cpu != -1) |
399 | print_cpu(m, cpu); | ||
400 | else | ||
401 | sched_debug_header(m); | ||
395 | 402 | ||
396 | return 0; | 403 | return 0; |
397 | } | 404 | } |
398 | 405 | ||
399 | void sysrq_sched_debug_show(void) | 406 | void sysrq_sched_debug_show(void) |
400 | { | 407 | { |
401 | sched_debug_show(NULL, NULL); | 408 | int cpu; |
409 | |||
410 | sched_debug_header(NULL); | ||
411 | for_each_online_cpu(cpu) | ||
412 | print_cpu(NULL, cpu); | ||
413 | |||
414 | } | ||
415 | |||
416 | /* | ||
417 | * This itererator needs some explanation. | ||
418 | * It returns 1 for the header position. | ||
419 | * This means 2 is cpu 0. | ||
420 | * In a hotplugged system some cpus, including cpu 0, may be missing so we have | ||
421 | * to use cpumask_* to iterate over the cpus. | ||
422 | */ | ||
423 | static void *sched_debug_start(struct seq_file *file, loff_t *offset) | ||
424 | { | ||
425 | unsigned long n = *offset; | ||
426 | |||
427 | if (n == 0) | ||
428 | return (void *) 1; | ||
429 | |||
430 | n--; | ||
431 | |||
432 | if (n > 0) | ||
433 | n = cpumask_next(n - 1, cpu_online_mask); | ||
434 | else | ||
435 | n = cpumask_first(cpu_online_mask); | ||
436 | |||
437 | *offset = n + 1; | ||
438 | |||
439 | if (n < nr_cpu_ids) | ||
440 | return (void *)(unsigned long)(n + 2); | ||
441 | return NULL; | ||
442 | } | ||
443 | |||
444 | static void *sched_debug_next(struct seq_file *file, void *data, loff_t *offset) | ||
445 | { | ||
446 | (*offset)++; | ||
447 | return sched_debug_start(file, offset); | ||
448 | } | ||
449 | |||
450 | static void sched_debug_stop(struct seq_file *file, void *data) | ||
451 | { | ||
452 | } | ||
453 | |||
454 | static const struct seq_operations sched_debug_sops = { | ||
455 | .start = sched_debug_start, | ||
456 | .next = sched_debug_next, | ||
457 | .stop = sched_debug_stop, | ||
458 | .show = sched_debug_show, | ||
459 | }; | ||
460 | |||
461 | static int sched_debug_release(struct inode *inode, struct file *file) | ||
462 | { | ||
463 | seq_release(inode, file); | ||
464 | |||
465 | return 0; | ||
402 | } | 466 | } |
403 | 467 | ||
404 | static int sched_debug_open(struct inode *inode, struct file *filp) | 468 | static int sched_debug_open(struct inode *inode, struct file *filp) |
405 | { | 469 | { |
406 | return single_open(filp, sched_debug_show, NULL); | 470 | int ret = 0; |
471 | |||
472 | ret = seq_open(filp, &sched_debug_sops); | ||
473 | |||
474 | return ret; | ||
407 | } | 475 | } |
408 | 476 | ||
409 | static const struct file_operations sched_debug_fops = { | 477 | static const struct file_operations sched_debug_fops = { |
410 | .open = sched_debug_open, | 478 | .open = sched_debug_open, |
411 | .read = seq_read, | 479 | .read = seq_read, |
412 | .llseek = seq_lseek, | 480 | .llseek = seq_lseek, |
413 | .release = single_release, | 481 | .release = sched_debug_release, |
414 | }; | 482 | }; |
415 | 483 | ||
416 | static int __init init_sched_debug_procfs(void) | 484 | static int __init init_sched_debug_procfs(void) |