aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched
diff options
context:
space:
mode:
authorNathan Zimmer <nzimmer@sgi.com>2013-02-21 18:15:09 -0500
committerIngo Molnar <mingo@kernel.org>2013-02-22 04:27:25 -0500
commitbbbfeac92beff40eb86c7f682a7f1395f9f0ae52 (patch)
tree80a9f7d32d7a9a7898349c6e6ca88a1a7c1edf13 /kernel/sched
parentcb152ff26717961b10d0888cd983ba284cb99cd1 (diff)
sched: Fix /proc/sched_debug failure on very very large systems
On systems with 4096 cores attemping to read /proc/sched_debug fails because we are trying to push all the data into a single kmalloc buffer. The issue is on these very large machines all the data will not fit in 4mb. A better solution is to not us the single_open mechanism but to provide our own seq_operations and treat each cpu as an individual record. The output should be identical to the previous version. Reported-by: Dave Jones <davej@redhat.com> Signed-off-by: Nathan Zimmer <nzimmer@sgi.com> Cc: Peter Zijlstra <peterz@infradead.org>) [ Whitespace fixlet] [ Fix spello in comment] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/debug.c90
1 files changed, 79 insertions, 11 deletions
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 7ae4c4c5420e..c496eb3c6459 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -269,11 +269,11 @@ static void print_cpu(struct seq_file *m, int cpu)
269 { 269 {
270 unsigned int freq = cpu_khz ? : 1; 270 unsigned int freq = cpu_khz ? : 1;
271 271
272 SEQ_printf(m, "\ncpu#%d, %u.%03u MHz\n", 272 SEQ_printf(m, "cpu#%d, %u.%03u MHz\n",
273 cpu, freq / 1000, (freq % 1000)); 273 cpu, freq / 1000, (freq % 1000));
274 } 274 }
275#else 275#else
276 SEQ_printf(m, "\ncpu#%d\n", cpu); 276 SEQ_printf(m, "cpu#%d\n", cpu);
277#endif 277#endif
278 278
279#define P(x) \ 279#define P(x) \
@@ -330,6 +330,7 @@ do { \
330 print_rq(m, rq, cpu); 330 print_rq(m, rq, cpu);
331 rcu_read_unlock(); 331 rcu_read_unlock();
332 spin_unlock_irqrestore(&sched_debug_lock, flags); 332 spin_unlock_irqrestore(&sched_debug_lock, flags);
333 SEQ_printf(m, "\n");
333} 334}
334 335
335static const char *sched_tunable_scaling_names[] = { 336static const char *sched_tunable_scaling_names[] = {
@@ -338,11 +339,10 @@ static const char *sched_tunable_scaling_names[] = {
338 "linear" 339 "linear"
339}; 340};
340 341
341static int sched_debug_show(struct seq_file *m, void *v) 342static void sched_debug_header(struct seq_file *m)
342{ 343{
343 u64 ktime, sched_clk, cpu_clk; 344 u64 ktime, sched_clk, cpu_clk;
344 unsigned long flags; 345 unsigned long flags;
345 int cpu;
346 346
347 local_irq_save(flags); 347 local_irq_save(flags);
348 ktime = ktime_to_ns(ktime_get()); 348 ktime = ktime_to_ns(ktime_get());
@@ -384,33 +384,101 @@ static int sched_debug_show(struct seq_file *m, void *v)
384#undef PN 384#undef PN
385#undef P 385#undef P
386 386
387 SEQ_printf(m, " .%-40s: %d (%s)\n", "sysctl_sched_tunable_scaling", 387 SEQ_printf(m, " .%-40s: %d (%s)\n",
388 "sysctl_sched_tunable_scaling",
388 sysctl_sched_tunable_scaling, 389 sysctl_sched_tunable_scaling,
389 sched_tunable_scaling_names[sysctl_sched_tunable_scaling]); 390 sched_tunable_scaling_names[sysctl_sched_tunable_scaling]);
391 SEQ_printf(m, "\n");
392}
390 393
391 for_each_online_cpu(cpu) 394static int sched_debug_show(struct seq_file *m, void *v)
392 print_cpu(m, cpu); 395{
396 int cpu = (unsigned long)(v - 2);
393 397
394 SEQ_printf(m, "\n"); 398 if (cpu != -1)
399 print_cpu(m, cpu);
400 else
401 sched_debug_header(m);
395 402
396 return 0; 403 return 0;
397} 404}
398 405
399void sysrq_sched_debug_show(void) 406void sysrq_sched_debug_show(void)
400{ 407{
401 sched_debug_show(NULL, NULL); 408 int cpu;
409
410 sched_debug_header(NULL);
411 for_each_online_cpu(cpu)
412 print_cpu(NULL, cpu);
413
414}
415
416/*
417 * This itererator needs some explanation.
418 * It returns 1 for the header position.
419 * This means 2 is cpu 0.
420 * In a hotplugged system some cpus, including cpu 0, may be missing so we have
421 * to use cpumask_* to iterate over the cpus.
422 */
423static void *sched_debug_start(struct seq_file *file, loff_t *offset)
424{
425 unsigned long n = *offset;
426
427 if (n == 0)
428 return (void *) 1;
429
430 n--;
431
432 if (n > 0)
433 n = cpumask_next(n - 1, cpu_online_mask);
434 else
435 n = cpumask_first(cpu_online_mask);
436
437 *offset = n + 1;
438
439 if (n < nr_cpu_ids)
440 return (void *)(unsigned long)(n + 2);
441 return NULL;
442}
443
444static void *sched_debug_next(struct seq_file *file, void *data, loff_t *offset)
445{
446 (*offset)++;
447 return sched_debug_start(file, offset);
448}
449
450static void sched_debug_stop(struct seq_file *file, void *data)
451{
452}
453
454static const struct seq_operations sched_debug_sops = {
455 .start = sched_debug_start,
456 .next = sched_debug_next,
457 .stop = sched_debug_stop,
458 .show = sched_debug_show,
459};
460
461static int sched_debug_release(struct inode *inode, struct file *file)
462{
463 seq_release(inode, file);
464
465 return 0;
402} 466}
403 467
404static int sched_debug_open(struct inode *inode, struct file *filp) 468static int sched_debug_open(struct inode *inode, struct file *filp)
405{ 469{
406 return single_open(filp, sched_debug_show, NULL); 470 int ret = 0;
471
472 ret = seq_open(filp, &sched_debug_sops);
473
474 return ret;
407} 475}
408 476
409static const struct file_operations sched_debug_fops = { 477static const struct file_operations sched_debug_fops = {
410 .open = sched_debug_open, 478 .open = sched_debug_open,
411 .read = seq_read, 479 .read = seq_read,
412 .llseek = seq_lseek, 480 .llseek = seq_lseek,
413 .release = single_release, 481 .release = sched_debug_release,
414}; 482};
415 483
416static int __init init_sched_debug_procfs(void) 484static int __init init_sched_debug_procfs(void)