aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul E. McKenney <paul.mckenney@linaro.org>2012-01-16 16:29:10 -0500
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2012-02-21 12:03:49 -0500
commita858af2875fb291d0f4b0a4419fefbf03c2379c0 (patch)
tree5183796cbf50ec0cd3bd5c741db15705376dacbd
parent13cfcca0e4e2d4cee1d0183c049eb34e54ac976e (diff)
rcu: Print scheduling-clock information on RCU CPU stall-warning messages
There have been situations where RCU CPU stall warnings were caused by issues in scheduling-clock timer initialization. To make it easier to track these down, this commit causes the RCU CPU stall-warning messages to print out the number of scheduling-clock interrupts taken in the current grace period for each stalled CPU. Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
-rw-r--r--kernel/rcutree.c33
-rw-r--r--kernel/rcutree.h11
-rw-r--r--kernel/rcutree_plugin.h150
-rw-r--r--lib/Kconfig.debug14
4 files changed, 194 insertions, 14 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index cfdab9898e33..dccd2f78db4e 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -689,12 +689,6 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
689 return; 689 return;
690 } 690 }
691 rsp->jiffies_stall = jiffies + 3 * jiffies_till_stall_check() + 3; 691 rsp->jiffies_stall = jiffies + 3 * jiffies_till_stall_check() + 3;
692
693 /*
694 * Now rat on any tasks that got kicked up to the root rcu_node
695 * due to CPU offlining.
696 */
697 ndetected = rcu_print_task_stall(rnp);
698 raw_spin_unlock_irqrestore(&rnp->lock, flags); 692 raw_spin_unlock_irqrestore(&rnp->lock, flags);
699 693
700 /* 694 /*
@@ -702,8 +696,9 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
702 * See Documentation/RCU/stallwarn.txt for info on how to debug 696 * See Documentation/RCU/stallwarn.txt for info on how to debug
703 * RCU CPU stall warnings. 697 * RCU CPU stall warnings.
704 */ 698 */
705 printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks: {", 699 printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks:",
706 rsp->name); 700 rsp->name);
701 print_cpu_stall_info_begin();
707 rcu_for_each_leaf_node(rsp, rnp) { 702 rcu_for_each_leaf_node(rsp, rnp) {
708 raw_spin_lock_irqsave(&rnp->lock, flags); 703 raw_spin_lock_irqsave(&rnp->lock, flags);
709 ndetected += rcu_print_task_stall(rnp); 704 ndetected += rcu_print_task_stall(rnp);
@@ -712,11 +707,22 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
712 continue; 707 continue;
713 for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) 708 for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
714 if (rnp->qsmask & (1UL << cpu)) { 709 if (rnp->qsmask & (1UL << cpu)) {
715 printk(" %d", rnp->grplo + cpu); 710 print_cpu_stall_info(rsp, rnp->grplo + cpu);
716 ndetected++; 711 ndetected++;
717 } 712 }
718 } 713 }
719 printk("} (detected by %d, t=%ld jiffies)\n", 714
715 /*
716 * Now rat on any tasks that got kicked up to the root rcu_node
717 * due to CPU offlining.
718 */
719 rnp = rcu_get_root(rsp);
720 raw_spin_lock_irqsave(&rnp->lock, flags);
721 ndetected = rcu_print_task_stall(rnp);
722 raw_spin_unlock_irqrestore(&rnp->lock, flags);
723
724 print_cpu_stall_info_end();
725 printk(KERN_CONT "(detected by %d, t=%ld jiffies)\n",
720 smp_processor_id(), (long)(jiffies - rsp->gp_start)); 726 smp_processor_id(), (long)(jiffies - rsp->gp_start));
721 if (ndetected == 0) 727 if (ndetected == 0)
722 printk(KERN_ERR "INFO: Stall ended before state dump start\n"); 728 printk(KERN_ERR "INFO: Stall ended before state dump start\n");
@@ -740,8 +746,11 @@ static void print_cpu_stall(struct rcu_state *rsp)
740 * See Documentation/RCU/stallwarn.txt for info on how to debug 746 * See Documentation/RCU/stallwarn.txt for info on how to debug
741 * RCU CPU stall warnings. 747 * RCU CPU stall warnings.
742 */ 748 */
743 printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n", 749 printk(KERN_ERR "INFO: %s self-detected stall on CPU", rsp->name);
744 rsp->name, smp_processor_id(), jiffies - rsp->gp_start); 750 print_cpu_stall_info_begin();
751 print_cpu_stall_info(rsp, smp_processor_id());
752 print_cpu_stall_info_end();
753 printk(KERN_CONT " (t=%lu jiffies)\n", jiffies - rsp->gp_start);
745 if (!trigger_all_cpu_backtrace()) 754 if (!trigger_all_cpu_backtrace())
746 dump_stack(); 755 dump_stack();
747 756
@@ -831,6 +840,7 @@ static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct
831 rdp->passed_quiesce = 0; 840 rdp->passed_quiesce = 0;
832 } else 841 } else
833 rdp->qs_pending = 0; 842 rdp->qs_pending = 0;
843 zero_cpu_stall_ticks(rdp);
834 } 844 }
835} 845}
836 846
@@ -1496,6 +1506,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1496void rcu_check_callbacks(int cpu, int user) 1506void rcu_check_callbacks(int cpu, int user)
1497{ 1507{
1498 trace_rcu_utilization("Start scheduler-tick"); 1508 trace_rcu_utilization("Start scheduler-tick");
1509 increment_cpu_stall_ticks();
1499 if (user || rcu_is_cpu_rrupt_from_idle()) { 1510 if (user || rcu_is_cpu_rrupt_from_idle()) {
1500 1511
1501 /* 1512 /*
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 0328a537846a..e2ac8ee415bb 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -239,6 +239,12 @@ struct rcu_data {
239 bool preemptible; /* Preemptible RCU? */ 239 bool preemptible; /* Preemptible RCU? */
240 struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ 240 struct rcu_node *mynode; /* This CPU's leaf of hierarchy */
241 unsigned long grpmask; /* Mask to apply to leaf qsmask. */ 241 unsigned long grpmask; /* Mask to apply to leaf qsmask. */
242#ifdef CONFIG_RCU_CPU_STALL_INFO
243 unsigned long ticks_this_gp; /* The number of scheduling-clock */
244 /* ticks this CPU has handled */
245 /* during and after the last grace */
246 /* period it is aware of. */
247#endif /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
242 248
243 /* 2) batch handling */ 249 /* 2) batch handling */
244 /* 250 /*
@@ -466,5 +472,10 @@ static void __cpuinit rcu_prepare_kthreads(int cpu);
466static void rcu_prepare_for_idle_init(int cpu); 472static void rcu_prepare_for_idle_init(int cpu);
467static void rcu_cleanup_after_idle(int cpu); 473static void rcu_cleanup_after_idle(int cpu);
468static void rcu_prepare_for_idle(int cpu); 474static void rcu_prepare_for_idle(int cpu);
475static void print_cpu_stall_info_begin(void);
476static void print_cpu_stall_info(struct rcu_state *rsp, int cpu);
477static void print_cpu_stall_info_end(void);
478static void zero_cpu_stall_ticks(struct rcu_data *rdp);
479static void increment_cpu_stall_ticks(void);
469 480
470#endif /* #ifndef RCU_TREE_NONCORE */ 481#endif /* #ifndef RCU_TREE_NONCORE */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 0e74e1c6333f..aa93b074bb2f 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -63,7 +63,10 @@ static void __init rcu_bootup_announce_oddness(void)
63 printk(KERN_INFO "\tRCU torture testing starts during boot.\n"); 63 printk(KERN_INFO "\tRCU torture testing starts during boot.\n");
64#endif 64#endif
65#if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE) 65#if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE)
66 printk(KERN_INFO "\tVerbose stalled-CPUs detection is disabled.\n"); 66 printk(KERN_INFO "\tDump stacks of tasks blocking RCU-preempt GP.\n");
67#endif
68#if defined(CONFIG_RCU_CPU_STALL_INFO)
69 printk(KERN_INFO "\tAdditional per-CPU info printed with stalls.\n");
67#endif 70#endif
68#if NUM_RCU_LVL_4 != 0 71#if NUM_RCU_LVL_4 != 0
69 printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n"); 72 printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n");
@@ -490,6 +493,31 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp)
490 493
491#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */ 494#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */
492 495
496#ifdef CONFIG_RCU_CPU_STALL_INFO
497
498static void rcu_print_task_stall_begin(struct rcu_node *rnp)
499{
500 printk(KERN_ERR "\tTasks blocked on level-%d rcu_node (CPUs %d-%d):",
501 rnp->level, rnp->grplo, rnp->grphi);
502}
503
504static void rcu_print_task_stall_end(void)
505{
506 printk(KERN_CONT "\n");
507}
508
509#else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
510
511static void rcu_print_task_stall_begin(struct rcu_node *rnp)
512{
513}
514
515static void rcu_print_task_stall_end(void)
516{
517}
518
519#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */
520
493/* 521/*
494 * Scan the current list of tasks blocked within RCU read-side critical 522 * Scan the current list of tasks blocked within RCU read-side critical
495 * sections, printing out the tid of each. 523 * sections, printing out the tid of each.
@@ -501,12 +529,14 @@ static int rcu_print_task_stall(struct rcu_node *rnp)
501 529
502 if (!rcu_preempt_blocked_readers_cgp(rnp)) 530 if (!rcu_preempt_blocked_readers_cgp(rnp))
503 return 0; 531 return 0;
532 rcu_print_task_stall_begin(rnp);
504 t = list_entry(rnp->gp_tasks, 533 t = list_entry(rnp->gp_tasks,
505 struct task_struct, rcu_node_entry); 534 struct task_struct, rcu_node_entry);
506 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) { 535 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
507 printk(" P%d", t->pid); 536 printk(KERN_CONT " P%d", t->pid);
508 ndetected++; 537 ndetected++;
509 } 538 }
539 rcu_print_task_stall_end();
510 return ndetected; 540 return ndetected;
511} 541}
512 542
@@ -2004,7 +2034,7 @@ static void rcu_cleanup_after_idle(int cpu)
2004} 2034}
2005 2035
2006/* 2036/*
2007 * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=y, 2037 * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=n,
2008 * is nothing. 2038 * is nothing.
2009 */ 2039 */
2010static void rcu_prepare_for_idle(int cpu) 2040static void rcu_prepare_for_idle(int cpu)
@@ -2273,3 +2303,117 @@ static void rcu_prepare_for_idle(int cpu)
2273} 2303}
2274 2304
2275#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ 2305#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
2306
2307#ifdef CONFIG_RCU_CPU_STALL_INFO
2308
2309#ifdef CONFIG_RCU_FAST_NO_HZ
2310
2311static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
2312{
2313 struct hrtimer *hrtp = &per_cpu(rcu_idle_gp_timer, cpu);
2314
2315 sprintf(cp, "drain=%d %c timer=%lld",
2316 per_cpu(rcu_dyntick_drain, cpu),
2317 per_cpu(rcu_dyntick_holdoff, cpu) == jiffies ? 'H' : '.',
2318 hrtimer_active(hrtp)
2319 ? ktime_to_us(hrtimer_get_remaining(hrtp))
2320 : -1);
2321}
2322
2323#else /* #ifdef CONFIG_RCU_FAST_NO_HZ */
2324
2325static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
2326{
2327}
2328
2329#endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */
2330
2331/* Initiate the stall-info list. */
2332static void print_cpu_stall_info_begin(void)
2333{
2334 printk(KERN_CONT "\n");
2335}
2336
2337/*
2338 * Print out diagnostic information for the specified stalled CPU.
2339 *
2340 * If the specified CPU is aware of the current RCU grace period
2341 * (flavor specified by rsp), then print the number of scheduling
2342 * clock interrupts the CPU has taken during the time that it has
2343 * been aware. Otherwise, print the number of RCU grace periods
2344 * that this CPU is ignorant of, for example, "1" if the CPU was
2345 * aware of the previous grace period.
2346 *
2347 * Also print out idle and (if CONFIG_RCU_FAST_NO_HZ) idle-entry info.
2348 */
2349static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
2350{
2351 char fast_no_hz[72];
2352 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
2353 struct rcu_dynticks *rdtp = rdp->dynticks;
2354 char *ticks_title;
2355 unsigned long ticks_value;
2356
2357 if (rsp->gpnum == rdp->gpnum) {
2358 ticks_title = "ticks this GP";
2359 ticks_value = rdp->ticks_this_gp;
2360 } else {
2361 ticks_title = "GPs behind";
2362 ticks_value = rsp->gpnum - rdp->gpnum;
2363 }
2364 print_cpu_stall_fast_no_hz(fast_no_hz, cpu);
2365 printk(KERN_ERR "\t%d: (%lu %s) idle=%03x/%llx/%d %s\n",
2366 cpu, ticks_value, ticks_title,
2367 atomic_read(&rdtp->dynticks) & 0xfff,
2368 rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting,
2369 fast_no_hz);
2370}
2371
2372/* Terminate the stall-info list. */
2373static void print_cpu_stall_info_end(void)
2374{
2375 printk(KERN_ERR "\t");
2376}
2377
2378/* Zero ->ticks_this_gp for all flavors of RCU. */
2379static void zero_cpu_stall_ticks(struct rcu_data *rdp)
2380{
2381 rdp->ticks_this_gp = 0;
2382}
2383
2384/* Increment ->ticks_this_gp for all flavors of RCU. */
2385static void increment_cpu_stall_ticks(void)
2386{
2387 __get_cpu_var(rcu_sched_data).ticks_this_gp++;
2388 __get_cpu_var(rcu_bh_data).ticks_this_gp++;
2389#ifdef CONFIG_TREE_PREEMPT_RCU
2390 __get_cpu_var(rcu_preempt_data).ticks_this_gp++;
2391#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
2392}
2393
2394#else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
2395
2396static void print_cpu_stall_info_begin(void)
2397{
2398 printk(KERN_CONT " {");
2399}
2400
2401static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
2402{
2403 printk(KERN_CONT " %d", cpu);
2404}
2405
2406static void print_cpu_stall_info_end(void)
2407{
2408 printk(KERN_CONT "} ");
2409}
2410
2411static void zero_cpu_stall_ticks(struct rcu_data *rdp)
2412{
2413}
2414
2415static void increment_cpu_stall_ticks(void)
2416{
2417}
2418
2419#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 3cc419d05f2f..d27a2aa3e815 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -927,6 +927,20 @@ config RCU_CPU_STALL_VERBOSE
927 927
928 Say Y if you want to enable such checks. 928 Say Y if you want to enable such checks.
929 929
930config RCU_CPU_STALL_INFO
931 bool "Print additional diagnostics on RCU CPU stall"
932 depends on (TREE_RCU || TREE_PREEMPT_RCU) && DEBUG_KERNEL
933 default n
934 help
935 For each stalled CPU that is aware of the current RCU grace
936 period, print out additional per-CPU diagnostic information
937 regarding scheduling-clock ticks, idle state, and,
938 for RCU_FAST_NO_HZ kernels, idle-entry state.
939
940 Say N if you are unsure.
941
942 Say Y if you want to enable such diagnostics.
943
930config RCU_TRACE 944config RCU_TRACE
931 bool "Enable tracing for RCU" 945 bool "Enable tracing for RCU"
932 depends on DEBUG_KERNEL 946 depends on DEBUG_KERNEL