aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2011-05-12 04:08:07 -0400
committerIngo Molnar <mingo@elte.hu>2011-05-19 17:25:29 -0400
commit80d02085d99039b3b7f3a73c8896226b0cb1ba07 (patch)
treec310902423ecb00effadcb59c60cbf118d4037cb
parent11c476f31a0fabc6e604da5b09a6590b57c3fb20 (diff)
Revert "rcu: Decrease memory-barrier usage based on semi-formal proof"
This reverts commit e59fb3120becfb36b22ddb8bd27d065d3cdca499. This reversion was due to (extreme) boot-time slowdowns on SPARC seen by Yinghai Lu and on x86 by Ingo . This is a non-trivial reversion due to intervening commits. Conflicts: Documentation/RCU/trace.txt kernel/rcutree.c Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--Documentation/RCU/trace.txt17
-rw-r--r--kernel/rcutree.c130
-rw-r--r--kernel/rcutree.h9
-rw-r--r--kernel/rcutree_plugin.h7
-rw-r--r--kernel/rcutree_trace.c12
5 files changed, 102 insertions, 73 deletions
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
index 8173cec473aa..c078ad48f7a1 100644
--- a/Documentation/RCU/trace.txt
+++ b/Documentation/RCU/trace.txt
@@ -99,11 +99,18 @@ o "qp" indicates that RCU still expects a quiescent state from
99 99
100o "dt" is the current value of the dyntick counter that is incremented 100o "dt" is the current value of the dyntick counter that is incremented
101 when entering or leaving dynticks idle state, either by the 101 when entering or leaving dynticks idle state, either by the
102 scheduler or by irq. This number is even if the CPU is in 102 scheduler or by irq. The number after the "/" is the interrupt
103 dyntick idle mode and odd otherwise. The number after the first 103 nesting depth when in dyntick-idle state, or one greater than
104 "/" is the interrupt nesting depth when in dyntick-idle state, 104 the interrupt-nesting depth otherwise.
105 or one greater than the interrupt-nesting depth otherwise. 105
106 The number after the second "/" is the NMI nesting depth. 106 This field is displayed only for CONFIG_NO_HZ kernels.
107
108o "dn" is the current value of the dyntick counter that is incremented
109 when entering or leaving dynticks idle state via NMI. If both
110 the "dt" and "dn" values are even, then this CPU is in dynticks
111 idle mode and may be ignored by RCU. If either of these two
112 counters is odd, then RCU must be alert to the possibility of
113 an RCU read-side critical section running on this CPU.
107 114
108 This field is displayed only for CONFIG_NO_HZ kernels. 115 This field is displayed only for CONFIG_NO_HZ kernels.
109 116
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 5616b17e4a22..e486f7c3ffb8 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -162,7 +162,7 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch);
162#ifdef CONFIG_NO_HZ 162#ifdef CONFIG_NO_HZ
163DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { 163DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
164 .dynticks_nesting = 1, 164 .dynticks_nesting = 1,
165 .dynticks = ATOMIC_INIT(1), 165 .dynticks = 1,
166}; 166};
167#endif /* #ifdef CONFIG_NO_HZ */ 167#endif /* #ifdef CONFIG_NO_HZ */
168 168
@@ -321,25 +321,13 @@ void rcu_enter_nohz(void)
321 unsigned long flags; 321 unsigned long flags;
322 struct rcu_dynticks *rdtp; 322 struct rcu_dynticks *rdtp;
323 323
324 smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
324 local_irq_save(flags); 325 local_irq_save(flags);
325 rdtp = &__get_cpu_var(rcu_dynticks); 326 rdtp = &__get_cpu_var(rcu_dynticks);
326 if (--rdtp->dynticks_nesting) { 327 rdtp->dynticks++;
327 local_irq_restore(flags); 328 rdtp->dynticks_nesting--;
328 return; 329 WARN_ON_ONCE(rdtp->dynticks & 0x1);
329 }
330 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
331 smp_mb__before_atomic_inc(); /* See above. */
332 atomic_inc(&rdtp->dynticks);
333 smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */
334 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
335 local_irq_restore(flags); 330 local_irq_restore(flags);
336
337 /* If the interrupt queued a callback, get out of dyntick mode. */
338 if (in_irq() &&
339 (__get_cpu_var(rcu_sched_data).nxtlist ||
340 __get_cpu_var(rcu_bh_data).nxtlist ||
341 rcu_preempt_needs_cpu(smp_processor_id())))
342 set_need_resched();
343} 331}
344 332
345/* 333/*
@@ -355,16 +343,11 @@ void rcu_exit_nohz(void)
355 343
356 local_irq_save(flags); 344 local_irq_save(flags);
357 rdtp = &__get_cpu_var(rcu_dynticks); 345 rdtp = &__get_cpu_var(rcu_dynticks);
358 if (rdtp->dynticks_nesting++) { 346 rdtp->dynticks++;
359 local_irq_restore(flags); 347 rdtp->dynticks_nesting++;
360 return; 348 WARN_ON_ONCE(!(rdtp->dynticks & 0x1));
361 }
362 smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */
363 atomic_inc(&rdtp->dynticks);
364 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
365 smp_mb__after_atomic_inc(); /* See above. */
366 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
367 local_irq_restore(flags); 349 local_irq_restore(flags);
350 smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
368} 351}
369 352
370/** 353/**
@@ -378,15 +361,11 @@ void rcu_nmi_enter(void)
378{ 361{
379 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); 362 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
380 363
381 if (rdtp->dynticks_nmi_nesting == 0 && 364 if (rdtp->dynticks & 0x1)
382 (atomic_read(&rdtp->dynticks) & 0x1))
383 return; 365 return;
384 rdtp->dynticks_nmi_nesting++; 366 rdtp->dynticks_nmi++;
385 smp_mb__before_atomic_inc(); /* Force delay from prior write. */ 367 WARN_ON_ONCE(!(rdtp->dynticks_nmi & 0x1));
386 atomic_inc(&rdtp->dynticks); 368 smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
387 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
388 smp_mb__after_atomic_inc(); /* See above. */
389 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
390} 369}
391 370
392/** 371/**
@@ -400,14 +379,11 @@ void rcu_nmi_exit(void)
400{ 379{
401 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); 380 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
402 381
403 if (rdtp->dynticks_nmi_nesting == 0 || 382 if (rdtp->dynticks & 0x1)
404 --rdtp->dynticks_nmi_nesting != 0)
405 return; 383 return;
406 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ 384 smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
407 smp_mb__before_atomic_inc(); /* See above. */ 385 rdtp->dynticks_nmi++;
408 atomic_inc(&rdtp->dynticks); 386 WARN_ON_ONCE(rdtp->dynticks_nmi & 0x1);
409 smp_mb__after_atomic_inc(); /* Force delay to next write. */
410 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
411} 387}
412 388
413/** 389/**
@@ -418,7 +394,13 @@ void rcu_nmi_exit(void)
418 */ 394 */
419void rcu_irq_enter(void) 395void rcu_irq_enter(void)
420{ 396{
421 rcu_exit_nohz(); 397 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
398
399 if (rdtp->dynticks_nesting++)
400 return;
401 rdtp->dynticks++;
402 WARN_ON_ONCE(!(rdtp->dynticks & 0x1));
403 smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
422} 404}
423 405
424/** 406/**
@@ -430,7 +412,18 @@ void rcu_irq_enter(void)
430 */ 412 */
431void rcu_irq_exit(void) 413void rcu_irq_exit(void)
432{ 414{
433 rcu_enter_nohz(); 415 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
416
417 if (--rdtp->dynticks_nesting)
418 return;
419 smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
420 rdtp->dynticks++;
421 WARN_ON_ONCE(rdtp->dynticks & 0x1);
422
423 /* If the interrupt queued a callback, get out of dyntick mode. */
424 if (__this_cpu_read(rcu_sched_data.nxtlist) ||
425 __this_cpu_read(rcu_bh_data.nxtlist))
426 set_need_resched();
434} 427}
435 428
436#ifdef CONFIG_SMP 429#ifdef CONFIG_SMP
@@ -442,8 +435,19 @@ void rcu_irq_exit(void)
442 */ 435 */
443static int dyntick_save_progress_counter(struct rcu_data *rdp) 436static int dyntick_save_progress_counter(struct rcu_data *rdp)
444{ 437{
445 rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks); 438 int ret;
446 return 0; 439 int snap;
440 int snap_nmi;
441
442 snap = rdp->dynticks->dynticks;
443 snap_nmi = rdp->dynticks->dynticks_nmi;
444 smp_mb(); /* Order sampling of snap with end of grace period. */
445 rdp->dynticks_snap = snap;
446 rdp->dynticks_nmi_snap = snap_nmi;
447 ret = ((snap & 0x1) == 0) && ((snap_nmi & 0x1) == 0);
448 if (ret)
449 rdp->dynticks_fqs++;
450 return ret;
447} 451}
448 452
449/* 453/*
@@ -454,11 +458,16 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp)
454 */ 458 */
455static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) 459static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
456{ 460{
457 unsigned long curr; 461 long curr;
458 unsigned long snap; 462 long curr_nmi;
463 long snap;
464 long snap_nmi;
459 465
460 curr = (unsigned long)atomic_add_return(0, &rdp->dynticks->dynticks); 466 curr = rdp->dynticks->dynticks;
461 snap = (unsigned long)rdp->dynticks_snap; 467 snap = rdp->dynticks_snap;
468 curr_nmi = rdp->dynticks->dynticks_nmi;
469 snap_nmi = rdp->dynticks_nmi_snap;
470 smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
462 471
463 /* 472 /*
464 * If the CPU passed through or entered a dynticks idle phase with 473 * If the CPU passed through or entered a dynticks idle phase with
@@ -468,7 +477,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
468 * read-side critical section that started before the beginning 477 * read-side critical section that started before the beginning
469 * of the current RCU grace period. 478 * of the current RCU grace period.
470 */ 479 */
471 if ((curr & 0x1) == 0 || ULONG_CMP_GE(curr, snap + 2)) { 480 if ((curr != snap || (curr & 0x1) == 0) &&
481 (curr_nmi != snap_nmi || (curr_nmi & 0x1) == 0)) {
472 rdp->dynticks_fqs++; 482 rdp->dynticks_fqs++;
473 return 1; 483 return 1;
474 } 484 }
@@ -897,12 +907,6 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
897 unsigned long gp_duration; 907 unsigned long gp_duration;
898 908
899 WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); 909 WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
900
901 /*
902 * Ensure that all grace-period and pre-grace-period activity
903 * is seen before the assignment to rsp->completed.
904 */
905 smp_mb(); /* See above block comment. */
906 gp_duration = jiffies - rsp->gp_start; 910 gp_duration = jiffies - rsp->gp_start;
907 if (gp_duration > rsp->gp_max) 911 if (gp_duration > rsp->gp_max)
908 rsp->gp_max = gp_duration; 912 rsp->gp_max = gp_duration;
@@ -1450,11 +1454,25 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
1450 */ 1454 */
1451static void rcu_process_callbacks(void) 1455static void rcu_process_callbacks(void)
1452{ 1456{
1457 /*
1458 * Memory references from any prior RCU read-side critical sections
1459 * executed by the interrupted code must be seen before any RCU
1460 * grace-period manipulations below.
1461 */
1462 smp_mb(); /* See above block comment. */
1463
1453 __rcu_process_callbacks(&rcu_sched_state, 1464 __rcu_process_callbacks(&rcu_sched_state,
1454 &__get_cpu_var(rcu_sched_data)); 1465 &__get_cpu_var(rcu_sched_data));
1455 __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); 1466 __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
1456 rcu_preempt_process_callbacks(); 1467 rcu_preempt_process_callbacks();
1457 1468
1469 /*
1470 * Memory references from any later RCU read-side critical sections
1471 * executed by the interrupted code must be seen after any RCU
1472 * grace-period manipulations above.
1473 */
1474 smp_mb(); /* See above block comment. */
1475
1458 /* If we are last CPU on way to dyntick-idle mode, accelerate it. */ 1476 /* If we are last CPU on way to dyntick-idle mode, accelerate it. */
1459 rcu_needs_cpu_flush(); 1477 rcu_needs_cpu_flush();
1460} 1478}
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 93d4a1c2e88b..257664815d5d 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -84,9 +84,11 @@
84 * Dynticks per-CPU state. 84 * Dynticks per-CPU state.
85 */ 85 */
86struct rcu_dynticks { 86struct rcu_dynticks {
87 int dynticks_nesting; /* Track irq/process nesting level. */ 87 int dynticks_nesting; /* Track nesting level, sort of. */
88 int dynticks_nmi_nesting; /* Track NMI nesting level. */ 88 int dynticks; /* Even value for dynticks-idle, else odd. */
89 atomic_t dynticks; /* Even value for dynticks-idle, else odd. */ 89 int dynticks_nmi; /* Even value for either dynticks-idle or */
90 /* not in nmi handler, else odd. So this */
91 /* remains even for nmi from irq handler. */
90}; 92};
91 93
92/* RCU's kthread states for tracing. */ 94/* RCU's kthread states for tracing. */
@@ -282,6 +284,7 @@ struct rcu_data {
282 /* 3) dynticks interface. */ 284 /* 3) dynticks interface. */
283 struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */ 285 struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */
284 int dynticks_snap; /* Per-GP tracking for dynticks. */ 286 int dynticks_snap; /* Per-GP tracking for dynticks. */
287 int dynticks_nmi_snap; /* Per-GP tracking for dynticks_nmi. */
285#endif /* #ifdef CONFIG_NO_HZ */ 288#endif /* #ifdef CONFIG_NO_HZ */
286 289
287 /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ 290 /* 4) reasons this CPU needed to be kicked by force_quiescent_state */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index ed339702481d..3f6559a5f5cd 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -1520,6 +1520,7 @@ int rcu_needs_cpu(int cpu)
1520{ 1520{
1521 int c = 0; 1521 int c = 0;
1522 int snap; 1522 int snap;
1523 int snap_nmi;
1523 int thatcpu; 1524 int thatcpu;
1524 1525
1525 /* Check for being in the holdoff period. */ 1526 /* Check for being in the holdoff period. */
@@ -1530,10 +1531,10 @@ int rcu_needs_cpu(int cpu)
1530 for_each_online_cpu(thatcpu) { 1531 for_each_online_cpu(thatcpu) {
1531 if (thatcpu == cpu) 1532 if (thatcpu == cpu)
1532 continue; 1533 continue;
1533 snap = atomic_add_return(0, &per_cpu(rcu_dynticks, 1534 snap = per_cpu(rcu_dynticks, thatcpu).dynticks;
1534 thatcpu).dynticks); 1535 snap_nmi = per_cpu(rcu_dynticks, thatcpu).dynticks_nmi;
1535 smp_mb(); /* Order sampling of snap with end of grace period. */ 1536 smp_mb(); /* Order sampling of snap with end of grace period. */
1536 if ((snap & 0x1) != 0) { 1537 if (((snap & 0x1) != 0) || ((snap_nmi & 0x1) != 0)) {
1537 per_cpu(rcu_dyntick_drain, cpu) = 0; 1538 per_cpu(rcu_dyntick_drain, cpu) = 0;
1538 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; 1539 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1;
1539 return rcu_needs_cpu_quick_check(cpu); 1540 return rcu_needs_cpu_quick_check(cpu);
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index 9678cc3650f5..aa0fd72b4bc7 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -69,10 +69,10 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
69 rdp->passed_quiesc, rdp->passed_quiesc_completed, 69 rdp->passed_quiesc, rdp->passed_quiesc_completed,
70 rdp->qs_pending); 70 rdp->qs_pending);
71#ifdef CONFIG_NO_HZ 71#ifdef CONFIG_NO_HZ
72 seq_printf(m, " dt=%d/%d/%d df=%lu", 72 seq_printf(m, " dt=%d/%d dn=%d df=%lu",
73 atomic_read(&rdp->dynticks->dynticks), 73 rdp->dynticks->dynticks,
74 rdp->dynticks->dynticks_nesting, 74 rdp->dynticks->dynticks_nesting,
75 rdp->dynticks->dynticks_nmi_nesting, 75 rdp->dynticks->dynticks_nmi,
76 rdp->dynticks_fqs); 76 rdp->dynticks_fqs);
77#endif /* #ifdef CONFIG_NO_HZ */ 77#endif /* #ifdef CONFIG_NO_HZ */
78 seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi); 78 seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi);
@@ -141,9 +141,9 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
141 rdp->qs_pending); 141 rdp->qs_pending);
142#ifdef CONFIG_NO_HZ 142#ifdef CONFIG_NO_HZ
143 seq_printf(m, ",%d,%d,%d,%lu", 143 seq_printf(m, ",%d,%d,%d,%lu",
144 atomic_read(&rdp->dynticks->dynticks), 144 rdp->dynticks->dynticks,
145 rdp->dynticks->dynticks_nesting, 145 rdp->dynticks->dynticks_nesting,
146 rdp->dynticks->dynticks_nmi_nesting, 146 rdp->dynticks->dynticks_nmi,
147 rdp->dynticks_fqs); 147 rdp->dynticks_fqs);
148#endif /* #ifdef CONFIG_NO_HZ */ 148#endif /* #ifdef CONFIG_NO_HZ */
149 seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi); 149 seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi);
@@ -167,7 +167,7 @@ static int show_rcudata_csv(struct seq_file *m, void *unused)
167{ 167{
168 seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\","); 168 seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\",");
169#ifdef CONFIG_NO_HZ 169#ifdef CONFIG_NO_HZ
170 seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\","); 170 seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\",");
171#endif /* #ifdef CONFIG_NO_HZ */ 171#endif /* #ifdef CONFIG_NO_HZ */
172 seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\",\"ci\",\"co\",\"ca\"\n"); 172 seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\",\"ci\",\"co\",\"ca\"\n");
173#ifdef CONFIG_TREE_PREEMPT_RCU 173#ifdef CONFIG_TREE_PREEMPT_RCU