aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcu
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rcu')
-rw-r--r--kernel/rcu/rcu.h5
-rw-r--r--kernel/rcu/srcu.c57
-rw-r--r--kernel/rcu/torture.c75
-rw-r--r--kernel/rcu/tree.c79
-rw-r--r--kernel/rcu/tree.h12
-rw-r--r--kernel/rcu/tree_plugin.h89
-rw-r--r--kernel/rcu/tree_trace.c3
-rw-r--r--kernel/rcu/update.c5
8 files changed, 251 insertions, 74 deletions
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 7859a0a3951e..79c3877e9c5b 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -96,19 +96,22 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head)
96} 96}
97#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ 97#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
98 98
99extern void kfree(const void *); 99void kfree(const void *);
100 100
101static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head) 101static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head)
102{ 102{
103 unsigned long offset = (unsigned long)head->func; 103 unsigned long offset = (unsigned long)head->func;
104 104
105 rcu_lock_acquire(&rcu_callback_map);
105 if (__is_kfree_rcu_offset(offset)) { 106 if (__is_kfree_rcu_offset(offset)) {
106 RCU_TRACE(trace_rcu_invoke_kfree_callback(rn, head, offset)); 107 RCU_TRACE(trace_rcu_invoke_kfree_callback(rn, head, offset));
107 kfree((void *)head - offset); 108 kfree((void *)head - offset);
109 rcu_lock_release(&rcu_callback_map);
108 return 1; 110 return 1;
109 } else { 111 } else {
110 RCU_TRACE(trace_rcu_invoke_callback(rn, head)); 112 RCU_TRACE(trace_rcu_invoke_callback(rn, head));
111 head->func(head); 113 head->func(head);
114 rcu_lock_release(&rcu_callback_map);
112 return 0; 115 return 0;
113 } 116 }
114} 117}
diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
index 01d5ccb8bfe3..3318d8284384 100644
--- a/kernel/rcu/srcu.c
+++ b/kernel/rcu/srcu.c
@@ -363,6 +363,29 @@ static void srcu_flip(struct srcu_struct *sp)
363/* 363/*
364 * Enqueue an SRCU callback on the specified srcu_struct structure, 364 * Enqueue an SRCU callback on the specified srcu_struct structure,
365 * initiating grace-period processing if it is not already running. 365 * initiating grace-period processing if it is not already running.
366 *
367 * Note that all CPUs must agree that the grace period extended beyond
368 * all pre-existing SRCU read-side critical section. On systems with
369 * more than one CPU, this means that when "func()" is invoked, each CPU
370 * is guaranteed to have executed a full memory barrier since the end of
371 * its last corresponding SRCU read-side critical section whose beginning
372 * preceded the call to call_rcu(). It also means that each CPU executing
373 * an SRCU read-side critical section that continues beyond the start of
374 * "func()" must have executed a memory barrier after the call_rcu()
375 * but before the beginning of that SRCU read-side critical section.
376 * Note that these guarantees include CPUs that are offline, idle, or
377 * executing in user mode, as well as CPUs that are executing in the kernel.
378 *
379 * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
380 * resulting SRCU callback function "func()", then both CPU A and CPU
381 * B are guaranteed to execute a full memory barrier during the time
382 * interval between the call to call_rcu() and the invocation of "func()".
383 * This guarantee applies even if CPU A and CPU B are the same CPU (but
384 * again only if the system has more than one CPU).
385 *
386 * Of course, these guarantees apply only for invocations of call_srcu(),
387 * srcu_read_lock(), and srcu_read_unlock() that are all passed the same
388 * srcu_struct structure.
366 */ 389 */
367void call_srcu(struct srcu_struct *sp, struct rcu_head *head, 390void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
368 void (*func)(struct rcu_head *head)) 391 void (*func)(struct rcu_head *head))
@@ -459,7 +482,30 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
459 * Note that it is illegal to call synchronize_srcu() from the corresponding 482 * Note that it is illegal to call synchronize_srcu() from the corresponding
460 * SRCU read-side critical section; doing so will result in deadlock. 483 * SRCU read-side critical section; doing so will result in deadlock.
461 * However, it is perfectly legal to call synchronize_srcu() on one 484 * However, it is perfectly legal to call synchronize_srcu() on one
462 * srcu_struct from some other srcu_struct's read-side critical section. 485 * srcu_struct from some other srcu_struct's read-side critical section,
486 * as long as the resulting graph of srcu_structs is acyclic.
487 *
488 * There are memory-ordering constraints implied by synchronize_srcu().
489 * On systems with more than one CPU, when synchronize_srcu() returns,
490 * each CPU is guaranteed to have executed a full memory barrier since
491 * the end of its last corresponding SRCU-sched read-side critical section
492 * whose beginning preceded the call to synchronize_srcu(). In addition,
493 * each CPU having an SRCU read-side critical section that extends beyond
494 * the return from synchronize_srcu() is guaranteed to have executed a
495 * full memory barrier after the beginning of synchronize_srcu() and before
496 * the beginning of that SRCU read-side critical section. Note that these
497 * guarantees include CPUs that are offline, idle, or executing in user mode,
498 * as well as CPUs that are executing in the kernel.
499 *
500 * Furthermore, if CPU A invoked synchronize_srcu(), which returned
501 * to its caller on CPU B, then both CPU A and CPU B are guaranteed
502 * to have executed a full memory barrier during the execution of
503 * synchronize_srcu(). This guarantee applies even if CPU A and CPU B
504 * are the same CPU, but again only if the system has more than one CPU.
505 *
506 * Of course, these memory-ordering guarantees apply only when
507 * synchronize_srcu(), srcu_read_lock(), and srcu_read_unlock() are
508 * passed the same srcu_struct structure.
463 */ 509 */
464void synchronize_srcu(struct srcu_struct *sp) 510void synchronize_srcu(struct srcu_struct *sp)
465{ 511{
@@ -476,12 +522,8 @@ EXPORT_SYMBOL_GPL(synchronize_srcu);
476 * Wait for an SRCU grace period to elapse, but be more aggressive about 522 * Wait for an SRCU grace period to elapse, but be more aggressive about
477 * spinning rather than blocking when waiting. 523 * spinning rather than blocking when waiting.
478 * 524 *
479 * Note that it is also illegal to call synchronize_srcu_expedited() 525 * Note that synchronize_srcu_expedited() has the same deadlock and
480 * from the corresponding SRCU read-side critical section; 526 * memory-ordering properties as does synchronize_srcu().
481 * doing so will result in deadlock. However, it is perfectly legal
482 * to call synchronize_srcu_expedited() on one srcu_struct from some
483 * other srcu_struct's read-side critical section, as long as
484 * the resulting graph of srcu_structs is acyclic.
485 */ 527 */
486void synchronize_srcu_expedited(struct srcu_struct *sp) 528void synchronize_srcu_expedited(struct srcu_struct *sp)
487{ 529{
@@ -491,6 +533,7 @@ EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
491 533
492/** 534/**
493 * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete. 535 * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete.
536 * @sp: srcu_struct on which to wait for in-flight callbacks.
494 */ 537 */
495void srcu_barrier(struct srcu_struct *sp) 538void srcu_barrier(struct srcu_struct *sp)
496{ 539{
diff --git a/kernel/rcu/torture.c b/kernel/rcu/torture.c
index 3929cd451511..732f8ae3086a 100644
--- a/kernel/rcu/torture.c
+++ b/kernel/rcu/torture.c
@@ -139,8 +139,6 @@ MODULE_PARM_DESC(verbose, "Enable verbose debugging printk()s");
139#define VERBOSE_PRINTK_ERRSTRING(s) \ 139#define VERBOSE_PRINTK_ERRSTRING(s) \
140 do { if (verbose) pr_alert("%s" TORTURE_FLAG "!!! " s "\n", torture_type); } while (0) 140 do { if (verbose) pr_alert("%s" TORTURE_FLAG "!!! " s "\n", torture_type); } while (0)
141 141
142static char printk_buf[4096];
143
144static int nrealreaders; 142static int nrealreaders;
145static struct task_struct *writer_task; 143static struct task_struct *writer_task;
146static struct task_struct **fakewriter_tasks; 144static struct task_struct **fakewriter_tasks;
@@ -376,7 +374,7 @@ struct rcu_torture_ops {
376 void (*call)(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); 374 void (*call)(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
377 void (*cb_barrier)(void); 375 void (*cb_barrier)(void);
378 void (*fqs)(void); 376 void (*fqs)(void);
379 int (*stats)(char *page); 377 void (*stats)(char *page);
380 int irq_capable; 378 int irq_capable;
381 int can_boost; 379 int can_boost;
382 const char *name; 380 const char *name;
@@ -578,21 +576,19 @@ static void srcu_torture_barrier(void)
578 srcu_barrier(&srcu_ctl); 576 srcu_barrier(&srcu_ctl);
579} 577}
580 578
581static int srcu_torture_stats(char *page) 579static void srcu_torture_stats(char *page)
582{ 580{
583 int cnt = 0;
584 int cpu; 581 int cpu;
585 int idx = srcu_ctl.completed & 0x1; 582 int idx = srcu_ctl.completed & 0x1;
586 583
587 cnt += sprintf(&page[cnt], "%s%s per-CPU(idx=%d):", 584 page += sprintf(page, "%s%s per-CPU(idx=%d):",
588 torture_type, TORTURE_FLAG, idx); 585 torture_type, TORTURE_FLAG, idx);
589 for_each_possible_cpu(cpu) { 586 for_each_possible_cpu(cpu) {
590 cnt += sprintf(&page[cnt], " %d(%lu,%lu)", cpu, 587 page += sprintf(page, " %d(%lu,%lu)", cpu,
591 per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx], 588 per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx],
592 per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx]); 589 per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx]);
593 } 590 }
594 cnt += sprintf(&page[cnt], "\n"); 591 sprintf(page, "\n");
595 return cnt;
596} 592}
597 593
598static void srcu_torture_synchronize_expedited(void) 594static void srcu_torture_synchronize_expedited(void)
@@ -1052,10 +1048,9 @@ rcu_torture_reader(void *arg)
1052/* 1048/*
1053 * Create an RCU-torture statistics message in the specified buffer. 1049 * Create an RCU-torture statistics message in the specified buffer.
1054 */ 1050 */
1055static int 1051static void
1056rcu_torture_printk(char *page) 1052rcu_torture_printk(char *page)
1057{ 1053{
1058 int cnt = 0;
1059 int cpu; 1054 int cpu;
1060 int i; 1055 int i;
1061 long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 }; 1056 long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
@@ -1071,8 +1066,8 @@ rcu_torture_printk(char *page)
1071 if (pipesummary[i] != 0) 1066 if (pipesummary[i] != 0)
1072 break; 1067 break;
1073 } 1068 }
1074 cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG); 1069 page += sprintf(page, "%s%s ", torture_type, TORTURE_FLAG);
1075 cnt += sprintf(&page[cnt], 1070 page += sprintf(page,
1076 "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d ", 1071 "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d ",
1077 rcu_torture_current, 1072 rcu_torture_current,
1078 rcu_torture_current_version, 1073 rcu_torture_current_version,
@@ -1080,53 +1075,52 @@ rcu_torture_printk(char *page)
1080 atomic_read(&n_rcu_torture_alloc), 1075 atomic_read(&n_rcu_torture_alloc),
1081 atomic_read(&n_rcu_torture_alloc_fail), 1076 atomic_read(&n_rcu_torture_alloc_fail),
1082 atomic_read(&n_rcu_torture_free)); 1077 atomic_read(&n_rcu_torture_free));
1083 cnt += sprintf(&page[cnt], "rtmbe: %d rtbke: %ld rtbre: %ld ", 1078 page += sprintf(page, "rtmbe: %d rtbke: %ld rtbre: %ld ",
1084 atomic_read(&n_rcu_torture_mberror), 1079 atomic_read(&n_rcu_torture_mberror),
1085 n_rcu_torture_boost_ktrerror, 1080 n_rcu_torture_boost_ktrerror,
1086 n_rcu_torture_boost_rterror); 1081 n_rcu_torture_boost_rterror);
1087 cnt += sprintf(&page[cnt], "rtbf: %ld rtb: %ld nt: %ld ", 1082 page += sprintf(page, "rtbf: %ld rtb: %ld nt: %ld ",
1088 n_rcu_torture_boost_failure, 1083 n_rcu_torture_boost_failure,
1089 n_rcu_torture_boosts, 1084 n_rcu_torture_boosts,
1090 n_rcu_torture_timers); 1085 n_rcu_torture_timers);
1091 cnt += sprintf(&page[cnt], 1086 page += sprintf(page,
1092 "onoff: %ld/%ld:%ld/%ld %d,%d:%d,%d %lu:%lu (HZ=%d) ", 1087 "onoff: %ld/%ld:%ld/%ld %d,%d:%d,%d %lu:%lu (HZ=%d) ",
1093 n_online_successes, n_online_attempts, 1088 n_online_successes, n_online_attempts,
1094 n_offline_successes, n_offline_attempts, 1089 n_offline_successes, n_offline_attempts,
1095 min_online, max_online, 1090 min_online, max_online,
1096 min_offline, max_offline, 1091 min_offline, max_offline,
1097 sum_online, sum_offline, HZ); 1092 sum_online, sum_offline, HZ);
1098 cnt += sprintf(&page[cnt], "barrier: %ld/%ld:%ld", 1093 page += sprintf(page, "barrier: %ld/%ld:%ld",
1099 n_barrier_successes, 1094 n_barrier_successes,
1100 n_barrier_attempts, 1095 n_barrier_attempts,
1101 n_rcu_torture_barrier_error); 1096 n_rcu_torture_barrier_error);
1102 cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG); 1097 page += sprintf(page, "\n%s%s ", torture_type, TORTURE_FLAG);
1103 if (atomic_read(&n_rcu_torture_mberror) != 0 || 1098 if (atomic_read(&n_rcu_torture_mberror) != 0 ||
1104 n_rcu_torture_barrier_error != 0 || 1099 n_rcu_torture_barrier_error != 0 ||
1105 n_rcu_torture_boost_ktrerror != 0 || 1100 n_rcu_torture_boost_ktrerror != 0 ||
1106 n_rcu_torture_boost_rterror != 0 || 1101 n_rcu_torture_boost_rterror != 0 ||
1107 n_rcu_torture_boost_failure != 0 || 1102 n_rcu_torture_boost_failure != 0 ||
1108 i > 1) { 1103 i > 1) {
1109 cnt += sprintf(&page[cnt], "!!! "); 1104 page += sprintf(page, "!!! ");
1110 atomic_inc(&n_rcu_torture_error); 1105 atomic_inc(&n_rcu_torture_error);
1111 WARN_ON_ONCE(1); 1106 WARN_ON_ONCE(1);
1112 } 1107 }
1113 cnt += sprintf(&page[cnt], "Reader Pipe: "); 1108 page += sprintf(page, "Reader Pipe: ");
1114 for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) 1109 for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
1115 cnt += sprintf(&page[cnt], " %ld", pipesummary[i]); 1110 page += sprintf(page, " %ld", pipesummary[i]);
1116 cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG); 1111 page += sprintf(page, "\n%s%s ", torture_type, TORTURE_FLAG);
1117 cnt += sprintf(&page[cnt], "Reader Batch: "); 1112 page += sprintf(page, "Reader Batch: ");
1118 for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) 1113 for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
1119 cnt += sprintf(&page[cnt], " %ld", batchsummary[i]); 1114 page += sprintf(page, " %ld", batchsummary[i]);
1120 cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG); 1115 page += sprintf(page, "\n%s%s ", torture_type, TORTURE_FLAG);
1121 cnt += sprintf(&page[cnt], "Free-Block Circulation: "); 1116 page += sprintf(page, "Free-Block Circulation: ");
1122 for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) { 1117 for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) {
1123 cnt += sprintf(&page[cnt], " %d", 1118 page += sprintf(page, " %d",
1124 atomic_read(&rcu_torture_wcount[i])); 1119 atomic_read(&rcu_torture_wcount[i]));
1125 } 1120 }
1126 cnt += sprintf(&page[cnt], "\n"); 1121 page += sprintf(page, "\n");
1127 if (cur_ops->stats) 1122 if (cur_ops->stats)
1128 cnt += cur_ops->stats(&page[cnt]); 1123 cur_ops->stats(page);
1129 return cnt;
1130} 1124}
1131 1125
1132/* 1126/*
@@ -1140,10 +1134,17 @@ rcu_torture_printk(char *page)
1140static void 1134static void
1141rcu_torture_stats_print(void) 1135rcu_torture_stats_print(void)
1142{ 1136{
1143 int cnt; 1137 int size = nr_cpu_ids * 200 + 8192;
1138 char *buf;
1144 1139
1145 cnt = rcu_torture_printk(printk_buf); 1140 buf = kmalloc(size, GFP_KERNEL);
1146 pr_alert("%s", printk_buf); 1141 if (!buf) {
1142 pr_err("rcu-torture: Out of memory, need: %d", size);
1143 return;
1144 }
1145 rcu_torture_printk(buf);
1146 pr_alert("%s", buf);
1147 kfree(buf);
1147} 1148}
1148 1149
1149/* 1150/*
@@ -1578,6 +1579,7 @@ static int rcu_torture_barrier_cbs(void *arg)
1578{ 1579{
1579 long myid = (long)arg; 1580 long myid = (long)arg;
1580 bool lastphase = 0; 1581 bool lastphase = 0;
1582 bool newphase;
1581 struct rcu_head rcu; 1583 struct rcu_head rcu;
1582 1584
1583 init_rcu_head_on_stack(&rcu); 1585 init_rcu_head_on_stack(&rcu);
@@ -1585,10 +1587,11 @@ static int rcu_torture_barrier_cbs(void *arg)
1585 set_user_nice(current, 19); 1587 set_user_nice(current, 19);
1586 do { 1588 do {
1587 wait_event(barrier_cbs_wq[myid], 1589 wait_event(barrier_cbs_wq[myid],
1588 barrier_phase != lastphase || 1590 (newphase =
1591 ACCESS_ONCE(barrier_phase)) != lastphase ||
1589 kthread_should_stop() || 1592 kthread_should_stop() ||
1590 fullstop != FULLSTOP_DONTSTOP); 1593 fullstop != FULLSTOP_DONTSTOP);
1591 lastphase = barrier_phase; 1594 lastphase = newphase;
1592 smp_mb(); /* ensure barrier_phase load before ->call(). */ 1595 smp_mb(); /* ensure barrier_phase load before ->call(). */
1593 if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP) 1596 if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP)
1594 break; 1597 break;
@@ -1625,7 +1628,7 @@ static int rcu_torture_barrier(void *arg)
1625 if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP) 1628 if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP)
1626 break; 1629 break;
1627 n_barrier_attempts++; 1630 n_barrier_attempts++;
1628 cur_ops->cb_barrier(); 1631 cur_ops->cb_barrier(); /* Implies smp_mb() for wait_event(). */
1629 if (atomic_read(&barrier_cbs_invoked) != n_barrier_cbs) { 1632 if (atomic_read(&barrier_cbs_invoked) != n_barrier_cbs) {
1630 n_rcu_torture_barrier_error++; 1633 n_rcu_torture_barrier_error++;
1631 WARN_ON_ONCE(1); 1634 WARN_ON_ONCE(1);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index dd081987a8ec..e37bd561c26f 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -369,6 +369,9 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
369static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval, 369static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
370 bool user) 370 bool user)
371{ 371{
372 struct rcu_state *rsp;
373 struct rcu_data *rdp;
374
372 trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting); 375 trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting);
373 if (!user && !is_idle_task(current)) { 376 if (!user && !is_idle_task(current)) {
374 struct task_struct *idle __maybe_unused = 377 struct task_struct *idle __maybe_unused =
@@ -380,6 +383,10 @@ static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
380 current->pid, current->comm, 383 current->pid, current->comm,
381 idle->pid, idle->comm); /* must be idle task! */ 384 idle->pid, idle->comm); /* must be idle task! */
382 } 385 }
386 for_each_rcu_flavor(rsp) {
387 rdp = this_cpu_ptr(rsp->rda);
388 do_nocb_deferred_wakeup(rdp);
389 }
383 rcu_prepare_for_idle(smp_processor_id()); 390 rcu_prepare_for_idle(smp_processor_id());
384 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ 391 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
385 smp_mb__before_atomic_inc(); /* See above. */ 392 smp_mb__before_atomic_inc(); /* See above. */
@@ -411,11 +418,12 @@ static void rcu_eqs_enter(bool user)
411 rdtp = this_cpu_ptr(&rcu_dynticks); 418 rdtp = this_cpu_ptr(&rcu_dynticks);
412 oldval = rdtp->dynticks_nesting; 419 oldval = rdtp->dynticks_nesting;
413 WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0); 420 WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0);
414 if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) 421 if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) {
415 rdtp->dynticks_nesting = 0; 422 rdtp->dynticks_nesting = 0;
416 else 423 rcu_eqs_enter_common(rdtp, oldval, user);
424 } else {
417 rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE; 425 rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE;
418 rcu_eqs_enter_common(rdtp, oldval, user); 426 }
419} 427}
420 428
421/** 429/**
@@ -533,11 +541,12 @@ static void rcu_eqs_exit(bool user)
533 rdtp = this_cpu_ptr(&rcu_dynticks); 541 rdtp = this_cpu_ptr(&rcu_dynticks);
534 oldval = rdtp->dynticks_nesting; 542 oldval = rdtp->dynticks_nesting;
535 WARN_ON_ONCE(oldval < 0); 543 WARN_ON_ONCE(oldval < 0);
536 if (oldval & DYNTICK_TASK_NEST_MASK) 544 if (oldval & DYNTICK_TASK_NEST_MASK) {
537 rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE; 545 rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
538 else 546 } else {
539 rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; 547 rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
540 rcu_eqs_exit_common(rdtp, oldval, user); 548 rcu_eqs_exit_common(rdtp, oldval, user);
549 }
541} 550}
542 551
543/** 552/**
@@ -716,7 +725,7 @@ bool rcu_lockdep_current_cpu_online(void)
716 bool ret; 725 bool ret;
717 726
718 if (in_nmi()) 727 if (in_nmi())
719 return 1; 728 return true;
720 preempt_disable(); 729 preempt_disable();
721 rdp = this_cpu_ptr(&rcu_sched_data); 730 rdp = this_cpu_ptr(&rcu_sched_data);
722 rnp = rdp->mynode; 731 rnp = rdp->mynode;
@@ -755,6 +764,12 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp,
755} 764}
756 765
757/* 766/*
767 * This function really isn't for public consumption, but RCU is special in
768 * that context switches can allow the state machine to make progress.
769 */
770extern void resched_cpu(int cpu);
771
772/*
758 * Return true if the specified CPU has passed through a quiescent 773 * Return true if the specified CPU has passed through a quiescent
759 * state by virtue of being in or having passed through an dynticks 774 * state by virtue of being in or having passed through an dynticks
760 * idle state since the last call to dyntick_save_progress_counter() 775 * idle state since the last call to dyntick_save_progress_counter()
@@ -812,16 +827,34 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
812 */ 827 */
813 rcu_kick_nohz_cpu(rdp->cpu); 828 rcu_kick_nohz_cpu(rdp->cpu);
814 829
830 /*
831 * Alternatively, the CPU might be running in the kernel
832 * for an extended period of time without a quiescent state.
833 * Attempt to force the CPU through the scheduler to gain the
834 * needed quiescent state, but only if the grace period has gone
835 * on for an uncommonly long time. If there are many stuck CPUs,
836 * we will beat on the first one until it gets unstuck, then move
837 * to the next. Only do this for the primary flavor of RCU.
838 */
839 if (rdp->rsp == rcu_state &&
840 ULONG_CMP_GE(ACCESS_ONCE(jiffies), rdp->rsp->jiffies_resched)) {
841 rdp->rsp->jiffies_resched += 5;
842 resched_cpu(rdp->cpu);
843 }
844
815 return 0; 845 return 0;
816} 846}
817 847
818static void record_gp_stall_check_time(struct rcu_state *rsp) 848static void record_gp_stall_check_time(struct rcu_state *rsp)
819{ 849{
820 unsigned long j = ACCESS_ONCE(jiffies); 850 unsigned long j = ACCESS_ONCE(jiffies);
851 unsigned long j1;
821 852
822 rsp->gp_start = j; 853 rsp->gp_start = j;
823 smp_wmb(); /* Record start time before stall time. */ 854 smp_wmb(); /* Record start time before stall time. */
824 rsp->jiffies_stall = j + rcu_jiffies_till_stall_check(); 855 j1 = rcu_jiffies_till_stall_check();
856 rsp->jiffies_stall = j + j1;
857 rsp->jiffies_resched = j + j1 / 2;
825} 858}
826 859
827/* 860/*
@@ -1509,6 +1542,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
1509 rdp = this_cpu_ptr(rsp->rda); 1542 rdp = this_cpu_ptr(rsp->rda);
1510 if (rnp == rdp->mynode) 1543 if (rnp == rdp->mynode)
1511 __note_gp_changes(rsp, rnp, rdp); 1544 __note_gp_changes(rsp, rnp, rdp);
1545 /* smp_mb() provided by prior unlock-lock pair. */
1512 nocb += rcu_future_gp_cleanup(rsp, rnp); 1546 nocb += rcu_future_gp_cleanup(rsp, rnp);
1513 raw_spin_unlock_irq(&rnp->lock); 1547 raw_spin_unlock_irq(&rnp->lock);
1514 cond_resched(); 1548 cond_resched();
@@ -1553,6 +1587,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
1553 wait_event_interruptible(rsp->gp_wq, 1587 wait_event_interruptible(rsp->gp_wq,
1554 ACCESS_ONCE(rsp->gp_flags) & 1588 ACCESS_ONCE(rsp->gp_flags) &
1555 RCU_GP_FLAG_INIT); 1589 RCU_GP_FLAG_INIT);
1590 /* Locking provides needed memory barrier. */
1556 if (rcu_gp_init(rsp)) 1591 if (rcu_gp_init(rsp))
1557 break; 1592 break;
1558 cond_resched(); 1593 cond_resched();
@@ -1582,6 +1617,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
1582 (!ACCESS_ONCE(rnp->qsmask) && 1617 (!ACCESS_ONCE(rnp->qsmask) &&
1583 !rcu_preempt_blocked_readers_cgp(rnp)), 1618 !rcu_preempt_blocked_readers_cgp(rnp)),
1584 j); 1619 j);
1620 /* Locking provides needed memory barriers. */
1585 /* If grace period done, leave loop. */ 1621 /* If grace period done, leave loop. */
1586 if (!ACCESS_ONCE(rnp->qsmask) && 1622 if (!ACCESS_ONCE(rnp->qsmask) &&
1587 !rcu_preempt_blocked_readers_cgp(rnp)) 1623 !rcu_preempt_blocked_readers_cgp(rnp))
@@ -1901,13 +1937,13 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
1901 * Adopt the RCU callbacks from the specified rcu_state structure's 1937 * Adopt the RCU callbacks from the specified rcu_state structure's
1902 * orphanage. The caller must hold the ->orphan_lock. 1938 * orphanage. The caller must hold the ->orphan_lock.
1903 */ 1939 */
1904static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) 1940static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags)
1905{ 1941{
1906 int i; 1942 int i;
1907 struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); 1943 struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
1908 1944
1909 /* No-CBs CPUs are handled specially. */ 1945 /* No-CBs CPUs are handled specially. */
1910 if (rcu_nocb_adopt_orphan_cbs(rsp, rdp)) 1946 if (rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags))
1911 return; 1947 return;
1912 1948
1913 /* Do the accounting first. */ 1949 /* Do the accounting first. */
@@ -1986,7 +2022,7 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
1986 2022
1987 /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */ 2023 /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */
1988 rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp); 2024 rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp);
1989 rcu_adopt_orphan_cbs(rsp); 2025 rcu_adopt_orphan_cbs(rsp, flags);
1990 2026
1991 /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ 2027 /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */
1992 mask = rdp->grpmask; /* rnp->grplo is constant. */ 2028 mask = rdp->grpmask; /* rnp->grplo is constant. */
@@ -2303,6 +2339,9 @@ __rcu_process_callbacks(struct rcu_state *rsp)
2303 /* If there are callbacks ready, invoke them. */ 2339 /* If there are callbacks ready, invoke them. */
2304 if (cpu_has_callbacks_ready_to_invoke(rdp)) 2340 if (cpu_has_callbacks_ready_to_invoke(rdp))
2305 invoke_rcu_callbacks(rsp, rdp); 2341 invoke_rcu_callbacks(rsp, rdp);
2342
2343 /* Do any needed deferred wakeups of rcuo kthreads. */
2344 do_nocb_deferred_wakeup(rdp);
2306} 2345}
2307 2346
2308/* 2347/*
@@ -2437,7 +2476,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
2437 2476
2438 if (cpu != -1) 2477 if (cpu != -1)
2439 rdp = per_cpu_ptr(rsp->rda, cpu); 2478 rdp = per_cpu_ptr(rsp->rda, cpu);
2440 offline = !__call_rcu_nocb(rdp, head, lazy); 2479 offline = !__call_rcu_nocb(rdp, head, lazy, flags);
2441 WARN_ON_ONCE(offline); 2480 WARN_ON_ONCE(offline);
2442 /* _call_rcu() is illegal on offline CPU; leak the callback. */ 2481 /* _call_rcu() is illegal on offline CPU; leak the callback. */
2443 local_irq_restore(flags); 2482 local_irq_restore(flags);
@@ -2757,6 +2796,10 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
2757 /* Check for CPU stalls, if enabled. */ 2796 /* Check for CPU stalls, if enabled. */
2758 check_cpu_stall(rsp, rdp); 2797 check_cpu_stall(rsp, rdp);
2759 2798
2799 /* Is this CPU a NO_HZ_FULL CPU that should ignore RCU? */
2800 if (rcu_nohz_full_cpu(rsp))
2801 return 0;
2802
2760 /* Is the RCU core waiting for a quiescent state from this CPU? */ 2803 /* Is the RCU core waiting for a quiescent state from this CPU? */
2761 if (rcu_scheduler_fully_active && 2804 if (rcu_scheduler_fully_active &&
2762 rdp->qs_pending && !rdp->passed_quiesce) { 2805 rdp->qs_pending && !rdp->passed_quiesce) {
@@ -2790,6 +2833,12 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
2790 return 1; 2833 return 1;
2791 } 2834 }
2792 2835
2836 /* Does this CPU need a deferred NOCB wakeup? */
2837 if (rcu_nocb_need_deferred_wakeup(rdp)) {
2838 rdp->n_rp_nocb_defer_wakeup++;
2839 return 1;
2840 }
2841
2793 /* nothing to do */ 2842 /* nothing to do */
2794 rdp->n_rp_need_nothing++; 2843 rdp->n_rp_need_nothing++;
2795 return 0; 2844 return 0;
@@ -3214,9 +3263,9 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
3214{ 3263{
3215 int i; 3264 int i;
3216 3265
3217 for (i = rcu_num_lvls - 1; i > 0; i--) 3266 rsp->levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf;
3267 for (i = rcu_num_lvls - 2; i >= 0; i--)
3218 rsp->levelspread[i] = CONFIG_RCU_FANOUT; 3268 rsp->levelspread[i] = CONFIG_RCU_FANOUT;
3219 rsp->levelspread[0] = rcu_fanout_leaf;
3220} 3269}
3221#else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ 3270#else /* #ifdef CONFIG_RCU_FANOUT_EXACT */
3222static void __init rcu_init_levelspread(struct rcu_state *rsp) 3271static void __init rcu_init_levelspread(struct rcu_state *rsp)
@@ -3346,6 +3395,8 @@ static void __init rcu_init_geometry(void)
3346 if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF && 3395 if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF &&
3347 nr_cpu_ids == NR_CPUS) 3396 nr_cpu_ids == NR_CPUS)
3348 return; 3397 return;
3398 pr_info("RCU: Adjusting geometry for rcu_fanout_leaf=%d, nr_cpu_ids=%d\n",
3399 rcu_fanout_leaf, nr_cpu_ids);
3349 3400
3350 /* 3401 /*
3351 * Compute number of nodes that can be handled an rcu_node tree 3402 * Compute number of nodes that can be handled an rcu_node tree
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 52be957c9fe2..8c19873f1ac9 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -317,6 +317,7 @@ struct rcu_data {
317 unsigned long n_rp_cpu_needs_gp; 317 unsigned long n_rp_cpu_needs_gp;
318 unsigned long n_rp_gp_completed; 318 unsigned long n_rp_gp_completed;
319 unsigned long n_rp_gp_started; 319 unsigned long n_rp_gp_started;
320 unsigned long n_rp_nocb_defer_wakeup;
320 unsigned long n_rp_need_nothing; 321 unsigned long n_rp_need_nothing;
321 322
322 /* 6) _rcu_barrier() and OOM callbacks. */ 323 /* 6) _rcu_barrier() and OOM callbacks. */
@@ -335,6 +336,7 @@ struct rcu_data {
335 int nocb_p_count_lazy; /* (approximate). */ 336 int nocb_p_count_lazy; /* (approximate). */
336 wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */ 337 wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */
337 struct task_struct *nocb_kthread; 338 struct task_struct *nocb_kthread;
339 bool nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */
338#endif /* #ifdef CONFIG_RCU_NOCB_CPU */ 340#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
339 341
340 /* 8) RCU CPU stall data. */ 342 /* 8) RCU CPU stall data. */
@@ -453,6 +455,8 @@ struct rcu_state {
453 /* but in jiffies. */ 455 /* but in jiffies. */
454 unsigned long jiffies_stall; /* Time at which to check */ 456 unsigned long jiffies_stall; /* Time at which to check */
455 /* for CPU stalls. */ 457 /* for CPU stalls. */
458 unsigned long jiffies_resched; /* Time at which to resched */
459 /* a reluctant CPU. */
456 unsigned long gp_max; /* Maximum GP duration in */ 460 unsigned long gp_max; /* Maximum GP duration in */
457 /* jiffies. */ 461 /* jiffies. */
458 const char *name; /* Name of structure. */ 462 const char *name; /* Name of structure. */
@@ -548,9 +552,12 @@ static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq);
548static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp); 552static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp);
549static void rcu_init_one_nocb(struct rcu_node *rnp); 553static void rcu_init_one_nocb(struct rcu_node *rnp);
550static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, 554static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
551 bool lazy); 555 bool lazy, unsigned long flags);
552static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, 556static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
553 struct rcu_data *rdp); 557 struct rcu_data *rdp,
558 unsigned long flags);
559static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp);
560static void do_nocb_deferred_wakeup(struct rcu_data *rdp);
554static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp); 561static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp);
555static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp); 562static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp);
556static void rcu_kick_nohz_cpu(int cpu); 563static void rcu_kick_nohz_cpu(int cpu);
@@ -564,6 +571,7 @@ static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
564 unsigned long maxj); 571 unsigned long maxj);
565static void rcu_bind_gp_kthread(void); 572static void rcu_bind_gp_kthread(void);
566static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp); 573static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp);
574static bool rcu_nohz_full_cpu(struct rcu_state *rsp);
567 575
568#endif /* #ifndef RCU_TREE_NONCORE */ 576#endif /* #ifndef RCU_TREE_NONCORE */
569 577
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 08a765232432..3ca32736e264 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -361,10 +361,14 @@ void rcu_read_unlock_special(struct task_struct *t)
361 special = t->rcu_read_unlock_special; 361 special = t->rcu_read_unlock_special;
362 if (special & RCU_READ_UNLOCK_NEED_QS) { 362 if (special & RCU_READ_UNLOCK_NEED_QS) {
363 rcu_preempt_qs(smp_processor_id()); 363 rcu_preempt_qs(smp_processor_id());
364 if (!t->rcu_read_unlock_special) {
365 local_irq_restore(flags);
366 return;
367 }
364 } 368 }
365 369
366 /* Hardware IRQ handlers cannot block. */ 370 /* Hardware IRQ handlers cannot block, complain if they get here. */
367 if (in_irq() || in_serving_softirq()) { 371 if (WARN_ON_ONCE(in_irq() || in_serving_softirq())) {
368 local_irq_restore(flags); 372 local_irq_restore(flags);
369 return; 373 return;
370 } 374 }
@@ -779,8 +783,10 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
779 } 783 }
780 if (rnp->parent == NULL) { 784 if (rnp->parent == NULL) {
781 raw_spin_unlock_irqrestore(&rnp->lock, flags); 785 raw_spin_unlock_irqrestore(&rnp->lock, flags);
782 if (wake) 786 if (wake) {
787 smp_mb(); /* EGP done before wake_up(). */
783 wake_up(&sync_rcu_preempt_exp_wq); 788 wake_up(&sync_rcu_preempt_exp_wq);
789 }
784 break; 790 break;
785 } 791 }
786 mask = rnp->grpmask; 792 mask = rnp->grpmask;
@@ -1852,6 +1858,7 @@ static int rcu_oom_notify(struct notifier_block *self,
1852 1858
1853 /* Wait for callbacks from earlier instance to complete. */ 1859 /* Wait for callbacks from earlier instance to complete. */
1854 wait_event(oom_callback_wq, atomic_read(&oom_callback_count) == 0); 1860 wait_event(oom_callback_wq, atomic_read(&oom_callback_count) == 0);
1861 smp_mb(); /* Ensure callback reuse happens after callback invocation. */
1855 1862
1856 /* 1863 /*
1857 * Prevent premature wakeup: ensure that all increments happen 1864 * Prevent premature wakeup: ensure that all increments happen
@@ -2101,7 +2108,8 @@ bool rcu_is_nocb_cpu(int cpu)
2101static void __call_rcu_nocb_enqueue(struct rcu_data *rdp, 2108static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
2102 struct rcu_head *rhp, 2109 struct rcu_head *rhp,
2103 struct rcu_head **rhtp, 2110 struct rcu_head **rhtp,
2104 int rhcount, int rhcount_lazy) 2111 int rhcount, int rhcount_lazy,
2112 unsigned long flags)
2105{ 2113{
2106 int len; 2114 int len;
2107 struct rcu_head **old_rhpp; 2115 struct rcu_head **old_rhpp;
@@ -2122,9 +2130,16 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
2122 } 2130 }
2123 len = atomic_long_read(&rdp->nocb_q_count); 2131 len = atomic_long_read(&rdp->nocb_q_count);
2124 if (old_rhpp == &rdp->nocb_head) { 2132 if (old_rhpp == &rdp->nocb_head) {
2125 wake_up(&rdp->nocb_wq); /* ... only if queue was empty ... */ 2133 if (!irqs_disabled_flags(flags)) {
2134 wake_up(&rdp->nocb_wq); /* ... if queue was empty ... */
2135 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
2136 TPS("WakeEmpty"));
2137 } else {
2138 rdp->nocb_defer_wakeup = true;
2139 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
2140 TPS("WakeEmptyIsDeferred"));
2141 }
2126 rdp->qlen_last_fqs_check = 0; 2142 rdp->qlen_last_fqs_check = 0;
2127 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeEmpty"));
2128 } else if (len > rdp->qlen_last_fqs_check + qhimark) { 2143 } else if (len > rdp->qlen_last_fqs_check + qhimark) {
2129 wake_up_process(t); /* ... or if many callbacks queued. */ 2144 wake_up_process(t); /* ... or if many callbacks queued. */
2130 rdp->qlen_last_fqs_check = LONG_MAX / 2; 2145 rdp->qlen_last_fqs_check = LONG_MAX / 2;
@@ -2145,12 +2160,12 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
2145 * "rcuo" kthread can find it. 2160 * "rcuo" kthread can find it.
2146 */ 2161 */
2147static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, 2162static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
2148 bool lazy) 2163 bool lazy, unsigned long flags)
2149{ 2164{
2150 2165
2151 if (!rcu_is_nocb_cpu(rdp->cpu)) 2166 if (!rcu_is_nocb_cpu(rdp->cpu))
2152 return 0; 2167 return 0;
2153 __call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy); 2168 __call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy, flags);
2154 if (__is_kfree_rcu_offset((unsigned long)rhp->func)) 2169 if (__is_kfree_rcu_offset((unsigned long)rhp->func))
2155 trace_rcu_kfree_callback(rdp->rsp->name, rhp, 2170 trace_rcu_kfree_callback(rdp->rsp->name, rhp,
2156 (unsigned long)rhp->func, 2171 (unsigned long)rhp->func,
@@ -2168,7 +2183,8 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
2168 * not a no-CBs CPU. 2183 * not a no-CBs CPU.
2169 */ 2184 */
2170static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, 2185static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
2171 struct rcu_data *rdp) 2186 struct rcu_data *rdp,
2187 unsigned long flags)
2172{ 2188{
2173 long ql = rsp->qlen; 2189 long ql = rsp->qlen;
2174 long qll = rsp->qlen_lazy; 2190 long qll = rsp->qlen_lazy;
@@ -2182,14 +2198,14 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
2182 /* First, enqueue the donelist, if any. This preserves CB ordering. */ 2198 /* First, enqueue the donelist, if any. This preserves CB ordering. */
2183 if (rsp->orphan_donelist != NULL) { 2199 if (rsp->orphan_donelist != NULL) {
2184 __call_rcu_nocb_enqueue(rdp, rsp->orphan_donelist, 2200 __call_rcu_nocb_enqueue(rdp, rsp->orphan_donelist,
2185 rsp->orphan_donetail, ql, qll); 2201 rsp->orphan_donetail, ql, qll, flags);
2186 ql = qll = 0; 2202 ql = qll = 0;
2187 rsp->orphan_donelist = NULL; 2203 rsp->orphan_donelist = NULL;
2188 rsp->orphan_donetail = &rsp->orphan_donelist; 2204 rsp->orphan_donetail = &rsp->orphan_donelist;
2189 } 2205 }
2190 if (rsp->orphan_nxtlist != NULL) { 2206 if (rsp->orphan_nxtlist != NULL) {
2191 __call_rcu_nocb_enqueue(rdp, rsp->orphan_nxtlist, 2207 __call_rcu_nocb_enqueue(rdp, rsp->orphan_nxtlist,
2192 rsp->orphan_nxttail, ql, qll); 2208 rsp->orphan_nxttail, ql, qll, flags);
2193 ql = qll = 0; 2209 ql = qll = 0;
2194 rsp->orphan_nxtlist = NULL; 2210 rsp->orphan_nxtlist = NULL;
2195 rsp->orphan_nxttail = &rsp->orphan_nxtlist; 2211 rsp->orphan_nxttail = &rsp->orphan_nxtlist;
@@ -2250,6 +2266,7 @@ static int rcu_nocb_kthread(void *arg)
2250 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, 2266 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
2251 TPS("Sleep")); 2267 TPS("Sleep"));
2252 wait_event_interruptible(rdp->nocb_wq, rdp->nocb_head); 2268 wait_event_interruptible(rdp->nocb_wq, rdp->nocb_head);
2269 /* Memory barrier provide by xchg() below. */
2253 } else if (firsttime) { 2270 } else if (firsttime) {
2254 firsttime = 0; 2271 firsttime = 0;
2255 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, 2272 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
@@ -2310,6 +2327,22 @@ static int rcu_nocb_kthread(void *arg)
2310 return 0; 2327 return 0;
2311} 2328}
2312 2329
2330/* Is a deferred wakeup of rcu_nocb_kthread() required? */
2331static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
2332{
2333 return ACCESS_ONCE(rdp->nocb_defer_wakeup);
2334}
2335
2336/* Do a deferred wakeup of rcu_nocb_kthread(). */
2337static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
2338{
2339 if (!rcu_nocb_need_deferred_wakeup(rdp))
2340 return;
2341 ACCESS_ONCE(rdp->nocb_defer_wakeup) = false;
2342 wake_up(&rdp->nocb_wq);
2343 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWakeEmpty"));
2344}
2345
2313/* Initialize per-rcu_data variables for no-CBs CPUs. */ 2346/* Initialize per-rcu_data variables for no-CBs CPUs. */
2314static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) 2347static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
2315{ 2348{
@@ -2365,13 +2398,14 @@ static void rcu_init_one_nocb(struct rcu_node *rnp)
2365} 2398}
2366 2399
2367static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, 2400static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
2368 bool lazy) 2401 bool lazy, unsigned long flags)
2369{ 2402{
2370 return 0; 2403 return 0;
2371} 2404}
2372 2405
2373static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, 2406static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
2374 struct rcu_data *rdp) 2407 struct rcu_data *rdp,
2408 unsigned long flags)
2375{ 2409{
2376 return 0; 2410 return 0;
2377} 2411}
@@ -2380,6 +2414,15 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
2380{ 2414{
2381} 2415}
2382 2416
2417static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
2418{
2419 return false;
2420}
2421
2422static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
2423{
2424}
2425
2383static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp) 2426static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
2384{ 2427{
2385} 2428}
@@ -2829,3 +2872,23 @@ static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp)
2829} 2872}
2830 2873
2831#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ 2874#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
2875
2876/*
2877 * Is this CPU a NO_HZ_FULL CPU that should ignore RCU so that the
2878 * grace-period kthread will do force_quiescent_state() processing?
2879 * The idea is to avoid waking up RCU core processing on such a
2880 * CPU unless the grace period has extended for too long.
2881 *
2882 * This code relies on the fact that all NO_HZ_FULL CPUs are also
2883 * CONFIG_RCU_NOCB_CPUs.
2884 */
2885static bool rcu_nohz_full_cpu(struct rcu_state *rsp)
2886{
2887#ifdef CONFIG_NO_HZ_FULL
2888 if (tick_nohz_full_cpu(smp_processor_id()) &&
2889 (!rcu_gp_in_progress(rsp) ||
2890 ULONG_CMP_LT(jiffies, ACCESS_ONCE(rsp->gp_start) + HZ)))
2891 return 1;
2892#endif /* #ifdef CONFIG_NO_HZ_FULL */
2893 return 0;
2894}
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c
index 3596797b7e46..4def475336d4 100644
--- a/kernel/rcu/tree_trace.c
+++ b/kernel/rcu/tree_trace.c
@@ -364,9 +364,10 @@ static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp)
364 rdp->n_rp_report_qs, 364 rdp->n_rp_report_qs,
365 rdp->n_rp_cb_ready, 365 rdp->n_rp_cb_ready,
366 rdp->n_rp_cpu_needs_gp); 366 rdp->n_rp_cpu_needs_gp);
367 seq_printf(m, "gpc=%ld gps=%ld nn=%ld\n", 367 seq_printf(m, "gpc=%ld gps=%ld nn=%ld ndw%ld\n",
368 rdp->n_rp_gp_completed, 368 rdp->n_rp_gp_completed,
369 rdp->n_rp_gp_started, 369 rdp->n_rp_gp_started,
370 rdp->n_rp_nocb_defer_wakeup,
370 rdp->n_rp_need_nothing); 371 rdp->n_rp_need_nothing);
371} 372}
372 373
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 6cb3dff89e2b..802365ccd591 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -128,6 +128,11 @@ struct lockdep_map rcu_sched_lock_map =
128 STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_sched", &rcu_sched_lock_key); 128 STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_sched", &rcu_sched_lock_key);
129EXPORT_SYMBOL_GPL(rcu_sched_lock_map); 129EXPORT_SYMBOL_GPL(rcu_sched_lock_map);
130 130
131static struct lock_class_key rcu_callback_key;
132struct lockdep_map rcu_callback_map =
133 STATIC_LOCKDEP_MAP_INIT("rcu_callback", &rcu_callback_key);
134EXPORT_SYMBOL_GPL(rcu_callback_map);
135
131int notrace debug_lockdep_rcu_enabled(void) 136int notrace debug_lockdep_rcu_enabled(void)
132{ 137{
133 return rcu_scheduler_active && debug_locks && 138 return rcu_scheduler_active && debug_locks &&