aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/rcupdate.c28
-rw-r--r--kernel/rcutiny_plugin.h16
-rw-r--r--kernel/rcutorture.c257
-rw-r--r--kernel/rcutree.c35
-rw-r--r--kernel/rcutree.h11
-rw-r--r--kernel/rcutree_plugin.h30
-rw-r--r--kernel/sched/core.c1
-rw-r--r--kernel/srcu.c548
-rw-r--r--kernel/timer.c8
9 files changed, 752 insertions, 182 deletions
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index a86f1741cc27..95cba41ce1e9 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -51,6 +51,34 @@
51 51
52#include "rcu.h" 52#include "rcu.h"
53 53
54#ifdef CONFIG_PREEMPT_RCU
55
56/*
57 * Check for a task exiting while in a preemptible-RCU read-side
58 * critical section, clean up if so. No need to issue warnings,
59 * as debug_check_no_locks_held() already does this if lockdep
60 * is enabled.
61 */
62void exit_rcu(void)
63{
64 struct task_struct *t = current;
65
66 if (likely(list_empty(&current->rcu_node_entry)))
67 return;
68 t->rcu_read_lock_nesting = 1;
69 barrier();
70 t->rcu_read_unlock_special = RCU_READ_UNLOCK_BLOCKED;
71 __rcu_read_unlock();
72}
73
74#else /* #ifdef CONFIG_PREEMPT_RCU */
75
76void exit_rcu(void)
77{
78}
79
80#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
81
54#ifdef CONFIG_DEBUG_LOCK_ALLOC 82#ifdef CONFIG_DEBUG_LOCK_ALLOC
55static struct lock_class_key rcu_lock_key; 83static struct lock_class_key rcu_lock_key;
56struct lockdep_map rcu_lock_map = 84struct lockdep_map rcu_lock_map =
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index 22ecea0dfb62..fc31a2d65100 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -851,22 +851,6 @@ int rcu_preempt_needs_cpu(void)
851 return rcu_preempt_ctrlblk.rcb.rcucblist != NULL; 851 return rcu_preempt_ctrlblk.rcb.rcucblist != NULL;
852} 852}
853 853
854/*
855 * Check for a task exiting while in a preemptible -RCU read-side
856 * critical section, clean up if so. No need to issue warnings,
857 * as debug_check_no_locks_held() already does this if lockdep
858 * is enabled.
859 */
860void exit_rcu(void)
861{
862 struct task_struct *t = current;
863
864 if (t->rcu_read_lock_nesting == 0)
865 return;
866 t->rcu_read_lock_nesting = 1;
867 __rcu_read_unlock();
868}
869
870#else /* #ifdef CONFIG_TINY_PREEMPT_RCU */ 854#else /* #ifdef CONFIG_TINY_PREEMPT_RCU */
871 855
872#ifdef CONFIG_RCU_TRACE 856#ifdef CONFIG_RCU_TRACE
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index a89b381a8c6e..e66b34ab7555 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -64,6 +64,7 @@ static int irqreader = 1; /* RCU readers from irq (timers). */
64static int fqs_duration; /* Duration of bursts (us), 0 to disable. */ 64static int fqs_duration; /* Duration of bursts (us), 0 to disable. */
65static int fqs_holdoff; /* Hold time within burst (us). */ 65static int fqs_holdoff; /* Hold time within burst (us). */
66static int fqs_stutter = 3; /* Wait time between bursts (s). */ 66static int fqs_stutter = 3; /* Wait time between bursts (s). */
67static int n_barrier_cbs; /* Number of callbacks to test RCU barriers. */
67static int onoff_interval; /* Wait time between CPU hotplugs, 0=disable. */ 68static int onoff_interval; /* Wait time between CPU hotplugs, 0=disable. */
68static int onoff_holdoff; /* Seconds after boot before CPU hotplugs. */ 69static int onoff_holdoff; /* Seconds after boot before CPU hotplugs. */
69static int shutdown_secs; /* Shutdown time (s). <=0 for no shutdown. */ 70static int shutdown_secs; /* Shutdown time (s). <=0 for no shutdown. */
@@ -96,6 +97,8 @@ module_param(fqs_holdoff, int, 0444);
96MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)"); 97MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)");
97module_param(fqs_stutter, int, 0444); 98module_param(fqs_stutter, int, 0444);
98MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)"); 99MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)");
100module_param(n_barrier_cbs, int, 0444);
101MODULE_PARM_DESC(n_barrier_cbs, "# of callbacks/kthreads for barrier testing");
99module_param(onoff_interval, int, 0444); 102module_param(onoff_interval, int, 0444);
100MODULE_PARM_DESC(onoff_interval, "Time between CPU hotplugs (s), 0=disable"); 103MODULE_PARM_DESC(onoff_interval, "Time between CPU hotplugs (s), 0=disable");
101module_param(onoff_holdoff, int, 0444); 104module_param(onoff_holdoff, int, 0444);
@@ -139,6 +142,8 @@ static struct task_struct *shutdown_task;
139static struct task_struct *onoff_task; 142static struct task_struct *onoff_task;
140#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 143#endif /* #ifdef CONFIG_HOTPLUG_CPU */
141static struct task_struct *stall_task; 144static struct task_struct *stall_task;
145static struct task_struct **barrier_cbs_tasks;
146static struct task_struct *barrier_task;
142 147
143#define RCU_TORTURE_PIPE_LEN 10 148#define RCU_TORTURE_PIPE_LEN 10
144 149
@@ -164,6 +169,7 @@ static atomic_t n_rcu_torture_alloc_fail;
164static atomic_t n_rcu_torture_free; 169static atomic_t n_rcu_torture_free;
165static atomic_t n_rcu_torture_mberror; 170static atomic_t n_rcu_torture_mberror;
166static atomic_t n_rcu_torture_error; 171static atomic_t n_rcu_torture_error;
172static long n_rcu_torture_barrier_error;
167static long n_rcu_torture_boost_ktrerror; 173static long n_rcu_torture_boost_ktrerror;
168static long n_rcu_torture_boost_rterror; 174static long n_rcu_torture_boost_rterror;
169static long n_rcu_torture_boost_failure; 175static long n_rcu_torture_boost_failure;
@@ -173,6 +179,8 @@ static long n_offline_attempts;
173static long n_offline_successes; 179static long n_offline_successes;
174static long n_online_attempts; 180static long n_online_attempts;
175static long n_online_successes; 181static long n_online_successes;
182static long n_barrier_attempts;
183static long n_barrier_successes;
176static struct list_head rcu_torture_removed; 184static struct list_head rcu_torture_removed;
177static cpumask_var_t shuffle_tmp_mask; 185static cpumask_var_t shuffle_tmp_mask;
178 186
@@ -197,6 +205,10 @@ static unsigned long shutdown_time; /* jiffies to system shutdown. */
197static unsigned long boost_starttime; /* jiffies of next boost test start. */ 205static unsigned long boost_starttime; /* jiffies of next boost test start. */
198DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ 206DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */
199 /* and boost task create/destroy. */ 207 /* and boost task create/destroy. */
208static atomic_t barrier_cbs_count; /* Barrier callbacks registered. */
209static atomic_t barrier_cbs_invoked; /* Barrier callbacks invoked. */
210static wait_queue_head_t *barrier_cbs_wq; /* Coordinate barrier testing. */
211static DECLARE_WAIT_QUEUE_HEAD(barrier_wq);
200 212
201/* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */ 213/* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */
202 214
@@ -327,6 +339,7 @@ struct rcu_torture_ops {
327 int (*completed)(void); 339 int (*completed)(void);
328 void (*deferred_free)(struct rcu_torture *p); 340 void (*deferred_free)(struct rcu_torture *p);
329 void (*sync)(void); 341 void (*sync)(void);
342 void (*call)(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
330 void (*cb_barrier)(void); 343 void (*cb_barrier)(void);
331 void (*fqs)(void); 344 void (*fqs)(void);
332 int (*stats)(char *page); 345 int (*stats)(char *page);
@@ -417,6 +430,7 @@ static struct rcu_torture_ops rcu_ops = {
417 .completed = rcu_torture_completed, 430 .completed = rcu_torture_completed,
418 .deferred_free = rcu_torture_deferred_free, 431 .deferred_free = rcu_torture_deferred_free,
419 .sync = synchronize_rcu, 432 .sync = synchronize_rcu,
433 .call = call_rcu,
420 .cb_barrier = rcu_barrier, 434 .cb_barrier = rcu_barrier,
421 .fqs = rcu_force_quiescent_state, 435 .fqs = rcu_force_quiescent_state,
422 .stats = NULL, 436 .stats = NULL,
@@ -460,6 +474,7 @@ static struct rcu_torture_ops rcu_sync_ops = {
460 .completed = rcu_torture_completed, 474 .completed = rcu_torture_completed,
461 .deferred_free = rcu_sync_torture_deferred_free, 475 .deferred_free = rcu_sync_torture_deferred_free,
462 .sync = synchronize_rcu, 476 .sync = synchronize_rcu,
477 .call = NULL,
463 .cb_barrier = NULL, 478 .cb_barrier = NULL,
464 .fqs = rcu_force_quiescent_state, 479 .fqs = rcu_force_quiescent_state,
465 .stats = NULL, 480 .stats = NULL,
@@ -477,6 +492,7 @@ static struct rcu_torture_ops rcu_expedited_ops = {
477 .completed = rcu_no_completed, 492 .completed = rcu_no_completed,
478 .deferred_free = rcu_sync_torture_deferred_free, 493 .deferred_free = rcu_sync_torture_deferred_free,
479 .sync = synchronize_rcu_expedited, 494 .sync = synchronize_rcu_expedited,
495 .call = NULL,
480 .cb_barrier = NULL, 496 .cb_barrier = NULL,
481 .fqs = rcu_force_quiescent_state, 497 .fqs = rcu_force_quiescent_state,
482 .stats = NULL, 498 .stats = NULL,
@@ -519,6 +535,7 @@ static struct rcu_torture_ops rcu_bh_ops = {
519 .completed = rcu_bh_torture_completed, 535 .completed = rcu_bh_torture_completed,
520 .deferred_free = rcu_bh_torture_deferred_free, 536 .deferred_free = rcu_bh_torture_deferred_free,
521 .sync = synchronize_rcu_bh, 537 .sync = synchronize_rcu_bh,
538 .call = call_rcu_bh,
522 .cb_barrier = rcu_barrier_bh, 539 .cb_barrier = rcu_barrier_bh,
523 .fqs = rcu_bh_force_quiescent_state, 540 .fqs = rcu_bh_force_quiescent_state,
524 .stats = NULL, 541 .stats = NULL,
@@ -535,6 +552,7 @@ static struct rcu_torture_ops rcu_bh_sync_ops = {
535 .completed = rcu_bh_torture_completed, 552 .completed = rcu_bh_torture_completed,
536 .deferred_free = rcu_sync_torture_deferred_free, 553 .deferred_free = rcu_sync_torture_deferred_free,
537 .sync = synchronize_rcu_bh, 554 .sync = synchronize_rcu_bh,
555 .call = NULL,
538 .cb_barrier = NULL, 556 .cb_barrier = NULL,
539 .fqs = rcu_bh_force_quiescent_state, 557 .fqs = rcu_bh_force_quiescent_state,
540 .stats = NULL, 558 .stats = NULL,
@@ -551,6 +569,7 @@ static struct rcu_torture_ops rcu_bh_expedited_ops = {
551 .completed = rcu_bh_torture_completed, 569 .completed = rcu_bh_torture_completed,
552 .deferred_free = rcu_sync_torture_deferred_free, 570 .deferred_free = rcu_sync_torture_deferred_free,
553 .sync = synchronize_rcu_bh_expedited, 571 .sync = synchronize_rcu_bh_expedited,
572 .call = NULL,
554 .cb_barrier = NULL, 573 .cb_barrier = NULL,
555 .fqs = rcu_bh_force_quiescent_state, 574 .fqs = rcu_bh_force_quiescent_state,
556 .stats = NULL, 575 .stats = NULL,
@@ -606,6 +625,11 @@ static int srcu_torture_completed(void)
606 return srcu_batches_completed(&srcu_ctl); 625 return srcu_batches_completed(&srcu_ctl);
607} 626}
608 627
628static void srcu_torture_deferred_free(struct rcu_torture *rp)
629{
630 call_srcu(&srcu_ctl, &rp->rtort_rcu, rcu_torture_cb);
631}
632
609static void srcu_torture_synchronize(void) 633static void srcu_torture_synchronize(void)
610{ 634{
611 synchronize_srcu(&srcu_ctl); 635 synchronize_srcu(&srcu_ctl);
@@ -620,7 +644,7 @@ static int srcu_torture_stats(char *page)
620 cnt += sprintf(&page[cnt], "%s%s per-CPU(idx=%d):", 644 cnt += sprintf(&page[cnt], "%s%s per-CPU(idx=%d):",
621 torture_type, TORTURE_FLAG, idx); 645 torture_type, TORTURE_FLAG, idx);
622 for_each_possible_cpu(cpu) { 646 for_each_possible_cpu(cpu) {
623 cnt += sprintf(&page[cnt], " %d(%d,%d)", cpu, 647 cnt += sprintf(&page[cnt], " %d(%lu,%lu)", cpu,
624 per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx], 648 per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx],
625 per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx]); 649 per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx]);
626 } 650 }
@@ -635,13 +659,29 @@ static struct rcu_torture_ops srcu_ops = {
635 .read_delay = srcu_read_delay, 659 .read_delay = srcu_read_delay,
636 .readunlock = srcu_torture_read_unlock, 660 .readunlock = srcu_torture_read_unlock,
637 .completed = srcu_torture_completed, 661 .completed = srcu_torture_completed,
638 .deferred_free = rcu_sync_torture_deferred_free, 662 .deferred_free = srcu_torture_deferred_free,
639 .sync = srcu_torture_synchronize, 663 .sync = srcu_torture_synchronize,
664 .call = NULL,
640 .cb_barrier = NULL, 665 .cb_barrier = NULL,
641 .stats = srcu_torture_stats, 666 .stats = srcu_torture_stats,
642 .name = "srcu" 667 .name = "srcu"
643}; 668};
644 669
670static struct rcu_torture_ops srcu_sync_ops = {
671 .init = srcu_torture_init,
672 .cleanup = srcu_torture_cleanup,
673 .readlock = srcu_torture_read_lock,
674 .read_delay = srcu_read_delay,
675 .readunlock = srcu_torture_read_unlock,
676 .completed = srcu_torture_completed,
677 .deferred_free = rcu_sync_torture_deferred_free,
678 .sync = srcu_torture_synchronize,
679 .call = NULL,
680 .cb_barrier = NULL,
681 .stats = srcu_torture_stats,
682 .name = "srcu_sync"
683};
684
645static int srcu_torture_read_lock_raw(void) __acquires(&srcu_ctl) 685static int srcu_torture_read_lock_raw(void) __acquires(&srcu_ctl)
646{ 686{
647 return srcu_read_lock_raw(&srcu_ctl); 687 return srcu_read_lock_raw(&srcu_ctl);
@@ -659,13 +699,29 @@ static struct rcu_torture_ops srcu_raw_ops = {
659 .read_delay = srcu_read_delay, 699 .read_delay = srcu_read_delay,
660 .readunlock = srcu_torture_read_unlock_raw, 700 .readunlock = srcu_torture_read_unlock_raw,
661 .completed = srcu_torture_completed, 701 .completed = srcu_torture_completed,
662 .deferred_free = rcu_sync_torture_deferred_free, 702 .deferred_free = srcu_torture_deferred_free,
663 .sync = srcu_torture_synchronize, 703 .sync = srcu_torture_synchronize,
704 .call = NULL,
664 .cb_barrier = NULL, 705 .cb_barrier = NULL,
665 .stats = srcu_torture_stats, 706 .stats = srcu_torture_stats,
666 .name = "srcu_raw" 707 .name = "srcu_raw"
667}; 708};
668 709
710static struct rcu_torture_ops srcu_raw_sync_ops = {
711 .init = srcu_torture_init,
712 .cleanup = srcu_torture_cleanup,
713 .readlock = srcu_torture_read_lock_raw,
714 .read_delay = srcu_read_delay,
715 .readunlock = srcu_torture_read_unlock_raw,
716 .completed = srcu_torture_completed,
717 .deferred_free = rcu_sync_torture_deferred_free,
718 .sync = srcu_torture_synchronize,
719 .call = NULL,
720 .cb_barrier = NULL,
721 .stats = srcu_torture_stats,
722 .name = "srcu_raw_sync"
723};
724
669static void srcu_torture_synchronize_expedited(void) 725static void srcu_torture_synchronize_expedited(void)
670{ 726{
671 synchronize_srcu_expedited(&srcu_ctl); 727 synchronize_srcu_expedited(&srcu_ctl);
@@ -680,6 +736,7 @@ static struct rcu_torture_ops srcu_expedited_ops = {
680 .completed = srcu_torture_completed, 736 .completed = srcu_torture_completed,
681 .deferred_free = rcu_sync_torture_deferred_free, 737 .deferred_free = rcu_sync_torture_deferred_free,
682 .sync = srcu_torture_synchronize_expedited, 738 .sync = srcu_torture_synchronize_expedited,
739 .call = NULL,
683 .cb_barrier = NULL, 740 .cb_barrier = NULL,
684 .stats = srcu_torture_stats, 741 .stats = srcu_torture_stats,
685 .name = "srcu_expedited" 742 .name = "srcu_expedited"
@@ -1129,7 +1186,8 @@ rcu_torture_printk(char *page)
1129 "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d " 1186 "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d "
1130 "rtmbe: %d rtbke: %ld rtbre: %ld " 1187 "rtmbe: %d rtbke: %ld rtbre: %ld "
1131 "rtbf: %ld rtb: %ld nt: %ld " 1188 "rtbf: %ld rtb: %ld nt: %ld "
1132 "onoff: %ld/%ld:%ld/%ld", 1189 "onoff: %ld/%ld:%ld/%ld "
1190 "barrier: %ld/%ld:%ld",
1133 rcu_torture_current, 1191 rcu_torture_current,
1134 rcu_torture_current_version, 1192 rcu_torture_current_version,
1135 list_empty(&rcu_torture_freelist), 1193 list_empty(&rcu_torture_freelist),
@@ -1145,14 +1203,17 @@ rcu_torture_printk(char *page)
1145 n_online_successes, 1203 n_online_successes,
1146 n_online_attempts, 1204 n_online_attempts,
1147 n_offline_successes, 1205 n_offline_successes,
1148 n_offline_attempts); 1206 n_offline_attempts,
1207 n_barrier_successes,
1208 n_barrier_attempts,
1209 n_rcu_torture_barrier_error);
1210 cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG);
1149 if (atomic_read(&n_rcu_torture_mberror) != 0 || 1211 if (atomic_read(&n_rcu_torture_mberror) != 0 ||
1212 n_rcu_torture_barrier_error != 0 ||
1150 n_rcu_torture_boost_ktrerror != 0 || 1213 n_rcu_torture_boost_ktrerror != 0 ||
1151 n_rcu_torture_boost_rterror != 0 || 1214 n_rcu_torture_boost_rterror != 0 ||
1152 n_rcu_torture_boost_failure != 0) 1215 n_rcu_torture_boost_failure != 0 ||
1153 cnt += sprintf(&page[cnt], " !!!"); 1216 i > 1) {
1154 cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG);
1155 if (i > 1) {
1156 cnt += sprintf(&page[cnt], "!!! "); 1217 cnt += sprintf(&page[cnt], "!!! ");
1157 atomic_inc(&n_rcu_torture_error); 1218 atomic_inc(&n_rcu_torture_error);
1158 WARN_ON_ONCE(1); 1219 WARN_ON_ONCE(1);
@@ -1337,6 +1398,7 @@ static void rcutorture_booster_cleanup(int cpu)
1337 1398
1338 /* This must be outside of the mutex, otherwise deadlock! */ 1399 /* This must be outside of the mutex, otherwise deadlock! */
1339 kthread_stop(t); 1400 kthread_stop(t);
1401 boost_tasks[cpu] = NULL;
1340} 1402}
1341 1403
1342static int rcutorture_booster_init(int cpu) 1404static int rcutorture_booster_init(int cpu)
@@ -1484,13 +1546,15 @@ static void rcu_torture_onoff_cleanup(void)
1484 return; 1546 return;
1485 VERBOSE_PRINTK_STRING("Stopping rcu_torture_onoff task"); 1547 VERBOSE_PRINTK_STRING("Stopping rcu_torture_onoff task");
1486 kthread_stop(onoff_task); 1548 kthread_stop(onoff_task);
1549 onoff_task = NULL;
1487} 1550}
1488 1551
1489#else /* #ifdef CONFIG_HOTPLUG_CPU */ 1552#else /* #ifdef CONFIG_HOTPLUG_CPU */
1490 1553
1491static void 1554static int
1492rcu_torture_onoff_init(void) 1555rcu_torture_onoff_init(void)
1493{ 1556{
1557 return 0;
1494} 1558}
1495 1559
1496static void rcu_torture_onoff_cleanup(void) 1560static void rcu_torture_onoff_cleanup(void)
@@ -1554,6 +1618,152 @@ static void rcu_torture_stall_cleanup(void)
1554 return; 1618 return;
1555 VERBOSE_PRINTK_STRING("Stopping rcu_torture_stall_task."); 1619 VERBOSE_PRINTK_STRING("Stopping rcu_torture_stall_task.");
1556 kthread_stop(stall_task); 1620 kthread_stop(stall_task);
1621 stall_task = NULL;
1622}
1623
1624/* Callback function for RCU barrier testing. */
1625void rcu_torture_barrier_cbf(struct rcu_head *rcu)
1626{
1627 atomic_inc(&barrier_cbs_invoked);
1628}
1629
1630/* kthread function to register callbacks used to test RCU barriers. */
1631static int rcu_torture_barrier_cbs(void *arg)
1632{
1633 long myid = (long)arg;
1634 struct rcu_head rcu;
1635
1636 init_rcu_head_on_stack(&rcu);
1637 VERBOSE_PRINTK_STRING("rcu_torture_barrier_cbs task started");
1638 set_user_nice(current, 19);
1639 do {
1640 wait_event(barrier_cbs_wq[myid],
1641 atomic_read(&barrier_cbs_count) == n_barrier_cbs ||
1642 kthread_should_stop() ||
1643 fullstop != FULLSTOP_DONTSTOP);
1644 if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP)
1645 break;
1646 cur_ops->call(&rcu, rcu_torture_barrier_cbf);
1647 if (atomic_dec_and_test(&barrier_cbs_count))
1648 wake_up(&barrier_wq);
1649 } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
1650 VERBOSE_PRINTK_STRING("rcu_torture_barrier_cbs task stopping");
1651 rcutorture_shutdown_absorb("rcu_torture_barrier_cbs");
1652 while (!kthread_should_stop())
1653 schedule_timeout_interruptible(1);
1654 cur_ops->cb_barrier();
1655 destroy_rcu_head_on_stack(&rcu);
1656 return 0;
1657}
1658
1659/* kthread function to drive and coordinate RCU barrier testing. */
1660static int rcu_torture_barrier(void *arg)
1661{
1662 int i;
1663
1664 VERBOSE_PRINTK_STRING("rcu_torture_barrier task starting");
1665 do {
1666 atomic_set(&barrier_cbs_invoked, 0);
1667 atomic_set(&barrier_cbs_count, n_barrier_cbs);
1668 /* wake_up() path contains the required barriers. */
1669 for (i = 0; i < n_barrier_cbs; i++)
1670 wake_up(&barrier_cbs_wq[i]);
1671 wait_event(barrier_wq,
1672 atomic_read(&barrier_cbs_count) == 0 ||
1673 kthread_should_stop() ||
1674 fullstop != FULLSTOP_DONTSTOP);
1675 if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP)
1676 break;
1677 n_barrier_attempts++;
1678 cur_ops->cb_barrier();
1679 if (atomic_read(&barrier_cbs_invoked) != n_barrier_cbs) {
1680 n_rcu_torture_barrier_error++;
1681 WARN_ON_ONCE(1);
1682 }
1683 n_barrier_successes++;
1684 schedule_timeout_interruptible(HZ / 10);
1685 } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
1686 VERBOSE_PRINTK_STRING("rcu_torture_barrier task stopping");
1687 rcutorture_shutdown_absorb("rcu_torture_barrier_cbs");
1688 while (!kthread_should_stop())
1689 schedule_timeout_interruptible(1);
1690 return 0;
1691}
1692
1693/* Initialize RCU barrier testing. */
1694static int rcu_torture_barrier_init(void)
1695{
1696 int i;
1697 int ret;
1698
1699 if (n_barrier_cbs == 0)
1700 return 0;
1701 if (cur_ops->call == NULL || cur_ops->cb_barrier == NULL) {
1702 printk(KERN_ALERT "%s" TORTURE_FLAG
1703 " Call or barrier ops missing for %s,\n",
1704 torture_type, cur_ops->name);
1705 printk(KERN_ALERT "%s" TORTURE_FLAG
1706 " RCU barrier testing omitted from run.\n",
1707 torture_type);
1708 return 0;
1709 }
1710 atomic_set(&barrier_cbs_count, 0);
1711 atomic_set(&barrier_cbs_invoked, 0);
1712 barrier_cbs_tasks =
1713 kzalloc(n_barrier_cbs * sizeof(barrier_cbs_tasks[0]),
1714 GFP_KERNEL);
1715 barrier_cbs_wq =
1716 kzalloc(n_barrier_cbs * sizeof(barrier_cbs_wq[0]),
1717 GFP_KERNEL);
1718 if (barrier_cbs_tasks == NULL || barrier_cbs_wq == 0)
1719 return -ENOMEM;
1720 for (i = 0; i < n_barrier_cbs; i++) {
1721 init_waitqueue_head(&barrier_cbs_wq[i]);
1722 barrier_cbs_tasks[i] = kthread_run(rcu_torture_barrier_cbs,
1723 (void *)(long)i,
1724 "rcu_torture_barrier_cbs");
1725 if (IS_ERR(barrier_cbs_tasks[i])) {
1726 ret = PTR_ERR(barrier_cbs_tasks[i]);
1727 VERBOSE_PRINTK_ERRSTRING("Failed to create rcu_torture_barrier_cbs");
1728 barrier_cbs_tasks[i] = NULL;
1729 return ret;
1730 }
1731 }
1732 barrier_task = kthread_run(rcu_torture_barrier, NULL,
1733 "rcu_torture_barrier");
1734 if (IS_ERR(barrier_task)) {
1735 ret = PTR_ERR(barrier_task);
1736 VERBOSE_PRINTK_ERRSTRING("Failed to create rcu_torture_barrier");
1737 barrier_task = NULL;
1738 }
1739 return 0;
1740}
1741
1742/* Clean up after RCU barrier testing. */
1743static void rcu_torture_barrier_cleanup(void)
1744{
1745 int i;
1746
1747 if (barrier_task != NULL) {
1748 VERBOSE_PRINTK_STRING("Stopping rcu_torture_barrier task");
1749 kthread_stop(barrier_task);
1750 barrier_task = NULL;
1751 }
1752 if (barrier_cbs_tasks != NULL) {
1753 for (i = 0; i < n_barrier_cbs; i++) {
1754 if (barrier_cbs_tasks[i] != NULL) {
1755 VERBOSE_PRINTK_STRING("Stopping rcu_torture_barrier_cbs task");
1756 kthread_stop(barrier_cbs_tasks[i]);
1757 barrier_cbs_tasks[i] = NULL;
1758 }
1759 }
1760 kfree(barrier_cbs_tasks);
1761 barrier_cbs_tasks = NULL;
1762 }
1763 if (barrier_cbs_wq != NULL) {
1764 kfree(barrier_cbs_wq);
1765 barrier_cbs_wq = NULL;
1766 }
1557} 1767}
1558 1768
1559static int rcutorture_cpu_notify(struct notifier_block *self, 1769static int rcutorture_cpu_notify(struct notifier_block *self,
@@ -1598,6 +1808,7 @@ rcu_torture_cleanup(void)
1598 fullstop = FULLSTOP_RMMOD; 1808 fullstop = FULLSTOP_RMMOD;
1599 mutex_unlock(&fullstop_mutex); 1809 mutex_unlock(&fullstop_mutex);
1600 unregister_reboot_notifier(&rcutorture_shutdown_nb); 1810 unregister_reboot_notifier(&rcutorture_shutdown_nb);
1811 rcu_torture_barrier_cleanup();
1601 rcu_torture_stall_cleanup(); 1812 rcu_torture_stall_cleanup();
1602 if (stutter_task) { 1813 if (stutter_task) {
1603 VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task"); 1814 VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task");
@@ -1665,6 +1876,7 @@ rcu_torture_cleanup(void)
1665 VERBOSE_PRINTK_STRING("Stopping rcu_torture_shutdown task"); 1876 VERBOSE_PRINTK_STRING("Stopping rcu_torture_shutdown task");
1666 kthread_stop(shutdown_task); 1877 kthread_stop(shutdown_task);
1667 } 1878 }
1879 shutdown_task = NULL;
1668 rcu_torture_onoff_cleanup(); 1880 rcu_torture_onoff_cleanup();
1669 1881
1670 /* Wait for all RCU callbacks to fire. */ 1882 /* Wait for all RCU callbacks to fire. */
@@ -1676,7 +1888,7 @@ rcu_torture_cleanup(void)
1676 1888
1677 if (cur_ops->cleanup) 1889 if (cur_ops->cleanup)
1678 cur_ops->cleanup(); 1890 cur_ops->cleanup();
1679 if (atomic_read(&n_rcu_torture_error)) 1891 if (atomic_read(&n_rcu_torture_error) || n_rcu_torture_barrier_error)
1680 rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE"); 1892 rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE");
1681 else if (n_online_successes != n_online_attempts || 1893 else if (n_online_successes != n_online_attempts ||
1682 n_offline_successes != n_offline_attempts) 1894 n_offline_successes != n_offline_attempts)
@@ -1692,10 +1904,12 @@ rcu_torture_init(void)
1692 int i; 1904 int i;
1693 int cpu; 1905 int cpu;
1694 int firsterr = 0; 1906 int firsterr = 0;
1907 int retval;
1695 static struct rcu_torture_ops *torture_ops[] = 1908 static struct rcu_torture_ops *torture_ops[] =
1696 { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops, 1909 { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops,
1697 &rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops, 1910 &rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops,
1698 &srcu_ops, &srcu_raw_ops, &srcu_expedited_ops, 1911 &srcu_ops, &srcu_sync_ops, &srcu_raw_ops,
1912 &srcu_raw_sync_ops, &srcu_expedited_ops,
1699 &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; 1913 &sched_ops, &sched_sync_ops, &sched_expedited_ops, };
1700 1914
1701 mutex_lock(&fullstop_mutex); 1915 mutex_lock(&fullstop_mutex);
@@ -1749,6 +1963,7 @@ rcu_torture_init(void)
1749 atomic_set(&n_rcu_torture_free, 0); 1963 atomic_set(&n_rcu_torture_free, 0);
1750 atomic_set(&n_rcu_torture_mberror, 0); 1964 atomic_set(&n_rcu_torture_mberror, 0);
1751 atomic_set(&n_rcu_torture_error, 0); 1965 atomic_set(&n_rcu_torture_error, 0);
1966 n_rcu_torture_barrier_error = 0;
1752 n_rcu_torture_boost_ktrerror = 0; 1967 n_rcu_torture_boost_ktrerror = 0;
1753 n_rcu_torture_boost_rterror = 0; 1968 n_rcu_torture_boost_rterror = 0;
1754 n_rcu_torture_boost_failure = 0; 1969 n_rcu_torture_boost_failure = 0;
@@ -1872,7 +2087,6 @@ rcu_torture_init(void)
1872 test_boost_duration = 2; 2087 test_boost_duration = 2;
1873 if ((test_boost == 1 && cur_ops->can_boost) || 2088 if ((test_boost == 1 && cur_ops->can_boost) ||
1874 test_boost == 2) { 2089 test_boost == 2) {
1875 int retval;
1876 2090
1877 boost_starttime = jiffies + test_boost_interval * HZ; 2091 boost_starttime = jiffies + test_boost_interval * HZ;
1878 register_cpu_notifier(&rcutorture_cpu_nb); 2092 register_cpu_notifier(&rcutorture_cpu_nb);
@@ -1897,9 +2111,22 @@ rcu_torture_init(void)
1897 goto unwind; 2111 goto unwind;
1898 } 2112 }
1899 } 2113 }
1900 rcu_torture_onoff_init(); 2114 i = rcu_torture_onoff_init();
2115 if (i != 0) {
2116 firsterr = i;
2117 goto unwind;
2118 }
1901 register_reboot_notifier(&rcutorture_shutdown_nb); 2119 register_reboot_notifier(&rcutorture_shutdown_nb);
1902 rcu_torture_stall_init(); 2120 i = rcu_torture_stall_init();
2121 if (i != 0) {
2122 firsterr = i;
2123 goto unwind;
2124 }
2125 retval = rcu_torture_barrier_init();
2126 if (retval != 0) {
2127 firsterr = retval;
2128 goto unwind;
2129 }
1903 rcutorture_record_test_transition(); 2130 rcutorture_record_test_transition();
1904 mutex_unlock(&fullstop_mutex); 2131 mutex_unlock(&fullstop_mutex);
1905 return 0; 2132 return 0;
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index e578dd327c64..b3ea3ac3a2b5 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -201,7 +201,6 @@ void rcu_note_context_switch(int cpu)
201{ 201{
202 trace_rcu_utilization("Start context switch"); 202 trace_rcu_utilization("Start context switch");
203 rcu_sched_qs(cpu); 203 rcu_sched_qs(cpu);
204 rcu_preempt_note_context_switch(cpu);
205 trace_rcu_utilization("End context switch"); 204 trace_rcu_utilization("End context switch");
206} 205}
207EXPORT_SYMBOL_GPL(rcu_note_context_switch); 206EXPORT_SYMBOL_GPL(rcu_note_context_switch);
@@ -1953,6 +1952,38 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
1953} 1952}
1954EXPORT_SYMBOL_GPL(call_rcu_bh); 1953EXPORT_SYMBOL_GPL(call_rcu_bh);
1955 1954
1955/*
1956 * Because a context switch is a grace period for RCU-sched and RCU-bh,
1957 * any blocking grace-period wait automatically implies a grace period
1958 * if there is only one CPU online at any point time during execution
1959 * of either synchronize_sched() or synchronize_rcu_bh(). It is OK to
1960 * occasionally incorrectly indicate that there are multiple CPUs online
1961 * when there was in fact only one the whole time, as this just adds
1962 * some overhead: RCU still operates correctly.
1963 *
1964 * Of course, sampling num_online_cpus() with preemption enabled can
1965 * give erroneous results if there are concurrent CPU-hotplug operations.
1966 * For example, given a demonic sequence of preemptions in num_online_cpus()
1967 * and CPU-hotplug operations, there could be two or more CPUs online at
1968 * all times, but num_online_cpus() might well return one (or even zero).
1969 *
1970 * However, all such demonic sequences require at least one CPU-offline
1971 * operation. Furthermore, rcu_blocking_is_gp() giving the wrong answer
1972 * is only a problem if there is an RCU read-side critical section executing
1973 * throughout. But RCU-sched and RCU-bh read-side critical sections
1974 * disable either preemption or bh, which prevents a CPU from going offline.
1975 * Therefore, the only way that rcu_blocking_is_gp() can incorrectly return
1976 * that there is only one CPU when in fact there was more than one throughout
1977 * is when there were no RCU readers in the system. If there are no
1978 * RCU readers, the grace period by definition can be of zero length,
1979 * regardless of the number of online CPUs.
1980 */
1981static inline int rcu_blocking_is_gp(void)
1982{
1983 might_sleep(); /* Check for RCU read-side critical section. */
1984 return num_online_cpus() <= 1;
1985}
1986
1956/** 1987/**
1957 * synchronize_sched - wait until an rcu-sched grace period has elapsed. 1988 * synchronize_sched - wait until an rcu-sched grace period has elapsed.
1958 * 1989 *
@@ -2543,7 +2574,7 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
2543 2574
2544 for (i = NUM_RCU_LVLS - 1; i > 0; i--) 2575 for (i = NUM_RCU_LVLS - 1; i > 0; i--)
2545 rsp->levelspread[i] = CONFIG_RCU_FANOUT; 2576 rsp->levelspread[i] = CONFIG_RCU_FANOUT;
2546 rsp->levelspread[0] = RCU_FANOUT_LEAF; 2577 rsp->levelspread[0] = CONFIG_RCU_FANOUT_LEAF;
2547} 2578}
2548#else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ 2579#else /* #ifdef CONFIG_RCU_FANOUT_EXACT */
2549static void __init rcu_init_levelspread(struct rcu_state *rsp) 2580static void __init rcu_init_levelspread(struct rcu_state *rsp)
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 1e49c5685960..7f5d138dedf5 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -29,18 +29,14 @@
29#include <linux/seqlock.h> 29#include <linux/seqlock.h>
30 30
31/* 31/*
32 * Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT. 32 * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and
33 * CONFIG_RCU_FANOUT_LEAF.
33 * In theory, it should be possible to add more levels straightforwardly. 34 * In theory, it should be possible to add more levels straightforwardly.
34 * In practice, this did work well going from three levels to four. 35 * In practice, this did work well going from three levels to four.
35 * Of course, your mileage may vary. 36 * Of course, your mileage may vary.
36 */ 37 */
37#define MAX_RCU_LVLS 4 38#define MAX_RCU_LVLS 4
38#if CONFIG_RCU_FANOUT > 16 39#define RCU_FANOUT_1 (CONFIG_RCU_FANOUT_LEAF)
39#define RCU_FANOUT_LEAF 16
40#else /* #if CONFIG_RCU_FANOUT > 16 */
41#define RCU_FANOUT_LEAF (CONFIG_RCU_FANOUT)
42#endif /* #else #if CONFIG_RCU_FANOUT > 16 */
43#define RCU_FANOUT_1 (RCU_FANOUT_LEAF)
44#define RCU_FANOUT_2 (RCU_FANOUT_1 * CONFIG_RCU_FANOUT) 40#define RCU_FANOUT_2 (RCU_FANOUT_1 * CONFIG_RCU_FANOUT)
45#define RCU_FANOUT_3 (RCU_FANOUT_2 * CONFIG_RCU_FANOUT) 41#define RCU_FANOUT_3 (RCU_FANOUT_2 * CONFIG_RCU_FANOUT)
46#define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT) 42#define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT)
@@ -434,7 +430,6 @@ DECLARE_PER_CPU(char, rcu_cpu_has_work);
434/* Forward declarations for rcutree_plugin.h */ 430/* Forward declarations for rcutree_plugin.h */
435static void rcu_bootup_announce(void); 431static void rcu_bootup_announce(void);
436long rcu_batches_completed(void); 432long rcu_batches_completed(void);
437static void rcu_preempt_note_context_switch(int cpu);
438static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); 433static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp);
439#ifdef CONFIG_HOTPLUG_CPU 434#ifdef CONFIG_HOTPLUG_CPU
440static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, 435static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp,
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 7082ea93566f..2411000d9869 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -153,7 +153,7 @@ static void rcu_preempt_qs(int cpu)
153 * 153 *
154 * Caller must disable preemption. 154 * Caller must disable preemption.
155 */ 155 */
156static void rcu_preempt_note_context_switch(int cpu) 156void rcu_preempt_note_context_switch(void)
157{ 157{
158 struct task_struct *t = current; 158 struct task_struct *t = current;
159 unsigned long flags; 159 unsigned long flags;
@@ -164,7 +164,7 @@ static void rcu_preempt_note_context_switch(int cpu)
164 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { 164 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
165 165
166 /* Possibly blocking in an RCU read-side critical section. */ 166 /* Possibly blocking in an RCU read-side critical section. */
167 rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu); 167 rdp = __this_cpu_ptr(rcu_preempt_state.rda);
168 rnp = rdp->mynode; 168 rnp = rdp->mynode;
169 raw_spin_lock_irqsave(&rnp->lock, flags); 169 raw_spin_lock_irqsave(&rnp->lock, flags);
170 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; 170 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
@@ -228,7 +228,7 @@ static void rcu_preempt_note_context_switch(int cpu)
228 * means that we continue to block the current grace period. 228 * means that we continue to block the current grace period.
229 */ 229 */
230 local_irq_save(flags); 230 local_irq_save(flags);
231 rcu_preempt_qs(cpu); 231 rcu_preempt_qs(smp_processor_id());
232 local_irq_restore(flags); 232 local_irq_restore(flags);
233} 233}
234 234
@@ -969,22 +969,6 @@ static void __init __rcu_init_preempt(void)
969 rcu_init_one(&rcu_preempt_state, &rcu_preempt_data); 969 rcu_init_one(&rcu_preempt_state, &rcu_preempt_data);
970} 970}
971 971
972/*
973 * Check for a task exiting while in a preemptible-RCU read-side
974 * critical section, clean up if so. No need to issue warnings,
975 * as debug_check_no_locks_held() already does this if lockdep
976 * is enabled.
977 */
978void exit_rcu(void)
979{
980 struct task_struct *t = current;
981
982 if (t->rcu_read_lock_nesting == 0)
983 return;
984 t->rcu_read_lock_nesting = 1;
985 __rcu_read_unlock();
986}
987
988#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 972#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
989 973
990static struct rcu_state *rcu_state = &rcu_sched_state; 974static struct rcu_state *rcu_state = &rcu_sched_state;
@@ -1018,14 +1002,6 @@ void rcu_force_quiescent_state(void)
1018EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); 1002EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
1019 1003
1020/* 1004/*
1021 * Because preemptible RCU does not exist, we never have to check for
1022 * CPUs being in quiescent states.
1023 */
1024static void rcu_preempt_note_context_switch(int cpu)
1025{
1026}
1027
1028/*
1029 * Because preemptible RCU does not exist, there are never any preempted 1005 * Because preemptible RCU does not exist, there are never any preempted
1030 * RCU readers. 1006 * RCU readers.
1031 */ 1007 */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 4603b9d8f30a..5d89eb93f7e4 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2083,6 +2083,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
2083#endif 2083#endif
2084 2084
2085 /* Here we just switch the register state and the stack. */ 2085 /* Here we just switch the register state and the stack. */
2086 rcu_switch_from(prev);
2086 switch_to(prev, next, prev); 2087 switch_to(prev, next, prev);
2087 2088
2088 barrier(); 2089 barrier();
diff --git a/kernel/srcu.c b/kernel/srcu.c
index ba35f3a4a1f4..2095be3318d5 100644
--- a/kernel/srcu.c
+++ b/kernel/srcu.c
@@ -34,10 +34,77 @@
34#include <linux/delay.h> 34#include <linux/delay.h>
35#include <linux/srcu.h> 35#include <linux/srcu.h>
36 36
37/*
38 * Initialize an rcu_batch structure to empty.
39 */
40static inline void rcu_batch_init(struct rcu_batch *b)
41{
42 b->head = NULL;
43 b->tail = &b->head;
44}
45
46/*
47 * Enqueue a callback onto the tail of the specified rcu_batch structure.
48 */
49static inline void rcu_batch_queue(struct rcu_batch *b, struct rcu_head *head)
50{
51 *b->tail = head;
52 b->tail = &head->next;
53}
54
55/*
56 * Is the specified rcu_batch structure empty?
57 */
58static inline bool rcu_batch_empty(struct rcu_batch *b)
59{
60 return b->tail == &b->head;
61}
62
63/*
64 * Remove the callback at the head of the specified rcu_batch structure
65 * and return a pointer to it, or return NULL if the structure is empty.
66 */
67static inline struct rcu_head *rcu_batch_dequeue(struct rcu_batch *b)
68{
69 struct rcu_head *head;
70
71 if (rcu_batch_empty(b))
72 return NULL;
73
74 head = b->head;
75 b->head = head->next;
76 if (b->tail == &head->next)
77 rcu_batch_init(b);
78
79 return head;
80}
81
82/*
83 * Move all callbacks from the rcu_batch structure specified by "from" to
84 * the structure specified by "to".
85 */
86static inline void rcu_batch_move(struct rcu_batch *to, struct rcu_batch *from)
87{
88 if (!rcu_batch_empty(from)) {
89 *to->tail = from->head;
90 to->tail = from->tail;
91 rcu_batch_init(from);
92 }
93}
94
95/* single-thread state-machine */
96static void process_srcu(struct work_struct *work);
97
37static int init_srcu_struct_fields(struct srcu_struct *sp) 98static int init_srcu_struct_fields(struct srcu_struct *sp)
38{ 99{
39 sp->completed = 0; 100 sp->completed = 0;
40 mutex_init(&sp->mutex); 101 spin_lock_init(&sp->queue_lock);
102 sp->running = false;
103 rcu_batch_init(&sp->batch_queue);
104 rcu_batch_init(&sp->batch_check0);
105 rcu_batch_init(&sp->batch_check1);
106 rcu_batch_init(&sp->batch_done);
107 INIT_DELAYED_WORK(&sp->work, process_srcu);
41 sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array); 108 sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array);
42 return sp->per_cpu_ref ? 0 : -ENOMEM; 109 return sp->per_cpu_ref ? 0 : -ENOMEM;
43} 110}
@@ -73,21 +140,116 @@ EXPORT_SYMBOL_GPL(init_srcu_struct);
73#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 140#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
74 141
75/* 142/*
76 * srcu_readers_active_idx -- returns approximate number of readers 143 * Returns approximate total of the readers' ->seq[] values for the
77 * active on the specified rank of per-CPU counters. 144 * rank of per-CPU counters specified by idx.
78 */ 145 */
146static unsigned long srcu_readers_seq_idx(struct srcu_struct *sp, int idx)
147{
148 int cpu;
149 unsigned long sum = 0;
150 unsigned long t;
79 151
80static int srcu_readers_active_idx(struct srcu_struct *sp, int idx) 152 for_each_possible_cpu(cpu) {
153 t = ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->seq[idx]);
154 sum += t;
155 }
156 return sum;
157}
158
159/*
160 * Returns approximate number of readers active on the specified rank
161 * of the per-CPU ->c[] counters.
162 */
163static unsigned long srcu_readers_active_idx(struct srcu_struct *sp, int idx)
81{ 164{
82 int cpu; 165 int cpu;
83 int sum; 166 unsigned long sum = 0;
167 unsigned long t;
84 168
85 sum = 0; 169 for_each_possible_cpu(cpu) {
86 for_each_possible_cpu(cpu) 170 t = ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[idx]);
87 sum += per_cpu_ptr(sp->per_cpu_ref, cpu)->c[idx]; 171 sum += t;
172 }
88 return sum; 173 return sum;
89} 174}
90 175
176/*
177 * Return true if the number of pre-existing readers is determined to
178 * be stably zero. An example unstable zero can occur if the call
179 * to srcu_readers_active_idx() misses an __srcu_read_lock() increment,
180 * but due to task migration, sees the corresponding __srcu_read_unlock()
181 * decrement. This can happen because srcu_readers_active_idx() takes
182 * time to sum the array, and might in fact be interrupted or preempted
183 * partway through the summation.
184 */
185static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx)
186{
187 unsigned long seq;
188
189 seq = srcu_readers_seq_idx(sp, idx);
190
191 /*
192 * The following smp_mb() A pairs with the smp_mb() B located in
193 * __srcu_read_lock(). This pairing ensures that if an
194 * __srcu_read_lock() increments its counter after the summation
195 * in srcu_readers_active_idx(), then the corresponding SRCU read-side
196 * critical section will see any changes made prior to the start
197 * of the current SRCU grace period.
198 *
199 * Also, if the above call to srcu_readers_seq_idx() saw the
200 * increment of ->seq[], then the call to srcu_readers_active_idx()
201 * must see the increment of ->c[].
202 */
203 smp_mb(); /* A */
204
205 /*
206 * Note that srcu_readers_active_idx() can incorrectly return
207 * zero even though there is a pre-existing reader throughout.
208 * To see this, suppose that task A is in a very long SRCU
209 * read-side critical section that started on CPU 0, and that
210 * no other reader exists, so that the sum of the counters
211 * is equal to one. Then suppose that task B starts executing
212 * srcu_readers_active_idx(), summing up to CPU 1, and then that
213 * task C starts reading on CPU 0, so that its increment is not
214 * summed, but finishes reading on CPU 2, so that its decrement
215 * -is- summed. Then when task B completes its sum, it will
216 * incorrectly get zero, despite the fact that task A has been
217 * in its SRCU read-side critical section the whole time.
218 *
219 * We therefore do a validation step should srcu_readers_active_idx()
220 * return zero.
221 */
222 if (srcu_readers_active_idx(sp, idx) != 0)
223 return false;
224
225 /*
226 * The remainder of this function is the validation step.
227 * The following smp_mb() D pairs with the smp_mb() C in
228 * __srcu_read_unlock(). If the __srcu_read_unlock() was seen
229 * by srcu_readers_active_idx() above, then any destructive
230 * operation performed after the grace period will happen after
231 * the corresponding SRCU read-side critical section.
232 *
233 * Note that there can be at most NR_CPUS worth of readers using
234 * the old index, which is not enough to overflow even a 32-bit
235 * integer. (Yes, this does mean that systems having more than
236 * a billion or so CPUs need to be 64-bit systems.) Therefore,
237 * the sum of the ->seq[] counters cannot possibly overflow.
238 * Therefore, the only way that the return values of the two
239 * calls to srcu_readers_seq_idx() can be equal is if there were
240 * no increments of the corresponding rank of ->seq[] counts
241 * in the interim. But the missed-increment scenario laid out
242 * above includes an increment of the ->seq[] counter by
243 * the corresponding __srcu_read_lock(). Therefore, if this
244 * scenario occurs, the return values from the two calls to
245 * srcu_readers_seq_idx() will differ, and thus the validation
246 * step below suffices.
247 */
248 smp_mb(); /* D */
249
250 return srcu_readers_seq_idx(sp, idx) == seq;
251}
252
91/** 253/**
92 * srcu_readers_active - returns approximate number of readers. 254 * srcu_readers_active - returns approximate number of readers.
93 * @sp: which srcu_struct to count active readers (holding srcu_read_lock). 255 * @sp: which srcu_struct to count active readers (holding srcu_read_lock).
@@ -98,7 +260,14 @@ static int srcu_readers_active_idx(struct srcu_struct *sp, int idx)
98 */ 260 */
99static int srcu_readers_active(struct srcu_struct *sp) 261static int srcu_readers_active(struct srcu_struct *sp)
100{ 262{
101 return srcu_readers_active_idx(sp, 0) + srcu_readers_active_idx(sp, 1); 263 int cpu;
264 unsigned long sum = 0;
265
266 for_each_possible_cpu(cpu) {
267 sum += ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[0]);
268 sum += ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[1]);
269 }
270 return sum;
102} 271}
103 272
104/** 273/**
@@ -131,10 +300,11 @@ int __srcu_read_lock(struct srcu_struct *sp)
131 int idx; 300 int idx;
132 301
133 preempt_disable(); 302 preempt_disable();
134 idx = sp->completed & 0x1; 303 idx = rcu_dereference_index_check(sp->completed,
135 barrier(); /* ensure compiler looks -once- at sp->completed. */ 304 rcu_read_lock_sched_held()) & 0x1;
136 per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]++; 305 ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->c[idx]) += 1;
137 srcu_barrier(); /* ensure compiler won't misorder critical section. */ 306 smp_mb(); /* B */ /* Avoid leaking the critical section. */
307 ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->seq[idx]) += 1;
138 preempt_enable(); 308 preempt_enable();
139 return idx; 309 return idx;
140} 310}
@@ -149,8 +319,8 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock);
149void __srcu_read_unlock(struct srcu_struct *sp, int idx) 319void __srcu_read_unlock(struct srcu_struct *sp, int idx)
150{ 320{
151 preempt_disable(); 321 preempt_disable();
152 srcu_barrier(); /* ensure compiler won't misorder critical section. */ 322 smp_mb(); /* C */ /* Avoid leaking the critical section. */
153 per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]--; 323 ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->c[idx]) -= 1;
154 preempt_enable(); 324 preempt_enable();
155} 325}
156EXPORT_SYMBOL_GPL(__srcu_read_unlock); 326EXPORT_SYMBOL_GPL(__srcu_read_unlock);
@@ -163,106 +333,119 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock);
163 * we repeatedly block for 1-millisecond time periods. This approach 333 * we repeatedly block for 1-millisecond time periods. This approach
164 * has done well in testing, so there is no need for a config parameter. 334 * has done well in testing, so there is no need for a config parameter.
165 */ 335 */
166#define SYNCHRONIZE_SRCU_READER_DELAY 10 336#define SRCU_RETRY_CHECK_DELAY 5
337#define SYNCHRONIZE_SRCU_TRYCOUNT 2
338#define SYNCHRONIZE_SRCU_EXP_TRYCOUNT 12
167 339
168/* 340/*
169 * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). 341 * @@@ Wait until all pre-existing readers complete. Such readers
342 * will have used the index specified by "idx".
343 * the caller should ensures the ->completed is not changed while checking
344 * and idx = (->completed & 1) ^ 1
170 */ 345 */
171static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void)) 346static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount)
172{ 347{
173 int idx; 348 for (;;) {
174 349 if (srcu_readers_active_idx_check(sp, idx))
175 rcu_lockdep_assert(!lock_is_held(&sp->dep_map) && 350 return true;
176 !lock_is_held(&rcu_bh_lock_map) && 351 if (--trycount <= 0)
177 !lock_is_held(&rcu_lock_map) && 352 return false;
178 !lock_is_held(&rcu_sched_lock_map), 353 udelay(SRCU_RETRY_CHECK_DELAY);
179 "Illegal synchronize_srcu() in same-type SRCU (or RCU) read-side critical section"); 354 }
180 355}
181 idx = sp->completed;
182 mutex_lock(&sp->mutex);
183 356
184 /* 357/*
185 * Check to see if someone else did the work for us while we were 358 * Increment the ->completed counter so that future SRCU readers will
186 * waiting to acquire the lock. We need -two- advances of 359 * use the other rank of the ->c[] and ->seq[] arrays. This allows
187 * the counter, not just one. If there was but one, we might have 360 * us to wait for pre-existing readers in a starvation-free manner.
188 * shown up -after- our helper's first synchronize_sched(), thus 361 */
189 * having failed to prevent CPU-reordering races with concurrent 362static void srcu_flip(struct srcu_struct *sp)
190 * srcu_read_unlock()s on other CPUs (see comment below). So we 363{
191 * either (1) wait for two or (2) supply the second ourselves. 364 sp->completed++;
192 */ 365}
193 366
194 if ((sp->completed - idx) >= 2) { 367/*
195 mutex_unlock(&sp->mutex); 368 * Enqueue an SRCU callback on the specified srcu_struct structure,
196 return; 369 * initiating grace-period processing if it is not already running.
370 */
371void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
372 void (*func)(struct rcu_head *head))
373{
374 unsigned long flags;
375
376 head->next = NULL;
377 head->func = func;
378 spin_lock_irqsave(&sp->queue_lock, flags);
379 rcu_batch_queue(&sp->batch_queue, head);
380 if (!sp->running) {
381 sp->running = true;
382 queue_delayed_work(system_nrt_wq, &sp->work, 0);
197 } 383 }
384 spin_unlock_irqrestore(&sp->queue_lock, flags);
385}
386EXPORT_SYMBOL_GPL(call_srcu);
198 387
199 sync_func(); /* Force memory barrier on all CPUs. */ 388struct rcu_synchronize {
389 struct rcu_head head;
390 struct completion completion;
391};
200 392
201 /* 393/*
202 * The preceding synchronize_sched() ensures that any CPU that 394 * Awaken the corresponding synchronize_srcu() instance now that a
203 * sees the new value of sp->completed will also see any preceding 395 * grace period has elapsed.
204 * changes to data structures made by this CPU. This prevents 396 */
205 * some other CPU from reordering the accesses in its SRCU 397static void wakeme_after_rcu(struct rcu_head *head)
206 * read-side critical section to precede the corresponding 398{
207 * srcu_read_lock() -- ensuring that such references will in 399 struct rcu_synchronize *rcu;
208 * fact be protected.
209 *
210 * So it is now safe to do the flip.
211 */
212 400
213 idx = sp->completed & 0x1; 401 rcu = container_of(head, struct rcu_synchronize, head);
214 sp->completed++; 402 complete(&rcu->completion);
403}
215 404
216 sync_func(); /* Force memory barrier on all CPUs. */ 405static void srcu_advance_batches(struct srcu_struct *sp, int trycount);
406static void srcu_reschedule(struct srcu_struct *sp);
217 407
218 /* 408/*
219 * At this point, because of the preceding synchronize_sched(), 409 * Helper function for synchronize_srcu() and synchronize_srcu_expedited().
220 * all srcu_read_lock() calls using the old counters have completed. 410 */
221 * Their corresponding critical sections might well be still 411static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
222 * executing, but the srcu_read_lock() primitives themselves 412{
223 * will have finished executing. We initially give readers 413 struct rcu_synchronize rcu;
224 * an arbitrarily chosen 10 microseconds to get out of their 414 struct rcu_head *head = &rcu.head;
225 * SRCU read-side critical sections, then loop waiting 1/HZ 415 bool done = false;
226 * seconds per iteration. The 10-microsecond value has done
227 * very well in testing.
228 */
229
230 if (srcu_readers_active_idx(sp, idx))
231 udelay(SYNCHRONIZE_SRCU_READER_DELAY);
232 while (srcu_readers_active_idx(sp, idx))
233 schedule_timeout_interruptible(1);
234 416
235 sync_func(); /* Force memory barrier on all CPUs. */ 417 rcu_lockdep_assert(!lock_is_held(&sp->dep_map) &&
418 !lock_is_held(&rcu_bh_lock_map) &&
419 !lock_is_held(&rcu_lock_map) &&
420 !lock_is_held(&rcu_sched_lock_map),
421 "Illegal synchronize_srcu() in same-type SRCU (or RCU) read-side critical section");
236 422
237 /* 423 init_completion(&rcu.completion);
238 * The preceding synchronize_sched() forces all srcu_read_unlock() 424
239 * primitives that were executing concurrently with the preceding 425 head->next = NULL;
240 * for_each_possible_cpu() loop to have completed by this point. 426 head->func = wakeme_after_rcu;
241 * More importantly, it also forces the corresponding SRCU read-side 427 spin_lock_irq(&sp->queue_lock);
242 * critical sections to have also completed, and the corresponding 428 if (!sp->running) {
243 * references to SRCU-protected data items to be dropped. 429 /* steal the processing owner */
244 * 430 sp->running = true;
245 * Note: 431 rcu_batch_queue(&sp->batch_check0, head);
246 * 432 spin_unlock_irq(&sp->queue_lock);
247 * Despite what you might think at first glance, the 433
248 * preceding synchronize_sched() -must- be within the 434 srcu_advance_batches(sp, trycount);
249 * critical section ended by the following mutex_unlock(). 435 if (!rcu_batch_empty(&sp->batch_done)) {
250 * Otherwise, a task taking the early exit can race 436 BUG_ON(sp->batch_done.head != head);
251 * with a srcu_read_unlock(), which might have executed 437 rcu_batch_dequeue(&sp->batch_done);
252 * just before the preceding srcu_readers_active() check, 438 done = true;
253 * and whose CPU might have reordered the srcu_read_unlock() 439 }
254 * with the preceding critical section. In this case, there 440 /* give the processing owner to work_struct */
255 * is nothing preventing the synchronize_sched() task that is 441 srcu_reschedule(sp);
256 * taking the early exit from freeing a data structure that 442 } else {
257 * is still being referenced (out of order) by the task 443 rcu_batch_queue(&sp->batch_queue, head);
258 * doing the srcu_read_unlock(). 444 spin_unlock_irq(&sp->queue_lock);
259 * 445 }
260 * Alternatively, the comparison with "2" on the early exit
261 * could be changed to "3", but this increases synchronize_srcu()
262 * latency for bulk loads. So the current code is preferred.
263 */
264 446
265 mutex_unlock(&sp->mutex); 447 if (!done)
448 wait_for_completion(&rcu.completion);
266} 449}
267 450
268/** 451/**
@@ -281,7 +464,7 @@ static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void))
281 */ 464 */
282void synchronize_srcu(struct srcu_struct *sp) 465void synchronize_srcu(struct srcu_struct *sp)
283{ 466{
284 __synchronize_srcu(sp, synchronize_sched); 467 __synchronize_srcu(sp, SYNCHRONIZE_SRCU_TRYCOUNT);
285} 468}
286EXPORT_SYMBOL_GPL(synchronize_srcu); 469EXPORT_SYMBOL_GPL(synchronize_srcu);
287 470
@@ -289,18 +472,11 @@ EXPORT_SYMBOL_GPL(synchronize_srcu);
289 * synchronize_srcu_expedited - Brute-force SRCU grace period 472 * synchronize_srcu_expedited - Brute-force SRCU grace period
290 * @sp: srcu_struct with which to synchronize. 473 * @sp: srcu_struct with which to synchronize.
291 * 474 *
292 * Wait for an SRCU grace period to elapse, but use a "big hammer" 475 * Wait for an SRCU grace period to elapse, but be more aggressive about
293 * approach to force the grace period to end quickly. This consumes 476 * spinning rather than blocking when waiting.
294 * significant time on all CPUs and is unfriendly to real-time workloads,
295 * so is thus not recommended for any sort of common-case code. In fact,
296 * if you are using synchronize_srcu_expedited() in a loop, please
297 * restructure your code to batch your updates, and then use a single
298 * synchronize_srcu() instead.
299 * 477 *
300 * Note that it is illegal to call this function while holding any lock 478 * Note that it is illegal to call this function while holding any lock
301 * that is acquired by a CPU-hotplug notifier. And yes, it is also illegal 479 * that is acquired by a CPU-hotplug notifier. It is also illegal to call
302 * to call this function from a CPU-hotplug notifier. Failing to observe
303 * these restriction will result in deadlock. It is also illegal to call
304 * synchronize_srcu_expedited() from the corresponding SRCU read-side 480 * synchronize_srcu_expedited() from the corresponding SRCU read-side
305 * critical section; doing so will result in deadlock. However, it is 481 * critical section; doing so will result in deadlock. However, it is
306 * perfectly legal to call synchronize_srcu_expedited() on one srcu_struct 482 * perfectly legal to call synchronize_srcu_expedited() on one srcu_struct
@@ -309,20 +485,166 @@ EXPORT_SYMBOL_GPL(synchronize_srcu);
309 */ 485 */
310void synchronize_srcu_expedited(struct srcu_struct *sp) 486void synchronize_srcu_expedited(struct srcu_struct *sp)
311{ 487{
312 __synchronize_srcu(sp, synchronize_sched_expedited); 488 __synchronize_srcu(sp, SYNCHRONIZE_SRCU_EXP_TRYCOUNT);
313} 489}
314EXPORT_SYMBOL_GPL(synchronize_srcu_expedited); 490EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
315 491
316/** 492/**
493 * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete.
494 */
495void srcu_barrier(struct srcu_struct *sp)
496{
497 synchronize_srcu(sp);
498}
499EXPORT_SYMBOL_GPL(srcu_barrier);
500
501/**
317 * srcu_batches_completed - return batches completed. 502 * srcu_batches_completed - return batches completed.
318 * @sp: srcu_struct on which to report batch completion. 503 * @sp: srcu_struct on which to report batch completion.
319 * 504 *
320 * Report the number of batches, correlated with, but not necessarily 505 * Report the number of batches, correlated with, but not necessarily
321 * precisely the same as, the number of grace periods that have elapsed. 506 * precisely the same as, the number of grace periods that have elapsed.
322 */ 507 */
323
324long srcu_batches_completed(struct srcu_struct *sp) 508long srcu_batches_completed(struct srcu_struct *sp)
325{ 509{
326 return sp->completed; 510 return sp->completed;
327} 511}
328EXPORT_SYMBOL_GPL(srcu_batches_completed); 512EXPORT_SYMBOL_GPL(srcu_batches_completed);
513
514#define SRCU_CALLBACK_BATCH 10
515#define SRCU_INTERVAL 1
516
517/*
518 * Move any new SRCU callbacks to the first stage of the SRCU grace
519 * period pipeline.
520 */
521static void srcu_collect_new(struct srcu_struct *sp)
522{
523 if (!rcu_batch_empty(&sp->batch_queue)) {
524 spin_lock_irq(&sp->queue_lock);
525 rcu_batch_move(&sp->batch_check0, &sp->batch_queue);
526 spin_unlock_irq(&sp->queue_lock);
527 }
528}
529
530/*
531 * Core SRCU state machine. Advance callbacks from ->batch_check0 to
532 * ->batch_check1 and then to ->batch_done as readers drain.
533 */
534static void srcu_advance_batches(struct srcu_struct *sp, int trycount)
535{
536 int idx = 1 ^ (sp->completed & 1);
537
538 /*
539 * Because readers might be delayed for an extended period after
540 * fetching ->completed for their index, at any point in time there
541 * might well be readers using both idx=0 and idx=1. We therefore
542 * need to wait for readers to clear from both index values before
543 * invoking a callback.
544 */
545
546 if (rcu_batch_empty(&sp->batch_check0) &&
547 rcu_batch_empty(&sp->batch_check1))
548 return; /* no callbacks need to be advanced */
549
550 if (!try_check_zero(sp, idx, trycount))
551 return; /* failed to advance, will try after SRCU_INTERVAL */
552
553 /*
554 * The callbacks in ->batch_check1 have already done with their
555 * first zero check and flip back when they were enqueued on
556 * ->batch_check0 in a previous invocation of srcu_advance_batches().
557 * (Presumably try_check_zero() returned false during that
558 * invocation, leaving the callbacks stranded on ->batch_check1.)
559 * They are therefore ready to invoke, so move them to ->batch_done.
560 */
561 rcu_batch_move(&sp->batch_done, &sp->batch_check1);
562
563 if (rcu_batch_empty(&sp->batch_check0))
564 return; /* no callbacks need to be advanced */
565 srcu_flip(sp);
566
567 /*
568 * The callbacks in ->batch_check0 just finished their
569 * first check zero and flip, so move them to ->batch_check1
570 * for future checking on the other idx.
571 */
572 rcu_batch_move(&sp->batch_check1, &sp->batch_check0);
573
574 /*
575 * SRCU read-side critical sections are normally short, so check
576 * at least twice in quick succession after a flip.
577 */
578 trycount = trycount < 2 ? 2 : trycount;
579 if (!try_check_zero(sp, idx^1, trycount))
580 return; /* failed to advance, will try after SRCU_INTERVAL */
581
582 /*
583 * The callbacks in ->batch_check1 have now waited for all
584 * pre-existing readers using both idx values. They are therefore
585 * ready to invoke, so move them to ->batch_done.
586 */
587 rcu_batch_move(&sp->batch_done, &sp->batch_check1);
588}
589
590/*
591 * Invoke a limited number of SRCU callbacks that have passed through
592 * their grace period. If there are more to do, SRCU will reschedule
593 * the workqueue.
594 */
595static void srcu_invoke_callbacks(struct srcu_struct *sp)
596{
597 int i;
598 struct rcu_head *head;
599
600 for (i = 0; i < SRCU_CALLBACK_BATCH; i++) {
601 head = rcu_batch_dequeue(&sp->batch_done);
602 if (!head)
603 break;
604 local_bh_disable();
605 head->func(head);
606 local_bh_enable();
607 }
608}
609
610/*
611 * Finished one round of SRCU grace period. Start another if there are
612 * more SRCU callbacks queued, otherwise put SRCU into not-running state.
613 */
614static void srcu_reschedule(struct srcu_struct *sp)
615{
616 bool pending = true;
617
618 if (rcu_batch_empty(&sp->batch_done) &&
619 rcu_batch_empty(&sp->batch_check1) &&
620 rcu_batch_empty(&sp->batch_check0) &&
621 rcu_batch_empty(&sp->batch_queue)) {
622 spin_lock_irq(&sp->queue_lock);
623 if (rcu_batch_empty(&sp->batch_done) &&
624 rcu_batch_empty(&sp->batch_check1) &&
625 rcu_batch_empty(&sp->batch_check0) &&
626 rcu_batch_empty(&sp->batch_queue)) {
627 sp->running = false;
628 pending = false;
629 }
630 spin_unlock_irq(&sp->queue_lock);
631 }
632
633 if (pending)
634 queue_delayed_work(system_nrt_wq, &sp->work, SRCU_INTERVAL);
635}
636
637/*
638 * This is the work-queue function that handles SRCU grace periods.
639 */
640static void process_srcu(struct work_struct *work)
641{
642 struct srcu_struct *sp;
643
644 sp = container_of(work, struct srcu_struct, work.work);
645
646 srcu_collect_new(sp);
647 srcu_advance_batches(sp, 1);
648 srcu_invoke_callbacks(sp);
649 srcu_reschedule(sp);
650}
diff --git a/kernel/timer.c b/kernel/timer.c
index a297ffcf888e..837c552fe838 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -861,7 +861,13 @@ EXPORT_SYMBOL(mod_timer);
861 * 861 *
862 * mod_timer_pinned() is a way to update the expire field of an 862 * mod_timer_pinned() is a way to update the expire field of an
863 * active timer (if the timer is inactive it will be activated) 863 * active timer (if the timer is inactive it will be activated)
864 * and not allow the timer to be migrated to a different CPU. 864 * and to ensure that the timer is scheduled on the current CPU.
865 *
866 * Note that this does not prevent the timer from being migrated
867 * when the current CPU goes offline. If this is a problem for
868 * you, use CPU-hotplug notifiers to handle it correctly, for
869 * example, cancelling the timer when the corresponding CPU goes
870 * offline.
865 * 871 *
866 * mod_timer_pinned(timer, expires) is equivalent to: 872 * mod_timer_pinned(timer, expires) is equivalent to:
867 * 873 *