aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-10-03 13:29:53 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-10-03 13:29:53 -0400
commit4b978934a440c1aafce986353001b03289eaa040 (patch)
treee8e0b54d128e16c35ccf77b724d1640df12d82ba
parent72a9cdd083005900f15934e8568f1ac43a6bb755 (diff)
parent2d8fbcd13ea1d0be3a7ea5f20c3a5b44b592e79c (diff)
Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RCU updates from Ingo Molnar: "The main changes in this cycle were: - Expedited grace-period changes, most notably avoiding having user threads drive expedited grace periods, using a workqueue instead. - Miscellaneous fixes, including a performance fix for lists that was sent with the lists modifications. - CPU hotplug updates, most notably providing exact CPU-online tracking for RCU. This will in turn allow removal of the checks supporting RCU's prior heuristic that was based on the assumption that CPUs would take no longer than one jiffy to come online. - Torture-test updates. - Documentation updates" * 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (22 commits) list: Expand list_first_entry_or_null() torture: TOROUT_STRING(): Insert a space between flag and message rcuperf: Consistently insert space between flag and message rcutorture: Print out barrier error as document says torture: Add task state to writer-task stall printk()s torture: Convert torture_shutdown() to hrtimer rcutorture: Convert to hotplug state machine cpu/hotplug: Get rid of CPU_STARTING reference rcu: Provide exact CPU-online tracking for RCU rcu: Avoid redundant quiescent-state chasing rcu: Don't use modular infrastructure in non-modular code sched: Make wake_up_nohz_cpu() handle CPUs going offline rcu: Use rcu_gp_kthread_wake() to wake up grace period kthreads rcu: Use RCU's online-CPU state for expedited IPI retry rcu: Exclude RCU-offline CPUs from expedited grace periods rcu: Make expedited RCU CPU stall warnings respond to controls rcu: Stop disabling expedited RCU CPU stall warnings rcu: Drive expedited grace periods from workqueue rcu: Consolidate expedited grace period machinery documentation: Record reason for rcu_head two-byte alignment ...
-rw-r--r--Documentation/RCU/Design/Requirements/Requirements.html22
-rw-r--r--Documentation/RCU/torture.txt15
-rw-r--r--include/linux/list.h7
-rw-r--r--include/linux/rcupdate.h1
-rw-r--r--include/linux/torture.h2
-rw-r--r--kernel/cpu.c1
-rw-r--r--kernel/rcu/rcuperf.c7
-rw-r--r--kernel/rcu/rcutorture.c62
-rw-r--r--kernel/rcu/tree.c44
-rw-r--r--kernel/rcu/tree.h1
-rw-r--r--kernel/rcu/tree_exp.h124
-rw-r--r--kernel/rcu/tree_plugin.h1
-rw-r--r--kernel/rcu/tree_trace.c7
-rw-r--r--kernel/rcu/update.c3
-rw-r--r--kernel/sched/core.c7
-rw-r--r--kernel/torture.c27
16 files changed, 193 insertions, 138 deletions
diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html
index ece410f40436..a4d3838130e4 100644
--- a/Documentation/RCU/Design/Requirements/Requirements.html
+++ b/Documentation/RCU/Design/Requirements/Requirements.html
@@ -2493,6 +2493,28 @@ or some future &ldquo;lazy&rdquo;
2493variant of <tt>call_rcu()</tt> that might one day be created for 2493variant of <tt>call_rcu()</tt> that might one day be created for
2494energy-efficiency purposes. 2494energy-efficiency purposes.
2495 2495
2496<p>
2497That said, there are limits.
2498RCU requires that the <tt>rcu_head</tt> structure be aligned to a
2499two-byte boundary, and passing a misaligned <tt>rcu_head</tt>
2500structure to one of the <tt>call_rcu()</tt> family of functions
2501will result in a splat.
2502It is therefore necessary to exercise caution when packing
2503structures containing fields of type <tt>rcu_head</tt>.
2504Why not a four-byte or even eight-byte alignment requirement?
2505Because the m68k architecture provides only two-byte alignment,
2506and thus acts as alignment's least common denominator.
2507
2508<p>
2509The reason for reserving the bottom bit of pointers to
2510<tt>rcu_head</tt> structures is to leave the door open to
2511&ldquo;lazy&rdquo; callbacks whose invocations can safely be deferred.
2512Deferring invocation could potentially have energy-efficiency
2513benefits, but only if the rate of non-lazy callbacks decreases
2514significantly for some important workload.
2515In the meantime, reserving the bottom bit keeps this option open
2516in case it one day becomes useful.
2517
2496<h3><a name="Performance, Scalability, Response Time, and Reliability"> 2518<h3><a name="Performance, Scalability, Response Time, and Reliability">
2497Performance, Scalability, Response Time, and Reliability</a></h3> 2519Performance, Scalability, Response Time, and Reliability</a></h3>
2498 2520
diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt
index 118e7c176ce7..278f6a9383b6 100644
--- a/Documentation/RCU/torture.txt
+++ b/Documentation/RCU/torture.txt
@@ -10,21 +10,6 @@ status messages via printk(), which can be examined via the dmesg
10command (perhaps grepping for "torture"). The test is started 10command (perhaps grepping for "torture"). The test is started
11when the module is loaded, and stops when the module is unloaded. 11when the module is loaded, and stops when the module is unloaded.
12 12
13CONFIG_RCU_TORTURE_TEST_RUNNABLE
14
15It is also possible to specify CONFIG_RCU_TORTURE_TEST=y, which will
16result in the tests being loaded into the base kernel. In this case,
17the CONFIG_RCU_TORTURE_TEST_RUNNABLE config option is used to specify
18whether the RCU torture tests are to be started immediately during
19boot or whether the /proc/sys/kernel/rcutorture_runnable file is used
20to enable them. This /proc file can be used to repeatedly pause and
21restart the tests, regardless of the initial state specified by the
22CONFIG_RCU_TORTURE_TEST_RUNNABLE config option.
23
24You will normally -not- want to start the RCU torture tests during boot
25(and thus the default is CONFIG_RCU_TORTURE_TEST_RUNNABLE=n), but doing
26this can sometimes be useful in finding boot-time bugs.
27
28 13
29MODULE PARAMETERS 14MODULE PARAMETERS
30 15
diff --git a/include/linux/list.h b/include/linux/list.h
index 5183138aa932..5809e9a2de5b 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -381,8 +381,11 @@ static inline void list_splice_tail_init(struct list_head *list,
381 * 381 *
382 * Note that if the list is empty, it returns NULL. 382 * Note that if the list is empty, it returns NULL.
383 */ 383 */
384#define list_first_entry_or_null(ptr, type, member) \ 384#define list_first_entry_or_null(ptr, type, member) ({ \
385 (!list_empty(ptr) ? list_first_entry(ptr, type, member) : NULL) 385 struct list_head *head__ = (ptr); \
386 struct list_head *pos__ = READ_ONCE(head__->next); \
387 pos__ != head__ ? list_entry(pos__, type, member) : NULL; \
388})
386 389
387/** 390/**
388 * list_next_entry - get the next element in list 391 * list_next_entry - get the next element in list
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 1aa62e1a761b..321f9ed552a9 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -334,6 +334,7 @@ void rcu_sched_qs(void);
334void rcu_bh_qs(void); 334void rcu_bh_qs(void);
335void rcu_check_callbacks(int user); 335void rcu_check_callbacks(int user);
336void rcu_report_dead(unsigned int cpu); 336void rcu_report_dead(unsigned int cpu);
337void rcu_cpu_starting(unsigned int cpu);
337 338
338#ifndef CONFIG_TINY_RCU 339#ifndef CONFIG_TINY_RCU
339void rcu_end_inkernel_boot(void); 340void rcu_end_inkernel_boot(void);
diff --git a/include/linux/torture.h b/include/linux/torture.h
index 6685a73736a2..a45702eb3e7b 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -43,7 +43,7 @@
43 43
44#define TORTURE_FLAG "-torture:" 44#define TORTURE_FLAG "-torture:"
45#define TOROUT_STRING(s) \ 45#define TOROUT_STRING(s) \
46 pr_alert("%s" TORTURE_FLAG s "\n", torture_type) 46 pr_alert("%s" TORTURE_FLAG " %s\n", torture_type, s)
47#define VERBOSE_TOROUT_STRING(s) \ 47#define VERBOSE_TOROUT_STRING(s) \
48 do { if (verbose) pr_alert("%s" TORTURE_FLAG " %s\n", torture_type, s); } while (0) 48 do { if (verbose) pr_alert("%s" TORTURE_FLAG " %s\n", torture_type, s); } while (0)
49#define VERBOSE_TOROUT_ERRSTRING(s) \ 49#define VERBOSE_TOROUT_ERRSTRING(s) \
diff --git a/kernel/cpu.c b/kernel/cpu.c
index ebbf027dd4a1..92c2451db415 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -889,6 +889,7 @@ void notify_cpu_starting(unsigned int cpu)
889 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); 889 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
890 enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE); 890 enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
891 891
892 rcu_cpu_starting(cpu); /* Enables RCU usage on this CPU. */
892 while (st->state < target) { 893 while (st->state < target) {
893 struct cpuhp_step *step; 894 struct cpuhp_step *step;
894 895
diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
index d38ab08a3fe7..123ccbd22449 100644
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c
@@ -52,7 +52,7 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.vnet.ibm.com>");
52 52
53#define PERF_FLAG "-perf:" 53#define PERF_FLAG "-perf:"
54#define PERFOUT_STRING(s) \ 54#define PERFOUT_STRING(s) \
55 pr_alert("%s" PERF_FLAG s "\n", perf_type) 55 pr_alert("%s" PERF_FLAG " %s\n", perf_type, s)
56#define VERBOSE_PERFOUT_STRING(s) \ 56#define VERBOSE_PERFOUT_STRING(s) \
57 do { if (verbose) pr_alert("%s" PERF_FLAG " %s\n", perf_type, s); } while (0) 57 do { if (verbose) pr_alert("%s" PERF_FLAG " %s\n", perf_type, s); } while (0)
58#define VERBOSE_PERFOUT_ERRSTRING(s) \ 58#define VERBOSE_PERFOUT_ERRSTRING(s) \
@@ -400,9 +400,8 @@ rcu_perf_writer(void *arg)
400 sp.sched_priority = 0; 400 sp.sched_priority = 0;
401 sched_setscheduler_nocheck(current, 401 sched_setscheduler_nocheck(current,
402 SCHED_NORMAL, &sp); 402 SCHED_NORMAL, &sp);
403 pr_alert("%s" PERF_FLAG 403 pr_alert("%s%s rcu_perf_writer %ld has %d measurements\n",
404 "rcu_perf_writer %ld has %d measurements\n", 404 perf_type, PERF_FLAG, me, MIN_MEAS);
405 perf_type, me, MIN_MEAS);
406 if (atomic_inc_return(&n_rcu_perf_writer_finished) >= 405 if (atomic_inc_return(&n_rcu_perf_writer_finished) >=
407 nrealwriters) { 406 nrealwriters) {
408 schedule_timeout_interruptible(10); 407 schedule_timeout_interruptible(10);
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 971e2b138063..bf08fee53dc7 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -1238,6 +1238,7 @@ rcu_torture_stats_print(void)
1238 long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 }; 1238 long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
1239 long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 }; 1239 long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
1240 static unsigned long rtcv_snap = ULONG_MAX; 1240 static unsigned long rtcv_snap = ULONG_MAX;
1241 struct task_struct *wtp;
1241 1242
1242 for_each_possible_cpu(cpu) { 1243 for_each_possible_cpu(cpu) {
1243 for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) { 1244 for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) {
@@ -1258,8 +1259,9 @@ rcu_torture_stats_print(void)
1258 atomic_read(&n_rcu_torture_alloc), 1259 atomic_read(&n_rcu_torture_alloc),
1259 atomic_read(&n_rcu_torture_alloc_fail), 1260 atomic_read(&n_rcu_torture_alloc_fail),
1260 atomic_read(&n_rcu_torture_free)); 1261 atomic_read(&n_rcu_torture_free));
1261 pr_cont("rtmbe: %d rtbke: %ld rtbre: %ld ", 1262 pr_cont("rtmbe: %d rtbe: %ld rtbke: %ld rtbre: %ld ",
1262 atomic_read(&n_rcu_torture_mberror), 1263 atomic_read(&n_rcu_torture_mberror),
1264 n_rcu_torture_barrier_error,
1263 n_rcu_torture_boost_ktrerror, 1265 n_rcu_torture_boost_ktrerror,
1264 n_rcu_torture_boost_rterror); 1266 n_rcu_torture_boost_rterror);
1265 pr_cont("rtbf: %ld rtb: %ld nt: %ld ", 1267 pr_cont("rtbf: %ld rtb: %ld nt: %ld ",
@@ -1312,10 +1314,12 @@ rcu_torture_stats_print(void)
1312 1314
1313 rcutorture_get_gp_data(cur_ops->ttype, 1315 rcutorture_get_gp_data(cur_ops->ttype,
1314 &flags, &gpnum, &completed); 1316 &flags, &gpnum, &completed);
1315 pr_alert("??? Writer stall state %s(%d) g%lu c%lu f%#x\n", 1317 wtp = READ_ONCE(writer_task);
1318 pr_alert("??? Writer stall state %s(%d) g%lu c%lu f%#x ->state %#lx\n",
1316 rcu_torture_writer_state_getname(), 1319 rcu_torture_writer_state_getname(),
1317 rcu_torture_writer_state, 1320 rcu_torture_writer_state,
1318 gpnum, completed, flags); 1321 gpnum, completed, flags,
1322 wtp == NULL ? ~0UL : wtp->state);
1319 show_rcu_gp_kthreads(); 1323 show_rcu_gp_kthreads();
1320 rcu_ftrace_dump(DUMP_ALL); 1324 rcu_ftrace_dump(DUMP_ALL);
1321 } 1325 }
@@ -1362,12 +1366,12 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag)
1362 onoff_interval, onoff_holdoff); 1366 onoff_interval, onoff_holdoff);
1363} 1367}
1364 1368
1365static void rcutorture_booster_cleanup(int cpu) 1369static int rcutorture_booster_cleanup(unsigned int cpu)
1366{ 1370{
1367 struct task_struct *t; 1371 struct task_struct *t;
1368 1372
1369 if (boost_tasks[cpu] == NULL) 1373 if (boost_tasks[cpu] == NULL)
1370 return; 1374 return 0;
1371 mutex_lock(&boost_mutex); 1375 mutex_lock(&boost_mutex);
1372 t = boost_tasks[cpu]; 1376 t = boost_tasks[cpu];
1373 boost_tasks[cpu] = NULL; 1377 boost_tasks[cpu] = NULL;
@@ -1375,9 +1379,10 @@ static void rcutorture_booster_cleanup(int cpu)
1375 1379
1376 /* This must be outside of the mutex, otherwise deadlock! */ 1380 /* This must be outside of the mutex, otherwise deadlock! */
1377 torture_stop_kthread(rcu_torture_boost, t); 1381 torture_stop_kthread(rcu_torture_boost, t);
1382 return 0;
1378} 1383}
1379 1384
1380static int rcutorture_booster_init(int cpu) 1385static int rcutorture_booster_init(unsigned int cpu)
1381{ 1386{
1382 int retval; 1387 int retval;
1383 1388
@@ -1577,28 +1582,7 @@ static void rcu_torture_barrier_cleanup(void)
1577 } 1582 }
1578} 1583}
1579 1584
1580static int rcutorture_cpu_notify(struct notifier_block *self, 1585static enum cpuhp_state rcutor_hp;
1581 unsigned long action, void *hcpu)
1582{
1583 long cpu = (long)hcpu;
1584
1585 switch (action & ~CPU_TASKS_FROZEN) {
1586 case CPU_ONLINE:
1587 case CPU_DOWN_FAILED:
1588 (void)rcutorture_booster_init(cpu);
1589 break;
1590 case CPU_DOWN_PREPARE:
1591 rcutorture_booster_cleanup(cpu);
1592 break;
1593 default:
1594 break;
1595 }
1596 return NOTIFY_OK;
1597}
1598
1599static struct notifier_block rcutorture_cpu_nb = {
1600 .notifier_call = rcutorture_cpu_notify,
1601};
1602 1586
1603static void 1587static void
1604rcu_torture_cleanup(void) 1588rcu_torture_cleanup(void)
@@ -1638,11 +1622,8 @@ rcu_torture_cleanup(void)
1638 for (i = 0; i < ncbflooders; i++) 1622 for (i = 0; i < ncbflooders; i++)
1639 torture_stop_kthread(rcu_torture_cbflood, cbflood_task[i]); 1623 torture_stop_kthread(rcu_torture_cbflood, cbflood_task[i]);
1640 if ((test_boost == 1 && cur_ops->can_boost) || 1624 if ((test_boost == 1 && cur_ops->can_boost) ||
1641 test_boost == 2) { 1625 test_boost == 2)
1642 unregister_cpu_notifier(&rcutorture_cpu_nb); 1626 cpuhp_remove_state(rcutor_hp);
1643 for_each_possible_cpu(i)
1644 rcutorture_booster_cleanup(i);
1645 }
1646 1627
1647 /* 1628 /*
1648 * Wait for all RCU callbacks to fire, then do flavor-specific 1629 * Wait for all RCU callbacks to fire, then do flavor-specific
@@ -1869,14 +1850,13 @@ rcu_torture_init(void)
1869 test_boost == 2) { 1850 test_boost == 2) {
1870 1851
1871 boost_starttime = jiffies + test_boost_interval * HZ; 1852 boost_starttime = jiffies + test_boost_interval * HZ;
1872 register_cpu_notifier(&rcutorture_cpu_nb); 1853
1873 for_each_possible_cpu(i) { 1854 firsterr = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "RCU_TORTURE",
1874 if (cpu_is_offline(i)) 1855 rcutorture_booster_init,
1875 continue; /* Heuristic: CPU can go offline. */ 1856 rcutorture_booster_cleanup);
1876 firsterr = rcutorture_booster_init(i); 1857 if (firsterr < 0)
1877 if (firsterr) 1858 goto unwind;
1878 goto unwind; 1859 rcutor_hp = firsterr;
1879 }
1880 } 1860 }
1881 firsterr = torture_shutdown_init(shutdown_secs, rcu_torture_cleanup); 1861 firsterr = torture_shutdown_init(shutdown_secs, rcu_torture_cleanup);
1882 if (firsterr) 1862 if (firsterr)
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 5d80925e7fc8..7e2e03879c2e 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -41,7 +41,6 @@
41#include <linux/export.h> 41#include <linux/export.h>
42#include <linux/completion.h> 42#include <linux/completion.h>
43#include <linux/moduleparam.h> 43#include <linux/moduleparam.h>
44#include <linux/module.h>
45#include <linux/percpu.h> 44#include <linux/percpu.h>
46#include <linux/notifier.h> 45#include <linux/notifier.h>
47#include <linux/cpu.h> 46#include <linux/cpu.h>
@@ -60,7 +59,6 @@
60#include "tree.h" 59#include "tree.h"
61#include "rcu.h" 60#include "rcu.h"
62 61
63MODULE_ALIAS("rcutree");
64#ifdef MODULE_PARAM_PREFIX 62#ifdef MODULE_PARAM_PREFIX
65#undef MODULE_PARAM_PREFIX 63#undef MODULE_PARAM_PREFIX
66#endif 64#endif
@@ -1848,6 +1846,7 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
1848 struct rcu_data *rdp) 1846 struct rcu_data *rdp)
1849{ 1847{
1850 bool ret; 1848 bool ret;
1849 bool need_gp;
1851 1850
1852 /* Handle the ends of any preceding grace periods first. */ 1851 /* Handle the ends of any preceding grace periods first. */
1853 if (rdp->completed == rnp->completed && 1852 if (rdp->completed == rnp->completed &&
@@ -1874,9 +1873,10 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
1874 */ 1873 */
1875 rdp->gpnum = rnp->gpnum; 1874 rdp->gpnum = rnp->gpnum;
1876 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart")); 1875 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart"));
1877 rdp->cpu_no_qs.b.norm = true; 1876 need_gp = !!(rnp->qsmask & rdp->grpmask);
1877 rdp->cpu_no_qs.b.norm = need_gp;
1878 rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr); 1878 rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr);
1879 rdp->core_needs_qs = !!(rnp->qsmask & rdp->grpmask); 1879 rdp->core_needs_qs = need_gp;
1880 zero_cpu_stall_ticks(rdp); 1880 zero_cpu_stall_ticks(rdp);
1881 WRITE_ONCE(rdp->gpwrap, false); 1881 WRITE_ONCE(rdp->gpwrap, false);
1882 } 1882 }
@@ -2344,7 +2344,7 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
2344 WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); 2344 WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
2345 WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS); 2345 WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
2346 raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(rsp), flags); 2346 raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(rsp), flags);
2347 swake_up(&rsp->gp_wq); /* Memory barrier implied by swake_up() path. */ 2347 rcu_gp_kthread_wake(rsp);
2348} 2348}
2349 2349
2350/* 2350/*
@@ -2970,7 +2970,7 @@ static void force_quiescent_state(struct rcu_state *rsp)
2970 } 2970 }
2971 WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS); 2971 WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
2972 raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags); 2972 raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags);
2973 swake_up(&rsp->gp_wq); /* Memory barrier implied by swake_up() path. */ 2973 rcu_gp_kthread_wake(rsp);
2974} 2974}
2975 2975
2976/* 2976/*
@@ -3792,8 +3792,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
3792 rnp = rdp->mynode; 3792 rnp = rdp->mynode;
3793 mask = rdp->grpmask; 3793 mask = rdp->grpmask;
3794 raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ 3794 raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
3795 rnp->qsmaskinitnext |= mask;
3796 rnp->expmaskinitnext |= mask;
3797 if (!rdp->beenonline) 3795 if (!rdp->beenonline)
3798 WRITE_ONCE(rsp->ncpus, READ_ONCE(rsp->ncpus) + 1); 3796 WRITE_ONCE(rsp->ncpus, READ_ONCE(rsp->ncpus) + 1);
3799 rdp->beenonline = true; /* We have now been online. */ 3797 rdp->beenonline = true; /* We have now been online. */
@@ -3860,6 +3858,32 @@ int rcutree_dead_cpu(unsigned int cpu)
3860 return 0; 3858 return 0;
3861} 3859}
3862 3860
3861/*
3862 * Mark the specified CPU as being online so that subsequent grace periods
3863 * (both expedited and normal) will wait on it. Note that this means that
3864 * incoming CPUs are not allowed to use RCU read-side critical sections
3865 * until this function is called. Failing to observe this restriction
3866 * will result in lockdep splats.
3867 */
3868void rcu_cpu_starting(unsigned int cpu)
3869{
3870 unsigned long flags;
3871 unsigned long mask;
3872 struct rcu_data *rdp;
3873 struct rcu_node *rnp;
3874 struct rcu_state *rsp;
3875
3876 for_each_rcu_flavor(rsp) {
3877 rdp = this_cpu_ptr(rsp->rda);
3878 rnp = rdp->mynode;
3879 mask = rdp->grpmask;
3880 raw_spin_lock_irqsave_rcu_node(rnp, flags);
3881 rnp->qsmaskinitnext |= mask;
3882 rnp->expmaskinitnext |= mask;
3883 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
3884 }
3885}
3886
3863#ifdef CONFIG_HOTPLUG_CPU 3887#ifdef CONFIG_HOTPLUG_CPU
3864/* 3888/*
3865 * The CPU is exiting the idle loop into the arch_cpu_idle_dead() 3889 * The CPU is exiting the idle loop into the arch_cpu_idle_dead()
@@ -4209,8 +4233,10 @@ void __init rcu_init(void)
4209 * or the scheduler are operational. 4233 * or the scheduler are operational.
4210 */ 4234 */
4211 pm_notifier(rcu_pm_notify, 0); 4235 pm_notifier(rcu_pm_notify, 0);
4212 for_each_online_cpu(cpu) 4236 for_each_online_cpu(cpu) {
4213 rcutree_prepare_cpu(cpu); 4237 rcutree_prepare_cpu(cpu);
4238 rcu_cpu_starting(cpu);
4239 }
4214} 4240}
4215 4241
4216#include "tree_exp.h" 4242#include "tree_exp.h"
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index f714f873bf9d..e99a5234d9ed 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -400,6 +400,7 @@ struct rcu_data {
400#ifdef CONFIG_RCU_FAST_NO_HZ 400#ifdef CONFIG_RCU_FAST_NO_HZ
401 struct rcu_head oom_head; 401 struct rcu_head oom_head;
402#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ 402#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
403 atomic_long_t exp_workdone0; /* # done by workqueue. */
403 atomic_long_t exp_workdone1; /* # done by others #1. */ 404 atomic_long_t exp_workdone1; /* # done by others #1. */
404 atomic_long_t exp_workdone2; /* # done by others #2. */ 405 atomic_long_t exp_workdone2; /* # done by others #2. */
405 atomic_long_t exp_workdone3; /* # done by others #3. */ 406 atomic_long_t exp_workdone3; /* # done by others #3. */
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 6d86ab6ec2c9..24343eb87b58 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -359,7 +359,8 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
359 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); 359 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
360 360
361 if (raw_smp_processor_id() == cpu || 361 if (raw_smp_processor_id() == cpu ||
362 !(atomic_add_return(0, &rdtp->dynticks) & 0x1)) 362 !(atomic_add_return(0, &rdtp->dynticks) & 0x1) ||
363 !(rnp->qsmaskinitnext & rdp->grpmask))
363 mask_ofl_test |= rdp->grpmask; 364 mask_ofl_test |= rdp->grpmask;
364 } 365 }
365 mask_ofl_ipi = rnp->expmask & ~mask_ofl_test; 366 mask_ofl_ipi = rnp->expmask & ~mask_ofl_test;
@@ -384,17 +385,16 @@ retry_ipi:
384 mask_ofl_ipi &= ~mask; 385 mask_ofl_ipi &= ~mask;
385 continue; 386 continue;
386 } 387 }
387 /* Failed, raced with offline. */ 388 /* Failed, raced with CPU hotplug operation. */
388 raw_spin_lock_irqsave_rcu_node(rnp, flags); 389 raw_spin_lock_irqsave_rcu_node(rnp, flags);
389 if (cpu_online(cpu) && 390 if ((rnp->qsmaskinitnext & mask) &&
390 (rnp->expmask & mask)) { 391 (rnp->expmask & mask)) {
392 /* Online, so delay for a bit and try again. */
391 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 393 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
392 schedule_timeout_uninterruptible(1); 394 schedule_timeout_uninterruptible(1);
393 if (cpu_online(cpu) && 395 goto retry_ipi;
394 (rnp->expmask & mask))
395 goto retry_ipi;
396 raw_spin_lock_irqsave_rcu_node(rnp, flags);
397 } 396 }
397 /* CPU really is offline, so we can ignore it. */
398 if (!(rnp->expmask & mask)) 398 if (!(rnp->expmask & mask))
399 mask_ofl_ipi &= ~mask; 399 mask_ofl_ipi &= ~mask;
400 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 400 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
@@ -427,12 +427,10 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
427 jiffies_stall); 427 jiffies_stall);
428 if (ret > 0 || sync_rcu_preempt_exp_done(rnp_root)) 428 if (ret > 0 || sync_rcu_preempt_exp_done(rnp_root))
429 return; 429 return;
430 if (ret < 0) { 430 WARN_ON(ret < 0); /* workqueues should not be signaled. */
431 /* Hit a signal, disable CPU stall warnings. */ 431 if (rcu_cpu_stall_suppress)
432 swait_event(rsp->expedited_wq, 432 continue;
433 sync_rcu_preempt_exp_done(rnp_root)); 433 panic_on_rcu_stall();
434 return;
435 }
436 pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {", 434 pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {",
437 rsp->name); 435 rsp->name);
438 ndetected = 0; 436 ndetected = 0;
@@ -500,7 +498,6 @@ static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s)
500 * next GP, to proceed. 498 * next GP, to proceed.
501 */ 499 */
502 mutex_lock(&rsp->exp_wake_mutex); 500 mutex_lock(&rsp->exp_wake_mutex);
503 mutex_unlock(&rsp->exp_mutex);
504 501
505 rcu_for_each_node_breadth_first(rsp, rnp) { 502 rcu_for_each_node_breadth_first(rsp, rnp) {
506 if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s)) { 503 if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s)) {
@@ -516,6 +513,70 @@ static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s)
516 mutex_unlock(&rsp->exp_wake_mutex); 513 mutex_unlock(&rsp->exp_wake_mutex);
517} 514}
518 515
516/* Let the workqueue handler know what it is supposed to do. */
517struct rcu_exp_work {
518 smp_call_func_t rew_func;
519 struct rcu_state *rew_rsp;
520 unsigned long rew_s;
521 struct work_struct rew_work;
522};
523
524/*
525 * Work-queue handler to drive an expedited grace period forward.
526 */
527static void wait_rcu_exp_gp(struct work_struct *wp)
528{
529 struct rcu_exp_work *rewp;
530
531 /* Initialize the rcu_node tree in preparation for the wait. */
532 rewp = container_of(wp, struct rcu_exp_work, rew_work);
533 sync_rcu_exp_select_cpus(rewp->rew_rsp, rewp->rew_func);
534
535 /* Wait and clean up, including waking everyone. */
536 rcu_exp_wait_wake(rewp->rew_rsp, rewp->rew_s);
537}
538
539/*
540 * Given an rcu_state pointer and a smp_call_function() handler, kick
541 * off the specified flavor of expedited grace period.
542 */
543static void _synchronize_rcu_expedited(struct rcu_state *rsp,
544 smp_call_func_t func)
545{
546 struct rcu_data *rdp;
547 struct rcu_exp_work rew;
548 struct rcu_node *rnp;
549 unsigned long s;
550
551 /* If expedited grace periods are prohibited, fall back to normal. */
552 if (rcu_gp_is_normal()) {
553 wait_rcu_gp(rsp->call);
554 return;
555 }
556
557 /* Take a snapshot of the sequence number. */
558 s = rcu_exp_gp_seq_snap(rsp);
559 if (exp_funnel_lock(rsp, s))
560 return; /* Someone else did our work for us. */
561
562 /* Marshall arguments and schedule the expedited grace period. */
563 rew.rew_func = func;
564 rew.rew_rsp = rsp;
565 rew.rew_s = s;
566 INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp);
567 schedule_work(&rew.rew_work);
568
569 /* Wait for expedited grace period to complete. */
570 rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
571 rnp = rcu_get_root(rsp);
572 wait_event(rnp->exp_wq[(s >> 1) & 0x3],
573 sync_exp_work_done(rsp,
574 &rdp->exp_workdone0, s));
575
576 /* Let the next expedited grace period start. */
577 mutex_unlock(&rsp->exp_mutex);
578}
579
519/** 580/**
520 * synchronize_sched_expedited - Brute-force RCU-sched grace period 581 * synchronize_sched_expedited - Brute-force RCU-sched grace period
521 * 582 *
@@ -534,29 +595,13 @@ static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s)
534 */ 595 */
535void synchronize_sched_expedited(void) 596void synchronize_sched_expedited(void)
536{ 597{
537 unsigned long s;
538 struct rcu_state *rsp = &rcu_sched_state; 598 struct rcu_state *rsp = &rcu_sched_state;
539 599
540 /* If only one CPU, this is automatically a grace period. */ 600 /* If only one CPU, this is automatically a grace period. */
541 if (rcu_blocking_is_gp()) 601 if (rcu_blocking_is_gp())
542 return; 602 return;
543 603
544 /* If expedited grace periods are prohibited, fall back to normal. */ 604 _synchronize_rcu_expedited(rsp, sync_sched_exp_handler);
545 if (rcu_gp_is_normal()) {
546 wait_rcu_gp(call_rcu_sched);
547 return;
548 }
549
550 /* Take a snapshot of the sequence number. */
551 s = rcu_exp_gp_seq_snap(rsp);
552 if (exp_funnel_lock(rsp, s))
553 return; /* Someone else did our work for us. */
554
555 /* Initialize the rcu_node tree in preparation for the wait. */
556 sync_rcu_exp_select_cpus(rsp, sync_sched_exp_handler);
557
558 /* Wait and clean up, including waking everyone. */
559 rcu_exp_wait_wake(rsp, s);
560} 605}
561EXPORT_SYMBOL_GPL(synchronize_sched_expedited); 606EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
562 607
@@ -620,23 +665,8 @@ static void sync_rcu_exp_handler(void *info)
620void synchronize_rcu_expedited(void) 665void synchronize_rcu_expedited(void)
621{ 666{
622 struct rcu_state *rsp = rcu_state_p; 667 struct rcu_state *rsp = rcu_state_p;
623 unsigned long s;
624
625 /* If expedited grace periods are prohibited, fall back to normal. */
626 if (rcu_gp_is_normal()) {
627 wait_rcu_gp(call_rcu);
628 return;
629 }
630
631 s = rcu_exp_gp_seq_snap(rsp);
632 if (exp_funnel_lock(rsp, s))
633 return; /* Someone else did our work for us. */
634
635 /* Initialize the rcu_node tree in preparation for the wait. */
636 sync_rcu_exp_select_cpus(rsp, sync_rcu_exp_handler);
637 668
638 /* Wait for ->blkd_tasks lists to drain, then wake everyone up. */ 669 _synchronize_rcu_expedited(rsp, sync_rcu_exp_handler);
639 rcu_exp_wait_wake(rsp, s);
640} 670}
641EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); 671EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
642 672
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 0082fce402a0..85c5a883c6e3 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -2173,6 +2173,7 @@ static int rcu_nocb_kthread(void *arg)
2173 cl++; 2173 cl++;
2174 c++; 2174 c++;
2175 local_bh_enable(); 2175 local_bh_enable();
2176 cond_resched_rcu_qs();
2176 list = next; 2177 list = next;
2177 } 2178 }
2178 trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1); 2179 trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1);
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c
index 86782f9a4604..b1f28972872c 100644
--- a/kernel/rcu/tree_trace.c
+++ b/kernel/rcu/tree_trace.c
@@ -185,16 +185,17 @@ static int show_rcuexp(struct seq_file *m, void *v)
185 int cpu; 185 int cpu;
186 struct rcu_state *rsp = (struct rcu_state *)m->private; 186 struct rcu_state *rsp = (struct rcu_state *)m->private;
187 struct rcu_data *rdp; 187 struct rcu_data *rdp;
188 unsigned long s1 = 0, s2 = 0, s3 = 0; 188 unsigned long s0 = 0, s1 = 0, s2 = 0, s3 = 0;
189 189
190 for_each_possible_cpu(cpu) { 190 for_each_possible_cpu(cpu) {
191 rdp = per_cpu_ptr(rsp->rda, cpu); 191 rdp = per_cpu_ptr(rsp->rda, cpu);
192 s0 += atomic_long_read(&rdp->exp_workdone0);
192 s1 += atomic_long_read(&rdp->exp_workdone1); 193 s1 += atomic_long_read(&rdp->exp_workdone1);
193 s2 += atomic_long_read(&rdp->exp_workdone2); 194 s2 += atomic_long_read(&rdp->exp_workdone2);
194 s3 += atomic_long_read(&rdp->exp_workdone3); 195 s3 += atomic_long_read(&rdp->exp_workdone3);
195 } 196 }
196 seq_printf(m, "s=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n", 197 seq_printf(m, "s=%lu wd0=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
197 rsp->expedited_sequence, s1, s2, s3, 198 rsp->expedited_sequence, s0, s1, s2, s3,
198 atomic_long_read(&rsp->expedited_normal), 199 atomic_long_read(&rsp->expedited_normal),
199 atomic_read(&rsp->expedited_need_qs), 200 atomic_read(&rsp->expedited_need_qs),
200 rsp->expedited_sequence / 2); 201 rsp->expedited_sequence / 2);
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index f0d8322bc3ec..f19271dce0a9 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -46,7 +46,7 @@
46#include <linux/export.h> 46#include <linux/export.h>
47#include <linux/hardirq.h> 47#include <linux/hardirq.h>
48#include <linux/delay.h> 48#include <linux/delay.h>
49#include <linux/module.h> 49#include <linux/moduleparam.h>
50#include <linux/kthread.h> 50#include <linux/kthread.h>
51#include <linux/tick.h> 51#include <linux/tick.h>
52 52
@@ -54,7 +54,6 @@
54 54
55#include "rcu.h" 55#include "rcu.h"
56 56
57MODULE_ALIAS("rcupdate");
58#ifdef MODULE_PARAM_PREFIX 57#ifdef MODULE_PARAM_PREFIX
59#undef MODULE_PARAM_PREFIX 58#undef MODULE_PARAM_PREFIX
60#endif 59#endif
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 44817c640e99..0a6a13c21c5a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -581,6 +581,8 @@ static bool wake_up_full_nohz_cpu(int cpu)
581 * If needed we can still optimize that later with an 581 * If needed we can still optimize that later with an
582 * empty IRQ. 582 * empty IRQ.
583 */ 583 */
584 if (cpu_is_offline(cpu))
585 return true; /* Don't try to wake offline CPUs. */
584 if (tick_nohz_full_cpu(cpu)) { 586 if (tick_nohz_full_cpu(cpu)) {
585 if (cpu != smp_processor_id() || 587 if (cpu != smp_processor_id() ||
586 tick_nohz_tick_stopped()) 588 tick_nohz_tick_stopped())
@@ -591,6 +593,11 @@ static bool wake_up_full_nohz_cpu(int cpu)
591 return false; 593 return false;
592} 594}
593 595
596/*
597 * Wake up the specified CPU. If the CPU is going offline, it is the
598 * caller's responsibility to deal with the lost wakeup, for example,
599 * by hooking into the CPU_DEAD notifier like timers and hrtimers do.
600 */
594void wake_up_nohz_cpu(int cpu) 601void wake_up_nohz_cpu(int cpu)
595{ 602{
596 if (!wake_up_full_nohz_cpu(cpu)) 603 if (!wake_up_full_nohz_cpu(cpu))
diff --git a/kernel/torture.c b/kernel/torture.c
index 75961b3decfe..0d887eb62856 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -43,6 +43,7 @@
43#include <linux/stat.h> 43#include <linux/stat.h>
44#include <linux/slab.h> 44#include <linux/slab.h>
45#include <linux/trace_clock.h> 45#include <linux/trace_clock.h>
46#include <linux/ktime.h>
46#include <asm/byteorder.h> 47#include <asm/byteorder.h>
47#include <linux/torture.h> 48#include <linux/torture.h>
48 49
@@ -446,9 +447,8 @@ EXPORT_SYMBOL_GPL(torture_shuffle_cleanup);
446 * Variables for auto-shutdown. This allows "lights out" torture runs 447 * Variables for auto-shutdown. This allows "lights out" torture runs
447 * to be fully scripted. 448 * to be fully scripted.
448 */ 449 */
449static int shutdown_secs; /* desired test duration in seconds. */
450static struct task_struct *shutdown_task; 450static struct task_struct *shutdown_task;
451static unsigned long shutdown_time; /* jiffies to system shutdown. */ 451static ktime_t shutdown_time; /* time to system shutdown. */
452static void (*torture_shutdown_hook)(void); 452static void (*torture_shutdown_hook)(void);
453 453
454/* 454/*
@@ -471,20 +471,20 @@ EXPORT_SYMBOL_GPL(torture_shutdown_absorb);
471 */ 471 */
472static int torture_shutdown(void *arg) 472static int torture_shutdown(void *arg)
473{ 473{
474 long delta; 474 ktime_t ktime_snap;
475 unsigned long jiffies_snap;
476 475
477 VERBOSE_TOROUT_STRING("torture_shutdown task started"); 476 VERBOSE_TOROUT_STRING("torture_shutdown task started");
478 jiffies_snap = jiffies; 477 ktime_snap = ktime_get();
479 while (ULONG_CMP_LT(jiffies_snap, shutdown_time) && 478 while (ktime_before(ktime_snap, shutdown_time) &&
480 !torture_must_stop()) { 479 !torture_must_stop()) {
481 delta = shutdown_time - jiffies_snap;
482 if (verbose) 480 if (verbose)
483 pr_alert("%s" TORTURE_FLAG 481 pr_alert("%s" TORTURE_FLAG
484 "torture_shutdown task: %lu jiffies remaining\n", 482 "torture_shutdown task: %llu ms remaining\n",
485 torture_type, delta); 483 torture_type,
486 schedule_timeout_interruptible(delta); 484 ktime_ms_delta(shutdown_time, ktime_snap));
487 jiffies_snap = jiffies; 485 set_current_state(TASK_INTERRUPTIBLE);
486 schedule_hrtimeout(&shutdown_time, HRTIMER_MODE_ABS);
487 ktime_snap = ktime_get();
488 } 488 }
489 if (torture_must_stop()) { 489 if (torture_must_stop()) {
490 torture_kthread_stopping("torture_shutdown"); 490 torture_kthread_stopping("torture_shutdown");
@@ -511,10 +511,9 @@ int torture_shutdown_init(int ssecs, void (*cleanup)(void))
511{ 511{
512 int ret = 0; 512 int ret = 0;
513 513
514 shutdown_secs = ssecs;
515 torture_shutdown_hook = cleanup; 514 torture_shutdown_hook = cleanup;
516 if (shutdown_secs > 0) { 515 if (ssecs > 0) {
517 shutdown_time = jiffies + shutdown_secs * HZ; 516 shutdown_time = ktime_add(ktime_get(), ktime_set(ssecs, 0));
518 ret = torture_create_kthread(torture_shutdown, NULL, 517 ret = torture_create_kthread(torture_shutdown, NULL,
519 shutdown_task); 518 shutdown_task);
520 } 519 }