summaryrefslogtreecommitdiffstats
path: root/kernel/rcu
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rcu')
-rw-r--r--kernel/rcu/rcu.h21
-rw-r--r--kernel/rcu/rcu_segcblist.c1
-rw-r--r--kernel/rcu/rcutorture.c24
-rw-r--r--kernel/rcu/tree.c159
-rw-r--r--kernel/rcu/tree.h5
-rw-r--r--kernel/rcu/tree_plugin.h14
-rw-r--r--kernel/rcu/update.c25
7 files changed, 187 insertions, 62 deletions
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index e4b43fef89f5..59c471de342a 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -203,6 +203,21 @@ static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head)
203extern int rcu_cpu_stall_suppress; 203extern int rcu_cpu_stall_suppress;
204int rcu_jiffies_till_stall_check(void); 204int rcu_jiffies_till_stall_check(void);
205 205
206#define rcu_ftrace_dump_stall_suppress() \
207do { \
208 if (!rcu_cpu_stall_suppress) \
209 rcu_cpu_stall_suppress = 3; \
210} while (0)
211
212#define rcu_ftrace_dump_stall_unsuppress() \
213do { \
214 if (rcu_cpu_stall_suppress == 3) \
215 rcu_cpu_stall_suppress = 0; \
216} while (0)
217
218#else /* #endif #ifdef CONFIG_RCU_STALL_COMMON */
219#define rcu_ftrace_dump_stall_suppress()
220#define rcu_ftrace_dump_stall_unsuppress()
206#endif /* #ifdef CONFIG_RCU_STALL_COMMON */ 221#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
207 222
208/* 223/*
@@ -220,8 +235,12 @@ do { \
220 static atomic_t ___rfd_beenhere = ATOMIC_INIT(0); \ 235 static atomic_t ___rfd_beenhere = ATOMIC_INIT(0); \
221 \ 236 \
222 if (!atomic_read(&___rfd_beenhere) && \ 237 if (!atomic_read(&___rfd_beenhere) && \
223 !atomic_xchg(&___rfd_beenhere, 1)) \ 238 !atomic_xchg(&___rfd_beenhere, 1)) { \
239 tracing_off(); \
240 rcu_ftrace_dump_stall_suppress(); \
224 ftrace_dump(oops_dump_mode); \ 241 ftrace_dump(oops_dump_mode); \
242 rcu_ftrace_dump_stall_unsuppress(); \
243 } \
225} while (0) 244} while (0)
226 245
227void rcu_early_boot_tests(void); 246void rcu_early_boot_tests(void);
diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c
index 7649fcd2c4c7..88cba7c2956c 100644
--- a/kernel/rcu/rcu_segcblist.c
+++ b/kernel/rcu/rcu_segcblist.c
@@ -23,6 +23,7 @@
23#include <linux/types.h> 23#include <linux/types.h>
24#include <linux/kernel.h> 24#include <linux/kernel.h>
25#include <linux/interrupt.h> 25#include <linux/interrupt.h>
26#include <linux/rcupdate.h>
26 27
27#include "rcu_segcblist.h" 28#include "rcu_segcblist.h"
28 29
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 45f2ffbc1e78..362eb2f78b3c 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -51,6 +51,7 @@
51#include <asm/byteorder.h> 51#include <asm/byteorder.h>
52#include <linux/torture.h> 52#include <linux/torture.h>
53#include <linux/vmalloc.h> 53#include <linux/vmalloc.h>
54#include <linux/sched/debug.h>
54 55
55#include "rcu.h" 56#include "rcu.h"
56 57
@@ -89,6 +90,7 @@ torture_param(int, shutdown_secs, 0, "Shutdown time (s), <= zero to disable.");
89torture_param(int, stall_cpu, 0, "Stall duration (s), zero to disable."); 90torture_param(int, stall_cpu, 0, "Stall duration (s), zero to disable.");
90torture_param(int, stall_cpu_holdoff, 10, 91torture_param(int, stall_cpu_holdoff, 10,
91 "Time to wait before starting stall (s)."); 92 "Time to wait before starting stall (s).");
93torture_param(int, stall_cpu_irqsoff, 0, "Disable interrupts while stalling.");
92torture_param(int, stat_interval, 60, 94torture_param(int, stat_interval, 60,
93 "Number of seconds between stats printk()s"); 95 "Number of seconds between stats printk()s");
94torture_param(int, stutter, 5, "Number of seconds to run/halt test"); 96torture_param(int, stutter, 5, "Number of seconds to run/halt test");
@@ -1239,6 +1241,7 @@ rcu_torture_stats_print(void)
1239 long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 }; 1241 long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
1240 long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 }; 1242 long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
1241 static unsigned long rtcv_snap = ULONG_MAX; 1243 static unsigned long rtcv_snap = ULONG_MAX;
1244 static bool splatted;
1242 struct task_struct *wtp; 1245 struct task_struct *wtp;
1243 1246
1244 for_each_possible_cpu(cpu) { 1247 for_each_possible_cpu(cpu) {
@@ -1324,6 +1327,10 @@ rcu_torture_stats_print(void)
1324 gpnum, completed, flags, 1327 gpnum, completed, flags,
1325 wtp == NULL ? ~0UL : wtp->state, 1328 wtp == NULL ? ~0UL : wtp->state,
1326 wtp == NULL ? -1 : (int)task_cpu(wtp)); 1329 wtp == NULL ? -1 : (int)task_cpu(wtp));
1330 if (!splatted && wtp) {
1331 sched_show_task(wtp);
1332 splatted = true;
1333 }
1327 show_rcu_gp_kthreads(); 1334 show_rcu_gp_kthreads();
1328 rcu_ftrace_dump(DUMP_ALL); 1335 rcu_ftrace_dump(DUMP_ALL);
1329 } 1336 }
@@ -1357,7 +1364,7 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag)
1357 "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d " 1364 "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d "
1358 "test_boost=%d/%d test_boost_interval=%d " 1365 "test_boost=%d/%d test_boost_interval=%d "
1359 "test_boost_duration=%d shutdown_secs=%d " 1366 "test_boost_duration=%d shutdown_secs=%d "
1360 "stall_cpu=%d stall_cpu_holdoff=%d " 1367 "stall_cpu=%d stall_cpu_holdoff=%d stall_cpu_irqsoff=%d "
1361 "n_barrier_cbs=%d " 1368 "n_barrier_cbs=%d "
1362 "onoff_interval=%d onoff_holdoff=%d\n", 1369 "onoff_interval=%d onoff_holdoff=%d\n",
1363 torture_type, tag, nrealreaders, nfakewriters, 1370 torture_type, tag, nrealreaders, nfakewriters,
@@ -1365,7 +1372,7 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag)
1365 stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter, 1372 stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter,
1366 test_boost, cur_ops->can_boost, 1373 test_boost, cur_ops->can_boost,
1367 test_boost_interval, test_boost_duration, shutdown_secs, 1374 test_boost_interval, test_boost_duration, shutdown_secs,
1368 stall_cpu, stall_cpu_holdoff, 1375 stall_cpu, stall_cpu_holdoff, stall_cpu_irqsoff,
1369 n_barrier_cbs, 1376 n_barrier_cbs,
1370 onoff_interval, onoff_holdoff); 1377 onoff_interval, onoff_holdoff);
1371} 1378}
@@ -1430,12 +1437,19 @@ static int rcu_torture_stall(void *args)
1430 if (!kthread_should_stop()) { 1437 if (!kthread_should_stop()) {
1431 stop_at = get_seconds() + stall_cpu; 1438 stop_at = get_seconds() + stall_cpu;
1432 /* RCU CPU stall is expected behavior in following code. */ 1439 /* RCU CPU stall is expected behavior in following code. */
1433 pr_alert("rcu_torture_stall start.\n");
1434 rcu_read_lock(); 1440 rcu_read_lock();
1435 preempt_disable(); 1441 if (stall_cpu_irqsoff)
1442 local_irq_disable();
1443 else
1444 preempt_disable();
1445 pr_alert("rcu_torture_stall start on CPU %d.\n",
1446 smp_processor_id());
1436 while (ULONG_CMP_LT(get_seconds(), stop_at)) 1447 while (ULONG_CMP_LT(get_seconds(), stop_at))
1437 continue; /* Induce RCU CPU stall warning. */ 1448 continue; /* Induce RCU CPU stall warning. */
1438 preempt_enable(); 1449 if (stall_cpu_irqsoff)
1450 local_irq_enable();
1451 else
1452 preempt_enable();
1439 rcu_read_unlock(); 1453 rcu_read_unlock();
1440 pr_alert("rcu_torture_stall end.\n"); 1454 pr_alert("rcu_torture_stall end.\n");
1441 } 1455 }
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 3e3650e94ae6..e4fe06d42385 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -534,8 +534,8 @@ module_param(rcu_kick_kthreads, bool, 0644);
534 * How long the grace period must be before we start recruiting 534 * How long the grace period must be before we start recruiting
535 * quiescent-state help from rcu_note_context_switch(). 535 * quiescent-state help from rcu_note_context_switch().
536 */ 536 */
537static ulong jiffies_till_sched_qs = HZ / 20; 537static ulong jiffies_till_sched_qs = HZ / 10;
538module_param(jiffies_till_sched_qs, ulong, 0644); 538module_param(jiffies_till_sched_qs, ulong, 0444);
539 539
540static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, 540static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
541 struct rcu_data *rdp); 541 struct rcu_data *rdp);
@@ -837,6 +837,9 @@ static void rcu_eqs_enter(bool user)
837 * We crowbar the ->dynticks_nesting field to zero to allow for 837 * We crowbar the ->dynticks_nesting field to zero to allow for
838 * the possibility of usermode upcalls having messed up our count 838 * the possibility of usermode upcalls having messed up our count
839 * of interrupt nesting level during the prior busy period. 839 * of interrupt nesting level during the prior busy period.
840 *
841 * If you add or remove a call to rcu_idle_enter(), be sure to test with
842 * CONFIG_RCU_EQS_DEBUG=y.
840 */ 843 */
841void rcu_idle_enter(void) 844void rcu_idle_enter(void)
842{ 845{
@@ -852,6 +855,9 @@ void rcu_idle_enter(void)
852 * is permitted between this call and rcu_user_exit(). This way the 855 * is permitted between this call and rcu_user_exit(). This way the
853 * CPU doesn't need to maintain the tick for RCU maintenance purposes 856 * CPU doesn't need to maintain the tick for RCU maintenance purposes
854 * when the CPU runs in userspace. 857 * when the CPU runs in userspace.
858 *
859 * If you add or remove a call to rcu_user_enter(), be sure to test with
860 * CONFIG_RCU_EQS_DEBUG=y.
855 */ 861 */
856void rcu_user_enter(void) 862void rcu_user_enter(void)
857{ 863{
@@ -875,6 +881,9 @@ void rcu_user_enter(void)
875 * Use things like work queues to work around this limitation. 881 * Use things like work queues to work around this limitation.
876 * 882 *
877 * You have been warned. 883 * You have been warned.
884 *
885 * If you add or remove a call to rcu_irq_exit(), be sure to test with
886 * CONFIG_RCU_EQS_DEBUG=y.
878 */ 887 */
879void rcu_irq_exit(void) 888void rcu_irq_exit(void)
880{ 889{
@@ -899,6 +908,9 @@ void rcu_irq_exit(void)
899 908
900/* 909/*
901 * Wrapper for rcu_irq_exit() where interrupts are enabled. 910 * Wrapper for rcu_irq_exit() where interrupts are enabled.
911 *
912 * If you add or remove a call to rcu_irq_exit_irqson(), be sure to test
913 * with CONFIG_RCU_EQS_DEBUG=y.
902 */ 914 */
903void rcu_irq_exit_irqson(void) 915void rcu_irq_exit_irqson(void)
904{ 916{
@@ -971,6 +983,9 @@ static void rcu_eqs_exit(bool user)
971 * allow for the possibility of usermode upcalls messing up our count 983 * allow for the possibility of usermode upcalls messing up our count
972 * of interrupt nesting level during the busy period that is just 984 * of interrupt nesting level during the busy period that is just
973 * now starting. 985 * now starting.
986 *
987 * If you add or remove a call to rcu_idle_exit(), be sure to test with
988 * CONFIG_RCU_EQS_DEBUG=y.
974 */ 989 */
975void rcu_idle_exit(void) 990void rcu_idle_exit(void)
976{ 991{
@@ -987,6 +1002,9 @@ void rcu_idle_exit(void)
987 * 1002 *
988 * Exit RCU idle mode while entering the kernel because it can 1003 * Exit RCU idle mode while entering the kernel because it can
989 * run a RCU read side critical section anytime. 1004 * run a RCU read side critical section anytime.
1005 *
1006 * If you add or remove a call to rcu_user_exit(), be sure to test with
1007 * CONFIG_RCU_EQS_DEBUG=y.
990 */ 1008 */
991void rcu_user_exit(void) 1009void rcu_user_exit(void)
992{ 1010{
@@ -1012,6 +1030,9 @@ void rcu_user_exit(void)
1012 * Use things like work queues to work around this limitation. 1030 * Use things like work queues to work around this limitation.
1013 * 1031 *
1014 * You have been warned. 1032 * You have been warned.
1033 *
1034 * If you add or remove a call to rcu_irq_enter(), be sure to test with
1035 * CONFIG_RCU_EQS_DEBUG=y.
1015 */ 1036 */
1016void rcu_irq_enter(void) 1037void rcu_irq_enter(void)
1017{ 1038{
@@ -1037,6 +1058,9 @@ void rcu_irq_enter(void)
1037 1058
1038/* 1059/*
1039 * Wrapper for rcu_irq_enter() where interrupts are enabled. 1060 * Wrapper for rcu_irq_enter() where interrupts are enabled.
1061 *
1062 * If you add or remove a call to rcu_irq_enter_irqson(), be sure to test
1063 * with CONFIG_RCU_EQS_DEBUG=y.
1040 */ 1064 */
1041void rcu_irq_enter_irqson(void) 1065void rcu_irq_enter_irqson(void)
1042{ 1066{
@@ -1055,6 +1079,9 @@ void rcu_irq_enter_irqson(void)
1055 * that the CPU is active. This implementation permits nested NMIs, as 1079 * that the CPU is active. This implementation permits nested NMIs, as
1056 * long as the nesting level does not overflow an int. (You will probably 1080 * long as the nesting level does not overflow an int. (You will probably
1057 * run out of stack space first.) 1081 * run out of stack space first.)
1082 *
1083 * If you add or remove a call to rcu_nmi_enter(), be sure to test
1084 * with CONFIG_RCU_EQS_DEBUG=y.
1058 */ 1085 */
1059void rcu_nmi_enter(void) 1086void rcu_nmi_enter(void)
1060{ 1087{
@@ -1087,6 +1114,9 @@ void rcu_nmi_enter(void)
1087 * RCU-idle period, update rdtp->dynticks and rdtp->dynticks_nmi_nesting 1114 * RCU-idle period, update rdtp->dynticks and rdtp->dynticks_nmi_nesting
1088 * to let the RCU grace-period handling know that the CPU is back to 1115 * to let the RCU grace-period handling know that the CPU is back to
1089 * being RCU-idle. 1116 * being RCU-idle.
1117 *
1118 * If you add or remove a call to rcu_nmi_exit(), be sure to test
1119 * with CONFIG_RCU_EQS_DEBUG=y.
1090 */ 1120 */
1091void rcu_nmi_exit(void) 1121void rcu_nmi_exit(void)
1092{ 1122{
@@ -1207,6 +1237,22 @@ static int rcu_is_cpu_rrupt_from_idle(void)
1207} 1237}
1208 1238
1209/* 1239/*
1240 * We are reporting a quiescent state on behalf of some other CPU, so
1241 * it is our responsibility to check for and handle potential overflow
1242 * of the rcu_node ->gpnum counter with respect to the rcu_data counters.
1243 * After all, the CPU might be in deep idle state, and thus executing no
1244 * code whatsoever.
1245 */
1246static void rcu_gpnum_ovf(struct rcu_node *rnp, struct rcu_data *rdp)
1247{
1248 lockdep_assert_held(&rnp->lock);
1249 if (ULONG_CMP_LT(READ_ONCE(rdp->gpnum) + ULONG_MAX / 4, rnp->gpnum))
1250 WRITE_ONCE(rdp->gpwrap, true);
1251 if (ULONG_CMP_LT(rdp->rcu_iw_gpnum + ULONG_MAX / 4, rnp->gpnum))
1252 rdp->rcu_iw_gpnum = rnp->gpnum + ULONG_MAX / 4;
1253}
1254
1255/*
1210 * Snapshot the specified CPU's dynticks counter so that we can later 1256 * Snapshot the specified CPU's dynticks counter so that we can later
1211 * credit them with an implicit quiescent state. Return 1 if this CPU 1257 * credit them with an implicit quiescent state. Return 1 if this CPU
1212 * is in dynticks idle mode, which is an extended quiescent state. 1258 * is in dynticks idle mode, which is an extended quiescent state.
@@ -1216,15 +1262,34 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp)
1216 rdp->dynticks_snap = rcu_dynticks_snap(rdp->dynticks); 1262 rdp->dynticks_snap = rcu_dynticks_snap(rdp->dynticks);
1217 if (rcu_dynticks_in_eqs(rdp->dynticks_snap)) { 1263 if (rcu_dynticks_in_eqs(rdp->dynticks_snap)) {
1218 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti")); 1264 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
1219 if (ULONG_CMP_LT(READ_ONCE(rdp->gpnum) + ULONG_MAX / 4, 1265 rcu_gpnum_ovf(rdp->mynode, rdp);
1220 rdp->mynode->gpnum))
1221 WRITE_ONCE(rdp->gpwrap, true);
1222 return 1; 1266 return 1;
1223 } 1267 }
1224 return 0; 1268 return 0;
1225} 1269}
1226 1270
1227/* 1271/*
1272 * Handler for the irq_work request posted when a grace period has
1273 * gone on for too long, but not yet long enough for an RCU CPU
1274 * stall warning. Set state appropriately, but just complain if
1275 * there is unexpected state on entry.
1276 */
1277static void rcu_iw_handler(struct irq_work *iwp)
1278{
1279 struct rcu_data *rdp;
1280 struct rcu_node *rnp;
1281
1282 rdp = container_of(iwp, struct rcu_data, rcu_iw);
1283 rnp = rdp->mynode;
1284 raw_spin_lock_rcu_node(rnp);
1285 if (!WARN_ON_ONCE(!rdp->rcu_iw_pending)) {
1286 rdp->rcu_iw_gpnum = rnp->gpnum;
1287 rdp->rcu_iw_pending = false;
1288 }
1289 raw_spin_unlock_rcu_node(rnp);
1290}
1291
1292/*
1228 * Return true if the specified CPU has passed through a quiescent 1293 * Return true if the specified CPU has passed through a quiescent
1229 * state by virtue of being in or having passed through an dynticks 1294 * state by virtue of being in or having passed through an dynticks
1230 * idle state since the last call to dyntick_save_progress_counter() 1295 * idle state since the last call to dyntick_save_progress_counter()
@@ -1235,8 +1300,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
1235 unsigned long jtsq; 1300 unsigned long jtsq;
1236 bool *rnhqp; 1301 bool *rnhqp;
1237 bool *ruqp; 1302 bool *ruqp;
1238 unsigned long rjtsc; 1303 struct rcu_node *rnp = rdp->mynode;
1239 struct rcu_node *rnp;
1240 1304
1241 /* 1305 /*
1242 * If the CPU passed through or entered a dynticks idle phase with 1306 * If the CPU passed through or entered a dynticks idle phase with
@@ -1249,34 +1313,25 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
1249 if (rcu_dynticks_in_eqs_since(rdp->dynticks, rdp->dynticks_snap)) { 1313 if (rcu_dynticks_in_eqs_since(rdp->dynticks, rdp->dynticks_snap)) {
1250 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti")); 1314 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
1251 rdp->dynticks_fqs++; 1315 rdp->dynticks_fqs++;
1316 rcu_gpnum_ovf(rnp, rdp);
1252 return 1; 1317 return 1;
1253 } 1318 }
1254 1319
1255 /* Compute and saturate jiffies_till_sched_qs. */
1256 jtsq = jiffies_till_sched_qs;
1257 rjtsc = rcu_jiffies_till_stall_check();
1258 if (jtsq > rjtsc / 2) {
1259 WRITE_ONCE(jiffies_till_sched_qs, rjtsc);
1260 jtsq = rjtsc / 2;
1261 } else if (jtsq < 1) {
1262 WRITE_ONCE(jiffies_till_sched_qs, 1);
1263 jtsq = 1;
1264 }
1265
1266 /* 1320 /*
1267 * Has this CPU encountered a cond_resched_rcu_qs() since the 1321 * Has this CPU encountered a cond_resched_rcu_qs() since the
1268 * beginning of the grace period? For this to be the case, 1322 * beginning of the grace period? For this to be the case,
1269 * the CPU has to have noticed the current grace period. This 1323 * the CPU has to have noticed the current grace period. This
1270 * might not be the case for nohz_full CPUs looping in the kernel. 1324 * might not be the case for nohz_full CPUs looping in the kernel.
1271 */ 1325 */
1272 rnp = rdp->mynode; 1326 jtsq = jiffies_till_sched_qs;
1273 ruqp = per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, rdp->cpu); 1327 ruqp = per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, rdp->cpu);
1274 if (time_after(jiffies, rdp->rsp->gp_start + jtsq) && 1328 if (time_after(jiffies, rdp->rsp->gp_start + jtsq) &&
1275 READ_ONCE(rdp->rcu_qs_ctr_snap) != per_cpu(rcu_dynticks.rcu_qs_ctr, rdp->cpu) && 1329 READ_ONCE(rdp->rcu_qs_ctr_snap) != per_cpu(rcu_dynticks.rcu_qs_ctr, rdp->cpu) &&
1276 READ_ONCE(rdp->gpnum) == rnp->gpnum && !rdp->gpwrap) { 1330 READ_ONCE(rdp->gpnum) == rnp->gpnum && !rdp->gpwrap) {
1277 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("rqc")); 1331 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("rqc"));
1332 rcu_gpnum_ovf(rnp, rdp);
1278 return 1; 1333 return 1;
1279 } else { 1334 } else if (time_after(jiffies, rdp->rsp->gp_start + jtsq)) {
1280 /* Load rcu_qs_ctr before store to rcu_urgent_qs. */ 1335 /* Load rcu_qs_ctr before store to rcu_urgent_qs. */
1281 smp_store_release(ruqp, true); 1336 smp_store_release(ruqp, true);
1282 } 1337 }
@@ -1285,6 +1340,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
1285 if (!(rdp->grpmask & rcu_rnp_online_cpus(rnp))) { 1340 if (!(rdp->grpmask & rcu_rnp_online_cpus(rnp))) {
1286 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("ofl")); 1341 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("ofl"));
1287 rdp->offline_fqs++; 1342 rdp->offline_fqs++;
1343 rcu_gpnum_ovf(rnp, rdp);
1288 return 1; 1344 return 1;
1289 } 1345 }
1290 1346
@@ -1304,10 +1360,6 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
1304 * updates are only once every few jiffies, the probability of 1360 * updates are only once every few jiffies, the probability of
1305 * lossage (and thus of slight grace-period extension) is 1361 * lossage (and thus of slight grace-period extension) is
1306 * quite low. 1362 * quite low.
1307 *
1308 * Note that if the jiffies_till_sched_qs boot/sysfs parameter
1309 * is set too high, we override with half of the RCU CPU stall
1310 * warning delay.
1311 */ 1363 */
1312 rnhqp = &per_cpu(rcu_dynticks.rcu_need_heavy_qs, rdp->cpu); 1364 rnhqp = &per_cpu(rcu_dynticks.rcu_need_heavy_qs, rdp->cpu);
1313 if (!READ_ONCE(*rnhqp) && 1365 if (!READ_ONCE(*rnhqp) &&
@@ -1316,15 +1368,26 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
1316 WRITE_ONCE(*rnhqp, true); 1368 WRITE_ONCE(*rnhqp, true);
1317 /* Store rcu_need_heavy_qs before rcu_urgent_qs. */ 1369 /* Store rcu_need_heavy_qs before rcu_urgent_qs. */
1318 smp_store_release(ruqp, true); 1370 smp_store_release(ruqp, true);
1319 rdp->rsp->jiffies_resched += 5; /* Re-enable beating. */ 1371 rdp->rsp->jiffies_resched += jtsq; /* Re-enable beating. */
1320 } 1372 }
1321 1373
1322 /* 1374 /*
1323 * If more than halfway to RCU CPU stall-warning time, do 1375 * If more than halfway to RCU CPU stall-warning time, do a
1324 * a resched_cpu() to try to loosen things up a bit. 1376 * resched_cpu() to try to loosen things up a bit. Also check to
1377 * see if the CPU is getting hammered with interrupts, but only
1378 * once per grace period, just to keep the IPIs down to a dull roar.
1325 */ 1379 */
1326 if (jiffies - rdp->rsp->gp_start > rcu_jiffies_till_stall_check() / 2) 1380 if (jiffies - rdp->rsp->gp_start > rcu_jiffies_till_stall_check() / 2) {
1327 resched_cpu(rdp->cpu); 1381 resched_cpu(rdp->cpu);
1382 if (IS_ENABLED(CONFIG_IRQ_WORK) &&
1383 !rdp->rcu_iw_pending && rdp->rcu_iw_gpnum != rnp->gpnum &&
1384 (rnp->ffmask & rdp->grpmask)) {
1385 init_irq_work(&rdp->rcu_iw, rcu_iw_handler);
1386 rdp->rcu_iw_pending = true;
1387 rdp->rcu_iw_gpnum = rnp->gpnum;
1388 irq_work_queue_on(&rdp->rcu_iw, rdp->cpu);
1389 }
1390 }
1328 1391
1329 return 0; 1392 return 0;
1330} 1393}
@@ -1513,6 +1576,7 @@ static void print_cpu_stall(struct rcu_state *rsp)
1513{ 1576{
1514 int cpu; 1577 int cpu;
1515 unsigned long flags; 1578 unsigned long flags;
1579 struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
1516 struct rcu_node *rnp = rcu_get_root(rsp); 1580 struct rcu_node *rnp = rcu_get_root(rsp);
1517 long totqlen = 0; 1581 long totqlen = 0;
1518 1582
@@ -1528,7 +1592,9 @@ static void print_cpu_stall(struct rcu_state *rsp)
1528 */ 1592 */
1529 pr_err("INFO: %s self-detected stall on CPU", rsp->name); 1593 pr_err("INFO: %s self-detected stall on CPU", rsp->name);
1530 print_cpu_stall_info_begin(); 1594 print_cpu_stall_info_begin();
1595 raw_spin_lock_irqsave_rcu_node(rdp->mynode, flags);
1531 print_cpu_stall_info(rsp, smp_processor_id()); 1596 print_cpu_stall_info(rsp, smp_processor_id());
1597 raw_spin_unlock_irqrestore_rcu_node(rdp->mynode, flags);
1532 print_cpu_stall_info_end(); 1598 print_cpu_stall_info_end();
1533 for_each_possible_cpu(cpu) 1599 for_each_possible_cpu(cpu)
1534 totqlen += rcu_segcblist_n_cbs(&per_cpu_ptr(rsp->rda, 1600 totqlen += rcu_segcblist_n_cbs(&per_cpu_ptr(rsp->rda,
@@ -1922,6 +1988,7 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
1922 rdp->core_needs_qs = need_gp; 1988 rdp->core_needs_qs = need_gp;
1923 zero_cpu_stall_ticks(rdp); 1989 zero_cpu_stall_ticks(rdp);
1924 WRITE_ONCE(rdp->gpwrap, false); 1990 WRITE_ONCE(rdp->gpwrap, false);
1991 rcu_gpnum_ovf(rnp, rdp);
1925 } 1992 }
1926 return ret; 1993 return ret;
1927} 1994}
@@ -3702,6 +3769,8 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
3702 rdp->cpu_no_qs.b.norm = true; 3769 rdp->cpu_no_qs.b.norm = true;
3703 rdp->rcu_qs_ctr_snap = per_cpu(rcu_dynticks.rcu_qs_ctr, cpu); 3770 rdp->rcu_qs_ctr_snap = per_cpu(rcu_dynticks.rcu_qs_ctr, cpu);
3704 rdp->core_needs_qs = false; 3771 rdp->core_needs_qs = false;
3772 rdp->rcu_iw_pending = false;
3773 rdp->rcu_iw_gpnum = rnp->gpnum - 1;
3705 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl")); 3774 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl"));
3706 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 3775 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
3707} 3776}
@@ -3739,10 +3808,24 @@ static void rcutree_affinity_setting(unsigned int cpu, int outgoing)
3739 */ 3808 */
3740int rcutree_online_cpu(unsigned int cpu) 3809int rcutree_online_cpu(unsigned int cpu)
3741{ 3810{
3742 sync_sched_exp_online_cleanup(cpu); 3811 unsigned long flags;
3743 rcutree_affinity_setting(cpu, -1); 3812 struct rcu_data *rdp;
3813 struct rcu_node *rnp;
3814 struct rcu_state *rsp;
3815
3816 for_each_rcu_flavor(rsp) {
3817 rdp = per_cpu_ptr(rsp->rda, cpu);
3818 rnp = rdp->mynode;
3819 raw_spin_lock_irqsave_rcu_node(rnp, flags);
3820 rnp->ffmask |= rdp->grpmask;
3821 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
3822 }
3744 if (IS_ENABLED(CONFIG_TREE_SRCU)) 3823 if (IS_ENABLED(CONFIG_TREE_SRCU))
3745 srcu_online_cpu(cpu); 3824 srcu_online_cpu(cpu);
3825 if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
3826 return 0; /* Too early in boot for scheduler work. */
3827 sync_sched_exp_online_cleanup(cpu);
3828 rcutree_affinity_setting(cpu, -1);
3746 return 0; 3829 return 0;
3747} 3830}
3748 3831
@@ -3752,6 +3835,19 @@ int rcutree_online_cpu(unsigned int cpu)
3752 */ 3835 */
3753int rcutree_offline_cpu(unsigned int cpu) 3836int rcutree_offline_cpu(unsigned int cpu)
3754{ 3837{
3838 unsigned long flags;
3839 struct rcu_data *rdp;
3840 struct rcu_node *rnp;
3841 struct rcu_state *rsp;
3842
3843 for_each_rcu_flavor(rsp) {
3844 rdp = per_cpu_ptr(rsp->rda, cpu);
3845 rnp = rdp->mynode;
3846 raw_spin_lock_irqsave_rcu_node(rnp, flags);
3847 rnp->ffmask &= ~rdp->grpmask;
3848 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
3849 }
3850
3755 rcutree_affinity_setting(cpu, cpu); 3851 rcutree_affinity_setting(cpu, cpu);
3756 if (IS_ENABLED(CONFIG_TREE_SRCU)) 3852 if (IS_ENABLED(CONFIG_TREE_SRCU))
3757 srcu_offline_cpu(cpu); 3853 srcu_offline_cpu(cpu);
@@ -4200,8 +4296,7 @@ void __init rcu_init(void)
4200 for_each_online_cpu(cpu) { 4296 for_each_online_cpu(cpu) {
4201 rcutree_prepare_cpu(cpu); 4297 rcutree_prepare_cpu(cpu);
4202 rcu_cpu_starting(cpu); 4298 rcu_cpu_starting(cpu);
4203 if (IS_ENABLED(CONFIG_TREE_SRCU)) 4299 rcutree_online_cpu(cpu);
4204 srcu_online_cpu(cpu);
4205 } 4300 }
4206} 4301}
4207 4302
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 8e1f285f0a70..46a5d1991450 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -103,6 +103,7 @@ struct rcu_node {
103 /* Online CPUs for next expedited GP. */ 103 /* Online CPUs for next expedited GP. */
104 /* Any CPU that has ever been online will */ 104 /* Any CPU that has ever been online will */
105 /* have its bit set. */ 105 /* have its bit set. */
106 unsigned long ffmask; /* Fully functional CPUs. */
106 unsigned long grpmask; /* Mask to apply to parent qsmask. */ 107 unsigned long grpmask; /* Mask to apply to parent qsmask. */
107 /* Only one bit will be set in this mask. */ 108 /* Only one bit will be set in this mask. */
108 int grplo; /* lowest-numbered CPU or group here. */ 109 int grplo; /* lowest-numbered CPU or group here. */
@@ -285,6 +286,10 @@ struct rcu_data {
285 286
286 /* 8) RCU CPU stall data. */ 287 /* 8) RCU CPU stall data. */
287 unsigned int softirq_snap; /* Snapshot of softirq activity. */ 288 unsigned int softirq_snap; /* Snapshot of softirq activity. */
289 /* ->rcu_iw* fields protected by leaf rcu_node ->lock. */
290 struct irq_work rcu_iw; /* Check for non-irq activity. */
291 bool rcu_iw_pending; /* Is ->rcu_iw pending? */
292 unsigned long rcu_iw_gpnum; /* ->gpnum associated with ->rcu_iw. */
288 293
289 int cpu; 294 int cpu;
290 struct rcu_state *rsp; 295 struct rcu_state *rsp;
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index e012b9be777e..4c857e583802 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -54,6 +54,7 @@ DEFINE_PER_CPU(char, rcu_cpu_has_work);
54 * This probably needs to be excluded from -rt builds. 54 * This probably needs to be excluded from -rt builds.
55 */ 55 */
56#define rt_mutex_owner(a) ({ WARN_ON_ONCE(1); NULL; }) 56#define rt_mutex_owner(a) ({ WARN_ON_ONCE(1); NULL; })
57#define rt_mutex_futex_unlock(x) WARN_ON_ONCE(1)
57 58
58#endif /* #else #ifdef CONFIG_RCU_BOOST */ 59#endif /* #else #ifdef CONFIG_RCU_BOOST */
59 60
@@ -530,7 +531,7 @@ void rcu_read_unlock_special(struct task_struct *t)
530 531
531 /* Unboost if we were boosted. */ 532 /* Unboost if we were boosted. */
532 if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex) 533 if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex)
533 rt_mutex_unlock(&rnp->boost_mtx); 534 rt_mutex_futex_unlock(&rnp->boost_mtx);
534 535
535 /* 536 /*
536 * If this was the last task on the expedited lists, 537 * If this was the last task on the expedited lists,
@@ -911,8 +912,6 @@ void exit_rcu(void)
911 912
912#ifdef CONFIG_RCU_BOOST 913#ifdef CONFIG_RCU_BOOST
913 914
914#include "../locking/rtmutex_common.h"
915
916static void rcu_wake_cond(struct task_struct *t, int status) 915static void rcu_wake_cond(struct task_struct *t, int status)
917{ 916{
918 /* 917 /*
@@ -1507,7 +1506,7 @@ static void rcu_prepare_for_idle(void)
1507 rdtp->last_accelerate = jiffies; 1506 rdtp->last_accelerate = jiffies;
1508 for_each_rcu_flavor(rsp) { 1507 for_each_rcu_flavor(rsp) {
1509 rdp = this_cpu_ptr(rsp->rda); 1508 rdp = this_cpu_ptr(rsp->rda);
1510 if (rcu_segcblist_pend_cbs(&rdp->cblist)) 1509 if (!rcu_segcblist_pend_cbs(&rdp->cblist))
1511 continue; 1510 continue;
1512 rnp = rdp->mynode; 1511 rnp = rdp->mynode;
1513 raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ 1512 raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
@@ -1671,6 +1670,7 @@ static void print_cpu_stall_info_begin(void)
1671 */ 1670 */
1672static void print_cpu_stall_info(struct rcu_state *rsp, int cpu) 1671static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
1673{ 1672{
1673 unsigned long delta;
1674 char fast_no_hz[72]; 1674 char fast_no_hz[72];
1675 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 1675 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
1676 struct rcu_dynticks *rdtp = rdp->dynticks; 1676 struct rcu_dynticks *rdtp = rdp->dynticks;
@@ -1685,11 +1685,15 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
1685 ticks_value = rsp->gpnum - rdp->gpnum; 1685 ticks_value = rsp->gpnum - rdp->gpnum;
1686 } 1686 }
1687 print_cpu_stall_fast_no_hz(fast_no_hz, cpu); 1687 print_cpu_stall_fast_no_hz(fast_no_hz, cpu);
1688 pr_err("\t%d-%c%c%c: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u fqs=%ld %s\n", 1688 delta = rdp->mynode->gpnum - rdp->rcu_iw_gpnum;
1689 pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u fqs=%ld %s\n",
1689 cpu, 1690 cpu,
1690 "O."[!!cpu_online(cpu)], 1691 "O."[!!cpu_online(cpu)],
1691 "o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)], 1692 "o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)],
1692 "N."[!!(rdp->grpmask & rdp->mynode->qsmaskinitnext)], 1693 "N."[!!(rdp->grpmask & rdp->mynode->qsmaskinitnext)],
1694 !IS_ENABLED(CONFIG_IRQ_WORK) ? '?' :
1695 rdp->rcu_iw_pending ? (int)min(delta, 9UL) + '0' :
1696 "!."[!delta],
1693 ticks_value, ticks_title, 1697 ticks_value, ticks_title,
1694 rcu_dynticks_snap(rdtp) & 0xfff, 1698 rcu_dynticks_snap(rdtp) & 0xfff,
1695 rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting, 1699 rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting,
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 5033b66d2753..27694561f769 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -494,6 +494,7 @@ EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read);
494#endif 494#endif
495 495
496int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */ 496int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */
497EXPORT_SYMBOL_GPL(rcu_cpu_stall_suppress);
497static int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT; 498static int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
498 499
499module_param(rcu_cpu_stall_suppress, int, 0644); 500module_param(rcu_cpu_stall_suppress, int, 0644);
@@ -575,7 +576,6 @@ DEFINE_STATIC_SRCU(tasks_rcu_exit_srcu);
575static int rcu_task_stall_timeout __read_mostly = RCU_TASK_STALL_TIMEOUT; 576static int rcu_task_stall_timeout __read_mostly = RCU_TASK_STALL_TIMEOUT;
576module_param(rcu_task_stall_timeout, int, 0644); 577module_param(rcu_task_stall_timeout, int, 0644);
577 578
578static void rcu_spawn_tasks_kthread(void);
579static struct task_struct *rcu_tasks_kthread_ptr; 579static struct task_struct *rcu_tasks_kthread_ptr;
580 580
581/** 581/**
@@ -600,7 +600,6 @@ void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func)
600{ 600{
601 unsigned long flags; 601 unsigned long flags;
602 bool needwake; 602 bool needwake;
603 bool havetask = READ_ONCE(rcu_tasks_kthread_ptr);
604 603
605 rhp->next = NULL; 604 rhp->next = NULL;
606 rhp->func = func; 605 rhp->func = func;
@@ -610,11 +609,8 @@ void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func)
610 rcu_tasks_cbs_tail = &rhp->next; 609 rcu_tasks_cbs_tail = &rhp->next;
611 raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags); 610 raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags);
612 /* We can't create the thread unless interrupts are enabled. */ 611 /* We can't create the thread unless interrupts are enabled. */
613 if ((needwake && havetask) || 612 if (needwake && READ_ONCE(rcu_tasks_kthread_ptr))
614 (!havetask && !irqs_disabled_flags(flags))) {
615 rcu_spawn_tasks_kthread();
616 wake_up(&rcu_tasks_cbs_wq); 613 wake_up(&rcu_tasks_cbs_wq);
617 }
618} 614}
619EXPORT_SYMBOL_GPL(call_rcu_tasks); 615EXPORT_SYMBOL_GPL(call_rcu_tasks);
620 616
@@ -853,27 +849,18 @@ static int __noreturn rcu_tasks_kthread(void *arg)
853 } 849 }
854} 850}
855 851
856/* Spawn rcu_tasks_kthread() at first call to call_rcu_tasks(). */ 852/* Spawn rcu_tasks_kthread() at core_initcall() time. */
857static void rcu_spawn_tasks_kthread(void) 853static int __init rcu_spawn_tasks_kthread(void)
858{ 854{
859 static DEFINE_MUTEX(rcu_tasks_kthread_mutex);
860 struct task_struct *t; 855 struct task_struct *t;
861 856
862 if (READ_ONCE(rcu_tasks_kthread_ptr)) {
863 smp_mb(); /* Ensure caller sees full kthread. */
864 return;
865 }
866 mutex_lock(&rcu_tasks_kthread_mutex);
867 if (rcu_tasks_kthread_ptr) {
868 mutex_unlock(&rcu_tasks_kthread_mutex);
869 return;
870 }
871 t = kthread_run(rcu_tasks_kthread, NULL, "rcu_tasks_kthread"); 857 t = kthread_run(rcu_tasks_kthread, NULL, "rcu_tasks_kthread");
872 BUG_ON(IS_ERR(t)); 858 BUG_ON(IS_ERR(t));
873 smp_mb(); /* Ensure others see full kthread. */ 859 smp_mb(); /* Ensure others see full kthread. */
874 WRITE_ONCE(rcu_tasks_kthread_ptr, t); 860 WRITE_ONCE(rcu_tasks_kthread_ptr, t);
875 mutex_unlock(&rcu_tasks_kthread_mutex); 861 return 0;
876} 862}
863core_initcall(rcu_spawn_tasks_kthread);
877 864
878/* Do the srcu_read_lock() for the above synchronize_srcu(). */ 865/* Do the srcu_read_lock() for the above synchronize_srcu(). */
879void exit_tasks_rcu_start(void) 866void exit_tasks_rcu_start(void)