aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcu/tree.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2014-05-22 05:36:10 -0400
committerIngo Molnar <mingo@kernel.org>2014-05-22 05:36:10 -0400
commite14505a8d50882ff3bdd4b791b14d90a0881fa4d (patch)
treed2c3e9846b82b02187d33ebafb44fd6934bcd81f /kernel/rcu/tree.c
parent4b660a7f5c8099d88d1a43d8ae138965112592c7 (diff)
parent61f38db3e3c0e4c3be0858750e2cabeadaecac0c (diff)
Merge branch 'rcu/next' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu into core/rcu
Pull RCU updates from Paul E. McKenney: " 1. Update RCU documentation. These were posted to LKML at https://lkml.org/lkml/2014/4/28/634. 2. Miscellaneous fixes. These were posted to LKML at https://lkml.org/lkml/2014/4/28/645. 3. Torture-test changes. These were posted to LKML at https://lkml.org/lkml/2014/4/28/667. 4. Variable-name renaming cleanup, sent separately due to conflicts. This was posted to LKML at https://lkml.org/lkml/2014/5/13/854. 5. Patch to suppress RCU stall warnings while sysrq requests are being processed. This patch is the RCU portions of the patch that Rik posted to LKML at https://lkml.org/lkml/2014/4/29/457. The reason for pushing this patch ahead instead of waiting until 3.17 is that the NMI-based stack traces are messing up sysrq output, and in some cases also messing up the system as well." Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/rcu/tree.c')
-rw-r--r--kernel/rcu/tree.c309
1 files changed, 217 insertions, 92 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 0c47e300210a..3e3f13e8b429 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -101,7 +101,7 @@ DEFINE_PER_CPU(struct rcu_data, sname##_data)
101RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched); 101RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
102RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh); 102RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh);
103 103
104static struct rcu_state *rcu_state; 104static struct rcu_state *rcu_state_p;
105LIST_HEAD(rcu_struct_flavors); 105LIST_HEAD(rcu_struct_flavors);
106 106
107/* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */ 107/* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */
@@ -243,7 +243,7 @@ static ulong jiffies_till_next_fqs = ULONG_MAX;
243module_param(jiffies_till_first_fqs, ulong, 0644); 243module_param(jiffies_till_first_fqs, ulong, 0644);
244module_param(jiffies_till_next_fqs, ulong, 0644); 244module_param(jiffies_till_next_fqs, ulong, 0644);
245 245
246static void rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, 246static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
247 struct rcu_data *rdp); 247 struct rcu_data *rdp);
248static void force_qs_rnp(struct rcu_state *rsp, 248static void force_qs_rnp(struct rcu_state *rsp,
249 int (*f)(struct rcu_data *rsp, bool *isidle, 249 int (*f)(struct rcu_data *rsp, bool *isidle,
@@ -271,6 +271,15 @@ long rcu_batches_completed_bh(void)
271EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); 271EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
272 272
273/* 273/*
274 * Force a quiescent state.
275 */
276void rcu_force_quiescent_state(void)
277{
278 force_quiescent_state(rcu_state_p);
279}
280EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
281
282/*
274 * Force a quiescent state for RCU BH. 283 * Force a quiescent state for RCU BH.
275 */ 284 */
276void rcu_bh_force_quiescent_state(void) 285void rcu_bh_force_quiescent_state(void)
@@ -280,6 +289,21 @@ void rcu_bh_force_quiescent_state(void)
280EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); 289EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
281 290
282/* 291/*
292 * Show the state of the grace-period kthreads.
293 */
294void show_rcu_gp_kthreads(void)
295{
296 struct rcu_state *rsp;
297
298 for_each_rcu_flavor(rsp) {
299 pr_info("%s: wait state: %d ->state: %#lx\n",
300 rsp->name, rsp->gp_state, rsp->gp_kthread->state);
301 /* sched_show_task(rsp->gp_kthread); */
302 }
303}
304EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads);
305
306/*
283 * Record the number of times rcutorture tests have been initiated and 307 * Record the number of times rcutorture tests have been initiated and
284 * terminated. This information allows the debugfs tracing stats to be 308 * terminated. This information allows the debugfs tracing stats to be
285 * correlated to the rcutorture messages, even when the rcutorture module 309 * correlated to the rcutorture messages, even when the rcutorture module
@@ -294,6 +318,39 @@ void rcutorture_record_test_transition(void)
294EXPORT_SYMBOL_GPL(rcutorture_record_test_transition); 318EXPORT_SYMBOL_GPL(rcutorture_record_test_transition);
295 319
296/* 320/*
321 * Send along grace-period-related data for rcutorture diagnostics.
322 */
323void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
324 unsigned long *gpnum, unsigned long *completed)
325{
326 struct rcu_state *rsp = NULL;
327
328 switch (test_type) {
329 case RCU_FLAVOR:
330 rsp = rcu_state_p;
331 break;
332 case RCU_BH_FLAVOR:
333 rsp = &rcu_bh_state;
334 break;
335 case RCU_SCHED_FLAVOR:
336 rsp = &rcu_sched_state;
337 break;
338 default:
339 break;
340 }
341 if (rsp != NULL) {
342 *flags = ACCESS_ONCE(rsp->gp_flags);
343 *gpnum = ACCESS_ONCE(rsp->gpnum);
344 *completed = ACCESS_ONCE(rsp->completed);
345 return;
346 }
347 *flags = 0;
348 *gpnum = 0;
349 *completed = 0;
350}
351EXPORT_SYMBOL_GPL(rcutorture_get_gp_data);
352
353/*
297 * Record the number of writer passes through the current rcutorture test. 354 * Record the number of writer passes through the current rcutorture test.
298 * This is also used to correlate debugfs tracing stats with the rcutorture 355 * This is also used to correlate debugfs tracing stats with the rcutorture
299 * messages. 356 * messages.
@@ -324,6 +381,28 @@ cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
324} 381}
325 382
326/* 383/*
384 * Return the root node of the specified rcu_state structure.
385 */
386static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
387{
388 return &rsp->node[0];
389}
390
391/*
392 * Is there any need for future grace periods?
393 * Interrupts must be disabled. If the caller does not hold the root
394 * rnp_node structure's ->lock, the results are advisory only.
395 */
396static int rcu_future_needs_gp(struct rcu_state *rsp)
397{
398 struct rcu_node *rnp = rcu_get_root(rsp);
399 int idx = (ACCESS_ONCE(rnp->completed) + 1) & 0x1;
400 int *fp = &rnp->need_future_gp[idx];
401
402 return ACCESS_ONCE(*fp);
403}
404
405/*
327 * Does the current CPU require a not-yet-started grace period? 406 * Does the current CPU require a not-yet-started grace period?
328 * The caller must have disabled interrupts to prevent races with 407 * The caller must have disabled interrupts to prevent races with
329 * normal callback registry. 408 * normal callback registry.
@@ -335,7 +414,7 @@ cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
335 414
336 if (rcu_gp_in_progress(rsp)) 415 if (rcu_gp_in_progress(rsp))
337 return 0; /* No, a grace period is already in progress. */ 416 return 0; /* No, a grace period is already in progress. */
338 if (rcu_nocb_needs_gp(rsp)) 417 if (rcu_future_needs_gp(rsp))
339 return 1; /* Yes, a no-CBs CPU needs one. */ 418 return 1; /* Yes, a no-CBs CPU needs one. */
340 if (!rdp->nxttail[RCU_NEXT_TAIL]) 419 if (!rdp->nxttail[RCU_NEXT_TAIL])
341 return 0; /* No, this is a no-CBs (or offline) CPU. */ 420 return 0; /* No, this is a no-CBs (or offline) CPU. */
@@ -350,14 +429,6 @@ cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
350} 429}
351 430
352/* 431/*
353 * Return the root node of the specified rcu_state structure.
354 */
355static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
356{
357 return &rsp->node[0];
358}
359
360/*
361 * rcu_eqs_enter_common - current CPU is moving towards extended quiescent state 432 * rcu_eqs_enter_common - current CPU is moving towards extended quiescent state
362 * 433 *
363 * If the new value of the ->dynticks_nesting counter now is zero, 434 * If the new value of the ->dynticks_nesting counter now is zero,
@@ -758,7 +829,12 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp,
758{ 829{
759 rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks); 830 rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks);
760 rcu_sysidle_check_cpu(rdp, isidle, maxj); 831 rcu_sysidle_check_cpu(rdp, isidle, maxj);
761 return (rdp->dynticks_snap & 0x1) == 0; 832 if ((rdp->dynticks_snap & 0x1) == 0) {
833 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
834 return 1;
835 } else {
836 return 0;
837 }
762} 838}
763 839
764/* 840/*
@@ -834,7 +910,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
834 * we will beat on the first one until it gets unstuck, then move 910 * we will beat on the first one until it gets unstuck, then move
835 * to the next. Only do this for the primary flavor of RCU. 911 * to the next. Only do this for the primary flavor of RCU.
836 */ 912 */
837 if (rdp->rsp == rcu_state && 913 if (rdp->rsp == rcu_state_p &&
838 ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) { 914 ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) {
839 rdp->rsp->jiffies_resched += 5; 915 rdp->rsp->jiffies_resched += 5;
840 resched_cpu(rdp->cpu); 916 resched_cpu(rdp->cpu);
@@ -851,7 +927,7 @@ static void record_gp_stall_check_time(struct rcu_state *rsp)
851 rsp->gp_start = j; 927 rsp->gp_start = j;
852 smp_wmb(); /* Record start time before stall time. */ 928 smp_wmb(); /* Record start time before stall time. */
853 j1 = rcu_jiffies_till_stall_check(); 929 j1 = rcu_jiffies_till_stall_check();
854 rsp->jiffies_stall = j + j1; 930 ACCESS_ONCE(rsp->jiffies_stall) = j + j1;
855 rsp->jiffies_resched = j + j1 / 2; 931 rsp->jiffies_resched = j + j1 / 2;
856} 932}
857 933
@@ -890,12 +966,12 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
890 /* Only let one CPU complain about others per time interval. */ 966 /* Only let one CPU complain about others per time interval. */
891 967
892 raw_spin_lock_irqsave(&rnp->lock, flags); 968 raw_spin_lock_irqsave(&rnp->lock, flags);
893 delta = jiffies - rsp->jiffies_stall; 969 delta = jiffies - ACCESS_ONCE(rsp->jiffies_stall);
894 if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) { 970 if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) {
895 raw_spin_unlock_irqrestore(&rnp->lock, flags); 971 raw_spin_unlock_irqrestore(&rnp->lock, flags);
896 return; 972 return;
897 } 973 }
898 rsp->jiffies_stall = jiffies + 3 * rcu_jiffies_till_stall_check() + 3; 974 ACCESS_ONCE(rsp->jiffies_stall) = jiffies + 3 * rcu_jiffies_till_stall_check() + 3;
899 raw_spin_unlock_irqrestore(&rnp->lock, flags); 975 raw_spin_unlock_irqrestore(&rnp->lock, flags);
900 976
901 /* 977 /*
@@ -932,9 +1008,9 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
932 print_cpu_stall_info_end(); 1008 print_cpu_stall_info_end();
933 for_each_possible_cpu(cpu) 1009 for_each_possible_cpu(cpu)
934 totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen; 1010 totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen;
935 pr_cont("(detected by %d, t=%ld jiffies, g=%lu, c=%lu, q=%lu)\n", 1011 pr_cont("(detected by %d, t=%ld jiffies, g=%ld, c=%ld, q=%lu)\n",
936 smp_processor_id(), (long)(jiffies - rsp->gp_start), 1012 smp_processor_id(), (long)(jiffies - rsp->gp_start),
937 rsp->gpnum, rsp->completed, totqlen); 1013 (long)rsp->gpnum, (long)rsp->completed, totqlen);
938 if (ndetected == 0) 1014 if (ndetected == 0)
939 pr_err("INFO: Stall ended before state dump start\n"); 1015 pr_err("INFO: Stall ended before state dump start\n");
940 else if (!trigger_all_cpu_backtrace()) 1016 else if (!trigger_all_cpu_backtrace())
@@ -947,12 +1023,6 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
947 force_quiescent_state(rsp); /* Kick them all. */ 1023 force_quiescent_state(rsp); /* Kick them all. */
948} 1024}
949 1025
950/*
951 * This function really isn't for public consumption, but RCU is special in
952 * that context switches can allow the state machine to make progress.
953 */
954extern void resched_cpu(int cpu);
955
956static void print_cpu_stall(struct rcu_state *rsp) 1026static void print_cpu_stall(struct rcu_state *rsp)
957{ 1027{
958 int cpu; 1028 int cpu;
@@ -971,14 +1041,15 @@ static void print_cpu_stall(struct rcu_state *rsp)
971 print_cpu_stall_info_end(); 1041 print_cpu_stall_info_end();
972 for_each_possible_cpu(cpu) 1042 for_each_possible_cpu(cpu)
973 totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen; 1043 totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen;
974 pr_cont(" (t=%lu jiffies g=%lu c=%lu q=%lu)\n", 1044 pr_cont(" (t=%lu jiffies g=%ld c=%ld q=%lu)\n",
975 jiffies - rsp->gp_start, rsp->gpnum, rsp->completed, totqlen); 1045 jiffies - rsp->gp_start,
1046 (long)rsp->gpnum, (long)rsp->completed, totqlen);
976 if (!trigger_all_cpu_backtrace()) 1047 if (!trigger_all_cpu_backtrace())
977 dump_stack(); 1048 dump_stack();
978 1049
979 raw_spin_lock_irqsave(&rnp->lock, flags); 1050 raw_spin_lock_irqsave(&rnp->lock, flags);
980 if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall)) 1051 if (ULONG_CMP_GE(jiffies, ACCESS_ONCE(rsp->jiffies_stall)))
981 rsp->jiffies_stall = jiffies + 1052 ACCESS_ONCE(rsp->jiffies_stall) = jiffies +
982 3 * rcu_jiffies_till_stall_check() + 3; 1053 3 * rcu_jiffies_till_stall_check() + 3;
983 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1054 raw_spin_unlock_irqrestore(&rnp->lock, flags);
984 1055
@@ -1062,7 +1133,7 @@ void rcu_cpu_stall_reset(void)
1062 struct rcu_state *rsp; 1133 struct rcu_state *rsp;
1063 1134
1064 for_each_rcu_flavor(rsp) 1135 for_each_rcu_flavor(rsp)
1065 rsp->jiffies_stall = jiffies + ULONG_MAX / 2; 1136 ACCESS_ONCE(rsp->jiffies_stall) = jiffies + ULONG_MAX / 2;
1066} 1137}
1067 1138
1068/* 1139/*
@@ -1123,15 +1194,18 @@ static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
1123/* 1194/*
1124 * Start some future grace period, as needed to handle newly arrived 1195 * Start some future grace period, as needed to handle newly arrived
1125 * callbacks. The required future grace periods are recorded in each 1196 * callbacks. The required future grace periods are recorded in each
1126 * rcu_node structure's ->need_future_gp field. 1197 * rcu_node structure's ->need_future_gp field. Returns true if there
1198 * is reason to awaken the grace-period kthread.
1127 * 1199 *
1128 * The caller must hold the specified rcu_node structure's ->lock. 1200 * The caller must hold the specified rcu_node structure's ->lock.
1129 */ 1201 */
1130static unsigned long __maybe_unused 1202static bool __maybe_unused
1131rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp) 1203rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
1204 unsigned long *c_out)
1132{ 1205{
1133 unsigned long c; 1206 unsigned long c;
1134 int i; 1207 int i;
1208 bool ret = false;
1135 struct rcu_node *rnp_root = rcu_get_root(rdp->rsp); 1209 struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
1136 1210
1137 /* 1211 /*
@@ -1142,7 +1216,7 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
1142 trace_rcu_future_gp(rnp, rdp, c, TPS("Startleaf")); 1216 trace_rcu_future_gp(rnp, rdp, c, TPS("Startleaf"));
1143 if (rnp->need_future_gp[c & 0x1]) { 1217 if (rnp->need_future_gp[c & 0x1]) {
1144 trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartleaf")); 1218 trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartleaf"));
1145 return c; 1219 goto out;
1146 } 1220 }
1147 1221
1148 /* 1222 /*
@@ -1156,7 +1230,7 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
1156 ACCESS_ONCE(rnp->gpnum) != ACCESS_ONCE(rnp->completed)) { 1230 ACCESS_ONCE(rnp->gpnum) != ACCESS_ONCE(rnp->completed)) {
1157 rnp->need_future_gp[c & 0x1]++; 1231 rnp->need_future_gp[c & 0x1]++;
1158 trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleaf")); 1232 trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleaf"));
1159 return c; 1233 goto out;
1160 } 1234 }
1161 1235
1162 /* 1236 /*
@@ -1197,12 +1271,15 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
1197 trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleafroot")); 1271 trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleafroot"));
1198 } else { 1272 } else {
1199 trace_rcu_future_gp(rnp, rdp, c, TPS("Startedroot")); 1273 trace_rcu_future_gp(rnp, rdp, c, TPS("Startedroot"));
1200 rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp); 1274 ret = rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp);
1201 } 1275 }
1202unlock_out: 1276unlock_out:
1203 if (rnp != rnp_root) 1277 if (rnp != rnp_root)
1204 raw_spin_unlock(&rnp_root->lock); 1278 raw_spin_unlock(&rnp_root->lock);
1205 return c; 1279out:
1280 if (c_out != NULL)
1281 *c_out = c;
1282 return ret;
1206} 1283}
1207 1284
1208/* 1285/*
@@ -1226,25 +1303,43 @@ static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
1226} 1303}
1227 1304
1228/* 1305/*
1306 * Awaken the grace-period kthread for the specified flavor of RCU.
1307 * Don't do a self-awaken, and don't bother awakening when there is
1308 * nothing for the grace-period kthread to do (as in several CPUs
1309 * raced to awaken, and we lost), and finally don't try to awaken
1310 * a kthread that has not yet been created.
1311 */
1312static void rcu_gp_kthread_wake(struct rcu_state *rsp)
1313{
1314 if (current == rsp->gp_kthread ||
1315 !ACCESS_ONCE(rsp->gp_flags) ||
1316 !rsp->gp_kthread)
1317 return;
1318 wake_up(&rsp->gp_wq);
1319}
1320
1321/*
1229 * If there is room, assign a ->completed number to any callbacks on 1322 * If there is room, assign a ->completed number to any callbacks on
1230 * this CPU that have not already been assigned. Also accelerate any 1323 * this CPU that have not already been assigned. Also accelerate any
1231 * callbacks that were previously assigned a ->completed number that has 1324 * callbacks that were previously assigned a ->completed number that has
1232 * since proven to be too conservative, which can happen if callbacks get 1325 * since proven to be too conservative, which can happen if callbacks get
1233 * assigned a ->completed number while RCU is idle, but with reference to 1326 * assigned a ->completed number while RCU is idle, but with reference to
1234 * a non-root rcu_node structure. This function is idempotent, so it does 1327 * a non-root rcu_node structure. This function is idempotent, so it does
1235 * not hurt to call it repeatedly. 1328 * not hurt to call it repeatedly. Returns an flag saying that we should
1329 * awaken the RCU grace-period kthread.
1236 * 1330 *
1237 * The caller must hold rnp->lock with interrupts disabled. 1331 * The caller must hold rnp->lock with interrupts disabled.
1238 */ 1332 */
1239static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp, 1333static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1240 struct rcu_data *rdp) 1334 struct rcu_data *rdp)
1241{ 1335{
1242 unsigned long c; 1336 unsigned long c;
1243 int i; 1337 int i;
1338 bool ret;
1244 1339
1245 /* If the CPU has no callbacks, nothing to do. */ 1340 /* If the CPU has no callbacks, nothing to do. */
1246 if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL]) 1341 if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL])
1247 return; 1342 return false;
1248 1343
1249 /* 1344 /*
1250 * Starting from the sublist containing the callbacks most 1345 * Starting from the sublist containing the callbacks most
@@ -1273,7 +1368,7 @@ static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1273 * be grouped into. 1368 * be grouped into.
1274 */ 1369 */
1275 if (++i >= RCU_NEXT_TAIL) 1370 if (++i >= RCU_NEXT_TAIL)
1276 return; 1371 return false;
1277 1372
1278 /* 1373 /*
1279 * Assign all subsequent callbacks' ->completed number to the next 1374 * Assign all subsequent callbacks' ->completed number to the next
@@ -1285,13 +1380,14 @@ static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1285 rdp->nxtcompleted[i] = c; 1380 rdp->nxtcompleted[i] = c;
1286 } 1381 }
1287 /* Record any needed additional grace periods. */ 1382 /* Record any needed additional grace periods. */
1288 rcu_start_future_gp(rnp, rdp); 1383 ret = rcu_start_future_gp(rnp, rdp, NULL);
1289 1384
1290 /* Trace depending on how much we were able to accelerate. */ 1385 /* Trace depending on how much we were able to accelerate. */
1291 if (!*rdp->nxttail[RCU_WAIT_TAIL]) 1386 if (!*rdp->nxttail[RCU_WAIT_TAIL])
1292 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccWaitCB")); 1387 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccWaitCB"));
1293 else 1388 else
1294 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccReadyCB")); 1389 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccReadyCB"));
1390 return ret;
1295} 1391}
1296 1392
1297/* 1393/*
@@ -1300,17 +1396,18 @@ static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1300 * assign ->completed numbers to any callbacks in the RCU_NEXT_TAIL 1396 * assign ->completed numbers to any callbacks in the RCU_NEXT_TAIL
1301 * sublist. This function is idempotent, so it does not hurt to 1397 * sublist. This function is idempotent, so it does not hurt to
1302 * invoke it repeatedly. As long as it is not invoked -too- often... 1398 * invoke it repeatedly. As long as it is not invoked -too- often...
1399 * Returns true if the RCU grace-period kthread needs to be awakened.
1303 * 1400 *
1304 * The caller must hold rnp->lock with interrupts disabled. 1401 * The caller must hold rnp->lock with interrupts disabled.
1305 */ 1402 */
1306static void rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp, 1403static bool rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1307 struct rcu_data *rdp) 1404 struct rcu_data *rdp)
1308{ 1405{
1309 int i, j; 1406 int i, j;
1310 1407
1311 /* If the CPU has no callbacks, nothing to do. */ 1408 /* If the CPU has no callbacks, nothing to do. */
1312 if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL]) 1409 if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL])
1313 return; 1410 return false;
1314 1411
1315 /* 1412 /*
1316 * Find all callbacks whose ->completed numbers indicate that they 1413 * Find all callbacks whose ->completed numbers indicate that they
@@ -1334,26 +1431,30 @@ static void rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1334 } 1431 }
1335 1432
1336 /* Classify any remaining callbacks. */ 1433 /* Classify any remaining callbacks. */
1337 rcu_accelerate_cbs(rsp, rnp, rdp); 1434 return rcu_accelerate_cbs(rsp, rnp, rdp);
1338} 1435}
1339 1436
1340/* 1437/*
1341 * Update CPU-local rcu_data state to record the beginnings and ends of 1438 * Update CPU-local rcu_data state to record the beginnings and ends of
1342 * grace periods. The caller must hold the ->lock of the leaf rcu_node 1439 * grace periods. The caller must hold the ->lock of the leaf rcu_node
1343 * structure corresponding to the current CPU, and must have irqs disabled. 1440 * structure corresponding to the current CPU, and must have irqs disabled.
1441 * Returns true if the grace-period kthread needs to be awakened.
1344 */ 1442 */
1345static void __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) 1443static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
1444 struct rcu_data *rdp)
1346{ 1445{
1446 bool ret;
1447
1347 /* Handle the ends of any preceding grace periods first. */ 1448 /* Handle the ends of any preceding grace periods first. */
1348 if (rdp->completed == rnp->completed) { 1449 if (rdp->completed == rnp->completed) {
1349 1450
1350 /* No grace period end, so just accelerate recent callbacks. */ 1451 /* No grace period end, so just accelerate recent callbacks. */
1351 rcu_accelerate_cbs(rsp, rnp, rdp); 1452 ret = rcu_accelerate_cbs(rsp, rnp, rdp);
1352 1453
1353 } else { 1454 } else {
1354 1455
1355 /* Advance callbacks. */ 1456 /* Advance callbacks. */
1356 rcu_advance_cbs(rsp, rnp, rdp); 1457 ret = rcu_advance_cbs(rsp, rnp, rdp);
1357 1458
1358 /* Remember that we saw this grace-period completion. */ 1459 /* Remember that we saw this grace-period completion. */
1359 rdp->completed = rnp->completed; 1460 rdp->completed = rnp->completed;
@@ -1372,11 +1473,13 @@ static void __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, struc
1372 rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask); 1473 rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask);
1373 zero_cpu_stall_ticks(rdp); 1474 zero_cpu_stall_ticks(rdp);
1374 } 1475 }
1476 return ret;
1375} 1477}
1376 1478
1377static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp) 1479static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)
1378{ 1480{
1379 unsigned long flags; 1481 unsigned long flags;
1482 bool needwake;
1380 struct rcu_node *rnp; 1483 struct rcu_node *rnp;
1381 1484
1382 local_irq_save(flags); 1485 local_irq_save(flags);
@@ -1388,8 +1491,10 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)
1388 return; 1491 return;
1389 } 1492 }
1390 smp_mb__after_unlock_lock(); 1493 smp_mb__after_unlock_lock();
1391 __note_gp_changes(rsp, rnp, rdp); 1494 needwake = __note_gp_changes(rsp, rnp, rdp);
1392 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1495 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1496 if (needwake)
1497 rcu_gp_kthread_wake(rsp);
1393} 1498}
1394 1499
1395/* 1500/*
@@ -1403,12 +1508,12 @@ static int rcu_gp_init(struct rcu_state *rsp)
1403 rcu_bind_gp_kthread(); 1508 rcu_bind_gp_kthread();
1404 raw_spin_lock_irq(&rnp->lock); 1509 raw_spin_lock_irq(&rnp->lock);
1405 smp_mb__after_unlock_lock(); 1510 smp_mb__after_unlock_lock();
1406 if (rsp->gp_flags == 0) { 1511 if (!ACCESS_ONCE(rsp->gp_flags)) {
1407 /* Spurious wakeup, tell caller to go back to sleep. */ 1512 /* Spurious wakeup, tell caller to go back to sleep. */
1408 raw_spin_unlock_irq(&rnp->lock); 1513 raw_spin_unlock_irq(&rnp->lock);
1409 return 0; 1514 return 0;
1410 } 1515 }
1411 rsp->gp_flags = 0; /* Clear all flags: New grace period. */ 1516 ACCESS_ONCE(rsp->gp_flags) = 0; /* Clear all flags: New grace period. */
1412 1517
1413 if (WARN_ON_ONCE(rcu_gp_in_progress(rsp))) { 1518 if (WARN_ON_ONCE(rcu_gp_in_progress(rsp))) {
1414 /* 1519 /*
@@ -1453,7 +1558,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
1453 WARN_ON_ONCE(rnp->completed != rsp->completed); 1558 WARN_ON_ONCE(rnp->completed != rsp->completed);
1454 ACCESS_ONCE(rnp->completed) = rsp->completed; 1559 ACCESS_ONCE(rnp->completed) = rsp->completed;
1455 if (rnp == rdp->mynode) 1560 if (rnp == rdp->mynode)
1456 __note_gp_changes(rsp, rnp, rdp); 1561 (void)__note_gp_changes(rsp, rnp, rdp);
1457 rcu_preempt_boost_start_gp(rnp); 1562 rcu_preempt_boost_start_gp(rnp);
1458 trace_rcu_grace_period_init(rsp->name, rnp->gpnum, 1563 trace_rcu_grace_period_init(rsp->name, rnp->gpnum,
1459 rnp->level, rnp->grplo, 1564 rnp->level, rnp->grplo,
@@ -1501,7 +1606,7 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
1501 if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { 1606 if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
1502 raw_spin_lock_irq(&rnp->lock); 1607 raw_spin_lock_irq(&rnp->lock);
1503 smp_mb__after_unlock_lock(); 1608 smp_mb__after_unlock_lock();
1504 rsp->gp_flags &= ~RCU_GP_FLAG_FQS; 1609 ACCESS_ONCE(rsp->gp_flags) &= ~RCU_GP_FLAG_FQS;
1505 raw_spin_unlock_irq(&rnp->lock); 1610 raw_spin_unlock_irq(&rnp->lock);
1506 } 1611 }
1507 return fqs_state; 1612 return fqs_state;
@@ -1513,6 +1618,7 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
1513static void rcu_gp_cleanup(struct rcu_state *rsp) 1618static void rcu_gp_cleanup(struct rcu_state *rsp)
1514{ 1619{
1515 unsigned long gp_duration; 1620 unsigned long gp_duration;
1621 bool needgp = false;
1516 int nocb = 0; 1622 int nocb = 0;
1517 struct rcu_data *rdp; 1623 struct rcu_data *rdp;
1518 struct rcu_node *rnp = rcu_get_root(rsp); 1624 struct rcu_node *rnp = rcu_get_root(rsp);
@@ -1548,7 +1654,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
1548 ACCESS_ONCE(rnp->completed) = rsp->gpnum; 1654 ACCESS_ONCE(rnp->completed) = rsp->gpnum;
1549 rdp = this_cpu_ptr(rsp->rda); 1655 rdp = this_cpu_ptr(rsp->rda);
1550 if (rnp == rdp->mynode) 1656 if (rnp == rdp->mynode)
1551 __note_gp_changes(rsp, rnp, rdp); 1657 needgp = __note_gp_changes(rsp, rnp, rdp) || needgp;
1552 /* smp_mb() provided by prior unlock-lock pair. */ 1658 /* smp_mb() provided by prior unlock-lock pair. */
1553 nocb += rcu_future_gp_cleanup(rsp, rnp); 1659 nocb += rcu_future_gp_cleanup(rsp, rnp);
1554 raw_spin_unlock_irq(&rnp->lock); 1660 raw_spin_unlock_irq(&rnp->lock);
@@ -1564,9 +1670,10 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
1564 trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end")); 1670 trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end"));
1565 rsp->fqs_state = RCU_GP_IDLE; 1671 rsp->fqs_state = RCU_GP_IDLE;
1566 rdp = this_cpu_ptr(rsp->rda); 1672 rdp = this_cpu_ptr(rsp->rda);
1567 rcu_advance_cbs(rsp, rnp, rdp); /* Reduce false positives below. */ 1673 /* Advance CBs to reduce false positives below. */
1568 if (cpu_needs_another_gp(rsp, rdp)) { 1674 needgp = rcu_advance_cbs(rsp, rnp, rdp) || needgp;
1569 rsp->gp_flags = RCU_GP_FLAG_INIT; 1675 if (needgp || cpu_needs_another_gp(rsp, rdp)) {
1676 ACCESS_ONCE(rsp->gp_flags) = RCU_GP_FLAG_INIT;
1570 trace_rcu_grace_period(rsp->name, 1677 trace_rcu_grace_period(rsp->name,
1571 ACCESS_ONCE(rsp->gpnum), 1678 ACCESS_ONCE(rsp->gpnum),
1572 TPS("newreq")); 1679 TPS("newreq"));
@@ -1593,6 +1700,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
1593 trace_rcu_grace_period(rsp->name, 1700 trace_rcu_grace_period(rsp->name,
1594 ACCESS_ONCE(rsp->gpnum), 1701 ACCESS_ONCE(rsp->gpnum),
1595 TPS("reqwait")); 1702 TPS("reqwait"));
1703 rsp->gp_state = RCU_GP_WAIT_GPS;
1596 wait_event_interruptible(rsp->gp_wq, 1704 wait_event_interruptible(rsp->gp_wq,
1597 ACCESS_ONCE(rsp->gp_flags) & 1705 ACCESS_ONCE(rsp->gp_flags) &
1598 RCU_GP_FLAG_INIT); 1706 RCU_GP_FLAG_INIT);
@@ -1620,6 +1728,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
1620 trace_rcu_grace_period(rsp->name, 1728 trace_rcu_grace_period(rsp->name,
1621 ACCESS_ONCE(rsp->gpnum), 1729 ACCESS_ONCE(rsp->gpnum),
1622 TPS("fqswait")); 1730 TPS("fqswait"));
1731 rsp->gp_state = RCU_GP_WAIT_FQS;
1623 ret = wait_event_interruptible_timeout(rsp->gp_wq, 1732 ret = wait_event_interruptible_timeout(rsp->gp_wq,
1624 ((gf = ACCESS_ONCE(rsp->gp_flags)) & 1733 ((gf = ACCESS_ONCE(rsp->gp_flags)) &
1625 RCU_GP_FLAG_FQS) || 1734 RCU_GP_FLAG_FQS) ||
@@ -1665,14 +1774,6 @@ static int __noreturn rcu_gp_kthread(void *arg)
1665 } 1774 }
1666} 1775}
1667 1776
1668static void rsp_wakeup(struct irq_work *work)
1669{
1670 struct rcu_state *rsp = container_of(work, struct rcu_state, wakeup_work);
1671
1672 /* Wake up rcu_gp_kthread() to start the grace period. */
1673 wake_up(&rsp->gp_wq);
1674}
1675
1676/* 1777/*
1677 * Start a new RCU grace period if warranted, re-initializing the hierarchy 1778 * Start a new RCU grace period if warranted, re-initializing the hierarchy
1678 * in preparation for detecting the next grace period. The caller must hold 1779 * in preparation for detecting the next grace period. The caller must hold
@@ -1681,8 +1782,10 @@ static void rsp_wakeup(struct irq_work *work)
1681 * Note that it is legal for a dying CPU (which is marked as offline) to 1782 * Note that it is legal for a dying CPU (which is marked as offline) to
1682 * invoke this function. This can happen when the dying CPU reports its 1783 * invoke this function. This can happen when the dying CPU reports its
1683 * quiescent state. 1784 * quiescent state.
1785 *
1786 * Returns true if the grace-period kthread must be awakened.
1684 */ 1787 */
1685static void 1788static bool
1686rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, 1789rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
1687 struct rcu_data *rdp) 1790 struct rcu_data *rdp)
1688{ 1791{
@@ -1693,20 +1796,18 @@ rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
1693 * or a grace period is already in progress. 1796 * or a grace period is already in progress.
1694 * Either way, don't start a new grace period. 1797 * Either way, don't start a new grace period.
1695 */ 1798 */
1696 return; 1799 return false;
1697 } 1800 }
1698 rsp->gp_flags = RCU_GP_FLAG_INIT; 1801 ACCESS_ONCE(rsp->gp_flags) = RCU_GP_FLAG_INIT;
1699 trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum), 1802 trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum),
1700 TPS("newreq")); 1803 TPS("newreq"));
1701 1804
1702 /* 1805 /*
1703 * We can't do wakeups while holding the rnp->lock, as that 1806 * We can't do wakeups while holding the rnp->lock, as that
1704 * could cause possible deadlocks with the rq->lock. Defer 1807 * could cause possible deadlocks with the rq->lock. Defer
1705 * the wakeup to interrupt context. And don't bother waking 1808 * the wakeup to our caller.
1706 * up the running kthread.
1707 */ 1809 */
1708 if (current != rsp->gp_kthread) 1810 return true;
1709 irq_work_queue(&rsp->wakeup_work);
1710} 1811}
1711 1812
1712/* 1813/*
@@ -1715,12 +1816,14 @@ rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
1715 * is invoked indirectly from rcu_advance_cbs(), which would result in 1816 * is invoked indirectly from rcu_advance_cbs(), which would result in
1716 * endless recursion -- or would do so if it wasn't for the self-deadlock 1817 * endless recursion -- or would do so if it wasn't for the self-deadlock
1717 * that is encountered beforehand. 1818 * that is encountered beforehand.
1819 *
1820 * Returns true if the grace-period kthread needs to be awakened.
1718 */ 1821 */
1719static void 1822static bool rcu_start_gp(struct rcu_state *rsp)
1720rcu_start_gp(struct rcu_state *rsp)
1721{ 1823{
1722 struct rcu_data *rdp = this_cpu_ptr(rsp->rda); 1824 struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
1723 struct rcu_node *rnp = rcu_get_root(rsp); 1825 struct rcu_node *rnp = rcu_get_root(rsp);
1826 bool ret = false;
1724 1827
1725 /* 1828 /*
1726 * If there is no grace period in progress right now, any 1829 * If there is no grace period in progress right now, any
@@ -1730,8 +1833,9 @@ rcu_start_gp(struct rcu_state *rsp)
1730 * resulting in pointless grace periods. So, advance callbacks 1833 * resulting in pointless grace periods. So, advance callbacks
1731 * then start the grace period! 1834 * then start the grace period!
1732 */ 1835 */
1733 rcu_advance_cbs(rsp, rnp, rdp); 1836 ret = rcu_advance_cbs(rsp, rnp, rdp) || ret;
1734 rcu_start_gp_advanced(rsp, rnp, rdp); 1837 ret = rcu_start_gp_advanced(rsp, rnp, rdp) || ret;
1838 return ret;
1735} 1839}
1736 1840
1737/* 1841/*
@@ -1820,6 +1924,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
1820{ 1924{
1821 unsigned long flags; 1925 unsigned long flags;
1822 unsigned long mask; 1926 unsigned long mask;
1927 bool needwake;
1823 struct rcu_node *rnp; 1928 struct rcu_node *rnp;
1824 1929
1825 rnp = rdp->mynode; 1930 rnp = rdp->mynode;
@@ -1848,9 +1953,11 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
1848 * This GP can't end until cpu checks in, so all of our 1953 * This GP can't end until cpu checks in, so all of our
1849 * callbacks can be processed during the next GP. 1954 * callbacks can be processed during the next GP.
1850 */ 1955 */
1851 rcu_accelerate_cbs(rsp, rnp, rdp); 1956 needwake = rcu_accelerate_cbs(rsp, rnp, rdp);
1852 1957
1853 rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */ 1958 rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */
1959 if (needwake)
1960 rcu_gp_kthread_wake(rsp);
1854 } 1961 }
1855} 1962}
1856 1963
@@ -1951,7 +2058,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
1951static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags) 2058static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags)
1952{ 2059{
1953 int i; 2060 int i;
1954 struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); 2061 struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
1955 2062
1956 /* No-CBs CPUs are handled specially. */ 2063 /* No-CBs CPUs are handled specially. */
1957 if (rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags)) 2064 if (rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags))
@@ -2320,7 +2427,7 @@ static void force_quiescent_state(struct rcu_state *rsp)
2320 raw_spin_unlock_irqrestore(&rnp_old->lock, flags); 2427 raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
2321 return; /* Someone beat us to it. */ 2428 return; /* Someone beat us to it. */
2322 } 2429 }
2323 rsp->gp_flags |= RCU_GP_FLAG_FQS; 2430 ACCESS_ONCE(rsp->gp_flags) |= RCU_GP_FLAG_FQS;
2324 raw_spin_unlock_irqrestore(&rnp_old->lock, flags); 2431 raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
2325 wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */ 2432 wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */
2326} 2433}
@@ -2334,7 +2441,8 @@ static void
2334__rcu_process_callbacks(struct rcu_state *rsp) 2441__rcu_process_callbacks(struct rcu_state *rsp)
2335{ 2442{
2336 unsigned long flags; 2443 unsigned long flags;
2337 struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); 2444 bool needwake;
2445 struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
2338 2446
2339 WARN_ON_ONCE(rdp->beenonline == 0); 2447 WARN_ON_ONCE(rdp->beenonline == 0);
2340 2448
@@ -2345,8 +2453,10 @@ __rcu_process_callbacks(struct rcu_state *rsp)
2345 local_irq_save(flags); 2453 local_irq_save(flags);
2346 if (cpu_needs_another_gp(rsp, rdp)) { 2454 if (cpu_needs_another_gp(rsp, rdp)) {
2347 raw_spin_lock(&rcu_get_root(rsp)->lock); /* irqs disabled. */ 2455 raw_spin_lock(&rcu_get_root(rsp)->lock); /* irqs disabled. */
2348 rcu_start_gp(rsp); 2456 needwake = rcu_start_gp(rsp);
2349 raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags); 2457 raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
2458 if (needwake)
2459 rcu_gp_kthread_wake(rsp);
2350 } else { 2460 } else {
2351 local_irq_restore(flags); 2461 local_irq_restore(flags);
2352 } 2462 }
@@ -2404,6 +2514,8 @@ static void invoke_rcu_core(void)
2404static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp, 2514static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
2405 struct rcu_head *head, unsigned long flags) 2515 struct rcu_head *head, unsigned long flags)
2406{ 2516{
2517 bool needwake;
2518
2407 /* 2519 /*
2408 * If called from an extended quiescent state, invoke the RCU 2520 * If called from an extended quiescent state, invoke the RCU
2409 * core in order to force a re-evaluation of RCU's idleness. 2521 * core in order to force a re-evaluation of RCU's idleness.
@@ -2433,8 +2545,10 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
2433 2545
2434 raw_spin_lock(&rnp_root->lock); 2546 raw_spin_lock(&rnp_root->lock);
2435 smp_mb__after_unlock_lock(); 2547 smp_mb__after_unlock_lock();
2436 rcu_start_gp(rsp); 2548 needwake = rcu_start_gp(rsp);
2437 raw_spin_unlock(&rnp_root->lock); 2549 raw_spin_unlock(&rnp_root->lock);
2550 if (needwake)
2551 rcu_gp_kthread_wake(rsp);
2438 } else { 2552 } else {
2439 /* Give the grace period a kick. */ 2553 /* Give the grace period a kick. */
2440 rdp->blimit = LONG_MAX; 2554 rdp->blimit = LONG_MAX;
@@ -2537,6 +2651,20 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
2537EXPORT_SYMBOL_GPL(call_rcu_bh); 2651EXPORT_SYMBOL_GPL(call_rcu_bh);
2538 2652
2539/* 2653/*
2654 * Queue an RCU callback for lazy invocation after a grace period.
2655 * This will likely be later named something like "call_rcu_lazy()",
2656 * but this change will require some way of tagging the lazy RCU
2657 * callbacks in the list of pending callbacks. Until then, this
2658 * function may only be called from __kfree_rcu().
2659 */
2660void kfree_call_rcu(struct rcu_head *head,
2661 void (*func)(struct rcu_head *rcu))
2662{
2663 __call_rcu(head, func, rcu_state_p, -1, 1);
2664}
2665EXPORT_SYMBOL_GPL(kfree_call_rcu);
2666
2667/*
2540 * Because a context switch is a grace period for RCU-sched and RCU-bh, 2668 * Because a context switch is a grace period for RCU-sched and RCU-bh,
2541 * any blocking grace-period wait automatically implies a grace period 2669 * any blocking grace-period wait automatically implies a grace period
2542 * if there is only one CPU online at any point time during execution 2670 * if there is only one CPU online at any point time during execution
@@ -2659,7 +2787,7 @@ unsigned long get_state_synchronize_rcu(void)
2659 * time-consuming work between get_state_synchronize_rcu() 2787 * time-consuming work between get_state_synchronize_rcu()
2660 * and cond_synchronize_rcu(). 2788 * and cond_synchronize_rcu().
2661 */ 2789 */
2662 return smp_load_acquire(&rcu_state->gpnum); 2790 return smp_load_acquire(&rcu_state_p->gpnum);
2663} 2791}
2664EXPORT_SYMBOL_GPL(get_state_synchronize_rcu); 2792EXPORT_SYMBOL_GPL(get_state_synchronize_rcu);
2665 2793
@@ -2685,7 +2813,7 @@ void cond_synchronize_rcu(unsigned long oldstate)
2685 * Ensure that this load happens before any RCU-destructive 2813 * Ensure that this load happens before any RCU-destructive
2686 * actions the caller might carry out after we return. 2814 * actions the caller might carry out after we return.
2687 */ 2815 */
2688 newstate = smp_load_acquire(&rcu_state->completed); 2816 newstate = smp_load_acquire(&rcu_state_p->completed);
2689 if (ULONG_CMP_GE(oldstate, newstate)) 2817 if (ULONG_CMP_GE(oldstate, newstate))
2690 synchronize_rcu(); 2818 synchronize_rcu();
2691} 2819}
@@ -2988,7 +3116,7 @@ static void rcu_barrier_callback(struct rcu_head *rhp)
2988static void rcu_barrier_func(void *type) 3116static void rcu_barrier_func(void *type)
2989{ 3117{
2990 struct rcu_state *rsp = type; 3118 struct rcu_state *rsp = type;
2991 struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); 3119 struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
2992 3120
2993 _rcu_barrier_trace(rsp, "IRQ", -1, rsp->n_barrier_done); 3121 _rcu_barrier_trace(rsp, "IRQ", -1, rsp->n_barrier_done);
2994 atomic_inc(&rsp->barrier_cpu_count); 3122 atomic_inc(&rsp->barrier_cpu_count);
@@ -3160,7 +3288,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
3160 * that this CPU cannot possibly have any RCU callbacks in flight yet. 3288 * that this CPU cannot possibly have any RCU callbacks in flight yet.
3161 */ 3289 */
3162static void 3290static void
3163rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) 3291rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
3164{ 3292{
3165 unsigned long flags; 3293 unsigned long flags;
3166 unsigned long mask; 3294 unsigned long mask;
@@ -3173,7 +3301,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
3173 /* Set up local state, ensuring consistent view of global state. */ 3301 /* Set up local state, ensuring consistent view of global state. */
3174 raw_spin_lock_irqsave(&rnp->lock, flags); 3302 raw_spin_lock_irqsave(&rnp->lock, flags);
3175 rdp->beenonline = 1; /* We have now been online. */ 3303 rdp->beenonline = 1; /* We have now been online. */
3176 rdp->preemptible = preemptible;
3177 rdp->qlen_last_fqs_check = 0; 3304 rdp->qlen_last_fqs_check = 0;
3178 rdp->n_force_qs_snap = rsp->n_force_qs; 3305 rdp->n_force_qs_snap = rsp->n_force_qs;
3179 rdp->blimit = blimit; 3306 rdp->blimit = blimit;
@@ -3217,8 +3344,7 @@ static void rcu_prepare_cpu(int cpu)
3217 struct rcu_state *rsp; 3344 struct rcu_state *rsp;
3218 3345
3219 for_each_rcu_flavor(rsp) 3346 for_each_rcu_flavor(rsp)
3220 rcu_init_percpu_data(cpu, rsp, 3347 rcu_init_percpu_data(cpu, rsp);
3221 strcmp(rsp->name, "rcu_preempt") == 0);
3222} 3348}
3223 3349
3224/* 3350/*
@@ -3228,7 +3354,7 @@ static int rcu_cpu_notify(struct notifier_block *self,
3228 unsigned long action, void *hcpu) 3354 unsigned long action, void *hcpu)
3229{ 3355{
3230 long cpu = (long)hcpu; 3356 long cpu = (long)hcpu;
3231 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); 3357 struct rcu_data *rdp = per_cpu_ptr(rcu_state_p->rda, cpu);
3232 struct rcu_node *rnp = rdp->mynode; 3358 struct rcu_node *rnp = rdp->mynode;
3233 struct rcu_state *rsp; 3359 struct rcu_state *rsp;
3234 3360
@@ -3402,8 +3528,8 @@ static void __init rcu_init_one(struct rcu_state *rsp,
3402 rnp->qsmaskinit = 0; 3528 rnp->qsmaskinit = 0;
3403 rnp->grplo = j * cpustride; 3529 rnp->grplo = j * cpustride;
3404 rnp->grphi = (j + 1) * cpustride - 1; 3530 rnp->grphi = (j + 1) * cpustride - 1;
3405 if (rnp->grphi >= NR_CPUS) 3531 if (rnp->grphi >= nr_cpu_ids)
3406 rnp->grphi = NR_CPUS - 1; 3532 rnp->grphi = nr_cpu_ids - 1;
3407 if (i == 0) { 3533 if (i == 0) {
3408 rnp->grpnum = 0; 3534 rnp->grpnum = 0;
3409 rnp->grpmask = 0; 3535 rnp->grpmask = 0;
@@ -3422,7 +3548,6 @@ static void __init rcu_init_one(struct rcu_state *rsp,
3422 3548
3423 rsp->rda = rda; 3549 rsp->rda = rda;
3424 init_waitqueue_head(&rsp->gp_wq); 3550 init_waitqueue_head(&rsp->gp_wq);
3425 init_irq_work(&rsp->wakeup_work, rsp_wakeup);
3426 rnp = rsp->level[rcu_num_lvls - 1]; 3551 rnp = rsp->level[rcu_num_lvls - 1];
3427 for_each_possible_cpu(i) { 3552 for_each_possible_cpu(i) {
3428 while (i > rnp->grphi) 3553 while (i > rnp->grphi)