aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcutree.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rcutree.c')
-rw-r--r--kernel/rcutree.c191
1 files changed, 62 insertions, 129 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 16ea67925015..e08abb9461ac 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -218,8 +218,8 @@ module_param(blimit, long, 0444);
218module_param(qhimark, long, 0444); 218module_param(qhimark, long, 0444);
219module_param(qlowmark, long, 0444); 219module_param(qlowmark, long, 0444);
220 220
221static ulong jiffies_till_first_fqs = RCU_JIFFIES_TILL_FORCE_QS; 221static ulong jiffies_till_first_fqs = ULONG_MAX;
222static ulong jiffies_till_next_fqs = RCU_JIFFIES_TILL_FORCE_QS; 222static ulong jiffies_till_next_fqs = ULONG_MAX;
223 223
224module_param(jiffies_till_first_fqs, ulong, 0644); 224module_param(jiffies_till_first_fqs, ulong, 0644);
225module_param(jiffies_till_next_fqs, ulong, 0644); 225module_param(jiffies_till_next_fqs, ulong, 0644);
@@ -866,7 +866,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
866 * See Documentation/RCU/stallwarn.txt for info on how to debug 866 * See Documentation/RCU/stallwarn.txt for info on how to debug
867 * RCU CPU stall warnings. 867 * RCU CPU stall warnings.
868 */ 868 */
869 printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks:", 869 pr_err("INFO: %s detected stalls on CPUs/tasks:",
870 rsp->name); 870 rsp->name);
871 print_cpu_stall_info_begin(); 871 print_cpu_stall_info_begin();
872 rcu_for_each_leaf_node(rsp, rnp) { 872 rcu_for_each_leaf_node(rsp, rnp) {
@@ -899,7 +899,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
899 smp_processor_id(), (long)(jiffies - rsp->gp_start), 899 smp_processor_id(), (long)(jiffies - rsp->gp_start),
900 rsp->gpnum, rsp->completed, totqlen); 900 rsp->gpnum, rsp->completed, totqlen);
901 if (ndetected == 0) 901 if (ndetected == 0)
902 printk(KERN_ERR "INFO: Stall ended before state dump start\n"); 902 pr_err("INFO: Stall ended before state dump start\n");
903 else if (!trigger_all_cpu_backtrace()) 903 else if (!trigger_all_cpu_backtrace())
904 rcu_dump_cpu_stacks(rsp); 904 rcu_dump_cpu_stacks(rsp);
905 905
@@ -922,7 +922,7 @@ static void print_cpu_stall(struct rcu_state *rsp)
922 * See Documentation/RCU/stallwarn.txt for info on how to debug 922 * See Documentation/RCU/stallwarn.txt for info on how to debug
923 * RCU CPU stall warnings. 923 * RCU CPU stall warnings.
924 */ 924 */
925 printk(KERN_ERR "INFO: %s self-detected stall on CPU", rsp->name); 925 pr_err("INFO: %s self-detected stall on CPU", rsp->name);
926 print_cpu_stall_info_begin(); 926 print_cpu_stall_info_begin();
927 print_cpu_stall_info(rsp, smp_processor_id()); 927 print_cpu_stall_info(rsp, smp_processor_id());
928 print_cpu_stall_info_end(); 928 print_cpu_stall_info_end();
@@ -985,65 +985,6 @@ void rcu_cpu_stall_reset(void)
985} 985}
986 986
987/* 987/*
988 * Update CPU-local rcu_data state to record the newly noticed grace period.
989 * This is used both when we started the grace period and when we notice
990 * that someone else started the grace period. The caller must hold the
991 * ->lock of the leaf rcu_node structure corresponding to the current CPU,
992 * and must have irqs disabled.
993 */
994static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
995{
996 if (rdp->gpnum != rnp->gpnum) {
997 /*
998 * If the current grace period is waiting for this CPU,
999 * set up to detect a quiescent state, otherwise don't
1000 * go looking for one.
1001 */
1002 rdp->gpnum = rnp->gpnum;
1003 trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpustart");
1004 rdp->passed_quiesce = 0;
1005 rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask);
1006 zero_cpu_stall_ticks(rdp);
1007 }
1008}
1009
1010static void note_new_gpnum(struct rcu_state *rsp, struct rcu_data *rdp)
1011{
1012 unsigned long flags;
1013 struct rcu_node *rnp;
1014
1015 local_irq_save(flags);
1016 rnp = rdp->mynode;
1017 if (rdp->gpnum == ACCESS_ONCE(rnp->gpnum) || /* outside lock. */
1018 !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */
1019 local_irq_restore(flags);
1020 return;
1021 }
1022 __note_new_gpnum(rsp, rnp, rdp);
1023 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1024}
1025
1026/*
1027 * Did someone else start a new RCU grace period start since we last
1028 * checked? Update local state appropriately if so. Must be called
1029 * on the CPU corresponding to rdp.
1030 */
1031static int
1032check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp)
1033{
1034 unsigned long flags;
1035 int ret = 0;
1036
1037 local_irq_save(flags);
1038 if (rdp->gpnum != rsp->gpnum) {
1039 note_new_gpnum(rsp, rdp);
1040 ret = 1;
1041 }
1042 local_irq_restore(flags);
1043 return ret;
1044}
1045
1046/*
1047 * Initialize the specified rcu_data structure's callback list to empty. 988 * Initialize the specified rcu_data structure's callback list to empty.
1048 */ 989 */
1049static void init_callback_list(struct rcu_data *rdp) 990static void init_callback_list(struct rcu_data *rdp)
@@ -1313,18 +1254,16 @@ static void rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1313} 1254}
1314 1255
1315/* 1256/*
1316 * Advance this CPU's callbacks, but only if the current grace period 1257 * Update CPU-local rcu_data state to record the beginnings and ends of
1317 * has ended. This may be called only from the CPU to whom the rdp 1258 * grace periods. The caller must hold the ->lock of the leaf rcu_node
1318 * belongs. In addition, the corresponding leaf rcu_node structure's 1259 * structure corresponding to the current CPU, and must have irqs disabled.
1319 * ->lock must be held by the caller, with irqs disabled.
1320 */ 1260 */
1321static void 1261static void __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
1322__rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
1323{ 1262{
1324 /* Did another grace period end? */ 1263 /* Handle the ends of any preceding grace periods first. */
1325 if (rdp->completed == rnp->completed) { 1264 if (rdp->completed == rnp->completed) {
1326 1265
1327 /* No, so just accelerate recent callbacks. */ 1266 /* No grace period end, so just accelerate recent callbacks. */
1328 rcu_accelerate_cbs(rsp, rnp, rdp); 1267 rcu_accelerate_cbs(rsp, rnp, rdp);
1329 1268
1330 } else { 1269 } else {
@@ -1335,68 +1274,40 @@ __rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat
1335 /* Remember that we saw this grace-period completion. */ 1274 /* Remember that we saw this grace-period completion. */
1336 rdp->completed = rnp->completed; 1275 rdp->completed = rnp->completed;
1337 trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuend"); 1276 trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuend");
1277 }
1338 1278
1279 if (rdp->gpnum != rnp->gpnum) {
1339 /* 1280 /*
1340 * If we were in an extended quiescent state, we may have 1281 * If the current grace period is waiting for this CPU,
1341 * missed some grace periods that others CPUs handled on 1282 * set up to detect a quiescent state, otherwise don't
1342 * our behalf. Catch up with this state to avoid noting 1283 * go looking for one.
1343 * spurious new grace periods. If another grace period
1344 * has started, then rnp->gpnum will have advanced, so
1345 * we will detect this later on. Of course, any quiescent
1346 * states we found for the old GP are now invalid.
1347 */
1348 if (ULONG_CMP_LT(rdp->gpnum, rdp->completed)) {
1349 rdp->gpnum = rdp->completed;
1350 rdp->passed_quiesce = 0;
1351 }
1352
1353 /*
1354 * If RCU does not need a quiescent state from this CPU,
1355 * then make sure that this CPU doesn't go looking for one.
1356 */ 1284 */
1357 if ((rnp->qsmask & rdp->grpmask) == 0) 1285 rdp->gpnum = rnp->gpnum;
1358 rdp->qs_pending = 0; 1286 trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpustart");
1287 rdp->passed_quiesce = 0;
1288 rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask);
1289 zero_cpu_stall_ticks(rdp);
1359 } 1290 }
1360} 1291}
1361 1292
1362/* 1293static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)
1363 * Advance this CPU's callbacks, but only if the current grace period
1364 * has ended. This may be called only from the CPU to whom the rdp
1365 * belongs.
1366 */
1367static void
1368rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp)
1369{ 1294{
1370 unsigned long flags; 1295 unsigned long flags;
1371 struct rcu_node *rnp; 1296 struct rcu_node *rnp;
1372 1297
1373 local_irq_save(flags); 1298 local_irq_save(flags);
1374 rnp = rdp->mynode; 1299 rnp = rdp->mynode;
1375 if (rdp->completed == ACCESS_ONCE(rnp->completed) || /* outside lock. */ 1300 if ((rdp->gpnum == ACCESS_ONCE(rnp->gpnum) &&
1301 rdp->completed == ACCESS_ONCE(rnp->completed)) || /* w/out lock. */
1376 !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */ 1302 !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */
1377 local_irq_restore(flags); 1303 local_irq_restore(flags);
1378 return; 1304 return;
1379 } 1305 }
1380 __rcu_process_gp_end(rsp, rnp, rdp); 1306 __note_gp_changes(rsp, rnp, rdp);
1381 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1307 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1382} 1308}
1383 1309
1384/* 1310/*
1385 * Do per-CPU grace-period initialization for running CPU. The caller
1386 * must hold the lock of the leaf rcu_node structure corresponding to
1387 * this CPU.
1388 */
1389static void
1390rcu_start_gp_per_cpu(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
1391{
1392 /* Prior grace period ended, so advance callbacks for current CPU. */
1393 __rcu_process_gp_end(rsp, rnp, rdp);
1394
1395 /* Set state so that this CPU will detect the next quiescent state. */
1396 __note_new_gpnum(rsp, rnp, rdp);
1397}
1398
1399/*
1400 * Initialize a new grace period. 1311 * Initialize a new grace period.
1401 */ 1312 */
1402static int rcu_gp_init(struct rcu_state *rsp) 1313static int rcu_gp_init(struct rcu_state *rsp)
@@ -1444,16 +1355,16 @@ static int rcu_gp_init(struct rcu_state *rsp)
1444 WARN_ON_ONCE(rnp->completed != rsp->completed); 1355 WARN_ON_ONCE(rnp->completed != rsp->completed);
1445 ACCESS_ONCE(rnp->completed) = rsp->completed; 1356 ACCESS_ONCE(rnp->completed) = rsp->completed;
1446 if (rnp == rdp->mynode) 1357 if (rnp == rdp->mynode)
1447 rcu_start_gp_per_cpu(rsp, rnp, rdp); 1358 __note_gp_changes(rsp, rnp, rdp);
1448 rcu_preempt_boost_start_gp(rnp); 1359 rcu_preempt_boost_start_gp(rnp);
1449 trace_rcu_grace_period_init(rsp->name, rnp->gpnum, 1360 trace_rcu_grace_period_init(rsp->name, rnp->gpnum,
1450 rnp->level, rnp->grplo, 1361 rnp->level, rnp->grplo,
1451 rnp->grphi, rnp->qsmask); 1362 rnp->grphi, rnp->qsmask);
1452 raw_spin_unlock_irq(&rnp->lock); 1363 raw_spin_unlock_irq(&rnp->lock);
1453#ifdef CONFIG_PROVE_RCU_DELAY 1364#ifdef CONFIG_PROVE_RCU_DELAY
1454 if ((prandom_u32() % (rcu_num_nodes * 8)) == 0 && 1365 if ((prandom_u32() % (rcu_num_nodes + 1)) == 0 &&
1455 system_state == SYSTEM_RUNNING) 1366 system_state == SYSTEM_RUNNING)
1456 schedule_timeout_uninterruptible(2); 1367 udelay(200);
1457#endif /* #ifdef CONFIG_PROVE_RCU_DELAY */ 1368#endif /* #ifdef CONFIG_PROVE_RCU_DELAY */
1458 cond_resched(); 1369 cond_resched();
1459 } 1370 }
@@ -1527,7 +1438,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
1527 ACCESS_ONCE(rnp->completed) = rsp->gpnum; 1438 ACCESS_ONCE(rnp->completed) = rsp->gpnum;
1528 rdp = this_cpu_ptr(rsp->rda); 1439 rdp = this_cpu_ptr(rsp->rda);
1529 if (rnp == rdp->mynode) 1440 if (rnp == rdp->mynode)
1530 __rcu_process_gp_end(rsp, rnp, rdp); 1441 __note_gp_changes(rsp, rnp, rdp);
1531 nocb += rcu_future_gp_cleanup(rsp, rnp); 1442 nocb += rcu_future_gp_cleanup(rsp, rnp);
1532 raw_spin_unlock_irq(&rnp->lock); 1443 raw_spin_unlock_irq(&rnp->lock);
1533 cond_resched(); 1444 cond_resched();
@@ -1613,6 +1524,14 @@ static int __noreturn rcu_gp_kthread(void *arg)
1613 } 1524 }
1614} 1525}
1615 1526
1527static void rsp_wakeup(struct irq_work *work)
1528{
1529 struct rcu_state *rsp = container_of(work, struct rcu_state, wakeup_work);
1530
1531 /* Wake up rcu_gp_kthread() to start the grace period. */
1532 wake_up(&rsp->gp_wq);
1533}
1534
1616/* 1535/*
1617 * Start a new RCU grace period if warranted, re-initializing the hierarchy 1536 * Start a new RCU grace period if warranted, re-initializing the hierarchy
1618 * in preparation for detecting the next grace period. The caller must hold 1537 * in preparation for detecting the next grace period. The caller must hold
@@ -1637,8 +1556,12 @@ rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
1637 } 1556 }
1638 rsp->gp_flags = RCU_GP_FLAG_INIT; 1557 rsp->gp_flags = RCU_GP_FLAG_INIT;
1639 1558
1640 /* Wake up rcu_gp_kthread() to start the grace period. */ 1559 /*
1641 wake_up(&rsp->gp_wq); 1560 * We can't do wakeups while holding the rnp->lock, as that
1561 * could cause possible deadlocks with the rq->lock. Deter
1562 * the wakeup to interrupt context.
1563 */
1564 irq_work_queue(&rsp->wakeup_work);
1642} 1565}
1643 1566
1644/* 1567/*
@@ -1793,9 +1716,8 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
1793static void 1716static void
1794rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) 1717rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
1795{ 1718{
1796 /* If there is now a new grace period, record and return. */ 1719 /* Check for grace-period ends and beginnings. */
1797 if (check_for_new_grace_period(rsp, rdp)) 1720 note_gp_changes(rsp, rdp);
1798 return;
1799 1721
1800 /* 1722 /*
1801 * Does this CPU still need to do its part for current grace period? 1723 * Does this CPU still need to do its part for current grace period?
@@ -2259,9 +2181,6 @@ __rcu_process_callbacks(struct rcu_state *rsp)
2259 2181
2260 WARN_ON_ONCE(rdp->beenonline == 0); 2182 WARN_ON_ONCE(rdp->beenonline == 0);
2261 2183
2262 /* Handle the end of a grace period that some other CPU ended. */
2263 rcu_process_gp_end(rsp, rdp);
2264
2265 /* Update RCU state based on any recent quiescent states. */ 2184 /* Update RCU state based on any recent quiescent states. */
2266 rcu_check_quiescent_state(rsp, rdp); 2185 rcu_check_quiescent_state(rsp, rdp);
2267 2186
@@ -2346,8 +2265,7 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
2346 if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { 2265 if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) {
2347 2266
2348 /* Are we ignoring a completed grace period? */ 2267 /* Are we ignoring a completed grace period? */
2349 rcu_process_gp_end(rsp, rdp); 2268 note_gp_changes(rsp, rdp);
2350 check_for_new_grace_period(rsp, rdp);
2351 2269
2352 /* Start a new grace period if one not already started. */ 2270 /* Start a new grace period if one not already started. */
2353 if (!rcu_gp_in_progress(rsp)) { 2271 if (!rcu_gp_in_progress(rsp)) {
@@ -3108,7 +3026,7 @@ static int __init rcu_spawn_gp_kthread(void)
3108 struct task_struct *t; 3026 struct task_struct *t;
3109 3027
3110 for_each_rcu_flavor(rsp) { 3028 for_each_rcu_flavor(rsp) {
3111 t = kthread_run(rcu_gp_kthread, rsp, rsp->name); 3029 t = kthread_run(rcu_gp_kthread, rsp, "%s", rsp->name);
3112 BUG_ON(IS_ERR(t)); 3030 BUG_ON(IS_ERR(t));
3113 rnp = rcu_get_root(rsp); 3031 rnp = rcu_get_root(rsp);
3114 raw_spin_lock_irqsave(&rnp->lock, flags); 3032 raw_spin_lock_irqsave(&rnp->lock, flags);
@@ -3235,6 +3153,7 @@ static void __init rcu_init_one(struct rcu_state *rsp,
3235 3153
3236 rsp->rda = rda; 3154 rsp->rda = rda;
3237 init_waitqueue_head(&rsp->gp_wq); 3155 init_waitqueue_head(&rsp->gp_wq);
3156 init_irq_work(&rsp->wakeup_work, rsp_wakeup);
3238 rnp = rsp->level[rcu_num_lvls - 1]; 3157 rnp = rsp->level[rcu_num_lvls - 1];
3239 for_each_possible_cpu(i) { 3158 for_each_possible_cpu(i) {
3240 while (i > rnp->grphi) 3159 while (i > rnp->grphi)
@@ -3252,11 +3171,25 @@ static void __init rcu_init_one(struct rcu_state *rsp,
3252 */ 3171 */
3253static void __init rcu_init_geometry(void) 3172static void __init rcu_init_geometry(void)
3254{ 3173{
3174 ulong d;
3255 int i; 3175 int i;
3256 int j; 3176 int j;
3257 int n = nr_cpu_ids; 3177 int n = nr_cpu_ids;
3258 int rcu_capacity[MAX_RCU_LVLS + 1]; 3178 int rcu_capacity[MAX_RCU_LVLS + 1];
3259 3179
3180 /*
3181 * Initialize any unspecified boot parameters.
3182 * The default values of jiffies_till_first_fqs and
3183 * jiffies_till_next_fqs are set to the RCU_JIFFIES_TILL_FORCE_QS
3184 * value, which is a function of HZ, then adding one for each
3185 * RCU_JIFFIES_FQS_DIV CPUs that might be on the system.
3186 */
3187 d = RCU_JIFFIES_TILL_FORCE_QS + nr_cpu_ids / RCU_JIFFIES_FQS_DIV;
3188 if (jiffies_till_first_fqs == ULONG_MAX)
3189 jiffies_till_first_fqs = d;
3190 if (jiffies_till_next_fqs == ULONG_MAX)
3191 jiffies_till_next_fqs = d;
3192
3260 /* If the compile-time values are accurate, just leave. */ 3193 /* If the compile-time values are accurate, just leave. */
3261 if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF && 3194 if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF &&
3262 nr_cpu_ids == NR_CPUS) 3195 nr_cpu_ids == NR_CPUS)