aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcutree.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rcutree.c')
-rw-r--r--kernel/rcutree.c260
1 files changed, 197 insertions, 63 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 5b8ad827fd86..d8534308fd05 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -64,7 +64,7 @@
64static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; 64static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
65static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS]; 65static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
66 66
67#define RCU_STATE_INITIALIZER(sname, cr) { \ 67#define RCU_STATE_INITIALIZER(sname, sabbr, cr) { \
68 .level = { &sname##_state.node[0] }, \ 68 .level = { &sname##_state.node[0] }, \
69 .call = cr, \ 69 .call = cr, \
70 .fqs_state = RCU_GP_IDLE, \ 70 .fqs_state = RCU_GP_IDLE, \
@@ -76,13 +76,14 @@ static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
76 .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ 76 .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
77 .onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \ 77 .onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \
78 .name = #sname, \ 78 .name = #sname, \
79 .abbr = sabbr, \
79} 80}
80 81
81struct rcu_state rcu_sched_state = 82struct rcu_state rcu_sched_state =
82 RCU_STATE_INITIALIZER(rcu_sched, call_rcu_sched); 83 RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
83DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); 84DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
84 85
85struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, call_rcu_bh); 86struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh);
86DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); 87DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
87 88
88static struct rcu_state *rcu_state; 89static struct rcu_state *rcu_state;
@@ -223,6 +224,8 @@ static ulong jiffies_till_next_fqs = RCU_JIFFIES_TILL_FORCE_QS;
223module_param(jiffies_till_first_fqs, ulong, 0644); 224module_param(jiffies_till_first_fqs, ulong, 0644);
224module_param(jiffies_till_next_fqs, ulong, 0644); 225module_param(jiffies_till_next_fqs, ulong, 0644);
225 226
227static void rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
228 struct rcu_data *rdp);
226static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)); 229static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *));
227static void force_quiescent_state(struct rcu_state *rsp); 230static void force_quiescent_state(struct rcu_state *rsp);
228static int rcu_pending(int cpu); 231static int rcu_pending(int cpu);
@@ -310,6 +313,8 @@ cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
310 313
311 if (rcu_gp_in_progress(rsp)) 314 if (rcu_gp_in_progress(rsp))
312 return 0; /* No, a grace period is already in progress. */ 315 return 0; /* No, a grace period is already in progress. */
316 if (rcu_nocb_needs_gp(rsp))
317 return 1; /* Yes, a no-CBs CPU needs one. */
313 if (!rdp->nxttail[RCU_NEXT_TAIL]) 318 if (!rdp->nxttail[RCU_NEXT_TAIL])
314 return 0; /* No, this is a no-CBs (or offline) CPU. */ 319 return 0; /* No, this is a no-CBs (or offline) CPU. */
315 if (*rdp->nxttail[RCU_NEXT_READY_TAIL]) 320 if (*rdp->nxttail[RCU_NEXT_READY_TAIL])
@@ -1035,10 +1040,11 @@ static void init_callback_list(struct rcu_data *rdp)
1035{ 1040{
1036 int i; 1041 int i;
1037 1042
1043 if (init_nocb_callback_list(rdp))
1044 return;
1038 rdp->nxtlist = NULL; 1045 rdp->nxtlist = NULL;
1039 for (i = 0; i < RCU_NEXT_SIZE; i++) 1046 for (i = 0; i < RCU_NEXT_SIZE; i++)
1040 rdp->nxttail[i] = &rdp->nxtlist; 1047 rdp->nxttail[i] = &rdp->nxtlist;
1041 init_nocb_callback_list(rdp);
1042} 1048}
1043 1049
1044/* 1050/*
@@ -1071,6 +1077,120 @@ static unsigned long rcu_cbs_completed(struct rcu_state *rsp,
1071} 1077}
1072 1078
1073/* 1079/*
1080 * Trace-event helper function for rcu_start_future_gp() and
1081 * rcu_nocb_wait_gp().
1082 */
1083static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
1084 unsigned long c, char *s)
1085{
1086 trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum,
1087 rnp->completed, c, rnp->level,
1088 rnp->grplo, rnp->grphi, s);
1089}
1090
1091/*
1092 * Start some future grace period, as needed to handle newly arrived
1093 * callbacks. The required future grace periods are recorded in each
1094 * rcu_node structure's ->need_future_gp field.
1095 *
1096 * The caller must hold the specified rcu_node structure's ->lock.
1097 */
1098static unsigned long __maybe_unused
1099rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
1100{
1101 unsigned long c;
1102 int i;
1103 struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
1104
1105 /*
1106 * Pick up grace-period number for new callbacks. If this
1107 * grace period is already marked as needed, return to the caller.
1108 */
1109 c = rcu_cbs_completed(rdp->rsp, rnp);
1110 trace_rcu_future_gp(rnp, rdp, c, "Startleaf");
1111 if (rnp->need_future_gp[c & 0x1]) {
1112 trace_rcu_future_gp(rnp, rdp, c, "Prestartleaf");
1113 return c;
1114 }
1115
1116 /*
1117 * If either this rcu_node structure or the root rcu_node structure
1118 * believe that a grace period is in progress, then we must wait
1119 * for the one following, which is in "c". Because our request
1120 * will be noticed at the end of the current grace period, we don't
1121 * need to explicitly start one.
1122 */
1123 if (rnp->gpnum != rnp->completed ||
1124 ACCESS_ONCE(rnp->gpnum) != ACCESS_ONCE(rnp->completed)) {
1125 rnp->need_future_gp[c & 0x1]++;
1126 trace_rcu_future_gp(rnp, rdp, c, "Startedleaf");
1127 return c;
1128 }
1129
1130 /*
1131 * There might be no grace period in progress. If we don't already
1132 * hold it, acquire the root rcu_node structure's lock in order to
1133 * start one (if needed).
1134 */
1135 if (rnp != rnp_root)
1136 raw_spin_lock(&rnp_root->lock);
1137
1138 /*
1139 * Get a new grace-period number. If there really is no grace
1140 * period in progress, it will be smaller than the one we obtained
1141 * earlier. Adjust callbacks as needed. Note that even no-CBs
1142 * CPUs have a ->nxtcompleted[] array, so no no-CBs checks needed.
1143 */
1144 c = rcu_cbs_completed(rdp->rsp, rnp_root);
1145 for (i = RCU_DONE_TAIL; i < RCU_NEXT_TAIL; i++)
1146 if (ULONG_CMP_LT(c, rdp->nxtcompleted[i]))
1147 rdp->nxtcompleted[i] = c;
1148
1149 /*
1150 * If the needed for the required grace period is already
1151 * recorded, trace and leave.
1152 */
1153 if (rnp_root->need_future_gp[c & 0x1]) {
1154 trace_rcu_future_gp(rnp, rdp, c, "Prestartedroot");
1155 goto unlock_out;
1156 }
1157
1158 /* Record the need for the future grace period. */
1159 rnp_root->need_future_gp[c & 0x1]++;
1160
1161 /* If a grace period is not already in progress, start one. */
1162 if (rnp_root->gpnum != rnp_root->completed) {
1163 trace_rcu_future_gp(rnp, rdp, c, "Startedleafroot");
1164 } else {
1165 trace_rcu_future_gp(rnp, rdp, c, "Startedroot");
1166 rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp);
1167 }
1168unlock_out:
1169 if (rnp != rnp_root)
1170 raw_spin_unlock(&rnp_root->lock);
1171 return c;
1172}
1173
1174/*
1175 * Clean up any old requests for the just-ended grace period. Also return
1176 * whether any additional grace periods have been requested. Also invoke
1177 * rcu_nocb_gp_cleanup() in order to wake up any no-callbacks kthreads
1178 * waiting for this grace period to complete.
1179 */
1180static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
1181{
1182 int c = rnp->completed;
1183 int needmore;
1184 struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
1185
1186 rcu_nocb_gp_cleanup(rsp, rnp);
1187 rnp->need_future_gp[c & 0x1] = 0;
1188 needmore = rnp->need_future_gp[(c + 1) & 0x1];
1189 trace_rcu_future_gp(rnp, rdp, c, needmore ? "CleanupMore" : "Cleanup");
1190 return needmore;
1191}
1192
1193/*
1074 * If there is room, assign a ->completed number to any callbacks on 1194 * If there is room, assign a ->completed number to any callbacks on
1075 * this CPU that have not already been assigned. Also accelerate any 1195 * this CPU that have not already been assigned. Also accelerate any
1076 * callbacks that were previously assigned a ->completed number that has 1196 * callbacks that were previously assigned a ->completed number that has
@@ -1129,6 +1249,8 @@ static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1129 rdp->nxttail[i] = rdp->nxttail[RCU_NEXT_TAIL]; 1249 rdp->nxttail[i] = rdp->nxttail[RCU_NEXT_TAIL];
1130 rdp->nxtcompleted[i] = c; 1250 rdp->nxtcompleted[i] = c;
1131 } 1251 }
1252 /* Record any needed additional grace periods. */
1253 rcu_start_future_gp(rnp, rdp);
1132 1254
1133 /* Trace depending on how much we were able to accelerate. */ 1255 /* Trace depending on how much we were able to accelerate. */
1134 if (!*rdp->nxttail[RCU_WAIT_TAIL]) 1256 if (!*rdp->nxttail[RCU_WAIT_TAIL])
@@ -1308,9 +1430,9 @@ static int rcu_gp_init(struct rcu_state *rsp)
1308 rdp = this_cpu_ptr(rsp->rda); 1430 rdp = this_cpu_ptr(rsp->rda);
1309 rcu_preempt_check_blocked_tasks(rnp); 1431 rcu_preempt_check_blocked_tasks(rnp);
1310 rnp->qsmask = rnp->qsmaskinit; 1432 rnp->qsmask = rnp->qsmaskinit;
1311 rnp->gpnum = rsp->gpnum; 1433 ACCESS_ONCE(rnp->gpnum) = rsp->gpnum;
1312 WARN_ON_ONCE(rnp->completed != rsp->completed); 1434 WARN_ON_ONCE(rnp->completed != rsp->completed);
1313 rnp->completed = rsp->completed; 1435 ACCESS_ONCE(rnp->completed) = rsp->completed;
1314 if (rnp == rdp->mynode) 1436 if (rnp == rdp->mynode)
1315 rcu_start_gp_per_cpu(rsp, rnp, rdp); 1437 rcu_start_gp_per_cpu(rsp, rnp, rdp);
1316 rcu_preempt_boost_start_gp(rnp); 1438 rcu_preempt_boost_start_gp(rnp);
@@ -1319,7 +1441,8 @@ static int rcu_gp_init(struct rcu_state *rsp)
1319 rnp->grphi, rnp->qsmask); 1441 rnp->grphi, rnp->qsmask);
1320 raw_spin_unlock_irq(&rnp->lock); 1442 raw_spin_unlock_irq(&rnp->lock);
1321#ifdef CONFIG_PROVE_RCU_DELAY 1443#ifdef CONFIG_PROVE_RCU_DELAY
1322 if ((random32() % (rcu_num_nodes * 8)) == 0) 1444 if ((prandom_u32() % (rcu_num_nodes * 8)) == 0 &&
1445 system_state == SYSTEM_RUNNING)
1323 schedule_timeout_uninterruptible(2); 1446 schedule_timeout_uninterruptible(2);
1324#endif /* #ifdef CONFIG_PROVE_RCU_DELAY */ 1447#endif /* #ifdef CONFIG_PROVE_RCU_DELAY */
1325 cond_resched(); 1448 cond_resched();
@@ -1361,6 +1484,7 @@ int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
1361static void rcu_gp_cleanup(struct rcu_state *rsp) 1484static void rcu_gp_cleanup(struct rcu_state *rsp)
1362{ 1485{
1363 unsigned long gp_duration; 1486 unsigned long gp_duration;
1487 int nocb = 0;
1364 struct rcu_data *rdp; 1488 struct rcu_data *rdp;
1365 struct rcu_node *rnp = rcu_get_root(rsp); 1489 struct rcu_node *rnp = rcu_get_root(rsp);
1366 1490
@@ -1390,17 +1514,23 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
1390 */ 1514 */
1391 rcu_for_each_node_breadth_first(rsp, rnp) { 1515 rcu_for_each_node_breadth_first(rsp, rnp) {
1392 raw_spin_lock_irq(&rnp->lock); 1516 raw_spin_lock_irq(&rnp->lock);
1393 rnp->completed = rsp->gpnum; 1517 ACCESS_ONCE(rnp->completed) = rsp->gpnum;
1518 rdp = this_cpu_ptr(rsp->rda);
1519 if (rnp == rdp->mynode)
1520 __rcu_process_gp_end(rsp, rnp, rdp);
1521 nocb += rcu_future_gp_cleanup(rsp, rnp);
1394 raw_spin_unlock_irq(&rnp->lock); 1522 raw_spin_unlock_irq(&rnp->lock);
1395 cond_resched(); 1523 cond_resched();
1396 } 1524 }
1397 rnp = rcu_get_root(rsp); 1525 rnp = rcu_get_root(rsp);
1398 raw_spin_lock_irq(&rnp->lock); 1526 raw_spin_lock_irq(&rnp->lock);
1527 rcu_nocb_gp_set(rnp, nocb);
1399 1528
1400 rsp->completed = rsp->gpnum; /* Declare grace period done. */ 1529 rsp->completed = rsp->gpnum; /* Declare grace period done. */
1401 trace_rcu_grace_period(rsp->name, rsp->completed, "end"); 1530 trace_rcu_grace_period(rsp->name, rsp->completed, "end");
1402 rsp->fqs_state = RCU_GP_IDLE; 1531 rsp->fqs_state = RCU_GP_IDLE;
1403 rdp = this_cpu_ptr(rsp->rda); 1532 rdp = this_cpu_ptr(rsp->rda);
1533 rcu_advance_cbs(rsp, rnp, rdp); /* Reduce false positives below. */
1404 if (cpu_needs_another_gp(rsp, rdp)) 1534 if (cpu_needs_another_gp(rsp, rdp))
1405 rsp->gp_flags = 1; 1535 rsp->gp_flags = 1;
1406 raw_spin_unlock_irq(&rnp->lock); 1536 raw_spin_unlock_irq(&rnp->lock);
@@ -1476,57 +1606,62 @@ static int __noreturn rcu_gp_kthread(void *arg)
1476/* 1606/*
1477 * Start a new RCU grace period if warranted, re-initializing the hierarchy 1607 * Start a new RCU grace period if warranted, re-initializing the hierarchy
1478 * in preparation for detecting the next grace period. The caller must hold 1608 * in preparation for detecting the next grace period. The caller must hold
1479 * the root node's ->lock, which is released before return. Hard irqs must 1609 * the root node's ->lock and hard irqs must be disabled.
1480 * be disabled.
1481 * 1610 *
1482 * Note that it is legal for a dying CPU (which is marked as offline) to 1611 * Note that it is legal for a dying CPU (which is marked as offline) to
1483 * invoke this function. This can happen when the dying CPU reports its 1612 * invoke this function. This can happen when the dying CPU reports its
1484 * quiescent state. 1613 * quiescent state.
1485 */ 1614 */
1486static void 1615static void
1487rcu_start_gp(struct rcu_state *rsp, unsigned long flags) 1616rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
1488 __releases(rcu_get_root(rsp)->lock) 1617 struct rcu_data *rdp)
1489{ 1618{
1490 struct rcu_data *rdp = this_cpu_ptr(rsp->rda); 1619 if (!rsp->gp_kthread || !cpu_needs_another_gp(rsp, rdp)) {
1491 struct rcu_node *rnp = rcu_get_root(rsp);
1492
1493 if (!rsp->gp_kthread ||
1494 !cpu_needs_another_gp(rsp, rdp)) {
1495 /* 1620 /*
1496 * Either we have not yet spawned the grace-period 1621 * Either we have not yet spawned the grace-period
1497 * task, this CPU does not need another grace period, 1622 * task, this CPU does not need another grace period,
1498 * or a grace period is already in progress. 1623 * or a grace period is already in progress.
1499 * Either way, don't start a new grace period. 1624 * Either way, don't start a new grace period.
1500 */ 1625 */
1501 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1502 return; 1626 return;
1503 } 1627 }
1504
1505 /*
1506 * Because there is no grace period in progress right now,
1507 * any callbacks we have up to this point will be satisfied
1508 * by the next grace period. So this is a good place to
1509 * assign a grace period number to recently posted callbacks.
1510 */
1511 rcu_accelerate_cbs(rsp, rnp, rdp);
1512
1513 rsp->gp_flags = RCU_GP_FLAG_INIT; 1628 rsp->gp_flags = RCU_GP_FLAG_INIT;
1514 raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */
1515
1516 /* Ensure that CPU is aware of completion of last grace period. */
1517 rcu_process_gp_end(rsp, rdp);
1518 local_irq_restore(flags);
1519 1629
1520 /* Wake up rcu_gp_kthread() to start the grace period. */ 1630 /* Wake up rcu_gp_kthread() to start the grace period. */
1521 wake_up(&rsp->gp_wq); 1631 wake_up(&rsp->gp_wq);
1522} 1632}
1523 1633
1524/* 1634/*
1635 * Similar to rcu_start_gp_advanced(), but also advance the calling CPU's
1636 * callbacks. Note that rcu_start_gp_advanced() cannot do this because it
1637 * is invoked indirectly from rcu_advance_cbs(), which would result in
1638 * endless recursion -- or would do so if it wasn't for the self-deadlock
1639 * that is encountered beforehand.
1640 */
1641static void
1642rcu_start_gp(struct rcu_state *rsp)
1643{
1644 struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
1645 struct rcu_node *rnp = rcu_get_root(rsp);
1646
1647 /*
1648 * If there is no grace period in progress right now, any
1649 * callbacks we have up to this point will be satisfied by the
1650 * next grace period. Also, advancing the callbacks reduces the
1651 * probability of false positives from cpu_needs_another_gp()
1652 * resulting in pointless grace periods. So, advance callbacks
1653 * then start the grace period!
1654 */
1655 rcu_advance_cbs(rsp, rnp, rdp);
1656 rcu_start_gp_advanced(rsp, rnp, rdp);
1657}
1658
1659/*
1525 * Report a full set of quiescent states to the specified rcu_state 1660 * Report a full set of quiescent states to the specified rcu_state
1526 * data structure. This involves cleaning up after the prior grace 1661 * data structure. This involves cleaning up after the prior grace
1527 * period and letting rcu_start_gp() start up the next grace period 1662 * period and letting rcu_start_gp() start up the next grace period
1528 * if one is needed. Note that the caller must hold rnp->lock, as 1663 * if one is needed. Note that the caller must hold rnp->lock, which
1529 * required by rcu_start_gp(), which will release it. 1664 * is released before return.
1530 */ 1665 */
1531static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) 1666static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
1532 __releases(rcu_get_root(rsp)->lock) 1667 __releases(rcu_get_root(rsp)->lock)
@@ -2124,7 +2259,8 @@ __rcu_process_callbacks(struct rcu_state *rsp)
2124 local_irq_save(flags); 2259 local_irq_save(flags);
2125 if (cpu_needs_another_gp(rsp, rdp)) { 2260 if (cpu_needs_another_gp(rsp, rdp)) {
2126 raw_spin_lock(&rcu_get_root(rsp)->lock); /* irqs disabled. */ 2261 raw_spin_lock(&rcu_get_root(rsp)->lock); /* irqs disabled. */
2127 rcu_start_gp(rsp, flags); /* releases above lock */ 2262 rcu_start_gp(rsp);
2263 raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
2128 } else { 2264 } else {
2129 local_irq_restore(flags); 2265 local_irq_restore(flags);
2130 } 2266 }
@@ -2169,7 +2305,8 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
2169 2305
2170static void invoke_rcu_core(void) 2306static void invoke_rcu_core(void)
2171{ 2307{
2172 raise_softirq(RCU_SOFTIRQ); 2308 if (cpu_online(smp_processor_id()))
2309 raise_softirq(RCU_SOFTIRQ);
2173} 2310}
2174 2311
2175/* 2312/*
@@ -2204,11 +2341,11 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
2204 2341
2205 /* Start a new grace period if one not already started. */ 2342 /* Start a new grace period if one not already started. */
2206 if (!rcu_gp_in_progress(rsp)) { 2343 if (!rcu_gp_in_progress(rsp)) {
2207 unsigned long nestflag;
2208 struct rcu_node *rnp_root = rcu_get_root(rsp); 2344 struct rcu_node *rnp_root = rcu_get_root(rsp);
2209 2345
2210 raw_spin_lock_irqsave(&rnp_root->lock, nestflag); 2346 raw_spin_lock(&rnp_root->lock);
2211 rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */ 2347 rcu_start_gp(rsp);
2348 raw_spin_unlock(&rnp_root->lock);
2212 } else { 2349 } else {
2213 /* Give the grace period a kick. */ 2350 /* Give the grace period a kick. */
2214 rdp->blimit = LONG_MAX; 2351 rdp->blimit = LONG_MAX;
@@ -2628,19 +2765,27 @@ static int rcu_pending(int cpu)
2628} 2765}
2629 2766
2630/* 2767/*
2631 * Check to see if any future RCU-related work will need to be done 2768 * Return true if the specified CPU has any callback. If all_lazy is
2632 * by the current CPU, even if none need be done immediately, returning 2769 * non-NULL, store an indication of whether all callbacks are lazy.
2633 * 1 if so. 2770 * (If there are no callbacks, all of them are deemed to be lazy.)
2634 */ 2771 */
2635static int rcu_cpu_has_callbacks(int cpu) 2772static int rcu_cpu_has_callbacks(int cpu, bool *all_lazy)
2636{ 2773{
2774 bool al = true;
2775 bool hc = false;
2776 struct rcu_data *rdp;
2637 struct rcu_state *rsp; 2777 struct rcu_state *rsp;
2638 2778
2639 /* RCU callbacks either ready or pending? */ 2779 for_each_rcu_flavor(rsp) {
2640 for_each_rcu_flavor(rsp) 2780 rdp = per_cpu_ptr(rsp->rda, cpu);
2641 if (per_cpu_ptr(rsp->rda, cpu)->nxtlist) 2781 if (rdp->qlen != rdp->qlen_lazy)
2642 return 1; 2782 al = false;
2643 return 0; 2783 if (rdp->nxtlist)
2784 hc = true;
2785 }
2786 if (all_lazy)
2787 *all_lazy = al;
2788 return hc;
2644} 2789}
2645 2790
2646/* 2791/*
@@ -2859,7 +3004,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
2859 rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; 3004 rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
2860 atomic_set(&rdp->dynticks->dynticks, 3005 atomic_set(&rdp->dynticks->dynticks,
2861 (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); 3006 (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
2862 rcu_prepare_for_idle_init(cpu);
2863 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 3007 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
2864 3008
2865 /* Add CPU to rcu_node bitmasks. */ 3009 /* Add CPU to rcu_node bitmasks. */
@@ -2909,7 +3053,6 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
2909 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); 3053 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
2910 struct rcu_node *rnp = rdp->mynode; 3054 struct rcu_node *rnp = rdp->mynode;
2911 struct rcu_state *rsp; 3055 struct rcu_state *rsp;
2912 int ret = NOTIFY_OK;
2913 3056
2914 trace_rcu_utilization("Start CPU hotplug"); 3057 trace_rcu_utilization("Start CPU hotplug");
2915 switch (action) { 3058 switch (action) {
@@ -2923,21 +3066,12 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
2923 rcu_boost_kthread_setaffinity(rnp, -1); 3066 rcu_boost_kthread_setaffinity(rnp, -1);
2924 break; 3067 break;
2925 case CPU_DOWN_PREPARE: 3068 case CPU_DOWN_PREPARE:
2926 if (nocb_cpu_expendable(cpu)) 3069 rcu_boost_kthread_setaffinity(rnp, cpu);
2927 rcu_boost_kthread_setaffinity(rnp, cpu);
2928 else
2929 ret = NOTIFY_BAD;
2930 break; 3070 break;
2931 case CPU_DYING: 3071 case CPU_DYING:
2932 case CPU_DYING_FROZEN: 3072 case CPU_DYING_FROZEN:
2933 /*
2934 * The whole machine is "stopped" except this CPU, so we can
2935 * touch any data without introducing corruption. We send the
2936 * dying CPU's callbacks to an arbitrarily chosen online CPU.
2937 */
2938 for_each_rcu_flavor(rsp) 3073 for_each_rcu_flavor(rsp)
2939 rcu_cleanup_dying_cpu(rsp); 3074 rcu_cleanup_dying_cpu(rsp);
2940 rcu_cleanup_after_idle(cpu);
2941 break; 3075 break;
2942 case CPU_DEAD: 3076 case CPU_DEAD:
2943 case CPU_DEAD_FROZEN: 3077 case CPU_DEAD_FROZEN:
@@ -2950,7 +3084,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
2950 break; 3084 break;
2951 } 3085 }
2952 trace_rcu_utilization("End CPU hotplug"); 3086 trace_rcu_utilization("End CPU hotplug");
2953 return ret; 3087 return NOTIFY_OK;
2954} 3088}
2955 3089
2956/* 3090/*
@@ -3085,6 +3219,7 @@ static void __init rcu_init_one(struct rcu_state *rsp,
3085 } 3219 }
3086 rnp->level = i; 3220 rnp->level = i;
3087 INIT_LIST_HEAD(&rnp->blkd_tasks); 3221 INIT_LIST_HEAD(&rnp->blkd_tasks);
3222 rcu_init_one_nocb(rnp);
3088 } 3223 }
3089 } 3224 }
3090 3225
@@ -3170,8 +3305,7 @@ void __init rcu_init(void)
3170 rcu_init_one(&rcu_sched_state, &rcu_sched_data); 3305 rcu_init_one(&rcu_sched_state, &rcu_sched_data);
3171 rcu_init_one(&rcu_bh_state, &rcu_bh_data); 3306 rcu_init_one(&rcu_bh_state, &rcu_bh_data);
3172 __rcu_init_preempt(); 3307 __rcu_init_preempt();
3173 rcu_init_nocb(); 3308 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
3174 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
3175 3309
3176 /* 3310 /*
3177 * We don't need protection against CPU-hotplug here because 3311 * We don't need protection against CPU-hotplug here because