diff options
Diffstat (limited to 'kernel/rcutree.c')
-rw-r--r-- | kernel/rcutree.c | 260 |
1 files changed, 197 insertions, 63 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 5b8ad827fd86..d8534308fd05 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -64,7 +64,7 @@ | |||
64 | static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; | 64 | static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; |
65 | static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS]; | 65 | static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS]; |
66 | 66 | ||
67 | #define RCU_STATE_INITIALIZER(sname, cr) { \ | 67 | #define RCU_STATE_INITIALIZER(sname, sabbr, cr) { \ |
68 | .level = { &sname##_state.node[0] }, \ | 68 | .level = { &sname##_state.node[0] }, \ |
69 | .call = cr, \ | 69 | .call = cr, \ |
70 | .fqs_state = RCU_GP_IDLE, \ | 70 | .fqs_state = RCU_GP_IDLE, \ |
@@ -76,13 +76,14 @@ static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS]; | |||
76 | .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ | 76 | .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ |
77 | .onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \ | 77 | .onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \ |
78 | .name = #sname, \ | 78 | .name = #sname, \ |
79 | .abbr = sabbr, \ | ||
79 | } | 80 | } |
80 | 81 | ||
81 | struct rcu_state rcu_sched_state = | 82 | struct rcu_state rcu_sched_state = |
82 | RCU_STATE_INITIALIZER(rcu_sched, call_rcu_sched); | 83 | RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched); |
83 | DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); | 84 | DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); |
84 | 85 | ||
85 | struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, call_rcu_bh); | 86 | struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh); |
86 | DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); | 87 | DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); |
87 | 88 | ||
88 | static struct rcu_state *rcu_state; | 89 | static struct rcu_state *rcu_state; |
@@ -223,6 +224,8 @@ static ulong jiffies_till_next_fqs = RCU_JIFFIES_TILL_FORCE_QS; | |||
223 | module_param(jiffies_till_first_fqs, ulong, 0644); | 224 | module_param(jiffies_till_first_fqs, ulong, 0644); |
224 | module_param(jiffies_till_next_fqs, ulong, 0644); | 225 | module_param(jiffies_till_next_fqs, ulong, 0644); |
225 | 226 | ||
227 | static void rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, | ||
228 | struct rcu_data *rdp); | ||
226 | static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)); | 229 | static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)); |
227 | static void force_quiescent_state(struct rcu_state *rsp); | 230 | static void force_quiescent_state(struct rcu_state *rsp); |
228 | static int rcu_pending(int cpu); | 231 | static int rcu_pending(int cpu); |
@@ -310,6 +313,8 @@ cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) | |||
310 | 313 | ||
311 | if (rcu_gp_in_progress(rsp)) | 314 | if (rcu_gp_in_progress(rsp)) |
312 | return 0; /* No, a grace period is already in progress. */ | 315 | return 0; /* No, a grace period is already in progress. */ |
316 | if (rcu_nocb_needs_gp(rsp)) | ||
317 | return 1; /* Yes, a no-CBs CPU needs one. */ | ||
313 | if (!rdp->nxttail[RCU_NEXT_TAIL]) | 318 | if (!rdp->nxttail[RCU_NEXT_TAIL]) |
314 | return 0; /* No, this is a no-CBs (or offline) CPU. */ | 319 | return 0; /* No, this is a no-CBs (or offline) CPU. */ |
315 | if (*rdp->nxttail[RCU_NEXT_READY_TAIL]) | 320 | if (*rdp->nxttail[RCU_NEXT_READY_TAIL]) |
@@ -1035,10 +1040,11 @@ static void init_callback_list(struct rcu_data *rdp) | |||
1035 | { | 1040 | { |
1036 | int i; | 1041 | int i; |
1037 | 1042 | ||
1043 | if (init_nocb_callback_list(rdp)) | ||
1044 | return; | ||
1038 | rdp->nxtlist = NULL; | 1045 | rdp->nxtlist = NULL; |
1039 | for (i = 0; i < RCU_NEXT_SIZE; i++) | 1046 | for (i = 0; i < RCU_NEXT_SIZE; i++) |
1040 | rdp->nxttail[i] = &rdp->nxtlist; | 1047 | rdp->nxttail[i] = &rdp->nxtlist; |
1041 | init_nocb_callback_list(rdp); | ||
1042 | } | 1048 | } |
1043 | 1049 | ||
1044 | /* | 1050 | /* |
@@ -1071,6 +1077,120 @@ static unsigned long rcu_cbs_completed(struct rcu_state *rsp, | |||
1071 | } | 1077 | } |
1072 | 1078 | ||
1073 | /* | 1079 | /* |
1080 | * Trace-event helper function for rcu_start_future_gp() and | ||
1081 | * rcu_nocb_wait_gp(). | ||
1082 | */ | ||
1083 | static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp, | ||
1084 | unsigned long c, char *s) | ||
1085 | { | ||
1086 | trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum, | ||
1087 | rnp->completed, c, rnp->level, | ||
1088 | rnp->grplo, rnp->grphi, s); | ||
1089 | } | ||
1090 | |||
1091 | /* | ||
1092 | * Start some future grace period, as needed to handle newly arrived | ||
1093 | * callbacks. The required future grace periods are recorded in each | ||
1094 | * rcu_node structure's ->need_future_gp field. | ||
1095 | * | ||
1096 | * The caller must hold the specified rcu_node structure's ->lock. | ||
1097 | */ | ||
1098 | static unsigned long __maybe_unused | ||
1099 | rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp) | ||
1100 | { | ||
1101 | unsigned long c; | ||
1102 | int i; | ||
1103 | struct rcu_node *rnp_root = rcu_get_root(rdp->rsp); | ||
1104 | |||
1105 | /* | ||
1106 | * Pick up grace-period number for new callbacks. If this | ||
1107 | * grace period is already marked as needed, return to the caller. | ||
1108 | */ | ||
1109 | c = rcu_cbs_completed(rdp->rsp, rnp); | ||
1110 | trace_rcu_future_gp(rnp, rdp, c, "Startleaf"); | ||
1111 | if (rnp->need_future_gp[c & 0x1]) { | ||
1112 | trace_rcu_future_gp(rnp, rdp, c, "Prestartleaf"); | ||
1113 | return c; | ||
1114 | } | ||
1115 | |||
1116 | /* | ||
1117 | * If either this rcu_node structure or the root rcu_node structure | ||
1118 | * believe that a grace period is in progress, then we must wait | ||
1119 | * for the one following, which is in "c". Because our request | ||
1120 | * will be noticed at the end of the current grace period, we don't | ||
1121 | * need to explicitly start one. | ||
1122 | */ | ||
1123 | if (rnp->gpnum != rnp->completed || | ||
1124 | ACCESS_ONCE(rnp->gpnum) != ACCESS_ONCE(rnp->completed)) { | ||
1125 | rnp->need_future_gp[c & 0x1]++; | ||
1126 | trace_rcu_future_gp(rnp, rdp, c, "Startedleaf"); | ||
1127 | return c; | ||
1128 | } | ||
1129 | |||
1130 | /* | ||
1131 | * There might be no grace period in progress. If we don't already | ||
1132 | * hold it, acquire the root rcu_node structure's lock in order to | ||
1133 | * start one (if needed). | ||
1134 | */ | ||
1135 | if (rnp != rnp_root) | ||
1136 | raw_spin_lock(&rnp_root->lock); | ||
1137 | |||
1138 | /* | ||
1139 | * Get a new grace-period number. If there really is no grace | ||
1140 | * period in progress, it will be smaller than the one we obtained | ||
1141 | * earlier. Adjust callbacks as needed. Note that even no-CBs | ||
1142 | * CPUs have a ->nxtcompleted[] array, so no no-CBs checks needed. | ||
1143 | */ | ||
1144 | c = rcu_cbs_completed(rdp->rsp, rnp_root); | ||
1145 | for (i = RCU_DONE_TAIL; i < RCU_NEXT_TAIL; i++) | ||
1146 | if (ULONG_CMP_LT(c, rdp->nxtcompleted[i])) | ||
1147 | rdp->nxtcompleted[i] = c; | ||
1148 | |||
1149 | /* | ||
1150 | * If the needed for the required grace period is already | ||
1151 | * recorded, trace and leave. | ||
1152 | */ | ||
1153 | if (rnp_root->need_future_gp[c & 0x1]) { | ||
1154 | trace_rcu_future_gp(rnp, rdp, c, "Prestartedroot"); | ||
1155 | goto unlock_out; | ||
1156 | } | ||
1157 | |||
1158 | /* Record the need for the future grace period. */ | ||
1159 | rnp_root->need_future_gp[c & 0x1]++; | ||
1160 | |||
1161 | /* If a grace period is not already in progress, start one. */ | ||
1162 | if (rnp_root->gpnum != rnp_root->completed) { | ||
1163 | trace_rcu_future_gp(rnp, rdp, c, "Startedleafroot"); | ||
1164 | } else { | ||
1165 | trace_rcu_future_gp(rnp, rdp, c, "Startedroot"); | ||
1166 | rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp); | ||
1167 | } | ||
1168 | unlock_out: | ||
1169 | if (rnp != rnp_root) | ||
1170 | raw_spin_unlock(&rnp_root->lock); | ||
1171 | return c; | ||
1172 | } | ||
1173 | |||
1174 | /* | ||
1175 | * Clean up any old requests for the just-ended grace period. Also return | ||
1176 | * whether any additional grace periods have been requested. Also invoke | ||
1177 | * rcu_nocb_gp_cleanup() in order to wake up any no-callbacks kthreads | ||
1178 | * waiting for this grace period to complete. | ||
1179 | */ | ||
1180 | static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) | ||
1181 | { | ||
1182 | int c = rnp->completed; | ||
1183 | int needmore; | ||
1184 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); | ||
1185 | |||
1186 | rcu_nocb_gp_cleanup(rsp, rnp); | ||
1187 | rnp->need_future_gp[c & 0x1] = 0; | ||
1188 | needmore = rnp->need_future_gp[(c + 1) & 0x1]; | ||
1189 | trace_rcu_future_gp(rnp, rdp, c, needmore ? "CleanupMore" : "Cleanup"); | ||
1190 | return needmore; | ||
1191 | } | ||
1192 | |||
1193 | /* | ||
1074 | * If there is room, assign a ->completed number to any callbacks on | 1194 | * If there is room, assign a ->completed number to any callbacks on |
1075 | * this CPU that have not already been assigned. Also accelerate any | 1195 | * this CPU that have not already been assigned. Also accelerate any |
1076 | * callbacks that were previously assigned a ->completed number that has | 1196 | * callbacks that were previously assigned a ->completed number that has |
@@ -1129,6 +1249,8 @@ static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp, | |||
1129 | rdp->nxttail[i] = rdp->nxttail[RCU_NEXT_TAIL]; | 1249 | rdp->nxttail[i] = rdp->nxttail[RCU_NEXT_TAIL]; |
1130 | rdp->nxtcompleted[i] = c; | 1250 | rdp->nxtcompleted[i] = c; |
1131 | } | 1251 | } |
1252 | /* Record any needed additional grace periods. */ | ||
1253 | rcu_start_future_gp(rnp, rdp); | ||
1132 | 1254 | ||
1133 | /* Trace depending on how much we were able to accelerate. */ | 1255 | /* Trace depending on how much we were able to accelerate. */ |
1134 | if (!*rdp->nxttail[RCU_WAIT_TAIL]) | 1256 | if (!*rdp->nxttail[RCU_WAIT_TAIL]) |
@@ -1308,9 +1430,9 @@ static int rcu_gp_init(struct rcu_state *rsp) | |||
1308 | rdp = this_cpu_ptr(rsp->rda); | 1430 | rdp = this_cpu_ptr(rsp->rda); |
1309 | rcu_preempt_check_blocked_tasks(rnp); | 1431 | rcu_preempt_check_blocked_tasks(rnp); |
1310 | rnp->qsmask = rnp->qsmaskinit; | 1432 | rnp->qsmask = rnp->qsmaskinit; |
1311 | rnp->gpnum = rsp->gpnum; | 1433 | ACCESS_ONCE(rnp->gpnum) = rsp->gpnum; |
1312 | WARN_ON_ONCE(rnp->completed != rsp->completed); | 1434 | WARN_ON_ONCE(rnp->completed != rsp->completed); |
1313 | rnp->completed = rsp->completed; | 1435 | ACCESS_ONCE(rnp->completed) = rsp->completed; |
1314 | if (rnp == rdp->mynode) | 1436 | if (rnp == rdp->mynode) |
1315 | rcu_start_gp_per_cpu(rsp, rnp, rdp); | 1437 | rcu_start_gp_per_cpu(rsp, rnp, rdp); |
1316 | rcu_preempt_boost_start_gp(rnp); | 1438 | rcu_preempt_boost_start_gp(rnp); |
@@ -1319,7 +1441,8 @@ static int rcu_gp_init(struct rcu_state *rsp) | |||
1319 | rnp->grphi, rnp->qsmask); | 1441 | rnp->grphi, rnp->qsmask); |
1320 | raw_spin_unlock_irq(&rnp->lock); | 1442 | raw_spin_unlock_irq(&rnp->lock); |
1321 | #ifdef CONFIG_PROVE_RCU_DELAY | 1443 | #ifdef CONFIG_PROVE_RCU_DELAY |
1322 | if ((random32() % (rcu_num_nodes * 8)) == 0) | 1444 | if ((prandom_u32() % (rcu_num_nodes * 8)) == 0 && |
1445 | system_state == SYSTEM_RUNNING) | ||
1323 | schedule_timeout_uninterruptible(2); | 1446 | schedule_timeout_uninterruptible(2); |
1324 | #endif /* #ifdef CONFIG_PROVE_RCU_DELAY */ | 1447 | #endif /* #ifdef CONFIG_PROVE_RCU_DELAY */ |
1325 | cond_resched(); | 1448 | cond_resched(); |
@@ -1361,6 +1484,7 @@ int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in) | |||
1361 | static void rcu_gp_cleanup(struct rcu_state *rsp) | 1484 | static void rcu_gp_cleanup(struct rcu_state *rsp) |
1362 | { | 1485 | { |
1363 | unsigned long gp_duration; | 1486 | unsigned long gp_duration; |
1487 | int nocb = 0; | ||
1364 | struct rcu_data *rdp; | 1488 | struct rcu_data *rdp; |
1365 | struct rcu_node *rnp = rcu_get_root(rsp); | 1489 | struct rcu_node *rnp = rcu_get_root(rsp); |
1366 | 1490 | ||
@@ -1390,17 +1514,23 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) | |||
1390 | */ | 1514 | */ |
1391 | rcu_for_each_node_breadth_first(rsp, rnp) { | 1515 | rcu_for_each_node_breadth_first(rsp, rnp) { |
1392 | raw_spin_lock_irq(&rnp->lock); | 1516 | raw_spin_lock_irq(&rnp->lock); |
1393 | rnp->completed = rsp->gpnum; | 1517 | ACCESS_ONCE(rnp->completed) = rsp->gpnum; |
1518 | rdp = this_cpu_ptr(rsp->rda); | ||
1519 | if (rnp == rdp->mynode) | ||
1520 | __rcu_process_gp_end(rsp, rnp, rdp); | ||
1521 | nocb += rcu_future_gp_cleanup(rsp, rnp); | ||
1394 | raw_spin_unlock_irq(&rnp->lock); | 1522 | raw_spin_unlock_irq(&rnp->lock); |
1395 | cond_resched(); | 1523 | cond_resched(); |
1396 | } | 1524 | } |
1397 | rnp = rcu_get_root(rsp); | 1525 | rnp = rcu_get_root(rsp); |
1398 | raw_spin_lock_irq(&rnp->lock); | 1526 | raw_spin_lock_irq(&rnp->lock); |
1527 | rcu_nocb_gp_set(rnp, nocb); | ||
1399 | 1528 | ||
1400 | rsp->completed = rsp->gpnum; /* Declare grace period done. */ | 1529 | rsp->completed = rsp->gpnum; /* Declare grace period done. */ |
1401 | trace_rcu_grace_period(rsp->name, rsp->completed, "end"); | 1530 | trace_rcu_grace_period(rsp->name, rsp->completed, "end"); |
1402 | rsp->fqs_state = RCU_GP_IDLE; | 1531 | rsp->fqs_state = RCU_GP_IDLE; |
1403 | rdp = this_cpu_ptr(rsp->rda); | 1532 | rdp = this_cpu_ptr(rsp->rda); |
1533 | rcu_advance_cbs(rsp, rnp, rdp); /* Reduce false positives below. */ | ||
1404 | if (cpu_needs_another_gp(rsp, rdp)) | 1534 | if (cpu_needs_another_gp(rsp, rdp)) |
1405 | rsp->gp_flags = 1; | 1535 | rsp->gp_flags = 1; |
1406 | raw_spin_unlock_irq(&rnp->lock); | 1536 | raw_spin_unlock_irq(&rnp->lock); |
@@ -1476,57 +1606,62 @@ static int __noreturn rcu_gp_kthread(void *arg) | |||
1476 | /* | 1606 | /* |
1477 | * Start a new RCU grace period if warranted, re-initializing the hierarchy | 1607 | * Start a new RCU grace period if warranted, re-initializing the hierarchy |
1478 | * in preparation for detecting the next grace period. The caller must hold | 1608 | * in preparation for detecting the next grace period. The caller must hold |
1479 | * the root node's ->lock, which is released before return. Hard irqs must | 1609 | * the root node's ->lock and hard irqs must be disabled. |
1480 | * be disabled. | ||
1481 | * | 1610 | * |
1482 | * Note that it is legal for a dying CPU (which is marked as offline) to | 1611 | * Note that it is legal for a dying CPU (which is marked as offline) to |
1483 | * invoke this function. This can happen when the dying CPU reports its | 1612 | * invoke this function. This can happen when the dying CPU reports its |
1484 | * quiescent state. | 1613 | * quiescent state. |
1485 | */ | 1614 | */ |
1486 | static void | 1615 | static void |
1487 | rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | 1616 | rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, |
1488 | __releases(rcu_get_root(rsp)->lock) | 1617 | struct rcu_data *rdp) |
1489 | { | 1618 | { |
1490 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); | 1619 | if (!rsp->gp_kthread || !cpu_needs_another_gp(rsp, rdp)) { |
1491 | struct rcu_node *rnp = rcu_get_root(rsp); | ||
1492 | |||
1493 | if (!rsp->gp_kthread || | ||
1494 | !cpu_needs_another_gp(rsp, rdp)) { | ||
1495 | /* | 1620 | /* |
1496 | * Either we have not yet spawned the grace-period | 1621 | * Either we have not yet spawned the grace-period |
1497 | * task, this CPU does not need another grace period, | 1622 | * task, this CPU does not need another grace period, |
1498 | * or a grace period is already in progress. | 1623 | * or a grace period is already in progress. |
1499 | * Either way, don't start a new grace period. | 1624 | * Either way, don't start a new grace period. |
1500 | */ | 1625 | */ |
1501 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
1502 | return; | 1626 | return; |
1503 | } | 1627 | } |
1504 | |||
1505 | /* | ||
1506 | * Because there is no grace period in progress right now, | ||
1507 | * any callbacks we have up to this point will be satisfied | ||
1508 | * by the next grace period. So this is a good place to | ||
1509 | * assign a grace period number to recently posted callbacks. | ||
1510 | */ | ||
1511 | rcu_accelerate_cbs(rsp, rnp, rdp); | ||
1512 | |||
1513 | rsp->gp_flags = RCU_GP_FLAG_INIT; | 1628 | rsp->gp_flags = RCU_GP_FLAG_INIT; |
1514 | raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */ | ||
1515 | |||
1516 | /* Ensure that CPU is aware of completion of last grace period. */ | ||
1517 | rcu_process_gp_end(rsp, rdp); | ||
1518 | local_irq_restore(flags); | ||
1519 | 1629 | ||
1520 | /* Wake up rcu_gp_kthread() to start the grace period. */ | 1630 | /* Wake up rcu_gp_kthread() to start the grace period. */ |
1521 | wake_up(&rsp->gp_wq); | 1631 | wake_up(&rsp->gp_wq); |
1522 | } | 1632 | } |
1523 | 1633 | ||
1524 | /* | 1634 | /* |
1635 | * Similar to rcu_start_gp_advanced(), but also advance the calling CPU's | ||
1636 | * callbacks. Note that rcu_start_gp_advanced() cannot do this because it | ||
1637 | * is invoked indirectly from rcu_advance_cbs(), which would result in | ||
1638 | * endless recursion -- or would do so if it wasn't for the self-deadlock | ||
1639 | * that is encountered beforehand. | ||
1640 | */ | ||
1641 | static void | ||
1642 | rcu_start_gp(struct rcu_state *rsp) | ||
1643 | { | ||
1644 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); | ||
1645 | struct rcu_node *rnp = rcu_get_root(rsp); | ||
1646 | |||
1647 | /* | ||
1648 | * If there is no grace period in progress right now, any | ||
1649 | * callbacks we have up to this point will be satisfied by the | ||
1650 | * next grace period. Also, advancing the callbacks reduces the | ||
1651 | * probability of false positives from cpu_needs_another_gp() | ||
1652 | * resulting in pointless grace periods. So, advance callbacks | ||
1653 | * then start the grace period! | ||
1654 | */ | ||
1655 | rcu_advance_cbs(rsp, rnp, rdp); | ||
1656 | rcu_start_gp_advanced(rsp, rnp, rdp); | ||
1657 | } | ||
1658 | |||
1659 | /* | ||
1525 | * Report a full set of quiescent states to the specified rcu_state | 1660 | * Report a full set of quiescent states to the specified rcu_state |
1526 | * data structure. This involves cleaning up after the prior grace | 1661 | * data structure. This involves cleaning up after the prior grace |
1527 | * period and letting rcu_start_gp() start up the next grace period | 1662 | * period and letting rcu_start_gp() start up the next grace period |
1528 | * if one is needed. Note that the caller must hold rnp->lock, as | 1663 | * if one is needed. Note that the caller must hold rnp->lock, which |
1529 | * required by rcu_start_gp(), which will release it. | 1664 | * is released before return. |
1530 | */ | 1665 | */ |
1531 | static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) | 1666 | static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) |
1532 | __releases(rcu_get_root(rsp)->lock) | 1667 | __releases(rcu_get_root(rsp)->lock) |
@@ -2124,7 +2259,8 @@ __rcu_process_callbacks(struct rcu_state *rsp) | |||
2124 | local_irq_save(flags); | 2259 | local_irq_save(flags); |
2125 | if (cpu_needs_another_gp(rsp, rdp)) { | 2260 | if (cpu_needs_another_gp(rsp, rdp)) { |
2126 | raw_spin_lock(&rcu_get_root(rsp)->lock); /* irqs disabled. */ | 2261 | raw_spin_lock(&rcu_get_root(rsp)->lock); /* irqs disabled. */ |
2127 | rcu_start_gp(rsp, flags); /* releases above lock */ | 2262 | rcu_start_gp(rsp); |
2263 | raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags); | ||
2128 | } else { | 2264 | } else { |
2129 | local_irq_restore(flags); | 2265 | local_irq_restore(flags); |
2130 | } | 2266 | } |
@@ -2169,7 +2305,8 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) | |||
2169 | 2305 | ||
2170 | static void invoke_rcu_core(void) | 2306 | static void invoke_rcu_core(void) |
2171 | { | 2307 | { |
2172 | raise_softirq(RCU_SOFTIRQ); | 2308 | if (cpu_online(smp_processor_id())) |
2309 | raise_softirq(RCU_SOFTIRQ); | ||
2173 | } | 2310 | } |
2174 | 2311 | ||
2175 | /* | 2312 | /* |
@@ -2204,11 +2341,11 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp, | |||
2204 | 2341 | ||
2205 | /* Start a new grace period if one not already started. */ | 2342 | /* Start a new grace period if one not already started. */ |
2206 | if (!rcu_gp_in_progress(rsp)) { | 2343 | if (!rcu_gp_in_progress(rsp)) { |
2207 | unsigned long nestflag; | ||
2208 | struct rcu_node *rnp_root = rcu_get_root(rsp); | 2344 | struct rcu_node *rnp_root = rcu_get_root(rsp); |
2209 | 2345 | ||
2210 | raw_spin_lock_irqsave(&rnp_root->lock, nestflag); | 2346 | raw_spin_lock(&rnp_root->lock); |
2211 | rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */ | 2347 | rcu_start_gp(rsp); |
2348 | raw_spin_unlock(&rnp_root->lock); | ||
2212 | } else { | 2349 | } else { |
2213 | /* Give the grace period a kick. */ | 2350 | /* Give the grace period a kick. */ |
2214 | rdp->blimit = LONG_MAX; | 2351 | rdp->blimit = LONG_MAX; |
@@ -2628,19 +2765,27 @@ static int rcu_pending(int cpu) | |||
2628 | } | 2765 | } |
2629 | 2766 | ||
2630 | /* | 2767 | /* |
2631 | * Check to see if any future RCU-related work will need to be done | 2768 | * Return true if the specified CPU has any callback. If all_lazy is |
2632 | * by the current CPU, even if none need be done immediately, returning | 2769 | * non-NULL, store an indication of whether all callbacks are lazy. |
2633 | * 1 if so. | 2770 | * (If there are no callbacks, all of them are deemed to be lazy.) |
2634 | */ | 2771 | */ |
2635 | static int rcu_cpu_has_callbacks(int cpu) | 2772 | static int rcu_cpu_has_callbacks(int cpu, bool *all_lazy) |
2636 | { | 2773 | { |
2774 | bool al = true; | ||
2775 | bool hc = false; | ||
2776 | struct rcu_data *rdp; | ||
2637 | struct rcu_state *rsp; | 2777 | struct rcu_state *rsp; |
2638 | 2778 | ||
2639 | /* RCU callbacks either ready or pending? */ | 2779 | for_each_rcu_flavor(rsp) { |
2640 | for_each_rcu_flavor(rsp) | 2780 | rdp = per_cpu_ptr(rsp->rda, cpu); |
2641 | if (per_cpu_ptr(rsp->rda, cpu)->nxtlist) | 2781 | if (rdp->qlen != rdp->qlen_lazy) |
2642 | return 1; | 2782 | al = false; |
2643 | return 0; | 2783 | if (rdp->nxtlist) |
2784 | hc = true; | ||
2785 | } | ||
2786 | if (all_lazy) | ||
2787 | *all_lazy = al; | ||
2788 | return hc; | ||
2644 | } | 2789 | } |
2645 | 2790 | ||
2646 | /* | 2791 | /* |
@@ -2859,7 +3004,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) | |||
2859 | rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; | 3004 | rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; |
2860 | atomic_set(&rdp->dynticks->dynticks, | 3005 | atomic_set(&rdp->dynticks->dynticks, |
2861 | (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); | 3006 | (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); |
2862 | rcu_prepare_for_idle_init(cpu); | ||
2863 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 3007 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
2864 | 3008 | ||
2865 | /* Add CPU to rcu_node bitmasks. */ | 3009 | /* Add CPU to rcu_node bitmasks. */ |
@@ -2909,7 +3053,6 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | |||
2909 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); | 3053 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); |
2910 | struct rcu_node *rnp = rdp->mynode; | 3054 | struct rcu_node *rnp = rdp->mynode; |
2911 | struct rcu_state *rsp; | 3055 | struct rcu_state *rsp; |
2912 | int ret = NOTIFY_OK; | ||
2913 | 3056 | ||
2914 | trace_rcu_utilization("Start CPU hotplug"); | 3057 | trace_rcu_utilization("Start CPU hotplug"); |
2915 | switch (action) { | 3058 | switch (action) { |
@@ -2923,21 +3066,12 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | |||
2923 | rcu_boost_kthread_setaffinity(rnp, -1); | 3066 | rcu_boost_kthread_setaffinity(rnp, -1); |
2924 | break; | 3067 | break; |
2925 | case CPU_DOWN_PREPARE: | 3068 | case CPU_DOWN_PREPARE: |
2926 | if (nocb_cpu_expendable(cpu)) | 3069 | rcu_boost_kthread_setaffinity(rnp, cpu); |
2927 | rcu_boost_kthread_setaffinity(rnp, cpu); | ||
2928 | else | ||
2929 | ret = NOTIFY_BAD; | ||
2930 | break; | 3070 | break; |
2931 | case CPU_DYING: | 3071 | case CPU_DYING: |
2932 | case CPU_DYING_FROZEN: | 3072 | case CPU_DYING_FROZEN: |
2933 | /* | ||
2934 | * The whole machine is "stopped" except this CPU, so we can | ||
2935 | * touch any data without introducing corruption. We send the | ||
2936 | * dying CPU's callbacks to an arbitrarily chosen online CPU. | ||
2937 | */ | ||
2938 | for_each_rcu_flavor(rsp) | 3073 | for_each_rcu_flavor(rsp) |
2939 | rcu_cleanup_dying_cpu(rsp); | 3074 | rcu_cleanup_dying_cpu(rsp); |
2940 | rcu_cleanup_after_idle(cpu); | ||
2941 | break; | 3075 | break; |
2942 | case CPU_DEAD: | 3076 | case CPU_DEAD: |
2943 | case CPU_DEAD_FROZEN: | 3077 | case CPU_DEAD_FROZEN: |
@@ -2950,7 +3084,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | |||
2950 | break; | 3084 | break; |
2951 | } | 3085 | } |
2952 | trace_rcu_utilization("End CPU hotplug"); | 3086 | trace_rcu_utilization("End CPU hotplug"); |
2953 | return ret; | 3087 | return NOTIFY_OK; |
2954 | } | 3088 | } |
2955 | 3089 | ||
2956 | /* | 3090 | /* |
@@ -3085,6 +3219,7 @@ static void __init rcu_init_one(struct rcu_state *rsp, | |||
3085 | } | 3219 | } |
3086 | rnp->level = i; | 3220 | rnp->level = i; |
3087 | INIT_LIST_HEAD(&rnp->blkd_tasks); | 3221 | INIT_LIST_HEAD(&rnp->blkd_tasks); |
3222 | rcu_init_one_nocb(rnp); | ||
3088 | } | 3223 | } |
3089 | } | 3224 | } |
3090 | 3225 | ||
@@ -3170,8 +3305,7 @@ void __init rcu_init(void) | |||
3170 | rcu_init_one(&rcu_sched_state, &rcu_sched_data); | 3305 | rcu_init_one(&rcu_sched_state, &rcu_sched_data); |
3171 | rcu_init_one(&rcu_bh_state, &rcu_bh_data); | 3306 | rcu_init_one(&rcu_bh_state, &rcu_bh_data); |
3172 | __rcu_init_preempt(); | 3307 | __rcu_init_preempt(); |
3173 | rcu_init_nocb(); | 3308 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); |
3174 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); | ||
3175 | 3309 | ||
3176 | /* | 3310 | /* |
3177 | * We don't need protection against CPU-hotplug here because | 3311 | * We don't need protection against CPU-hotplug here because |