aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcutree.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rcutree.c')
-rw-r--r--kernel/rcutree.c347
1 files changed, 253 insertions, 94 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index d0c5baf1ab18..4b97bba7396e 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -75,6 +75,8 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS];
75 .gpnum = -300, \ 75 .gpnum = -300, \
76 .completed = -300, \ 76 .completed = -300, \
77 .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \ 77 .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \
78 .orphan_nxttail = &structname##_state.orphan_nxtlist, \
79 .orphan_donetail = &structname##_state.orphan_donelist, \
78 .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.fqslock), \ 80 .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.fqslock), \
79 .n_force_qs = 0, \ 81 .n_force_qs = 0, \
80 .n_force_qs_ngp = 0, \ 82 .n_force_qs_ngp = 0, \
@@ -145,6 +147,13 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);
145unsigned long rcutorture_testseq; 147unsigned long rcutorture_testseq;
146unsigned long rcutorture_vernum; 148unsigned long rcutorture_vernum;
147 149
150/* State information for rcu_barrier() and friends. */
151
152static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
153static atomic_t rcu_barrier_cpu_count;
154static DEFINE_MUTEX(rcu_barrier_mutex);
155static struct completion rcu_barrier_completion;
156
148/* 157/*
149 * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s 158 * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s
150 * permit this function to be invoked without holding the root rcu_node 159 * permit this function to be invoked without holding the root rcu_node
@@ -1311,95 +1320,135 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
1311#ifdef CONFIG_HOTPLUG_CPU 1320#ifdef CONFIG_HOTPLUG_CPU
1312 1321
1313/* 1322/*
1314 * Move a dying CPU's RCU callbacks to online CPU's callback list. 1323 * Send the specified CPU's RCU callbacks to the orphanage. The
1315 * Also record a quiescent state for this CPU for the current grace period. 1324 * specified CPU must be offline, and the caller must hold the
1316 * Synchronization and interrupt disabling are not required because 1325 * ->onofflock.
1317 * this function executes in stop_machine() context. Therefore, cleanup
1318 * operations that might block must be done later from the CPU_DEAD
1319 * notifier.
1320 *
1321 * Note that the outgoing CPU's bit has already been cleared in the
1322 * cpu_online_mask. This allows us to randomly pick a callback
1323 * destination from the bits set in that mask.
1324 */ 1326 */
1325static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) 1327static void
1328rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
1329 struct rcu_node *rnp, struct rcu_data *rdp)
1326{ 1330{
1327 int i; 1331 int i;
1328 unsigned long mask;
1329 int receive_cpu = cpumask_any(cpu_online_mask);
1330 struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
1331 struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu);
1332 RCU_TRACE(struct rcu_node *rnp = rdp->mynode); /* For dying CPU. */
1333 1332
1334 /* First, adjust the counts. */ 1333 /*
1334 * Orphan the callbacks. First adjust the counts. This is safe
1335 * because ->onofflock excludes _rcu_barrier()'s adoption of
1336 * the callbacks, thus no memory barrier is required.
1337 */
1335 if (rdp->nxtlist != NULL) { 1338 if (rdp->nxtlist != NULL) {
1336 receive_rdp->qlen_lazy += rdp->qlen_lazy; 1339 rsp->qlen_lazy += rdp->qlen_lazy;
1337 receive_rdp->qlen += rdp->qlen; 1340 rsp->qlen += rdp->qlen;
1341 rdp->n_cbs_orphaned += rdp->qlen;
1338 rdp->qlen_lazy = 0; 1342 rdp->qlen_lazy = 0;
1339 rdp->qlen = 0; 1343 rdp->qlen = 0;
1340 } 1344 }
1341 1345
1342 /* 1346 /*
1343 * Next, move ready-to-invoke callbacks to be invoked on some 1347 * Next, move those callbacks still needing a grace period to
1344 * other CPU. These will not be required to pass through another 1348 * the orphanage, where some other CPU will pick them up.
1345 * grace period: They are done, regardless of CPU. 1349 * Some of the callbacks might have gone partway through a grace
1350 * period, but that is too bad. They get to start over because we
1351 * cannot assume that grace periods are synchronized across CPUs.
1352 * We don't bother updating the ->nxttail[] array yet, instead
1353 * we just reset the whole thing later on.
1346 */ 1354 */
1347 if (rdp->nxtlist != NULL && 1355 if (*rdp->nxttail[RCU_DONE_TAIL] != NULL) {
1348 rdp->nxttail[RCU_DONE_TAIL] != &rdp->nxtlist) { 1356 *rsp->orphan_nxttail = *rdp->nxttail[RCU_DONE_TAIL];
1349 struct rcu_head *oldhead; 1357 rsp->orphan_nxttail = rdp->nxttail[RCU_NEXT_TAIL];
1350 struct rcu_head **oldtail; 1358 *rdp->nxttail[RCU_DONE_TAIL] = NULL;
1351 struct rcu_head **newtail;
1352
1353 oldhead = rdp->nxtlist;
1354 oldtail = receive_rdp->nxttail[RCU_DONE_TAIL];
1355 rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL];
1356 *rdp->nxttail[RCU_DONE_TAIL] = *oldtail;
1357 *receive_rdp->nxttail[RCU_DONE_TAIL] = oldhead;
1358 newtail = rdp->nxttail[RCU_DONE_TAIL];
1359 for (i = RCU_DONE_TAIL; i < RCU_NEXT_SIZE; i++) {
1360 if (receive_rdp->nxttail[i] == oldtail)
1361 receive_rdp->nxttail[i] = newtail;
1362 if (rdp->nxttail[i] == newtail)
1363 rdp->nxttail[i] = &rdp->nxtlist;
1364 }
1365 } 1359 }
1366 1360
1367 /* 1361 /*
1368 * Finally, put the rest of the callbacks at the end of the list. 1362 * Then move the ready-to-invoke callbacks to the orphanage,
1369 * The ones that made it partway through get to start over: We 1363 * where some other CPU will pick them up. These will not be
1370 * cannot assume that grace periods are synchronized across CPUs. 1364 * required to pass though another grace period: They are done.
1371 * (We could splice RCU_WAIT_TAIL into RCU_NEXT_READY_TAIL, but
1372 * this does not seem compelling. Not yet, anyway.)
1373 */ 1365 */
1374 if (rdp->nxtlist != NULL) { 1366 if (rdp->nxtlist != NULL) {
1375 *receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist; 1367 *rsp->orphan_donetail = rdp->nxtlist;
1376 receive_rdp->nxttail[RCU_NEXT_TAIL] = 1368 rsp->orphan_donetail = rdp->nxttail[RCU_DONE_TAIL];
1377 rdp->nxttail[RCU_NEXT_TAIL];
1378 receive_rdp->n_cbs_adopted += rdp->qlen;
1379 rdp->n_cbs_orphaned += rdp->qlen;
1380
1381 rdp->nxtlist = NULL;
1382 for (i = 0; i < RCU_NEXT_SIZE; i++)
1383 rdp->nxttail[i] = &rdp->nxtlist;
1384 } 1369 }
1385 1370
1371 /* Finally, initialize the rcu_data structure's list to empty. */
1372 rdp->nxtlist = NULL;
1373 for (i = 0; i < RCU_NEXT_SIZE; i++)
1374 rdp->nxttail[i] = &rdp->nxtlist;
1375}
1376
1377/*
1378 * Adopt the RCU callbacks from the specified rcu_state structure's
1379 * orphanage. The caller must hold the ->onofflock.
1380 */
1381static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
1382{
1383 int i;
1384 struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
1385
1386 /* 1386 /*
1387 * Record a quiescent state for the dying CPU. This is safe 1387 * If there is an rcu_barrier() operation in progress, then
1388 * only because we have already cleared out the callbacks. 1388 * only the task doing that operation is permitted to adopt
1389 * (Otherwise, the RCU core might try to schedule the invocation 1389 * callbacks. To do otherwise breaks rcu_barrier() and friends
1390 * of callbacks on this now-offline CPU, which would be bad.) 1390 * by causing them to fail to wait for the callbacks in the
1391 * orphanage.
1391 */ 1392 */
1392 mask = rdp->grpmask; /* rnp->grplo is constant. */ 1393 if (rsp->rcu_barrier_in_progress &&
1394 rsp->rcu_barrier_in_progress != current)
1395 return;
1396
1397 /* Do the accounting first. */
1398 rdp->qlen_lazy += rsp->qlen_lazy;
1399 rdp->qlen += rsp->qlen;
1400 rdp->n_cbs_adopted += rsp->qlen;
1401 if (rsp->qlen_lazy != rsp->qlen)
1402 rcu_idle_count_callbacks_posted();
1403 rsp->qlen_lazy = 0;
1404 rsp->qlen = 0;
1405
1406 /*
1407 * We do not need a memory barrier here because the only way we
1408 * can get here if there is an rcu_barrier() in flight is if
1409 * we are the task doing the rcu_barrier().
1410 */
1411
1412 /* First adopt the ready-to-invoke callbacks. */
1413 if (rsp->orphan_donelist != NULL) {
1414 *rsp->orphan_donetail = *rdp->nxttail[RCU_DONE_TAIL];
1415 *rdp->nxttail[RCU_DONE_TAIL] = rsp->orphan_donelist;
1416 for (i = RCU_NEXT_SIZE - 1; i >= RCU_DONE_TAIL; i--)
1417 if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL])
1418 rdp->nxttail[i] = rsp->orphan_donetail;
1419 rsp->orphan_donelist = NULL;
1420 rsp->orphan_donetail = &rsp->orphan_donelist;
1421 }
1422
1423 /* And then adopt the callbacks that still need a grace period. */
1424 if (rsp->orphan_nxtlist != NULL) {
1425 *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_nxtlist;
1426 rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_nxttail;
1427 rsp->orphan_nxtlist = NULL;
1428 rsp->orphan_nxttail = &rsp->orphan_nxtlist;
1429 }
1430}
1431
1432/*
1433 * Trace the fact that this CPU is going offline.
1434 */
1435static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
1436{
1437 RCU_TRACE(unsigned long mask);
1438 RCU_TRACE(struct rcu_data *rdp = this_cpu_ptr(rsp->rda));
1439 RCU_TRACE(struct rcu_node *rnp = rdp->mynode);
1440
1441 RCU_TRACE(mask = rdp->grpmask);
1393 trace_rcu_grace_period(rsp->name, 1442 trace_rcu_grace_period(rsp->name,
1394 rnp->gpnum + 1 - !!(rnp->qsmask & mask), 1443 rnp->gpnum + 1 - !!(rnp->qsmask & mask),
1395 "cpuofl"); 1444 "cpuofl");
1396 rcu_report_qs_rdp(smp_processor_id(), rsp, rdp, rsp->gpnum);
1397 /* Note that rcu_report_qs_rdp() might call trace_rcu_grace_period(). */
1398} 1445}
1399 1446
1400/* 1447/*
1401 * The CPU has been completely removed, and some other CPU is reporting 1448 * The CPU has been completely removed, and some other CPU is reporting
1402 * this fact from process context. Do the remainder of the cleanup. 1449 * this fact from process context. Do the remainder of the cleanup,
1450 * including orphaning the outgoing CPU's RCU callbacks, and also
1451 * adopting them, if there is no _rcu_barrier() instance running.
1403 * There can only be one CPU hotplug operation at a time, so no other 1452 * There can only be one CPU hotplug operation at a time, so no other
1404 * CPU can be attempting to update rcu_cpu_kthread_task. 1453 * CPU can be attempting to update rcu_cpu_kthread_task.
1405 */ 1454 */
@@ -1409,17 +1458,21 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
1409 unsigned long mask; 1458 unsigned long mask;
1410 int need_report = 0; 1459 int need_report = 0;
1411 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 1460 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
1412 struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rnp. */ 1461 struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */
1413 1462
1414 /* Adjust any no-longer-needed kthreads. */ 1463 /* Adjust any no-longer-needed kthreads. */
1415 rcu_stop_cpu_kthread(cpu); 1464 rcu_stop_cpu_kthread(cpu);
1416 rcu_node_kthread_setaffinity(rnp, -1); 1465 rcu_node_kthread_setaffinity(rnp, -1);
1417 1466
1418 /* Remove the dying CPU from the bitmasks in the rcu_node hierarchy. */ 1467 /* Remove the dead CPU from the bitmasks in the rcu_node hierarchy. */
1419 1468
1420 /* Exclude any attempts to start a new grace period. */ 1469 /* Exclude any attempts to start a new grace period. */
1421 raw_spin_lock_irqsave(&rsp->onofflock, flags); 1470 raw_spin_lock_irqsave(&rsp->onofflock, flags);
1422 1471
1472 /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */
1473 rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp);
1474 rcu_adopt_orphan_cbs(rsp);
1475
1423 /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ 1476 /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */
1424 mask = rdp->grpmask; /* rnp->grplo is constant. */ 1477 mask = rdp->grpmask; /* rnp->grplo is constant. */
1425 do { 1478 do {
@@ -1456,6 +1509,10 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
1456 1509
1457#else /* #ifdef CONFIG_HOTPLUG_CPU */ 1510#else /* #ifdef CONFIG_HOTPLUG_CPU */
1458 1511
1512static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
1513{
1514}
1515
1459static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) 1516static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
1460{ 1517{
1461} 1518}
@@ -1474,7 +1531,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1474{ 1531{
1475 unsigned long flags; 1532 unsigned long flags;
1476 struct rcu_head *next, *list, **tail; 1533 struct rcu_head *next, *list, **tail;
1477 int bl, count, count_lazy; 1534 int bl, count, count_lazy, i;
1478 1535
1479 /* If no callbacks are ready, just return.*/ 1536 /* If no callbacks are ready, just return.*/
1480 if (!cpu_has_callbacks_ready_to_invoke(rdp)) { 1537 if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
@@ -1497,9 +1554,9 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1497 rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; 1554 rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL];
1498 *rdp->nxttail[RCU_DONE_TAIL] = NULL; 1555 *rdp->nxttail[RCU_DONE_TAIL] = NULL;
1499 tail = rdp->nxttail[RCU_DONE_TAIL]; 1556 tail = rdp->nxttail[RCU_DONE_TAIL];
1500 for (count = RCU_NEXT_SIZE - 1; count >= 0; count--) 1557 for (i = RCU_NEXT_SIZE - 1; i >= 0; i--)
1501 if (rdp->nxttail[count] == rdp->nxttail[RCU_DONE_TAIL]) 1558 if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL])
1502 rdp->nxttail[count] = &rdp->nxtlist; 1559 rdp->nxttail[i] = &rdp->nxtlist;
1503 local_irq_restore(flags); 1560 local_irq_restore(flags);
1504 1561
1505 /* Invoke callbacks. */ 1562 /* Invoke callbacks. */
@@ -1524,18 +1581,19 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1524 rcu_is_callbacks_kthread()); 1581 rcu_is_callbacks_kthread());
1525 1582
1526 /* Update count, and requeue any remaining callbacks. */ 1583 /* Update count, and requeue any remaining callbacks. */
1527 rdp->qlen_lazy -= count_lazy;
1528 rdp->qlen -= count;
1529 rdp->n_cbs_invoked += count;
1530 if (list != NULL) { 1584 if (list != NULL) {
1531 *tail = rdp->nxtlist; 1585 *tail = rdp->nxtlist;
1532 rdp->nxtlist = list; 1586 rdp->nxtlist = list;
1533 for (count = 0; count < RCU_NEXT_SIZE; count++) 1587 for (i = 0; i < RCU_NEXT_SIZE; i++)
1534 if (&rdp->nxtlist == rdp->nxttail[count]) 1588 if (&rdp->nxtlist == rdp->nxttail[i])
1535 rdp->nxttail[count] = tail; 1589 rdp->nxttail[i] = tail;
1536 else 1590 else
1537 break; 1591 break;
1538 } 1592 }
1593 smp_mb(); /* List handling before counting for rcu_barrier(). */
1594 rdp->qlen_lazy -= count_lazy;
1595 rdp->qlen -= count;
1596 rdp->n_cbs_invoked += count;
1539 1597
1540 /* Reinstate batch limit if we have worked down the excess. */ 1598 /* Reinstate batch limit if we have worked down the excess. */
1541 if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark) 1599 if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark)
@@ -1823,11 +1881,14 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1823 rdp = this_cpu_ptr(rsp->rda); 1881 rdp = this_cpu_ptr(rsp->rda);
1824 1882
1825 /* Add the callback to our list. */ 1883 /* Add the callback to our list. */
1826 *rdp->nxttail[RCU_NEXT_TAIL] = head;
1827 rdp->nxttail[RCU_NEXT_TAIL] = &head->next;
1828 rdp->qlen++; 1884 rdp->qlen++;
1829 if (lazy) 1885 if (lazy)
1830 rdp->qlen_lazy++; 1886 rdp->qlen_lazy++;
1887 else
1888 rcu_idle_count_callbacks_posted();
1889 smp_mb(); /* Count before adding callback for rcu_barrier(). */
1890 *rdp->nxttail[RCU_NEXT_TAIL] = head;
1891 rdp->nxttail[RCU_NEXT_TAIL] = &head->next;
1831 1892
1832 if (__is_kfree_rcu_offset((unsigned long)func)) 1893 if (__is_kfree_rcu_offset((unsigned long)func))
1833 trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func, 1894 trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func,
@@ -1893,6 +1954,38 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
1893} 1954}
1894EXPORT_SYMBOL_GPL(call_rcu_bh); 1955EXPORT_SYMBOL_GPL(call_rcu_bh);
1895 1956
1957/*
1958 * Because a context switch is a grace period for RCU-sched and RCU-bh,
1959 * any blocking grace-period wait automatically implies a grace period
1960 * if there is only one CPU online at any point time during execution
1961 * of either synchronize_sched() or synchronize_rcu_bh(). It is OK to
1962 * occasionally incorrectly indicate that there are multiple CPUs online
1963 * when there was in fact only one the whole time, as this just adds
1964 * some overhead: RCU still operates correctly.
1965 *
1966 * Of course, sampling num_online_cpus() with preemption enabled can
1967 * give erroneous results if there are concurrent CPU-hotplug operations.
1968 * For example, given a demonic sequence of preemptions in num_online_cpus()
1969 * and CPU-hotplug operations, there could be two or more CPUs online at
1970 * all times, but num_online_cpus() might well return one (or even zero).
1971 *
1972 * However, all such demonic sequences require at least one CPU-offline
1973 * operation. Furthermore, rcu_blocking_is_gp() giving the wrong answer
1974 * is only a problem if there is an RCU read-side critical section executing
1975 * throughout. But RCU-sched and RCU-bh read-side critical sections
1976 * disable either preemption or bh, which prevents a CPU from going offline.
1977 * Therefore, the only way that rcu_blocking_is_gp() can incorrectly return
1978 * that there is only one CPU when in fact there was more than one throughout
1979 * is when there were no RCU readers in the system. If there are no
1980 * RCU readers, the grace period by definition can be of zero length,
1981 * regardless of the number of online CPUs.
1982 */
1983static inline int rcu_blocking_is_gp(void)
1984{
1985 might_sleep(); /* Check for RCU read-side critical section. */
1986 return num_online_cpus() <= 1;
1987}
1988
1896/** 1989/**
1897 * synchronize_sched - wait until an rcu-sched grace period has elapsed. 1990 * synchronize_sched - wait until an rcu-sched grace period has elapsed.
1898 * 1991 *
@@ -2166,11 +2259,10 @@ static int rcu_cpu_has_callbacks(int cpu)
2166 rcu_preempt_cpu_has_callbacks(cpu); 2259 rcu_preempt_cpu_has_callbacks(cpu);
2167} 2260}
2168 2261
2169static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; 2262/*
2170static atomic_t rcu_barrier_cpu_count; 2263 * RCU callback function for _rcu_barrier(). If we are last, wake
2171static DEFINE_MUTEX(rcu_barrier_mutex); 2264 * up the task executing _rcu_barrier().
2172static struct completion rcu_barrier_completion; 2265 */
2173
2174static void rcu_barrier_callback(struct rcu_head *notused) 2266static void rcu_barrier_callback(struct rcu_head *notused)
2175{ 2267{
2176 if (atomic_dec_and_test(&rcu_barrier_cpu_count)) 2268 if (atomic_dec_and_test(&rcu_barrier_cpu_count))
@@ -2200,27 +2292,94 @@ static void _rcu_barrier(struct rcu_state *rsp,
2200 void (*call_rcu_func)(struct rcu_head *head, 2292 void (*call_rcu_func)(struct rcu_head *head,
2201 void (*func)(struct rcu_head *head))) 2293 void (*func)(struct rcu_head *head)))
2202{ 2294{
2203 BUG_ON(in_interrupt()); 2295 int cpu;
2296 unsigned long flags;
2297 struct rcu_data *rdp;
2298 struct rcu_head rh;
2299
2300 init_rcu_head_on_stack(&rh);
2301
2204 /* Take mutex to serialize concurrent rcu_barrier() requests. */ 2302 /* Take mutex to serialize concurrent rcu_barrier() requests. */
2205 mutex_lock(&rcu_barrier_mutex); 2303 mutex_lock(&rcu_barrier_mutex);
2206 init_completion(&rcu_barrier_completion); 2304
2305 smp_mb(); /* Prevent any prior operations from leaking in. */
2306
2207 /* 2307 /*
2208 * Initialize rcu_barrier_cpu_count to 1, then invoke 2308 * Initialize the count to one rather than to zero in order to
2209 * rcu_barrier_func() on each CPU, so that each CPU also has 2309 * avoid a too-soon return to zero in case of a short grace period
2210 * incremented rcu_barrier_cpu_count. Only then is it safe to 2310 * (or preemption of this task). Also flag this task as doing
2211 * decrement rcu_barrier_cpu_count -- otherwise the first CPU 2311 * an rcu_barrier(). This will prevent anyone else from adopting
2212 * might complete its grace period before all of the other CPUs 2312 * orphaned callbacks, which could cause otherwise failure if a
2213 * did their increment, causing this function to return too 2313 * CPU went offline and quickly came back online. To see this,
2214 * early. Note that on_each_cpu() disables irqs, which prevents 2314 * consider the following sequence of events:
2215 * any CPUs from coming online or going offline until each online 2315 *
2216 * CPU has queued its RCU-barrier callback. 2316 * 1. We cause CPU 0 to post an rcu_barrier_callback() callback.
2317 * 2. CPU 1 goes offline, orphaning its callbacks.
2318 * 3. CPU 0 adopts CPU 1's orphaned callbacks.
2319 * 4. CPU 1 comes back online.
2320 * 5. We cause CPU 1 to post an rcu_barrier_callback() callback.
2321 * 6. Both rcu_barrier_callback() callbacks are invoked, awakening
2322 * us -- but before CPU 1's orphaned callbacks are invoked!!!
2217 */ 2323 */
2324 init_completion(&rcu_barrier_completion);
2218 atomic_set(&rcu_barrier_cpu_count, 1); 2325 atomic_set(&rcu_barrier_cpu_count, 1);
2219 on_each_cpu(rcu_barrier_func, (void *)call_rcu_func, 1); 2326 raw_spin_lock_irqsave(&rsp->onofflock, flags);
2327 rsp->rcu_barrier_in_progress = current;
2328 raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
2329
2330 /*
2331 * Force every CPU with callbacks to register a new callback
2332 * that will tell us when all the preceding callbacks have
2333 * been invoked. If an offline CPU has callbacks, wait for
2334 * it to either come back online or to finish orphaning those
2335 * callbacks.
2336 */
2337 for_each_possible_cpu(cpu) {
2338 preempt_disable();
2339 rdp = per_cpu_ptr(rsp->rda, cpu);
2340 if (cpu_is_offline(cpu)) {
2341 preempt_enable();
2342 while (cpu_is_offline(cpu) && ACCESS_ONCE(rdp->qlen))
2343 schedule_timeout_interruptible(1);
2344 } else if (ACCESS_ONCE(rdp->qlen)) {
2345 smp_call_function_single(cpu, rcu_barrier_func,
2346 (void *)call_rcu_func, 1);
2347 preempt_enable();
2348 } else {
2349 preempt_enable();
2350 }
2351 }
2352
2353 /*
2354 * Now that all online CPUs have rcu_barrier_callback() callbacks
2355 * posted, we can adopt all of the orphaned callbacks and place
2356 * an rcu_barrier_callback() callback after them. When that is done,
2357 * we are guaranteed to have an rcu_barrier_callback() callback
2358 * following every callback that could possibly have been
2359 * registered before _rcu_barrier() was called.
2360 */
2361 raw_spin_lock_irqsave(&rsp->onofflock, flags);
2362 rcu_adopt_orphan_cbs(rsp);
2363 rsp->rcu_barrier_in_progress = NULL;
2364 raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
2365 atomic_inc(&rcu_barrier_cpu_count);
2366 smp_mb__after_atomic_inc(); /* Ensure atomic_inc() before callback. */
2367 call_rcu_func(&rh, rcu_barrier_callback);
2368
2369 /*
2370 * Now that we have an rcu_barrier_callback() callback on each
2371 * CPU, and thus each counted, remove the initial count.
2372 */
2220 if (atomic_dec_and_test(&rcu_barrier_cpu_count)) 2373 if (atomic_dec_and_test(&rcu_barrier_cpu_count))
2221 complete(&rcu_barrier_completion); 2374 complete(&rcu_barrier_completion);
2375
2376 /* Wait for all rcu_barrier_callback() callbacks to be invoked. */
2222 wait_for_completion(&rcu_barrier_completion); 2377 wait_for_completion(&rcu_barrier_completion);
2378
2379 /* Other rcu_barrier() invocations can now safely proceed. */
2223 mutex_unlock(&rcu_barrier_mutex); 2380 mutex_unlock(&rcu_barrier_mutex);
2381
2382 destroy_rcu_head_on_stack(&rh);
2224} 2383}
2225 2384
2226/** 2385/**
@@ -2417,7 +2576,7 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
2417 2576
2418 for (i = NUM_RCU_LVLS - 1; i > 0; i--) 2577 for (i = NUM_RCU_LVLS - 1; i > 0; i--)
2419 rsp->levelspread[i] = CONFIG_RCU_FANOUT; 2578 rsp->levelspread[i] = CONFIG_RCU_FANOUT;
2420 rsp->levelspread[0] = RCU_FANOUT_LEAF; 2579 rsp->levelspread[0] = CONFIG_RCU_FANOUT_LEAF;
2421} 2580}
2422#else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ 2581#else /* #ifdef CONFIG_RCU_FANOUT_EXACT */
2423static void __init rcu_init_levelspread(struct rcu_state *rsp) 2582static void __init rcu_init_levelspread(struct rcu_state *rsp)