diff options
Diffstat (limited to 'kernel/rcutree.c')
-rw-r--r-- | kernel/rcutree.c | 347 |
1 files changed, 253 insertions, 94 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index d0c5baf1ab18..4b97bba7396e 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -75,6 +75,8 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; | |||
75 | .gpnum = -300, \ | 75 | .gpnum = -300, \ |
76 | .completed = -300, \ | 76 | .completed = -300, \ |
77 | .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \ | 77 | .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \ |
78 | .orphan_nxttail = &structname##_state.orphan_nxtlist, \ | ||
79 | .orphan_donetail = &structname##_state.orphan_donelist, \ | ||
78 | .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.fqslock), \ | 80 | .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.fqslock), \ |
79 | .n_force_qs = 0, \ | 81 | .n_force_qs = 0, \ |
80 | .n_force_qs_ngp = 0, \ | 82 | .n_force_qs_ngp = 0, \ |
@@ -145,6 +147,13 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); | |||
145 | unsigned long rcutorture_testseq; | 147 | unsigned long rcutorture_testseq; |
146 | unsigned long rcutorture_vernum; | 148 | unsigned long rcutorture_vernum; |
147 | 149 | ||
150 | /* State information for rcu_barrier() and friends. */ | ||
151 | |||
152 | static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; | ||
153 | static atomic_t rcu_barrier_cpu_count; | ||
154 | static DEFINE_MUTEX(rcu_barrier_mutex); | ||
155 | static struct completion rcu_barrier_completion; | ||
156 | |||
148 | /* | 157 | /* |
149 | * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s | 158 | * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s |
150 | * permit this function to be invoked without holding the root rcu_node | 159 | * permit this function to be invoked without holding the root rcu_node |
@@ -1311,95 +1320,135 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1311 | #ifdef CONFIG_HOTPLUG_CPU | 1320 | #ifdef CONFIG_HOTPLUG_CPU |
1312 | 1321 | ||
1313 | /* | 1322 | /* |
1314 | * Move a dying CPU's RCU callbacks to online CPU's callback list. | 1323 | * Send the specified CPU's RCU callbacks to the orphanage. The |
1315 | * Also record a quiescent state for this CPU for the current grace period. | 1324 | * specified CPU must be offline, and the caller must hold the |
1316 | * Synchronization and interrupt disabling are not required because | 1325 | * ->onofflock. |
1317 | * this function executes in stop_machine() context. Therefore, cleanup | ||
1318 | * operations that might block must be done later from the CPU_DEAD | ||
1319 | * notifier. | ||
1320 | * | ||
1321 | * Note that the outgoing CPU's bit has already been cleared in the | ||
1322 | * cpu_online_mask. This allows us to randomly pick a callback | ||
1323 | * destination from the bits set in that mask. | ||
1324 | */ | 1326 | */ |
1325 | static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) | 1327 | static void |
1328 | rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, | ||
1329 | struct rcu_node *rnp, struct rcu_data *rdp) | ||
1326 | { | 1330 | { |
1327 | int i; | 1331 | int i; |
1328 | unsigned long mask; | ||
1329 | int receive_cpu = cpumask_any(cpu_online_mask); | ||
1330 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); | ||
1331 | struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu); | ||
1332 | RCU_TRACE(struct rcu_node *rnp = rdp->mynode); /* For dying CPU. */ | ||
1333 | 1332 | ||
1334 | /* First, adjust the counts. */ | 1333 | /* |
1334 | * Orphan the callbacks. First adjust the counts. This is safe | ||
1335 | * because ->onofflock excludes _rcu_barrier()'s adoption of | ||
1336 | * the callbacks, thus no memory barrier is required. | ||
1337 | */ | ||
1335 | if (rdp->nxtlist != NULL) { | 1338 | if (rdp->nxtlist != NULL) { |
1336 | receive_rdp->qlen_lazy += rdp->qlen_lazy; | 1339 | rsp->qlen_lazy += rdp->qlen_lazy; |
1337 | receive_rdp->qlen += rdp->qlen; | 1340 | rsp->qlen += rdp->qlen; |
1341 | rdp->n_cbs_orphaned += rdp->qlen; | ||
1338 | rdp->qlen_lazy = 0; | 1342 | rdp->qlen_lazy = 0; |
1339 | rdp->qlen = 0; | 1343 | rdp->qlen = 0; |
1340 | } | 1344 | } |
1341 | 1345 | ||
1342 | /* | 1346 | /* |
1343 | * Next, move ready-to-invoke callbacks to be invoked on some | 1347 | * Next, move those callbacks still needing a grace period to |
1344 | * other CPU. These will not be required to pass through another | 1348 | * the orphanage, where some other CPU will pick them up. |
1345 | * grace period: They are done, regardless of CPU. | 1349 | * Some of the callbacks might have gone partway through a grace |
1350 | * period, but that is too bad. They get to start over because we | ||
1351 | * cannot assume that grace periods are synchronized across CPUs. | ||
1352 | * We don't bother updating the ->nxttail[] array yet, instead | ||
1353 | * we just reset the whole thing later on. | ||
1346 | */ | 1354 | */ |
1347 | if (rdp->nxtlist != NULL && | 1355 | if (*rdp->nxttail[RCU_DONE_TAIL] != NULL) { |
1348 | rdp->nxttail[RCU_DONE_TAIL] != &rdp->nxtlist) { | 1356 | *rsp->orphan_nxttail = *rdp->nxttail[RCU_DONE_TAIL]; |
1349 | struct rcu_head *oldhead; | 1357 | rsp->orphan_nxttail = rdp->nxttail[RCU_NEXT_TAIL]; |
1350 | struct rcu_head **oldtail; | 1358 | *rdp->nxttail[RCU_DONE_TAIL] = NULL; |
1351 | struct rcu_head **newtail; | ||
1352 | |||
1353 | oldhead = rdp->nxtlist; | ||
1354 | oldtail = receive_rdp->nxttail[RCU_DONE_TAIL]; | ||
1355 | rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; | ||
1356 | *rdp->nxttail[RCU_DONE_TAIL] = *oldtail; | ||
1357 | *receive_rdp->nxttail[RCU_DONE_TAIL] = oldhead; | ||
1358 | newtail = rdp->nxttail[RCU_DONE_TAIL]; | ||
1359 | for (i = RCU_DONE_TAIL; i < RCU_NEXT_SIZE; i++) { | ||
1360 | if (receive_rdp->nxttail[i] == oldtail) | ||
1361 | receive_rdp->nxttail[i] = newtail; | ||
1362 | if (rdp->nxttail[i] == newtail) | ||
1363 | rdp->nxttail[i] = &rdp->nxtlist; | ||
1364 | } | ||
1365 | } | 1359 | } |
1366 | 1360 | ||
1367 | /* | 1361 | /* |
1368 | * Finally, put the rest of the callbacks at the end of the list. | 1362 | * Then move the ready-to-invoke callbacks to the orphanage, |
1369 | * The ones that made it partway through get to start over: We | 1363 | * where some other CPU will pick them up. These will not be |
1370 | * cannot assume that grace periods are synchronized across CPUs. | 1364 | * required to pass though another grace period: They are done. |
1371 | * (We could splice RCU_WAIT_TAIL into RCU_NEXT_READY_TAIL, but | ||
1372 | * this does not seem compelling. Not yet, anyway.) | ||
1373 | */ | 1365 | */ |
1374 | if (rdp->nxtlist != NULL) { | 1366 | if (rdp->nxtlist != NULL) { |
1375 | *receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist; | 1367 | *rsp->orphan_donetail = rdp->nxtlist; |
1376 | receive_rdp->nxttail[RCU_NEXT_TAIL] = | 1368 | rsp->orphan_donetail = rdp->nxttail[RCU_DONE_TAIL]; |
1377 | rdp->nxttail[RCU_NEXT_TAIL]; | ||
1378 | receive_rdp->n_cbs_adopted += rdp->qlen; | ||
1379 | rdp->n_cbs_orphaned += rdp->qlen; | ||
1380 | |||
1381 | rdp->nxtlist = NULL; | ||
1382 | for (i = 0; i < RCU_NEXT_SIZE; i++) | ||
1383 | rdp->nxttail[i] = &rdp->nxtlist; | ||
1384 | } | 1369 | } |
1385 | 1370 | ||
1371 | /* Finally, initialize the rcu_data structure's list to empty. */ | ||
1372 | rdp->nxtlist = NULL; | ||
1373 | for (i = 0; i < RCU_NEXT_SIZE; i++) | ||
1374 | rdp->nxttail[i] = &rdp->nxtlist; | ||
1375 | } | ||
1376 | |||
1377 | /* | ||
1378 | * Adopt the RCU callbacks from the specified rcu_state structure's | ||
1379 | * orphanage. The caller must hold the ->onofflock. | ||
1380 | */ | ||
1381 | static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) | ||
1382 | { | ||
1383 | int i; | ||
1384 | struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); | ||
1385 | |||
1386 | /* | 1386 | /* |
1387 | * Record a quiescent state for the dying CPU. This is safe | 1387 | * If there is an rcu_barrier() operation in progress, then |
1388 | * only because we have already cleared out the callbacks. | 1388 | * only the task doing that operation is permitted to adopt |
1389 | * (Otherwise, the RCU core might try to schedule the invocation | 1389 | * callbacks. To do otherwise breaks rcu_barrier() and friends |
1390 | * of callbacks on this now-offline CPU, which would be bad.) | 1390 | * by causing them to fail to wait for the callbacks in the |
1391 | * orphanage. | ||
1391 | */ | 1392 | */ |
1392 | mask = rdp->grpmask; /* rnp->grplo is constant. */ | 1393 | if (rsp->rcu_barrier_in_progress && |
1394 | rsp->rcu_barrier_in_progress != current) | ||
1395 | return; | ||
1396 | |||
1397 | /* Do the accounting first. */ | ||
1398 | rdp->qlen_lazy += rsp->qlen_lazy; | ||
1399 | rdp->qlen += rsp->qlen; | ||
1400 | rdp->n_cbs_adopted += rsp->qlen; | ||
1401 | if (rsp->qlen_lazy != rsp->qlen) | ||
1402 | rcu_idle_count_callbacks_posted(); | ||
1403 | rsp->qlen_lazy = 0; | ||
1404 | rsp->qlen = 0; | ||
1405 | |||
1406 | /* | ||
1407 | * We do not need a memory barrier here because the only way we | ||
1408 | * can get here if there is an rcu_barrier() in flight is if | ||
1409 | * we are the task doing the rcu_barrier(). | ||
1410 | */ | ||
1411 | |||
1412 | /* First adopt the ready-to-invoke callbacks. */ | ||
1413 | if (rsp->orphan_donelist != NULL) { | ||
1414 | *rsp->orphan_donetail = *rdp->nxttail[RCU_DONE_TAIL]; | ||
1415 | *rdp->nxttail[RCU_DONE_TAIL] = rsp->orphan_donelist; | ||
1416 | for (i = RCU_NEXT_SIZE - 1; i >= RCU_DONE_TAIL; i--) | ||
1417 | if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL]) | ||
1418 | rdp->nxttail[i] = rsp->orphan_donetail; | ||
1419 | rsp->orphan_donelist = NULL; | ||
1420 | rsp->orphan_donetail = &rsp->orphan_donelist; | ||
1421 | } | ||
1422 | |||
1423 | /* And then adopt the callbacks that still need a grace period. */ | ||
1424 | if (rsp->orphan_nxtlist != NULL) { | ||
1425 | *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_nxtlist; | ||
1426 | rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_nxttail; | ||
1427 | rsp->orphan_nxtlist = NULL; | ||
1428 | rsp->orphan_nxttail = &rsp->orphan_nxtlist; | ||
1429 | } | ||
1430 | } | ||
1431 | |||
1432 | /* | ||
1433 | * Trace the fact that this CPU is going offline. | ||
1434 | */ | ||
1435 | static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) | ||
1436 | { | ||
1437 | RCU_TRACE(unsigned long mask); | ||
1438 | RCU_TRACE(struct rcu_data *rdp = this_cpu_ptr(rsp->rda)); | ||
1439 | RCU_TRACE(struct rcu_node *rnp = rdp->mynode); | ||
1440 | |||
1441 | RCU_TRACE(mask = rdp->grpmask); | ||
1393 | trace_rcu_grace_period(rsp->name, | 1442 | trace_rcu_grace_period(rsp->name, |
1394 | rnp->gpnum + 1 - !!(rnp->qsmask & mask), | 1443 | rnp->gpnum + 1 - !!(rnp->qsmask & mask), |
1395 | "cpuofl"); | 1444 | "cpuofl"); |
1396 | rcu_report_qs_rdp(smp_processor_id(), rsp, rdp, rsp->gpnum); | ||
1397 | /* Note that rcu_report_qs_rdp() might call trace_rcu_grace_period(). */ | ||
1398 | } | 1445 | } |
1399 | 1446 | ||
1400 | /* | 1447 | /* |
1401 | * The CPU has been completely removed, and some other CPU is reporting | 1448 | * The CPU has been completely removed, and some other CPU is reporting |
1402 | * this fact from process context. Do the remainder of the cleanup. | 1449 | * this fact from process context. Do the remainder of the cleanup, |
1450 | * including orphaning the outgoing CPU's RCU callbacks, and also | ||
1451 | * adopting them, if there is no _rcu_barrier() instance running. | ||
1403 | * There can only be one CPU hotplug operation at a time, so no other | 1452 | * There can only be one CPU hotplug operation at a time, so no other |
1404 | * CPU can be attempting to update rcu_cpu_kthread_task. | 1453 | * CPU can be attempting to update rcu_cpu_kthread_task. |
1405 | */ | 1454 | */ |
@@ -1409,17 +1458,21 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) | |||
1409 | unsigned long mask; | 1458 | unsigned long mask; |
1410 | int need_report = 0; | 1459 | int need_report = 0; |
1411 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | 1460 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); |
1412 | struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rnp. */ | 1461 | struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ |
1413 | 1462 | ||
1414 | /* Adjust any no-longer-needed kthreads. */ | 1463 | /* Adjust any no-longer-needed kthreads. */ |
1415 | rcu_stop_cpu_kthread(cpu); | 1464 | rcu_stop_cpu_kthread(cpu); |
1416 | rcu_node_kthread_setaffinity(rnp, -1); | 1465 | rcu_node_kthread_setaffinity(rnp, -1); |
1417 | 1466 | ||
1418 | /* Remove the dying CPU from the bitmasks in the rcu_node hierarchy. */ | 1467 | /* Remove the dead CPU from the bitmasks in the rcu_node hierarchy. */ |
1419 | 1468 | ||
1420 | /* Exclude any attempts to start a new grace period. */ | 1469 | /* Exclude any attempts to start a new grace period. */ |
1421 | raw_spin_lock_irqsave(&rsp->onofflock, flags); | 1470 | raw_spin_lock_irqsave(&rsp->onofflock, flags); |
1422 | 1471 | ||
1472 | /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */ | ||
1473 | rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp); | ||
1474 | rcu_adopt_orphan_cbs(rsp); | ||
1475 | |||
1423 | /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ | 1476 | /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ |
1424 | mask = rdp->grpmask; /* rnp->grplo is constant. */ | 1477 | mask = rdp->grpmask; /* rnp->grplo is constant. */ |
1425 | do { | 1478 | do { |
@@ -1456,6 +1509,10 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) | |||
1456 | 1509 | ||
1457 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ | 1510 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ |
1458 | 1511 | ||
1512 | static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) | ||
1513 | { | ||
1514 | } | ||
1515 | |||
1459 | static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) | 1516 | static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) |
1460 | { | 1517 | { |
1461 | } | 1518 | } |
@@ -1474,7 +1531,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1474 | { | 1531 | { |
1475 | unsigned long flags; | 1532 | unsigned long flags; |
1476 | struct rcu_head *next, *list, **tail; | 1533 | struct rcu_head *next, *list, **tail; |
1477 | int bl, count, count_lazy; | 1534 | int bl, count, count_lazy, i; |
1478 | 1535 | ||
1479 | /* If no callbacks are ready, just return.*/ | 1536 | /* If no callbacks are ready, just return.*/ |
1480 | if (!cpu_has_callbacks_ready_to_invoke(rdp)) { | 1537 | if (!cpu_has_callbacks_ready_to_invoke(rdp)) { |
@@ -1497,9 +1554,9 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1497 | rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; | 1554 | rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; |
1498 | *rdp->nxttail[RCU_DONE_TAIL] = NULL; | 1555 | *rdp->nxttail[RCU_DONE_TAIL] = NULL; |
1499 | tail = rdp->nxttail[RCU_DONE_TAIL]; | 1556 | tail = rdp->nxttail[RCU_DONE_TAIL]; |
1500 | for (count = RCU_NEXT_SIZE - 1; count >= 0; count--) | 1557 | for (i = RCU_NEXT_SIZE - 1; i >= 0; i--) |
1501 | if (rdp->nxttail[count] == rdp->nxttail[RCU_DONE_TAIL]) | 1558 | if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL]) |
1502 | rdp->nxttail[count] = &rdp->nxtlist; | 1559 | rdp->nxttail[i] = &rdp->nxtlist; |
1503 | local_irq_restore(flags); | 1560 | local_irq_restore(flags); |
1504 | 1561 | ||
1505 | /* Invoke callbacks. */ | 1562 | /* Invoke callbacks. */ |
@@ -1524,18 +1581,19 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1524 | rcu_is_callbacks_kthread()); | 1581 | rcu_is_callbacks_kthread()); |
1525 | 1582 | ||
1526 | /* Update count, and requeue any remaining callbacks. */ | 1583 | /* Update count, and requeue any remaining callbacks. */ |
1527 | rdp->qlen_lazy -= count_lazy; | ||
1528 | rdp->qlen -= count; | ||
1529 | rdp->n_cbs_invoked += count; | ||
1530 | if (list != NULL) { | 1584 | if (list != NULL) { |
1531 | *tail = rdp->nxtlist; | 1585 | *tail = rdp->nxtlist; |
1532 | rdp->nxtlist = list; | 1586 | rdp->nxtlist = list; |
1533 | for (count = 0; count < RCU_NEXT_SIZE; count++) | 1587 | for (i = 0; i < RCU_NEXT_SIZE; i++) |
1534 | if (&rdp->nxtlist == rdp->nxttail[count]) | 1588 | if (&rdp->nxtlist == rdp->nxttail[i]) |
1535 | rdp->nxttail[count] = tail; | 1589 | rdp->nxttail[i] = tail; |
1536 | else | 1590 | else |
1537 | break; | 1591 | break; |
1538 | } | 1592 | } |
1593 | smp_mb(); /* List handling before counting for rcu_barrier(). */ | ||
1594 | rdp->qlen_lazy -= count_lazy; | ||
1595 | rdp->qlen -= count; | ||
1596 | rdp->n_cbs_invoked += count; | ||
1539 | 1597 | ||
1540 | /* Reinstate batch limit if we have worked down the excess. */ | 1598 | /* Reinstate batch limit if we have worked down the excess. */ |
1541 | if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark) | 1599 | if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark) |
@@ -1823,11 +1881,14 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | |||
1823 | rdp = this_cpu_ptr(rsp->rda); | 1881 | rdp = this_cpu_ptr(rsp->rda); |
1824 | 1882 | ||
1825 | /* Add the callback to our list. */ | 1883 | /* Add the callback to our list. */ |
1826 | *rdp->nxttail[RCU_NEXT_TAIL] = head; | ||
1827 | rdp->nxttail[RCU_NEXT_TAIL] = &head->next; | ||
1828 | rdp->qlen++; | 1884 | rdp->qlen++; |
1829 | if (lazy) | 1885 | if (lazy) |
1830 | rdp->qlen_lazy++; | 1886 | rdp->qlen_lazy++; |
1887 | else | ||
1888 | rcu_idle_count_callbacks_posted(); | ||
1889 | smp_mb(); /* Count before adding callback for rcu_barrier(). */ | ||
1890 | *rdp->nxttail[RCU_NEXT_TAIL] = head; | ||
1891 | rdp->nxttail[RCU_NEXT_TAIL] = &head->next; | ||
1831 | 1892 | ||
1832 | if (__is_kfree_rcu_offset((unsigned long)func)) | 1893 | if (__is_kfree_rcu_offset((unsigned long)func)) |
1833 | trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func, | 1894 | trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func, |
@@ -1893,6 +1954,38 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | |||
1893 | } | 1954 | } |
1894 | EXPORT_SYMBOL_GPL(call_rcu_bh); | 1955 | EXPORT_SYMBOL_GPL(call_rcu_bh); |
1895 | 1956 | ||
1957 | /* | ||
1958 | * Because a context switch is a grace period for RCU-sched and RCU-bh, | ||
1959 | * any blocking grace-period wait automatically implies a grace period | ||
1960 | * if there is only one CPU online at any point time during execution | ||
1961 | * of either synchronize_sched() or synchronize_rcu_bh(). It is OK to | ||
1962 | * occasionally incorrectly indicate that there are multiple CPUs online | ||
1963 | * when there was in fact only one the whole time, as this just adds | ||
1964 | * some overhead: RCU still operates correctly. | ||
1965 | * | ||
1966 | * Of course, sampling num_online_cpus() with preemption enabled can | ||
1967 | * give erroneous results if there are concurrent CPU-hotplug operations. | ||
1968 | * For example, given a demonic sequence of preemptions in num_online_cpus() | ||
1969 | * and CPU-hotplug operations, there could be two or more CPUs online at | ||
1970 | * all times, but num_online_cpus() might well return one (or even zero). | ||
1971 | * | ||
1972 | * However, all such demonic sequences require at least one CPU-offline | ||
1973 | * operation. Furthermore, rcu_blocking_is_gp() giving the wrong answer | ||
1974 | * is only a problem if there is an RCU read-side critical section executing | ||
1975 | * throughout. But RCU-sched and RCU-bh read-side critical sections | ||
1976 | * disable either preemption or bh, which prevents a CPU from going offline. | ||
1977 | * Therefore, the only way that rcu_blocking_is_gp() can incorrectly return | ||
1978 | * that there is only one CPU when in fact there was more than one throughout | ||
1979 | * is when there were no RCU readers in the system. If there are no | ||
1980 | * RCU readers, the grace period by definition can be of zero length, | ||
1981 | * regardless of the number of online CPUs. | ||
1982 | */ | ||
1983 | static inline int rcu_blocking_is_gp(void) | ||
1984 | { | ||
1985 | might_sleep(); /* Check for RCU read-side critical section. */ | ||
1986 | return num_online_cpus() <= 1; | ||
1987 | } | ||
1988 | |||
1896 | /** | 1989 | /** |
1897 | * synchronize_sched - wait until an rcu-sched grace period has elapsed. | 1990 | * synchronize_sched - wait until an rcu-sched grace period has elapsed. |
1898 | * | 1991 | * |
@@ -2166,11 +2259,10 @@ static int rcu_cpu_has_callbacks(int cpu) | |||
2166 | rcu_preempt_cpu_has_callbacks(cpu); | 2259 | rcu_preempt_cpu_has_callbacks(cpu); |
2167 | } | 2260 | } |
2168 | 2261 | ||
2169 | static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; | 2262 | /* |
2170 | static atomic_t rcu_barrier_cpu_count; | 2263 | * RCU callback function for _rcu_barrier(). If we are last, wake |
2171 | static DEFINE_MUTEX(rcu_barrier_mutex); | 2264 | * up the task executing _rcu_barrier(). |
2172 | static struct completion rcu_barrier_completion; | 2265 | */ |
2173 | |||
2174 | static void rcu_barrier_callback(struct rcu_head *notused) | 2266 | static void rcu_barrier_callback(struct rcu_head *notused) |
2175 | { | 2267 | { |
2176 | if (atomic_dec_and_test(&rcu_barrier_cpu_count)) | 2268 | if (atomic_dec_and_test(&rcu_barrier_cpu_count)) |
@@ -2200,27 +2292,94 @@ static void _rcu_barrier(struct rcu_state *rsp, | |||
2200 | void (*call_rcu_func)(struct rcu_head *head, | 2292 | void (*call_rcu_func)(struct rcu_head *head, |
2201 | void (*func)(struct rcu_head *head))) | 2293 | void (*func)(struct rcu_head *head))) |
2202 | { | 2294 | { |
2203 | BUG_ON(in_interrupt()); | 2295 | int cpu; |
2296 | unsigned long flags; | ||
2297 | struct rcu_data *rdp; | ||
2298 | struct rcu_head rh; | ||
2299 | |||
2300 | init_rcu_head_on_stack(&rh); | ||
2301 | |||
2204 | /* Take mutex to serialize concurrent rcu_barrier() requests. */ | 2302 | /* Take mutex to serialize concurrent rcu_barrier() requests. */ |
2205 | mutex_lock(&rcu_barrier_mutex); | 2303 | mutex_lock(&rcu_barrier_mutex); |
2206 | init_completion(&rcu_barrier_completion); | 2304 | |
2305 | smp_mb(); /* Prevent any prior operations from leaking in. */ | ||
2306 | |||
2207 | /* | 2307 | /* |
2208 | * Initialize rcu_barrier_cpu_count to 1, then invoke | 2308 | * Initialize the count to one rather than to zero in order to |
2209 | * rcu_barrier_func() on each CPU, so that each CPU also has | 2309 | * avoid a too-soon return to zero in case of a short grace period |
2210 | * incremented rcu_barrier_cpu_count. Only then is it safe to | 2310 | * (or preemption of this task). Also flag this task as doing |
2211 | * decrement rcu_barrier_cpu_count -- otherwise the first CPU | 2311 | * an rcu_barrier(). This will prevent anyone else from adopting |
2212 | * might complete its grace period before all of the other CPUs | 2312 | * orphaned callbacks, which could cause otherwise failure if a |
2213 | * did their increment, causing this function to return too | 2313 | * CPU went offline and quickly came back online. To see this, |
2214 | * early. Note that on_each_cpu() disables irqs, which prevents | 2314 | * consider the following sequence of events: |
2215 | * any CPUs from coming online or going offline until each online | 2315 | * |
2216 | * CPU has queued its RCU-barrier callback. | 2316 | * 1. We cause CPU 0 to post an rcu_barrier_callback() callback. |
2317 | * 2. CPU 1 goes offline, orphaning its callbacks. | ||
2318 | * 3. CPU 0 adopts CPU 1's orphaned callbacks. | ||
2319 | * 4. CPU 1 comes back online. | ||
2320 | * 5. We cause CPU 1 to post an rcu_barrier_callback() callback. | ||
2321 | * 6. Both rcu_barrier_callback() callbacks are invoked, awakening | ||
2322 | * us -- but before CPU 1's orphaned callbacks are invoked!!! | ||
2217 | */ | 2323 | */ |
2324 | init_completion(&rcu_barrier_completion); | ||
2218 | atomic_set(&rcu_barrier_cpu_count, 1); | 2325 | atomic_set(&rcu_barrier_cpu_count, 1); |
2219 | on_each_cpu(rcu_barrier_func, (void *)call_rcu_func, 1); | 2326 | raw_spin_lock_irqsave(&rsp->onofflock, flags); |
2327 | rsp->rcu_barrier_in_progress = current; | ||
2328 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | ||
2329 | |||
2330 | /* | ||
2331 | * Force every CPU with callbacks to register a new callback | ||
2332 | * that will tell us when all the preceding callbacks have | ||
2333 | * been invoked. If an offline CPU has callbacks, wait for | ||
2334 | * it to either come back online or to finish orphaning those | ||
2335 | * callbacks. | ||
2336 | */ | ||
2337 | for_each_possible_cpu(cpu) { | ||
2338 | preempt_disable(); | ||
2339 | rdp = per_cpu_ptr(rsp->rda, cpu); | ||
2340 | if (cpu_is_offline(cpu)) { | ||
2341 | preempt_enable(); | ||
2342 | while (cpu_is_offline(cpu) && ACCESS_ONCE(rdp->qlen)) | ||
2343 | schedule_timeout_interruptible(1); | ||
2344 | } else if (ACCESS_ONCE(rdp->qlen)) { | ||
2345 | smp_call_function_single(cpu, rcu_barrier_func, | ||
2346 | (void *)call_rcu_func, 1); | ||
2347 | preempt_enable(); | ||
2348 | } else { | ||
2349 | preempt_enable(); | ||
2350 | } | ||
2351 | } | ||
2352 | |||
2353 | /* | ||
2354 | * Now that all online CPUs have rcu_barrier_callback() callbacks | ||
2355 | * posted, we can adopt all of the orphaned callbacks and place | ||
2356 | * an rcu_barrier_callback() callback after them. When that is done, | ||
2357 | * we are guaranteed to have an rcu_barrier_callback() callback | ||
2358 | * following every callback that could possibly have been | ||
2359 | * registered before _rcu_barrier() was called. | ||
2360 | */ | ||
2361 | raw_spin_lock_irqsave(&rsp->onofflock, flags); | ||
2362 | rcu_adopt_orphan_cbs(rsp); | ||
2363 | rsp->rcu_barrier_in_progress = NULL; | ||
2364 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | ||
2365 | atomic_inc(&rcu_barrier_cpu_count); | ||
2366 | smp_mb__after_atomic_inc(); /* Ensure atomic_inc() before callback. */ | ||
2367 | call_rcu_func(&rh, rcu_barrier_callback); | ||
2368 | |||
2369 | /* | ||
2370 | * Now that we have an rcu_barrier_callback() callback on each | ||
2371 | * CPU, and thus each counted, remove the initial count. | ||
2372 | */ | ||
2220 | if (atomic_dec_and_test(&rcu_barrier_cpu_count)) | 2373 | if (atomic_dec_and_test(&rcu_barrier_cpu_count)) |
2221 | complete(&rcu_barrier_completion); | 2374 | complete(&rcu_barrier_completion); |
2375 | |||
2376 | /* Wait for all rcu_barrier_callback() callbacks to be invoked. */ | ||
2222 | wait_for_completion(&rcu_barrier_completion); | 2377 | wait_for_completion(&rcu_barrier_completion); |
2378 | |||
2379 | /* Other rcu_barrier() invocations can now safely proceed. */ | ||
2223 | mutex_unlock(&rcu_barrier_mutex); | 2380 | mutex_unlock(&rcu_barrier_mutex); |
2381 | |||
2382 | destroy_rcu_head_on_stack(&rh); | ||
2224 | } | 2383 | } |
2225 | 2384 | ||
2226 | /** | 2385 | /** |
@@ -2417,7 +2576,7 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) | |||
2417 | 2576 | ||
2418 | for (i = NUM_RCU_LVLS - 1; i > 0; i--) | 2577 | for (i = NUM_RCU_LVLS - 1; i > 0; i--) |
2419 | rsp->levelspread[i] = CONFIG_RCU_FANOUT; | 2578 | rsp->levelspread[i] = CONFIG_RCU_FANOUT; |
2420 | rsp->levelspread[0] = RCU_FANOUT_LEAF; | 2579 | rsp->levelspread[0] = CONFIG_RCU_FANOUT_LEAF; |
2421 | } | 2580 | } |
2422 | #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ | 2581 | #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ |
2423 | static void __init rcu_init_levelspread(struct rcu_state *rsp) | 2582 | static void __init rcu_init_levelspread(struct rcu_state *rsp) |