aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcu/tree.c
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2017-02-08 15:36:42 -0500
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2017-04-18 14:38:18 -0400
commit15fecf89e46a962ccda583d919e25d9da7bf0723 (patch)
tree7ca067833f685d989ecfd3dcdce3fca9ecc40f31 /kernel/rcu/tree.c
parentb8c78d3afc6aac1c722af3bec18959c6bd93231c (diff)
srcu: Abstract multi-tail callback list handling
RCU has only one multi-tail callback list, which is implemented via the nxtlist, nxttail, nxtcompleted, qlen_lazy, and qlen fields in the rcu_data structure, and whose operations are open-code throughout the Tree RCU implementation. This has been more or less OK in the past, but upcoming callback-list optimizations in SRCU could really use a multi-tail callback list there as well. This commit therefore abstracts the multi-tail callback list handling into a new kernel/rcu/rcu_segcblist.h file, and uses this new API. The simple head-and-tail pointer callback list is also abstracted and applied everywhere except for the NOCB callback-offload lists. (Yes, the plan is to apply them there as well, but this commit is already bigger than would be good.) Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'kernel/rcu/tree.c')
-rw-r--r--kernel/rcu/tree.c348
1 files changed, 112 insertions, 236 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 530ab6cf7a0b..8cc9d40b41ea 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -97,8 +97,8 @@ struct rcu_state sname##_state = { \
97 .gpnum = 0UL - 300UL, \ 97 .gpnum = 0UL - 300UL, \
98 .completed = 0UL - 300UL, \ 98 .completed = 0UL - 300UL, \
99 .orphan_lock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.orphan_lock), \ 99 .orphan_lock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.orphan_lock), \
100 .orphan_nxttail = &sname##_state.orphan_nxtlist, \ 100 .orphan_pend = RCU_CBLIST_INITIALIZER(sname##_state.orphan_pend), \
101 .orphan_donetail = &sname##_state.orphan_donelist, \ 101 .orphan_done = RCU_CBLIST_INITIALIZER(sname##_state.orphan_done), \
102 .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ 102 .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
103 .name = RCU_STATE_NAME(sname), \ 103 .name = RCU_STATE_NAME(sname), \
104 .abbr = sabbr, \ 104 .abbr = sabbr, \
@@ -726,16 +726,6 @@ void rcutorture_record_progress(unsigned long vernum)
726EXPORT_SYMBOL_GPL(rcutorture_record_progress); 726EXPORT_SYMBOL_GPL(rcutorture_record_progress);
727 727
728/* 728/*
729 * Does the CPU have callbacks ready to be invoked?
730 */
731static int
732cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
733{
734 return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL] &&
735 rdp->nxttail[RCU_NEXT_TAIL] != NULL;
736}
737
738/*
739 * Return the root node of the specified rcu_state structure. 729 * Return the root node of the specified rcu_state structure.
740 */ 730 */
741static struct rcu_node *rcu_get_root(struct rcu_state *rsp) 731static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
@@ -765,21 +755,17 @@ static int rcu_future_needs_gp(struct rcu_state *rsp)
765static bool 755static bool
766cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) 756cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
767{ 757{
768 int i;
769
770 if (rcu_gp_in_progress(rsp)) 758 if (rcu_gp_in_progress(rsp))
771 return false; /* No, a grace period is already in progress. */ 759 return false; /* No, a grace period is already in progress. */
772 if (rcu_future_needs_gp(rsp)) 760 if (rcu_future_needs_gp(rsp))
773 return true; /* Yes, a no-CBs CPU needs one. */ 761 return true; /* Yes, a no-CBs CPU needs one. */
774 if (!rdp->nxttail[RCU_NEXT_TAIL]) 762 if (!rcu_segcblist_is_enabled(&rdp->cblist))
775 return false; /* No, this is a no-CBs (or offline) CPU. */ 763 return false; /* No, this is a no-CBs (or offline) CPU. */
776 if (*rdp->nxttail[RCU_NEXT_READY_TAIL]) 764 if (!rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL))
777 return true; /* Yes, CPU has newly registered callbacks. */ 765 return true; /* Yes, CPU has newly registered callbacks. */
778 for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) 766 if (rcu_segcblist_future_gp_needed(&rdp->cblist,
779 if (rdp->nxttail[i - 1] != rdp->nxttail[i] && 767 READ_ONCE(rsp->completed)))
780 ULONG_CMP_LT(READ_ONCE(rsp->completed), 768 return true; /* Yes, CBs for future grace period. */
781 rdp->nxtcompleted[i]))
782 return true; /* Yes, CBs for future grace period. */
783 return false; /* No grace period needed. */ 769 return false; /* No grace period needed. */
784} 770}
785 771
@@ -1490,7 +1476,8 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
1490 1476
1491 print_cpu_stall_info_end(); 1477 print_cpu_stall_info_end();
1492 for_each_possible_cpu(cpu) 1478 for_each_possible_cpu(cpu)
1493 totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen; 1479 totqlen += rcu_segcblist_n_cbs(&per_cpu_ptr(rsp->rda,
1480 cpu)->cblist);
1494 pr_cont("(detected by %d, t=%ld jiffies, g=%ld, c=%ld, q=%lu)\n", 1481 pr_cont("(detected by %d, t=%ld jiffies, g=%ld, c=%ld, q=%lu)\n",
1495 smp_processor_id(), (long)(jiffies - rsp->gp_start), 1482 smp_processor_id(), (long)(jiffies - rsp->gp_start),
1496 (long)rsp->gpnum, (long)rsp->completed, totqlen); 1483 (long)rsp->gpnum, (long)rsp->completed, totqlen);
@@ -1544,7 +1531,8 @@ static void print_cpu_stall(struct rcu_state *rsp)
1544 print_cpu_stall_info(rsp, smp_processor_id()); 1531 print_cpu_stall_info(rsp, smp_processor_id());
1545 print_cpu_stall_info_end(); 1532 print_cpu_stall_info_end();
1546 for_each_possible_cpu(cpu) 1533 for_each_possible_cpu(cpu)
1547 totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen; 1534 totqlen += rcu_segcblist_n_cbs(&per_cpu_ptr(rsp->rda,
1535 cpu)->cblist);
1548 pr_cont(" (t=%lu jiffies g=%ld c=%ld q=%lu)\n", 1536 pr_cont(" (t=%lu jiffies g=%ld c=%ld q=%lu)\n",
1549 jiffies - rsp->gp_start, 1537 jiffies - rsp->gp_start,
1550 (long)rsp->gpnum, (long)rsp->completed, totqlen); 1538 (long)rsp->gpnum, (long)rsp->completed, totqlen);
@@ -1647,30 +1635,6 @@ void rcu_cpu_stall_reset(void)
1647} 1635}
1648 1636
1649/* 1637/*
1650 * Initialize the specified rcu_data structure's default callback list
1651 * to empty. The default callback list is the one that is not used by
1652 * no-callbacks CPUs.
1653 */
1654static void init_default_callback_list(struct rcu_data *rdp)
1655{
1656 int i;
1657
1658 rdp->nxtlist = NULL;
1659 for (i = 0; i < RCU_NEXT_SIZE; i++)
1660 rdp->nxttail[i] = &rdp->nxtlist;
1661}
1662
1663/*
1664 * Initialize the specified rcu_data structure's callback list to empty.
1665 */
1666static void init_callback_list(struct rcu_data *rdp)
1667{
1668 if (init_nocb_callback_list(rdp))
1669 return;
1670 init_default_callback_list(rdp);
1671}
1672
1673/*
1674 * Determine the value that ->completed will have at the end of the 1638 * Determine the value that ->completed will have at the end of the
1675 * next subsequent grace period. This is used to tag callbacks so that 1639 * next subsequent grace period. This is used to tag callbacks so that
1676 * a CPU can invoke callbacks in a timely fashion even if that CPU has 1640 * a CPU can invoke callbacks in a timely fashion even if that CPU has
@@ -1724,7 +1688,6 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
1724 unsigned long *c_out) 1688 unsigned long *c_out)
1725{ 1689{
1726 unsigned long c; 1690 unsigned long c;
1727 int i;
1728 bool ret = false; 1691 bool ret = false;
1729 struct rcu_node *rnp_root = rcu_get_root(rdp->rsp); 1692 struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
1730 1693
@@ -1770,13 +1733,11 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
1770 /* 1733 /*
1771 * Get a new grace-period number. If there really is no grace 1734 * Get a new grace-period number. If there really is no grace
1772 * period in progress, it will be smaller than the one we obtained 1735 * period in progress, it will be smaller than the one we obtained
1773 * earlier. Adjust callbacks as needed. Note that even no-CBs 1736 * earlier. Adjust callbacks as needed.
1774 * CPUs have a ->nxtcompleted[] array, so no no-CBs checks needed.
1775 */ 1737 */
1776 c = rcu_cbs_completed(rdp->rsp, rnp_root); 1738 c = rcu_cbs_completed(rdp->rsp, rnp_root);
1777 for (i = RCU_DONE_TAIL; i < RCU_NEXT_TAIL; i++) 1739 if (!rcu_is_nocb_cpu(rdp->cpu))
1778 if (ULONG_CMP_LT(c, rdp->nxtcompleted[i])) 1740 (void)rcu_segcblist_accelerate(&rdp->cblist, c);
1779 rdp->nxtcompleted[i] = c;
1780 1741
1781 /* 1742 /*
1782 * If the needed for the required grace period is already 1743 * If the needed for the required grace period is already
@@ -1856,57 +1817,27 @@ static void rcu_gp_kthread_wake(struct rcu_state *rsp)
1856static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp, 1817static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1857 struct rcu_data *rdp) 1818 struct rcu_data *rdp)
1858{ 1819{
1859 unsigned long c; 1820 bool ret = false;
1860 int i;
1861 bool ret;
1862
1863 /* If the CPU has no callbacks, nothing to do. */
1864 if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL])
1865 return false;
1866
1867 /*
1868 * Starting from the sublist containing the callbacks most
1869 * recently assigned a ->completed number and working down, find the
1870 * first sublist that is not assignable to an upcoming grace period.
1871 * Such a sublist has something in it (first two tests) and has
1872 * a ->completed number assigned that will complete sooner than
1873 * the ->completed number for newly arrived callbacks (last test).
1874 *
1875 * The key point is that any later sublist can be assigned the
1876 * same ->completed number as the newly arrived callbacks, which
1877 * means that the callbacks in any of these later sublist can be
1878 * grouped into a single sublist, whether or not they have already
1879 * been assigned a ->completed number.
1880 */
1881 c = rcu_cbs_completed(rsp, rnp);
1882 for (i = RCU_NEXT_TAIL - 1; i > RCU_DONE_TAIL; i--)
1883 if (rdp->nxttail[i] != rdp->nxttail[i - 1] &&
1884 !ULONG_CMP_GE(rdp->nxtcompleted[i], c))
1885 break;
1886 1821
1887 /* 1822 /* If no pending (not yet ready to invoke) callbacks, nothing to do. */
1888 * If there are no sublist for unassigned callbacks, leave. 1823 if (!rcu_segcblist_pend_cbs(&rdp->cblist))
1889 * At the same time, advance "i" one sublist, so that "i" will
1890 * index into the sublist where all the remaining callbacks should
1891 * be grouped into.
1892 */
1893 if (++i >= RCU_NEXT_TAIL)
1894 return false; 1824 return false;
1895 1825
1896 /* 1826 /*
1897 * Assign all subsequent callbacks' ->completed number to the next 1827 * Callbacks are often registered with incomplete grace-period
1898 * full grace period and group them all in the sublist initially 1828 * information. Something about the fact that getting exact
1899 * indexed by "i". 1829 * information requires acquiring a global lock... RCU therefore
1830 * makes a conservative estimate of the grace period number at which
1831 * a given callback will become ready to invoke. The following
1832 * code checks this estimate and improves it when possible, thus
1833 * accelerating callback invocation to an earlier grace-period
1834 * number.
1900 */ 1835 */
1901 for (; i <= RCU_NEXT_TAIL; i++) { 1836 if (rcu_segcblist_accelerate(&rdp->cblist, rcu_cbs_completed(rsp, rnp)))
1902 rdp->nxttail[i] = rdp->nxttail[RCU_NEXT_TAIL]; 1837 ret = rcu_start_future_gp(rnp, rdp, NULL);
1903 rdp->nxtcompleted[i] = c;
1904 }
1905 /* Record any needed additional grace periods. */
1906 ret = rcu_start_future_gp(rnp, rdp, NULL);
1907 1838
1908 /* Trace depending on how much we were able to accelerate. */ 1839 /* Trace depending on how much we were able to accelerate. */
1909 if (!*rdp->nxttail[RCU_WAIT_TAIL]) 1840 if (rcu_segcblist_restempty(&rdp->cblist, RCU_WAIT_TAIL))
1910 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccWaitCB")); 1841 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccWaitCB"));
1911 else 1842 else
1912 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccReadyCB")); 1843 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccReadyCB"));
@@ -1926,32 +1857,15 @@ static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1926static bool rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp, 1857static bool rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1927 struct rcu_data *rdp) 1858 struct rcu_data *rdp)
1928{ 1859{
1929 int i, j; 1860 /* If no pending (not yet ready to invoke) callbacks, nothing to do. */
1930 1861 if (!rcu_segcblist_pend_cbs(&rdp->cblist))
1931 /* If the CPU has no callbacks, nothing to do. */
1932 if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL])
1933 return false; 1862 return false;
1934 1863
1935 /* 1864 /*
1936 * Find all callbacks whose ->completed numbers indicate that they 1865 * Find all callbacks whose ->completed numbers indicate that they
1937 * are ready to invoke, and put them into the RCU_DONE_TAIL sublist. 1866 * are ready to invoke, and put them into the RCU_DONE_TAIL sublist.
1938 */ 1867 */
1939 for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) { 1868 rcu_segcblist_advance(&rdp->cblist, rnp->completed);
1940 if (ULONG_CMP_LT(rnp->completed, rdp->nxtcompleted[i]))
1941 break;
1942 rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[i];
1943 }
1944 /* Clean up any sublist tail pointers that were misordered above. */
1945 for (j = RCU_WAIT_TAIL; j < i; j++)
1946 rdp->nxttail[j] = rdp->nxttail[RCU_DONE_TAIL];
1947
1948 /* Copy down callbacks to fill in empty sublists. */
1949 for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) {
1950 if (rdp->nxttail[j] == rdp->nxttail[RCU_NEXT_TAIL])
1951 break;
1952 rdp->nxttail[j] = rdp->nxttail[i];
1953 rdp->nxtcompleted[j] = rdp->nxtcompleted[i];
1954 }
1955 1869
1956 /* Classify any remaining callbacks. */ 1870 /* Classify any remaining callbacks. */
1957 return rcu_accelerate_cbs(rsp, rnp, rdp); 1871 return rcu_accelerate_cbs(rsp, rnp, rdp);
@@ -2668,13 +2582,8 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
2668 * because _rcu_barrier() excludes CPU-hotplug operations, so it 2582 * because _rcu_barrier() excludes CPU-hotplug operations, so it
2669 * cannot be running now. Thus no memory barrier is required. 2583 * cannot be running now. Thus no memory barrier is required.
2670 */ 2584 */
2671 if (rdp->nxtlist != NULL) { 2585 rdp->n_cbs_orphaned += rcu_segcblist_n_cbs(&rdp->cblist);
2672 rsp->qlen_lazy += rdp->qlen_lazy; 2586 rcu_segcblist_extract_count(&rdp->cblist, &rsp->orphan_done);
2673 rsp->qlen += rdp->qlen;
2674 rdp->n_cbs_orphaned += rdp->qlen;
2675 rdp->qlen_lazy = 0;
2676 WRITE_ONCE(rdp->qlen, 0);
2677 }
2678 2587
2679 /* 2588 /*
2680 * Next, move those callbacks still needing a grace period to 2589 * Next, move those callbacks still needing a grace period to
@@ -2682,31 +2591,18 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
2682 * Some of the callbacks might have gone partway through a grace 2591 * Some of the callbacks might have gone partway through a grace
2683 * period, but that is too bad. They get to start over because we 2592 * period, but that is too bad. They get to start over because we
2684 * cannot assume that grace periods are synchronized across CPUs. 2593 * cannot assume that grace periods are synchronized across CPUs.
2685 * We don't bother updating the ->nxttail[] array yet, instead
2686 * we just reset the whole thing later on.
2687 */ 2594 */
2688 if (*rdp->nxttail[RCU_DONE_TAIL] != NULL) { 2595 rcu_segcblist_extract_pend_cbs(&rdp->cblist, &rsp->orphan_pend);
2689 *rsp->orphan_nxttail = *rdp->nxttail[RCU_DONE_TAIL];
2690 rsp->orphan_nxttail = rdp->nxttail[RCU_NEXT_TAIL];
2691 *rdp->nxttail[RCU_DONE_TAIL] = NULL;
2692 }
2693 2596
2694 /* 2597 /*
2695 * Then move the ready-to-invoke callbacks to the orphanage, 2598 * Then move the ready-to-invoke callbacks to the orphanage,
2696 * where some other CPU will pick them up. These will not be 2599 * where some other CPU will pick them up. These will not be
2697 * required to pass though another grace period: They are done. 2600 * required to pass though another grace period: They are done.
2698 */ 2601 */
2699 if (rdp->nxtlist != NULL) { 2602 rcu_segcblist_extract_done_cbs(&rdp->cblist, &rsp->orphan_done);
2700 *rsp->orphan_donetail = rdp->nxtlist;
2701 rsp->orphan_donetail = rdp->nxttail[RCU_DONE_TAIL];
2702 }
2703 2603
2704 /* 2604 /* Finally, disallow further callbacks on this CPU. */
2705 * Finally, initialize the rcu_data structure's list to empty and 2605 rcu_segcblist_disable(&rdp->cblist);
2706 * disallow further callbacks on this CPU.
2707 */
2708 init_callback_list(rdp);
2709 rdp->nxttail[RCU_NEXT_TAIL] = NULL;
2710} 2606}
2711 2607
2712/* 2608/*
@@ -2715,7 +2611,6 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
2715 */ 2611 */
2716static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags) 2612static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags)
2717{ 2613{
2718 int i;
2719 struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); 2614 struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
2720 2615
2721 /* No-CBs CPUs are handled specially. */ 2616 /* No-CBs CPUs are handled specially. */
@@ -2724,13 +2619,11 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags)
2724 return; 2619 return;
2725 2620
2726 /* Do the accounting first. */ 2621 /* Do the accounting first. */
2727 rdp->qlen_lazy += rsp->qlen_lazy; 2622 rdp->n_cbs_adopted += rcu_cblist_n_cbs(&rsp->orphan_done);
2728 rdp->qlen += rsp->qlen; 2623 if (rcu_cblist_n_lazy_cbs(&rsp->orphan_done) !=
2729 rdp->n_cbs_adopted += rsp->qlen; 2624 rcu_cblist_n_cbs(&rsp->orphan_done))
2730 if (rsp->qlen_lazy != rsp->qlen)
2731 rcu_idle_count_callbacks_posted(); 2625 rcu_idle_count_callbacks_posted();
2732 rsp->qlen_lazy = 0; 2626 rcu_segcblist_insert_count(&rdp->cblist, &rsp->orphan_done);
2733 rsp->qlen = 0;
2734 2627
2735 /* 2628 /*
2736 * We do not need a memory barrier here because the only way we 2629 * We do not need a memory barrier here because the only way we
@@ -2738,24 +2631,13 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags)
2738 * we are the task doing the rcu_barrier(). 2631 * we are the task doing the rcu_barrier().
2739 */ 2632 */
2740 2633
2741 /* First adopt the ready-to-invoke callbacks. */ 2634 /* First adopt the ready-to-invoke callbacks, then the done ones. */
2742 if (rsp->orphan_donelist != NULL) { 2635 rcu_segcblist_insert_done_cbs(&rdp->cblist, &rsp->orphan_done);
2743 *rsp->orphan_donetail = *rdp->nxttail[RCU_DONE_TAIL]; 2636 WARN_ON_ONCE(!rcu_cblist_empty(&rsp->orphan_done));
2744 *rdp->nxttail[RCU_DONE_TAIL] = rsp->orphan_donelist; 2637 rcu_segcblist_insert_pend_cbs(&rdp->cblist, &rsp->orphan_pend);
2745 for (i = RCU_NEXT_SIZE - 1; i >= RCU_DONE_TAIL; i--) 2638 WARN_ON_ONCE(!rcu_cblist_empty(&rsp->orphan_pend));
2746 if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL]) 2639 WARN_ON_ONCE(rcu_segcblist_empty(&rdp->cblist) !=
2747 rdp->nxttail[i] = rsp->orphan_donetail; 2640 !rcu_segcblist_n_cbs(&rdp->cblist));
2748 rsp->orphan_donelist = NULL;
2749 rsp->orphan_donetail = &rsp->orphan_donelist;
2750 }
2751
2752 /* And then adopt the callbacks that still need a grace period. */
2753 if (rsp->orphan_nxtlist != NULL) {
2754 *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_nxtlist;
2755 rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_nxttail;
2756 rsp->orphan_nxtlist = NULL;
2757 rsp->orphan_nxttail = &rsp->orphan_nxtlist;
2758 }
2759} 2641}
2760 2642
2761/* 2643/*
@@ -2843,9 +2725,11 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
2843 rcu_adopt_orphan_cbs(rsp, flags); 2725 rcu_adopt_orphan_cbs(rsp, flags);
2844 raw_spin_unlock_irqrestore(&rsp->orphan_lock, flags); 2726 raw_spin_unlock_irqrestore(&rsp->orphan_lock, flags);
2845 2727
2846 WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL, 2728 WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 ||
2847 "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n", 2729 !rcu_segcblist_empty(&rdp->cblist),
2848 cpu, rdp->qlen, rdp->nxtlist); 2730 "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n",
2731 cpu, rcu_segcblist_n_cbs(&rdp->cblist),
2732 rcu_segcblist_first_cb(&rdp->cblist));
2849} 2733}
2850 2734
2851/* 2735/*
@@ -2855,14 +2739,17 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
2855static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) 2739static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
2856{ 2740{
2857 unsigned long flags; 2741 unsigned long flags;
2858 struct rcu_head *next, *list, **tail; 2742 struct rcu_head *rhp;
2859 long bl, count, count_lazy; 2743 struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl);
2860 int i; 2744 long bl, count;
2861 2745
2862 /* If no callbacks are ready, just return. */ 2746 /* If no callbacks are ready, just return. */
2863 if (!cpu_has_callbacks_ready_to_invoke(rdp)) { 2747 if (!rcu_segcblist_ready_cbs(&rdp->cblist)) {
2864 trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, 0); 2748 trace_rcu_batch_start(rsp->name,
2865 trace_rcu_batch_end(rsp->name, 0, !!READ_ONCE(rdp->nxtlist), 2749 rcu_segcblist_n_lazy_cbs(&rdp->cblist),
2750 rcu_segcblist_n_cbs(&rdp->cblist), 0);
2751 trace_rcu_batch_end(rsp->name, 0,
2752 !rcu_segcblist_empty(&rdp->cblist),
2866 need_resched(), is_idle_task(current), 2753 need_resched(), is_idle_task(current),
2867 rcu_is_callbacks_kthread()); 2754 rcu_is_callbacks_kthread());
2868 return; 2755 return;
@@ -2870,73 +2757,62 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
2870 2757
2871 /* 2758 /*
2872 * Extract the list of ready callbacks, disabling to prevent 2759 * Extract the list of ready callbacks, disabling to prevent
2873 * races with call_rcu() from interrupt handlers. 2760 * races with call_rcu() from interrupt handlers. Leave the
2761 * callback counts, as rcu_barrier() needs to be conservative.
2874 */ 2762 */
2875 local_irq_save(flags); 2763 local_irq_save(flags);
2876 WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); 2764 WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
2877 bl = rdp->blimit; 2765 bl = rdp->blimit;
2878 trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, bl); 2766 trace_rcu_batch_start(rsp->name, rcu_segcblist_n_lazy_cbs(&rdp->cblist),
2879 list = rdp->nxtlist; 2767 rcu_segcblist_n_cbs(&rdp->cblist), bl);
2880 rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; 2768 rcu_segcblist_extract_done_cbs(&rdp->cblist, &rcl);
2881 *rdp->nxttail[RCU_DONE_TAIL] = NULL;
2882 tail = rdp->nxttail[RCU_DONE_TAIL];
2883 for (i = RCU_NEXT_SIZE - 1; i >= 0; i--)
2884 if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL])
2885 rdp->nxttail[i] = &rdp->nxtlist;
2886 local_irq_restore(flags); 2769 local_irq_restore(flags);
2887 2770
2888 /* Invoke callbacks. */ 2771 /* Invoke callbacks. */
2889 count = count_lazy = 0; 2772 rhp = rcu_cblist_dequeue(&rcl);
2890 while (list) { 2773 for (; rhp; rhp = rcu_cblist_dequeue(&rcl)) {
2891 next = list->next; 2774 debug_rcu_head_unqueue(rhp);
2892 prefetch(next); 2775 if (__rcu_reclaim(rsp->name, rhp))
2893 debug_rcu_head_unqueue(list); 2776 rcu_cblist_dequeued_lazy(&rcl);
2894 if (__rcu_reclaim(rsp->name, list)) 2777 /*
2895 count_lazy++; 2778 * Stop only if limit reached and CPU has something to do.
2896 list = next; 2779 * Note: The rcl structure counts down from zero.
2897 /* Stop only if limit reached and CPU has something to do. */ 2780 */
2898 if (++count >= bl && 2781 if (-rcu_cblist_n_cbs(&rcl) >= bl &&
2899 (need_resched() || 2782 (need_resched() ||
2900 (!is_idle_task(current) && !rcu_is_callbacks_kthread()))) 2783 (!is_idle_task(current) && !rcu_is_callbacks_kthread())))
2901 break; 2784 break;
2902 } 2785 }
2903 2786
2904 local_irq_save(flags); 2787 local_irq_save(flags);
2905 trace_rcu_batch_end(rsp->name, count, !!list, need_resched(), 2788 count = -rcu_cblist_n_cbs(&rcl);
2906 is_idle_task(current), 2789 trace_rcu_batch_end(rsp->name, count, !rcu_cblist_empty(&rcl),
2790 need_resched(), is_idle_task(current),
2907 rcu_is_callbacks_kthread()); 2791 rcu_is_callbacks_kthread());
2908 2792
2909 /* Update count, and requeue any remaining callbacks. */ 2793 /* Update counts and requeue any remaining callbacks. */
2910 if (list != NULL) { 2794 rcu_segcblist_insert_done_cbs(&rdp->cblist, &rcl);
2911 *tail = rdp->nxtlist;
2912 rdp->nxtlist = list;
2913 for (i = 0; i < RCU_NEXT_SIZE; i++)
2914 if (&rdp->nxtlist == rdp->nxttail[i])
2915 rdp->nxttail[i] = tail;
2916 else
2917 break;
2918 }
2919 smp_mb(); /* List handling before counting for rcu_barrier(). */ 2795 smp_mb(); /* List handling before counting for rcu_barrier(). */
2920 rdp->qlen_lazy -= count_lazy;
2921 WRITE_ONCE(rdp->qlen, rdp->qlen - count);
2922 rdp->n_cbs_invoked += count; 2796 rdp->n_cbs_invoked += count;
2797 rcu_segcblist_insert_count(&rdp->cblist, &rcl);
2923 2798
2924 /* Reinstate batch limit if we have worked down the excess. */ 2799 /* Reinstate batch limit if we have worked down the excess. */
2925 if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark) 2800 count = rcu_segcblist_n_cbs(&rdp->cblist);
2801 if (rdp->blimit == LONG_MAX && count <= qlowmark)
2926 rdp->blimit = blimit; 2802 rdp->blimit = blimit;
2927 2803
2928 /* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */ 2804 /* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */
2929 if (rdp->qlen == 0 && rdp->qlen_last_fqs_check != 0) { 2805 if (count == 0 && rdp->qlen_last_fqs_check != 0) {
2930 rdp->qlen_last_fqs_check = 0; 2806 rdp->qlen_last_fqs_check = 0;
2931 rdp->n_force_qs_snap = rsp->n_force_qs; 2807 rdp->n_force_qs_snap = rsp->n_force_qs;
2932 } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark) 2808 } else if (count < rdp->qlen_last_fqs_check - qhimark)
2933 rdp->qlen_last_fqs_check = rdp->qlen; 2809 rdp->qlen_last_fqs_check = count;
2934 WARN_ON_ONCE((rdp->nxtlist == NULL) != (rdp->qlen == 0)); 2810 WARN_ON_ONCE(rcu_segcblist_empty(&rdp->cblist) != (count == 0));
2935 2811
2936 local_irq_restore(flags); 2812 local_irq_restore(flags);
2937 2813
2938 /* Re-invoke RCU core processing if there are callbacks remaining. */ 2814 /* Re-invoke RCU core processing if there are callbacks remaining. */
2939 if (cpu_has_callbacks_ready_to_invoke(rdp)) 2815 if (rcu_segcblist_ready_cbs(&rdp->cblist))
2940 invoke_rcu_core(); 2816 invoke_rcu_core();
2941} 2817}
2942 2818
@@ -3120,7 +2996,7 @@ __rcu_process_callbacks(struct rcu_state *rsp)
3120 } 2996 }
3121 2997
3122 /* If there are callbacks ready, invoke them. */ 2998 /* If there are callbacks ready, invoke them. */
3123 if (cpu_has_callbacks_ready_to_invoke(rdp)) 2999 if (rcu_segcblist_ready_cbs(&rdp->cblist))
3124 invoke_rcu_callbacks(rsp, rdp); 3000 invoke_rcu_callbacks(rsp, rdp);
3125 3001
3126 /* Do any needed deferred wakeups of rcuo kthreads. */ 3002 /* Do any needed deferred wakeups of rcuo kthreads. */
@@ -3192,7 +3068,8 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
3192 * invoking force_quiescent_state() if the newly enqueued callback 3068 * invoking force_quiescent_state() if the newly enqueued callback
3193 * is the only one waiting for a grace period to complete. 3069 * is the only one waiting for a grace period to complete.
3194 */ 3070 */
3195 if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { 3071 if (unlikely(rcu_segcblist_n_cbs(&rdp->cblist) >
3072 rdp->qlen_last_fqs_check + qhimark)) {
3196 3073
3197 /* Are we ignoring a completed grace period? */ 3074 /* Are we ignoring a completed grace period? */
3198 note_gp_changes(rsp, rdp); 3075 note_gp_changes(rsp, rdp);
@@ -3210,10 +3087,10 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
3210 /* Give the grace period a kick. */ 3087 /* Give the grace period a kick. */
3211 rdp->blimit = LONG_MAX; 3088 rdp->blimit = LONG_MAX;
3212 if (rsp->n_force_qs == rdp->n_force_qs_snap && 3089 if (rsp->n_force_qs == rdp->n_force_qs_snap &&
3213 *rdp->nxttail[RCU_DONE_TAIL] != head) 3090 rcu_segcblist_first_pend_cb(&rdp->cblist) != head)
3214 force_quiescent_state(rsp); 3091 force_quiescent_state(rsp);
3215 rdp->n_force_qs_snap = rsp->n_force_qs; 3092 rdp->n_force_qs_snap = rsp->n_force_qs;
3216 rdp->qlen_last_fqs_check = rdp->qlen; 3093 rdp->qlen_last_fqs_check = rcu_segcblist_n_cbs(&rdp->cblist);
3217 } 3094 }
3218 } 3095 }
3219} 3096}
@@ -3253,7 +3130,7 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func,
3253 rdp = this_cpu_ptr(rsp->rda); 3130 rdp = this_cpu_ptr(rsp->rda);
3254 3131
3255 /* Add the callback to our list. */ 3132 /* Add the callback to our list. */
3256 if (unlikely(rdp->nxttail[RCU_NEXT_TAIL] == NULL) || cpu != -1) { 3133 if (unlikely(!rcu_segcblist_is_enabled(&rdp->cblist)) || cpu != -1) {
3257 int offline; 3134 int offline;
3258 3135
3259 if (cpu != -1) 3136 if (cpu != -1)
@@ -3272,23 +3149,21 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func,
3272 */ 3149 */
3273 BUG_ON(cpu != -1); 3150 BUG_ON(cpu != -1);
3274 WARN_ON_ONCE(!rcu_is_watching()); 3151 WARN_ON_ONCE(!rcu_is_watching());
3275 if (!likely(rdp->nxtlist)) 3152 if (rcu_segcblist_empty(&rdp->cblist))
3276 init_default_callback_list(rdp); 3153 rcu_segcblist_init(&rdp->cblist);
3277 } 3154 }
3278 WRITE_ONCE(rdp->qlen, rdp->qlen + 1); 3155 rcu_segcblist_enqueue(&rdp->cblist, head, lazy);
3279 if (lazy) 3156 if (!lazy)
3280 rdp->qlen_lazy++;
3281 else
3282 rcu_idle_count_callbacks_posted(); 3157 rcu_idle_count_callbacks_posted();
3283 smp_mb(); /* Count before adding callback for rcu_barrier(). */
3284 *rdp->nxttail[RCU_NEXT_TAIL] = head;
3285 rdp->nxttail[RCU_NEXT_TAIL] = &head->next;
3286 3158
3287 if (__is_kfree_rcu_offset((unsigned long)func)) 3159 if (__is_kfree_rcu_offset((unsigned long)func))
3288 trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func, 3160 trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func,
3289 rdp->qlen_lazy, rdp->qlen); 3161 rcu_segcblist_n_lazy_cbs(&rdp->cblist),
3162 rcu_segcblist_n_cbs(&rdp->cblist));
3290 else 3163 else
3291 trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen); 3164 trace_rcu_callback(rsp->name, head,
3165 rcu_segcblist_n_lazy_cbs(&rdp->cblist),
3166 rcu_segcblist_n_cbs(&rdp->cblist));
3292 3167
3293 /* Go handle any RCU core processing required. */ 3168 /* Go handle any RCU core processing required. */
3294 __call_rcu_core(rsp, rdp, head, flags); 3169 __call_rcu_core(rsp, rdp, head, flags);
@@ -3600,7 +3475,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
3600 } 3475 }
3601 3476
3602 /* Does this CPU have callbacks ready to invoke? */ 3477 /* Does this CPU have callbacks ready to invoke? */
3603 if (cpu_has_callbacks_ready_to_invoke(rdp)) { 3478 if (rcu_segcblist_ready_cbs(&rdp->cblist)) {
3604 rdp->n_rp_cb_ready++; 3479 rdp->n_rp_cb_ready++;
3605 return 1; 3480 return 1;
3606 } 3481 }
@@ -3664,10 +3539,10 @@ static bool __maybe_unused rcu_cpu_has_callbacks(bool *all_lazy)
3664 3539
3665 for_each_rcu_flavor(rsp) { 3540 for_each_rcu_flavor(rsp) {
3666 rdp = this_cpu_ptr(rsp->rda); 3541 rdp = this_cpu_ptr(rsp->rda);
3667 if (!rdp->nxtlist) 3542 if (rcu_segcblist_empty(&rdp->cblist))
3668 continue; 3543 continue;
3669 hc = true; 3544 hc = true;
3670 if (rdp->qlen != rdp->qlen_lazy || !all_lazy) { 3545 if (rcu_segcblist_n_nonlazy_cbs(&rdp->cblist) || !all_lazy) {
3671 al = false; 3546 al = false;
3672 break; 3547 break;
3673 } 3548 }
@@ -3776,7 +3651,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
3776 __call_rcu(&rdp->barrier_head, 3651 __call_rcu(&rdp->barrier_head,
3777 rcu_barrier_callback, rsp, cpu, 0); 3652 rcu_barrier_callback, rsp, cpu, 0);
3778 } 3653 }
3779 } else if (READ_ONCE(rdp->qlen)) { 3654 } else if (rcu_segcblist_n_cbs(&rdp->cblist)) {
3780 _rcu_barrier_trace(rsp, "OnlineQ", cpu, 3655 _rcu_barrier_trace(rsp, "OnlineQ", cpu,
3781 rsp->barrier_sequence); 3656 rsp->barrier_sequence);
3782 smp_call_function_single(cpu, rcu_barrier_func, rsp, 1); 3657 smp_call_function_single(cpu, rcu_barrier_func, rsp, 1);
@@ -3885,8 +3760,9 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
3885 rdp->qlen_last_fqs_check = 0; 3760 rdp->qlen_last_fqs_check = 0;
3886 rdp->n_force_qs_snap = rsp->n_force_qs; 3761 rdp->n_force_qs_snap = rsp->n_force_qs;
3887 rdp->blimit = blimit; 3762 rdp->blimit = blimit;
3888 if (!rdp->nxtlist) 3763 if (rcu_segcblist_empty(&rdp->cblist) && /* No early-boot CBs? */
3889 init_callback_list(rdp); /* Re-enable callbacks on this CPU. */ 3764 !init_nocb_callback_list(rdp))
3765 rcu_segcblist_init(&rdp->cblist); /* Re-enable callbacks. */
3890 rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; 3766 rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
3891 rcu_sysidle_init_percpu_data(rdp->dynticks); 3767 rcu_sysidle_init_percpu_data(rdp->dynticks);
3892 rcu_dynticks_eqs_online(); 3768 rcu_dynticks_eqs_online();