diff options
Diffstat (limited to 'kernel/rcutree.c')
| -rw-r--r-- | kernel/rcutree.c | 478 |
1 files changed, 309 insertions, 169 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 4b97bba7396e..f280e542e3e9 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
| @@ -60,36 +60,44 @@ | |||
| 60 | 60 | ||
| 61 | /* Data structures. */ | 61 | /* Data structures. */ |
| 62 | 62 | ||
| 63 | static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; | 63 | static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; |
| 64 | 64 | ||
| 65 | #define RCU_STATE_INITIALIZER(structname) { \ | 65 | #define RCU_STATE_INITIALIZER(sname, cr) { \ |
| 66 | .level = { &structname##_state.node[0] }, \ | 66 | .level = { &sname##_state.node[0] }, \ |
| 67 | .levelcnt = { \ | 67 | .call = cr, \ |
| 68 | NUM_RCU_LVL_0, /* root of hierarchy. */ \ | ||
| 69 | NUM_RCU_LVL_1, \ | ||
| 70 | NUM_RCU_LVL_2, \ | ||
| 71 | NUM_RCU_LVL_3, \ | ||
| 72 | NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \ | ||
| 73 | }, \ | ||
| 74 | .fqs_state = RCU_GP_IDLE, \ | 68 | .fqs_state = RCU_GP_IDLE, \ |
| 75 | .gpnum = -300, \ | 69 | .gpnum = -300, \ |
| 76 | .completed = -300, \ | 70 | .completed = -300, \ |
| 77 | .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \ | 71 | .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.onofflock), \ |
| 78 | .orphan_nxttail = &structname##_state.orphan_nxtlist, \ | 72 | .orphan_nxttail = &sname##_state.orphan_nxtlist, \ |
| 79 | .orphan_donetail = &structname##_state.orphan_donelist, \ | 73 | .orphan_donetail = &sname##_state.orphan_donelist, \ |
| 80 | .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.fqslock), \ | 74 | .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ |
| 81 | .n_force_qs = 0, \ | 75 | .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.fqslock), \ |
| 82 | .n_force_qs_ngp = 0, \ | 76 | .name = #sname, \ |
| 83 | .name = #structname, \ | ||
| 84 | } | 77 | } |
| 85 | 78 | ||
| 86 | struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched); | 79 | struct rcu_state rcu_sched_state = |
| 80 | RCU_STATE_INITIALIZER(rcu_sched, call_rcu_sched); | ||
| 87 | DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); | 81 | DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); |
| 88 | 82 | ||
| 89 | struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh); | 83 | struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, call_rcu_bh); |
| 90 | DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); | 84 | DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); |
| 91 | 85 | ||
| 92 | static struct rcu_state *rcu_state; | 86 | static struct rcu_state *rcu_state; |
| 87 | LIST_HEAD(rcu_struct_flavors); | ||
| 88 | |||
| 89 | /* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */ | ||
| 90 | static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF; | ||
| 91 | module_param(rcu_fanout_leaf, int, 0); | ||
| 92 | int rcu_num_lvls __read_mostly = RCU_NUM_LVLS; | ||
| 93 | static int num_rcu_lvl[] = { /* Number of rcu_nodes at specified level. */ | ||
| 94 | NUM_RCU_LVL_0, | ||
| 95 | NUM_RCU_LVL_1, | ||
| 96 | NUM_RCU_LVL_2, | ||
| 97 | NUM_RCU_LVL_3, | ||
| 98 | NUM_RCU_LVL_4, | ||
| 99 | }; | ||
| 100 | int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */ | ||
| 93 | 101 | ||
| 94 | /* | 102 | /* |
| 95 | * The rcu_scheduler_active variable transitions from zero to one just | 103 | * The rcu_scheduler_active variable transitions from zero to one just |
| @@ -147,13 +155,6 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); | |||
| 147 | unsigned long rcutorture_testseq; | 155 | unsigned long rcutorture_testseq; |
| 148 | unsigned long rcutorture_vernum; | 156 | unsigned long rcutorture_vernum; |
| 149 | 157 | ||
| 150 | /* State information for rcu_barrier() and friends. */ | ||
| 151 | |||
| 152 | static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; | ||
| 153 | static atomic_t rcu_barrier_cpu_count; | ||
| 154 | static DEFINE_MUTEX(rcu_barrier_mutex); | ||
| 155 | static struct completion rcu_barrier_completion; | ||
| 156 | |||
| 157 | /* | 158 | /* |
| 158 | * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s | 159 | * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s |
| 159 | * permit this function to be invoked without holding the root rcu_node | 160 | * permit this function to be invoked without holding the root rcu_node |
| @@ -358,7 +359,7 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval) | |||
| 358 | struct task_struct *idle = idle_task(smp_processor_id()); | 359 | struct task_struct *idle = idle_task(smp_processor_id()); |
| 359 | 360 | ||
| 360 | trace_rcu_dyntick("Error on entry: not idle task", oldval, 0); | 361 | trace_rcu_dyntick("Error on entry: not idle task", oldval, 0); |
| 361 | ftrace_dump(DUMP_ALL); | 362 | ftrace_dump(DUMP_ORIG); |
| 362 | WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", | 363 | WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", |
| 363 | current->pid, current->comm, | 364 | current->pid, current->comm, |
| 364 | idle->pid, idle->comm); /* must be idle task! */ | 365 | idle->pid, idle->comm); /* must be idle task! */ |
| @@ -468,7 +469,7 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) | |||
| 468 | 469 | ||
| 469 | trace_rcu_dyntick("Error on exit: not idle task", | 470 | trace_rcu_dyntick("Error on exit: not idle task", |
| 470 | oldval, rdtp->dynticks_nesting); | 471 | oldval, rdtp->dynticks_nesting); |
| 471 | ftrace_dump(DUMP_ALL); | 472 | ftrace_dump(DUMP_ORIG); |
| 472 | WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", | 473 | WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", |
| 473 | current->pid, current->comm, | 474 | current->pid, current->comm, |
| 474 | idle->pid, idle->comm); /* must be idle task! */ | 475 | idle->pid, idle->comm); /* must be idle task! */ |
| @@ -585,8 +586,6 @@ void rcu_nmi_exit(void) | |||
| 585 | WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); | 586 | WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); |
| 586 | } | 587 | } |
| 587 | 588 | ||
| 588 | #ifdef CONFIG_PROVE_RCU | ||
| 589 | |||
| 590 | /** | 589 | /** |
| 591 | * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle | 590 | * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle |
| 592 | * | 591 | * |
| @@ -604,7 +603,7 @@ int rcu_is_cpu_idle(void) | |||
| 604 | } | 603 | } |
| 605 | EXPORT_SYMBOL(rcu_is_cpu_idle); | 604 | EXPORT_SYMBOL(rcu_is_cpu_idle); |
| 606 | 605 | ||
| 607 | #ifdef CONFIG_HOTPLUG_CPU | 606 | #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) |
| 608 | 607 | ||
| 609 | /* | 608 | /* |
| 610 | * Is the current CPU online? Disable preemption to avoid false positives | 609 | * Is the current CPU online? Disable preemption to avoid false positives |
| @@ -645,9 +644,7 @@ bool rcu_lockdep_current_cpu_online(void) | |||
| 645 | } | 644 | } |
| 646 | EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online); | 645 | EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online); |
| 647 | 646 | ||
| 648 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | 647 | #endif /* #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) */ |
| 649 | |||
| 650 | #endif /* #ifdef CONFIG_PROVE_RCU */ | ||
| 651 | 648 | ||
| 652 | /** | 649 | /** |
| 653 | * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle | 650 | * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle |
| @@ -733,7 +730,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp) | |||
| 733 | int cpu; | 730 | int cpu; |
| 734 | long delta; | 731 | long delta; |
| 735 | unsigned long flags; | 732 | unsigned long flags; |
| 736 | int ndetected; | 733 | int ndetected = 0; |
| 737 | struct rcu_node *rnp = rcu_get_root(rsp); | 734 | struct rcu_node *rnp = rcu_get_root(rsp); |
| 738 | 735 | ||
| 739 | /* Only let one CPU complain about others per time interval. */ | 736 | /* Only let one CPU complain about others per time interval. */ |
| @@ -774,7 +771,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp) | |||
| 774 | */ | 771 | */ |
| 775 | rnp = rcu_get_root(rsp); | 772 | rnp = rcu_get_root(rsp); |
| 776 | raw_spin_lock_irqsave(&rnp->lock, flags); | 773 | raw_spin_lock_irqsave(&rnp->lock, flags); |
| 777 | ndetected = rcu_print_task_stall(rnp); | 774 | ndetected += rcu_print_task_stall(rnp); |
| 778 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 775 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
| 779 | 776 | ||
| 780 | print_cpu_stall_info_end(); | 777 | print_cpu_stall_info_end(); |
| @@ -860,9 +857,10 @@ static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) | |||
| 860 | */ | 857 | */ |
| 861 | void rcu_cpu_stall_reset(void) | 858 | void rcu_cpu_stall_reset(void) |
| 862 | { | 859 | { |
| 863 | rcu_sched_state.jiffies_stall = jiffies + ULONG_MAX / 2; | 860 | struct rcu_state *rsp; |
| 864 | rcu_bh_state.jiffies_stall = jiffies + ULONG_MAX / 2; | 861 | |
| 865 | rcu_preempt_stall_reset(); | 862 | for_each_rcu_flavor(rsp) |
| 863 | rsp->jiffies_stall = jiffies + ULONG_MAX / 2; | ||
| 866 | } | 864 | } |
| 867 | 865 | ||
| 868 | static struct notifier_block rcu_panic_block = { | 866 | static struct notifier_block rcu_panic_block = { |
| @@ -894,8 +892,9 @@ static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct | |||
| 894 | if (rnp->qsmask & rdp->grpmask) { | 892 | if (rnp->qsmask & rdp->grpmask) { |
| 895 | rdp->qs_pending = 1; | 893 | rdp->qs_pending = 1; |
| 896 | rdp->passed_quiesce = 0; | 894 | rdp->passed_quiesce = 0; |
| 897 | } else | 895 | } else { |
| 898 | rdp->qs_pending = 0; | 896 | rdp->qs_pending = 0; |
| 897 | } | ||
| 899 | zero_cpu_stall_ticks(rdp); | 898 | zero_cpu_stall_ticks(rdp); |
| 900 | } | 899 | } |
| 901 | } | 900 | } |
| @@ -937,6 +936,18 @@ check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 937 | } | 936 | } |
| 938 | 937 | ||
| 939 | /* | 938 | /* |
| 939 | * Initialize the specified rcu_data structure's callback list to empty. | ||
| 940 | */ | ||
| 941 | static void init_callback_list(struct rcu_data *rdp) | ||
| 942 | { | ||
| 943 | int i; | ||
| 944 | |||
| 945 | rdp->nxtlist = NULL; | ||
| 946 | for (i = 0; i < RCU_NEXT_SIZE; i++) | ||
| 947 | rdp->nxttail[i] = &rdp->nxtlist; | ||
| 948 | } | ||
| 949 | |||
| 950 | /* | ||
| 940 | * Advance this CPU's callbacks, but only if the current grace period | 951 | * Advance this CPU's callbacks, but only if the current grace period |
| 941 | * has ended. This may be called only from the CPU to whom the rdp | 952 | * has ended. This may be called only from the CPU to whom the rdp |
| 942 | * belongs. In addition, the corresponding leaf rcu_node structure's | 953 | * belongs. In addition, the corresponding leaf rcu_node structure's |
| @@ -1328,8 +1339,6 @@ static void | |||
| 1328 | rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, | 1339 | rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, |
| 1329 | struct rcu_node *rnp, struct rcu_data *rdp) | 1340 | struct rcu_node *rnp, struct rcu_data *rdp) |
| 1330 | { | 1341 | { |
| 1331 | int i; | ||
| 1332 | |||
| 1333 | /* | 1342 | /* |
| 1334 | * Orphan the callbacks. First adjust the counts. This is safe | 1343 | * Orphan the callbacks. First adjust the counts. This is safe |
| 1335 | * because ->onofflock excludes _rcu_barrier()'s adoption of | 1344 | * because ->onofflock excludes _rcu_barrier()'s adoption of |
| @@ -1340,7 +1349,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, | |||
| 1340 | rsp->qlen += rdp->qlen; | 1349 | rsp->qlen += rdp->qlen; |
| 1341 | rdp->n_cbs_orphaned += rdp->qlen; | 1350 | rdp->n_cbs_orphaned += rdp->qlen; |
| 1342 | rdp->qlen_lazy = 0; | 1351 | rdp->qlen_lazy = 0; |
| 1343 | rdp->qlen = 0; | 1352 | ACCESS_ONCE(rdp->qlen) = 0; |
| 1344 | } | 1353 | } |
| 1345 | 1354 | ||
| 1346 | /* | 1355 | /* |
| @@ -1369,9 +1378,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, | |||
| 1369 | } | 1378 | } |
| 1370 | 1379 | ||
| 1371 | /* Finally, initialize the rcu_data structure's list to empty. */ | 1380 | /* Finally, initialize the rcu_data structure's list to empty. */ |
| 1372 | rdp->nxtlist = NULL; | 1381 | init_callback_list(rdp); |
| 1373 | for (i = 0; i < RCU_NEXT_SIZE; i++) | ||
| 1374 | rdp->nxttail[i] = &rdp->nxtlist; | ||
| 1375 | } | 1382 | } |
| 1376 | 1383 | ||
| 1377 | /* | 1384 | /* |
| @@ -1505,6 +1512,9 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) | |||
| 1505 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1512 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
| 1506 | if (need_report & RCU_OFL_TASKS_EXP_GP) | 1513 | if (need_report & RCU_OFL_TASKS_EXP_GP) |
| 1507 | rcu_report_exp_rnp(rsp, rnp, true); | 1514 | rcu_report_exp_rnp(rsp, rnp, true); |
| 1515 | WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL, | ||
| 1516 | "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n", | ||
| 1517 | cpu, rdp->qlen, rdp->nxtlist); | ||
| 1508 | } | 1518 | } |
| 1509 | 1519 | ||
| 1510 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ | 1520 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ |
| @@ -1592,7 +1602,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 1592 | } | 1602 | } |
| 1593 | smp_mb(); /* List handling before counting for rcu_barrier(). */ | 1603 | smp_mb(); /* List handling before counting for rcu_barrier(). */ |
| 1594 | rdp->qlen_lazy -= count_lazy; | 1604 | rdp->qlen_lazy -= count_lazy; |
| 1595 | rdp->qlen -= count; | 1605 | ACCESS_ONCE(rdp->qlen) -= count; |
| 1596 | rdp->n_cbs_invoked += count; | 1606 | rdp->n_cbs_invoked += count; |
| 1597 | 1607 | ||
| 1598 | /* Reinstate batch limit if we have worked down the excess. */ | 1608 | /* Reinstate batch limit if we have worked down the excess. */ |
| @@ -1605,6 +1615,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 1605 | rdp->n_force_qs_snap = rsp->n_force_qs; | 1615 | rdp->n_force_qs_snap = rsp->n_force_qs; |
| 1606 | } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark) | 1616 | } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark) |
| 1607 | rdp->qlen_last_fqs_check = rdp->qlen; | 1617 | rdp->qlen_last_fqs_check = rdp->qlen; |
| 1618 | WARN_ON_ONCE((rdp->nxtlist == NULL) != (rdp->qlen == 0)); | ||
| 1608 | 1619 | ||
| 1609 | local_irq_restore(flags); | 1620 | local_irq_restore(flags); |
| 1610 | 1621 | ||
| @@ -1745,8 +1756,6 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) | |||
| 1745 | break; /* grace period idle or initializing, ignore. */ | 1756 | break; /* grace period idle or initializing, ignore. */ |
| 1746 | 1757 | ||
| 1747 | case RCU_SAVE_DYNTICK: | 1758 | case RCU_SAVE_DYNTICK: |
| 1748 | if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK) | ||
| 1749 | break; /* So gcc recognizes the dead code. */ | ||
| 1750 | 1759 | ||
| 1751 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ | 1760 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ |
| 1752 | 1761 | ||
| @@ -1788,9 +1797,10 @@ unlock_fqs_ret: | |||
| 1788 | * whom the rdp belongs. | 1797 | * whom the rdp belongs. |
| 1789 | */ | 1798 | */ |
| 1790 | static void | 1799 | static void |
| 1791 | __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) | 1800 | __rcu_process_callbacks(struct rcu_state *rsp) |
| 1792 | { | 1801 | { |
| 1793 | unsigned long flags; | 1802 | unsigned long flags; |
| 1803 | struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); | ||
| 1794 | 1804 | ||
| 1795 | WARN_ON_ONCE(rdp->beenonline == 0); | 1805 | WARN_ON_ONCE(rdp->beenonline == 0); |
| 1796 | 1806 | ||
| @@ -1826,11 +1836,11 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 1826 | */ | 1836 | */ |
| 1827 | static void rcu_process_callbacks(struct softirq_action *unused) | 1837 | static void rcu_process_callbacks(struct softirq_action *unused) |
| 1828 | { | 1838 | { |
| 1839 | struct rcu_state *rsp; | ||
| 1840 | |||
| 1829 | trace_rcu_utilization("Start RCU core"); | 1841 | trace_rcu_utilization("Start RCU core"); |
| 1830 | __rcu_process_callbacks(&rcu_sched_state, | 1842 | for_each_rcu_flavor(rsp) |
| 1831 | &__get_cpu_var(rcu_sched_data)); | 1843 | __rcu_process_callbacks(rsp); |
| 1832 | __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); | ||
| 1833 | rcu_preempt_process_callbacks(); | ||
| 1834 | trace_rcu_utilization("End RCU core"); | 1844 | trace_rcu_utilization("End RCU core"); |
| 1835 | } | 1845 | } |
| 1836 | 1846 | ||
| @@ -1857,6 +1867,56 @@ static void invoke_rcu_core(void) | |||
| 1857 | raise_softirq(RCU_SOFTIRQ); | 1867 | raise_softirq(RCU_SOFTIRQ); |
| 1858 | } | 1868 | } |
| 1859 | 1869 | ||
| 1870 | /* | ||
| 1871 | * Handle any core-RCU processing required by a call_rcu() invocation. | ||
| 1872 | */ | ||
| 1873 | static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp, | ||
| 1874 | struct rcu_head *head, unsigned long flags) | ||
| 1875 | { | ||
| 1876 | /* | ||
| 1877 | * If called from an extended quiescent state, invoke the RCU | ||
| 1878 | * core in order to force a re-evaluation of RCU's idleness. | ||
| 1879 | */ | ||
| 1880 | if (rcu_is_cpu_idle() && cpu_online(smp_processor_id())) | ||
| 1881 | invoke_rcu_core(); | ||
| 1882 | |||
| 1883 | /* If interrupts were disabled or CPU offline, don't invoke RCU core. */ | ||
| 1884 | if (irqs_disabled_flags(flags) || cpu_is_offline(smp_processor_id())) | ||
| 1885 | return; | ||
| 1886 | |||
| 1887 | /* | ||
| 1888 | * Force the grace period if too many callbacks or too long waiting. | ||
| 1889 | * Enforce hysteresis, and don't invoke force_quiescent_state() | ||
| 1890 | * if some other CPU has recently done so. Also, don't bother | ||
| 1891 | * invoking force_quiescent_state() if the newly enqueued callback | ||
| 1892 | * is the only one waiting for a grace period to complete. | ||
| 1893 | */ | ||
| 1894 | if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { | ||
| 1895 | |||
| 1896 | /* Are we ignoring a completed grace period? */ | ||
| 1897 | rcu_process_gp_end(rsp, rdp); | ||
| 1898 | check_for_new_grace_period(rsp, rdp); | ||
| 1899 | |||
| 1900 | /* Start a new grace period if one not already started. */ | ||
| 1901 | if (!rcu_gp_in_progress(rsp)) { | ||
| 1902 | unsigned long nestflag; | ||
| 1903 | struct rcu_node *rnp_root = rcu_get_root(rsp); | ||
| 1904 | |||
| 1905 | raw_spin_lock_irqsave(&rnp_root->lock, nestflag); | ||
| 1906 | rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */ | ||
| 1907 | } else { | ||
| 1908 | /* Give the grace period a kick. */ | ||
| 1909 | rdp->blimit = LONG_MAX; | ||
| 1910 | if (rsp->n_force_qs == rdp->n_force_qs_snap && | ||
| 1911 | *rdp->nxttail[RCU_DONE_TAIL] != head) | ||
| 1912 | force_quiescent_state(rsp, 0); | ||
| 1913 | rdp->n_force_qs_snap = rsp->n_force_qs; | ||
| 1914 | rdp->qlen_last_fqs_check = rdp->qlen; | ||
| 1915 | } | ||
| 1916 | } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) | ||
| 1917 | force_quiescent_state(rsp, 1); | ||
| 1918 | } | ||
| 1919 | |||
| 1860 | static void | 1920 | static void |
| 1861 | __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | 1921 | __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), |
| 1862 | struct rcu_state *rsp, bool lazy) | 1922 | struct rcu_state *rsp, bool lazy) |
| @@ -1881,7 +1941,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | |||
| 1881 | rdp = this_cpu_ptr(rsp->rda); | 1941 | rdp = this_cpu_ptr(rsp->rda); |
| 1882 | 1942 | ||
| 1883 | /* Add the callback to our list. */ | 1943 | /* Add the callback to our list. */ |
| 1884 | rdp->qlen++; | 1944 | ACCESS_ONCE(rdp->qlen)++; |
| 1885 | if (lazy) | 1945 | if (lazy) |
| 1886 | rdp->qlen_lazy++; | 1946 | rdp->qlen_lazy++; |
| 1887 | else | 1947 | else |
| @@ -1896,43 +1956,8 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | |||
| 1896 | else | 1956 | else |
| 1897 | trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen); | 1957 | trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen); |
| 1898 | 1958 | ||
| 1899 | /* If interrupts were disabled, don't dive into RCU core. */ | 1959 | /* Go handle any RCU core processing required. */ |
| 1900 | if (irqs_disabled_flags(flags)) { | 1960 | __call_rcu_core(rsp, rdp, head, flags); |
| 1901 | local_irq_restore(flags); | ||
| 1902 | return; | ||
| 1903 | } | ||
| 1904 | |||
| 1905 | /* | ||
| 1906 | * Force the grace period if too many callbacks or too long waiting. | ||
| 1907 | * Enforce hysteresis, and don't invoke force_quiescent_state() | ||
| 1908 | * if some other CPU has recently done so. Also, don't bother | ||
| 1909 | * invoking force_quiescent_state() if the newly enqueued callback | ||
| 1910 | * is the only one waiting for a grace period to complete. | ||
| 1911 | */ | ||
| 1912 | if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { | ||
| 1913 | |||
| 1914 | /* Are we ignoring a completed grace period? */ | ||
| 1915 | rcu_process_gp_end(rsp, rdp); | ||
| 1916 | check_for_new_grace_period(rsp, rdp); | ||
| 1917 | |||
| 1918 | /* Start a new grace period if one not already started. */ | ||
| 1919 | if (!rcu_gp_in_progress(rsp)) { | ||
| 1920 | unsigned long nestflag; | ||
| 1921 | struct rcu_node *rnp_root = rcu_get_root(rsp); | ||
| 1922 | |||
| 1923 | raw_spin_lock_irqsave(&rnp_root->lock, nestflag); | ||
| 1924 | rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */ | ||
| 1925 | } else { | ||
| 1926 | /* Give the grace period a kick. */ | ||
| 1927 | rdp->blimit = LONG_MAX; | ||
| 1928 | if (rsp->n_force_qs == rdp->n_force_qs_snap && | ||
| 1929 | *rdp->nxttail[RCU_DONE_TAIL] != head) | ||
| 1930 | force_quiescent_state(rsp, 0); | ||
| 1931 | rdp->n_force_qs_snap = rsp->n_force_qs; | ||
| 1932 | rdp->qlen_last_fqs_check = rdp->qlen; | ||
| 1933 | } | ||
| 1934 | } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) | ||
| 1935 | force_quiescent_state(rsp, 1); | ||
| 1936 | local_irq_restore(flags); | 1961 | local_irq_restore(flags); |
| 1937 | } | 1962 | } |
| 1938 | 1963 | ||
| @@ -1962,28 +1987,16 @@ EXPORT_SYMBOL_GPL(call_rcu_bh); | |||
| 1962 | * occasionally incorrectly indicate that there are multiple CPUs online | 1987 | * occasionally incorrectly indicate that there are multiple CPUs online |
| 1963 | * when there was in fact only one the whole time, as this just adds | 1988 | * when there was in fact only one the whole time, as this just adds |
| 1964 | * some overhead: RCU still operates correctly. | 1989 | * some overhead: RCU still operates correctly. |
| 1965 | * | ||
| 1966 | * Of course, sampling num_online_cpus() with preemption enabled can | ||
| 1967 | * give erroneous results if there are concurrent CPU-hotplug operations. | ||
| 1968 | * For example, given a demonic sequence of preemptions in num_online_cpus() | ||
| 1969 | * and CPU-hotplug operations, there could be two or more CPUs online at | ||
| 1970 | * all times, but num_online_cpus() might well return one (or even zero). | ||
| 1971 | * | ||
| 1972 | * However, all such demonic sequences require at least one CPU-offline | ||
| 1973 | * operation. Furthermore, rcu_blocking_is_gp() giving the wrong answer | ||
| 1974 | * is only a problem if there is an RCU read-side critical section executing | ||
| 1975 | * throughout. But RCU-sched and RCU-bh read-side critical sections | ||
| 1976 | * disable either preemption or bh, which prevents a CPU from going offline. | ||
| 1977 | * Therefore, the only way that rcu_blocking_is_gp() can incorrectly return | ||
| 1978 | * that there is only one CPU when in fact there was more than one throughout | ||
| 1979 | * is when there were no RCU readers in the system. If there are no | ||
| 1980 | * RCU readers, the grace period by definition can be of zero length, | ||
| 1981 | * regardless of the number of online CPUs. | ||
| 1982 | */ | 1990 | */ |
| 1983 | static inline int rcu_blocking_is_gp(void) | 1991 | static inline int rcu_blocking_is_gp(void) |
| 1984 | { | 1992 | { |
| 1993 | int ret; | ||
| 1994 | |||
| 1985 | might_sleep(); /* Check for RCU read-side critical section. */ | 1995 | might_sleep(); /* Check for RCU read-side critical section. */ |
| 1986 | return num_online_cpus() <= 1; | 1996 | preempt_disable(); |
| 1997 | ret = num_online_cpus() <= 1; | ||
| 1998 | preempt_enable(); | ||
| 1999 | return ret; | ||
| 1987 | } | 2000 | } |
| 1988 | 2001 | ||
| 1989 | /** | 2002 | /** |
| @@ -2118,9 +2131,9 @@ void synchronize_sched_expedited(void) | |||
| 2118 | put_online_cpus(); | 2131 | put_online_cpus(); |
| 2119 | 2132 | ||
| 2120 | /* No joy, try again later. Or just synchronize_sched(). */ | 2133 | /* No joy, try again later. Or just synchronize_sched(). */ |
| 2121 | if (trycount++ < 10) | 2134 | if (trycount++ < 10) { |
| 2122 | udelay(trycount * num_online_cpus()); | 2135 | udelay(trycount * num_online_cpus()); |
| 2123 | else { | 2136 | } else { |
| 2124 | synchronize_sched(); | 2137 | synchronize_sched(); |
| 2125 | return; | 2138 | return; |
| 2126 | } | 2139 | } |
| @@ -2241,9 +2254,12 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 2241 | */ | 2254 | */ |
| 2242 | static int rcu_pending(int cpu) | 2255 | static int rcu_pending(int cpu) |
| 2243 | { | 2256 | { |
| 2244 | return __rcu_pending(&rcu_sched_state, &per_cpu(rcu_sched_data, cpu)) || | 2257 | struct rcu_state *rsp; |
| 2245 | __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu)) || | 2258 | |
| 2246 | rcu_preempt_pending(cpu); | 2259 | for_each_rcu_flavor(rsp) |
| 2260 | if (__rcu_pending(rsp, per_cpu_ptr(rsp->rda, cpu))) | ||
| 2261 | return 1; | ||
| 2262 | return 0; | ||
| 2247 | } | 2263 | } |
| 2248 | 2264 | ||
| 2249 | /* | 2265 | /* |
| @@ -2253,20 +2269,41 @@ static int rcu_pending(int cpu) | |||
| 2253 | */ | 2269 | */ |
| 2254 | static int rcu_cpu_has_callbacks(int cpu) | 2270 | static int rcu_cpu_has_callbacks(int cpu) |
| 2255 | { | 2271 | { |
| 2272 | struct rcu_state *rsp; | ||
| 2273 | |||
| 2256 | /* RCU callbacks either ready or pending? */ | 2274 | /* RCU callbacks either ready or pending? */ |
| 2257 | return per_cpu(rcu_sched_data, cpu).nxtlist || | 2275 | for_each_rcu_flavor(rsp) |
| 2258 | per_cpu(rcu_bh_data, cpu).nxtlist || | 2276 | if (per_cpu_ptr(rsp->rda, cpu)->nxtlist) |
| 2259 | rcu_preempt_cpu_has_callbacks(cpu); | 2277 | return 1; |
| 2278 | return 0; | ||
| 2279 | } | ||
| 2280 | |||
| 2281 | /* | ||
| 2282 | * Helper function for _rcu_barrier() tracing. If tracing is disabled, | ||
| 2283 | * the compiler is expected to optimize this away. | ||
| 2284 | */ | ||
| 2285 | static void _rcu_barrier_trace(struct rcu_state *rsp, char *s, | ||
| 2286 | int cpu, unsigned long done) | ||
| 2287 | { | ||
| 2288 | trace_rcu_barrier(rsp->name, s, cpu, | ||
| 2289 | atomic_read(&rsp->barrier_cpu_count), done); | ||
| 2260 | } | 2290 | } |
| 2261 | 2291 | ||
| 2262 | /* | 2292 | /* |
| 2263 | * RCU callback function for _rcu_barrier(). If we are last, wake | 2293 | * RCU callback function for _rcu_barrier(). If we are last, wake |
| 2264 | * up the task executing _rcu_barrier(). | 2294 | * up the task executing _rcu_barrier(). |
| 2265 | */ | 2295 | */ |
| 2266 | static void rcu_barrier_callback(struct rcu_head *notused) | 2296 | static void rcu_barrier_callback(struct rcu_head *rhp) |
| 2267 | { | 2297 | { |
| 2268 | if (atomic_dec_and_test(&rcu_barrier_cpu_count)) | 2298 | struct rcu_data *rdp = container_of(rhp, struct rcu_data, barrier_head); |
| 2269 | complete(&rcu_barrier_completion); | 2299 | struct rcu_state *rsp = rdp->rsp; |
| 2300 | |||
| 2301 | if (atomic_dec_and_test(&rsp->barrier_cpu_count)) { | ||
| 2302 | _rcu_barrier_trace(rsp, "LastCB", -1, rsp->n_barrier_done); | ||
| 2303 | complete(&rsp->barrier_completion); | ||
| 2304 | } else { | ||
| 2305 | _rcu_barrier_trace(rsp, "CB", -1, rsp->n_barrier_done); | ||
| 2306 | } | ||
| 2270 | } | 2307 | } |
| 2271 | 2308 | ||
| 2272 | /* | 2309 | /* |
| @@ -2274,35 +2311,63 @@ static void rcu_barrier_callback(struct rcu_head *notused) | |||
| 2274 | */ | 2311 | */ |
| 2275 | static void rcu_barrier_func(void *type) | 2312 | static void rcu_barrier_func(void *type) |
| 2276 | { | 2313 | { |
| 2277 | int cpu = smp_processor_id(); | 2314 | struct rcu_state *rsp = type; |
| 2278 | struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu); | 2315 | struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); |
| 2279 | void (*call_rcu_func)(struct rcu_head *head, | ||
| 2280 | void (*func)(struct rcu_head *head)); | ||
| 2281 | 2316 | ||
| 2282 | atomic_inc(&rcu_barrier_cpu_count); | 2317 | _rcu_barrier_trace(rsp, "IRQ", -1, rsp->n_barrier_done); |
| 2283 | call_rcu_func = type; | 2318 | atomic_inc(&rsp->barrier_cpu_count); |
| 2284 | call_rcu_func(head, rcu_barrier_callback); | 2319 | rsp->call(&rdp->barrier_head, rcu_barrier_callback); |
| 2285 | } | 2320 | } |
| 2286 | 2321 | ||
| 2287 | /* | 2322 | /* |
| 2288 | * Orchestrate the specified type of RCU barrier, waiting for all | 2323 | * Orchestrate the specified type of RCU barrier, waiting for all |
| 2289 | * RCU callbacks of the specified type to complete. | 2324 | * RCU callbacks of the specified type to complete. |
| 2290 | */ | 2325 | */ |
| 2291 | static void _rcu_barrier(struct rcu_state *rsp, | 2326 | static void _rcu_barrier(struct rcu_state *rsp) |
| 2292 | void (*call_rcu_func)(struct rcu_head *head, | ||
| 2293 | void (*func)(struct rcu_head *head))) | ||
| 2294 | { | 2327 | { |
| 2295 | int cpu; | 2328 | int cpu; |
| 2296 | unsigned long flags; | 2329 | unsigned long flags; |
| 2297 | struct rcu_data *rdp; | 2330 | struct rcu_data *rdp; |
| 2298 | struct rcu_head rh; | 2331 | struct rcu_data rd; |
| 2332 | unsigned long snap = ACCESS_ONCE(rsp->n_barrier_done); | ||
| 2333 | unsigned long snap_done; | ||
| 2299 | 2334 | ||
| 2300 | init_rcu_head_on_stack(&rh); | 2335 | init_rcu_head_on_stack(&rd.barrier_head); |
| 2336 | _rcu_barrier_trace(rsp, "Begin", -1, snap); | ||
| 2301 | 2337 | ||
| 2302 | /* Take mutex to serialize concurrent rcu_barrier() requests. */ | 2338 | /* Take mutex to serialize concurrent rcu_barrier() requests. */ |
| 2303 | mutex_lock(&rcu_barrier_mutex); | 2339 | mutex_lock(&rsp->barrier_mutex); |
| 2340 | |||
| 2341 | /* | ||
| 2342 | * Ensure that all prior references, including to ->n_barrier_done, | ||
| 2343 | * are ordered before the _rcu_barrier() machinery. | ||
| 2344 | */ | ||
| 2345 | smp_mb(); /* See above block comment. */ | ||
| 2346 | |||
| 2347 | /* | ||
| 2348 | * Recheck ->n_barrier_done to see if others did our work for us. | ||
| 2349 | * This means checking ->n_barrier_done for an even-to-odd-to-even | ||
| 2350 | * transition. The "if" expression below therefore rounds the old | ||
| 2351 | * value up to the next even number and adds two before comparing. | ||
| 2352 | */ | ||
| 2353 | snap_done = ACCESS_ONCE(rsp->n_barrier_done); | ||
| 2354 | _rcu_barrier_trace(rsp, "Check", -1, snap_done); | ||
| 2355 | if (ULONG_CMP_GE(snap_done, ((snap + 1) & ~0x1) + 2)) { | ||
| 2356 | _rcu_barrier_trace(rsp, "EarlyExit", -1, snap_done); | ||
| 2357 | smp_mb(); /* caller's subsequent code after above check. */ | ||
| 2358 | mutex_unlock(&rsp->barrier_mutex); | ||
| 2359 | return; | ||
| 2360 | } | ||
| 2304 | 2361 | ||
| 2305 | smp_mb(); /* Prevent any prior operations from leaking in. */ | 2362 | /* |
| 2363 | * Increment ->n_barrier_done to avoid duplicate work. Use | ||
| 2364 | * ACCESS_ONCE() to prevent the compiler from speculating | ||
| 2365 | * the increment to precede the early-exit check. | ||
| 2366 | */ | ||
| 2367 | ACCESS_ONCE(rsp->n_barrier_done)++; | ||
| 2368 | WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 1); | ||
| 2369 | _rcu_barrier_trace(rsp, "Inc1", -1, rsp->n_barrier_done); | ||
| 2370 | smp_mb(); /* Order ->n_barrier_done increment with below mechanism. */ | ||
| 2306 | 2371 | ||
| 2307 | /* | 2372 | /* |
| 2308 | * Initialize the count to one rather than to zero in order to | 2373 | * Initialize the count to one rather than to zero in order to |
| @@ -2321,8 +2386,8 @@ static void _rcu_barrier(struct rcu_state *rsp, | |||
| 2321 | * 6. Both rcu_barrier_callback() callbacks are invoked, awakening | 2386 | * 6. Both rcu_barrier_callback() callbacks are invoked, awakening |
| 2322 | * us -- but before CPU 1's orphaned callbacks are invoked!!! | 2387 | * us -- but before CPU 1's orphaned callbacks are invoked!!! |
| 2323 | */ | 2388 | */ |
| 2324 | init_completion(&rcu_barrier_completion); | 2389 | init_completion(&rsp->barrier_completion); |
| 2325 | atomic_set(&rcu_barrier_cpu_count, 1); | 2390 | atomic_set(&rsp->barrier_cpu_count, 1); |
| 2326 | raw_spin_lock_irqsave(&rsp->onofflock, flags); | 2391 | raw_spin_lock_irqsave(&rsp->onofflock, flags); |
| 2327 | rsp->rcu_barrier_in_progress = current; | 2392 | rsp->rcu_barrier_in_progress = current; |
| 2328 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | 2393 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); |
| @@ -2338,14 +2403,19 @@ static void _rcu_barrier(struct rcu_state *rsp, | |||
| 2338 | preempt_disable(); | 2403 | preempt_disable(); |
| 2339 | rdp = per_cpu_ptr(rsp->rda, cpu); | 2404 | rdp = per_cpu_ptr(rsp->rda, cpu); |
| 2340 | if (cpu_is_offline(cpu)) { | 2405 | if (cpu_is_offline(cpu)) { |
| 2406 | _rcu_barrier_trace(rsp, "Offline", cpu, | ||
| 2407 | rsp->n_barrier_done); | ||
| 2341 | preempt_enable(); | 2408 | preempt_enable(); |
| 2342 | while (cpu_is_offline(cpu) && ACCESS_ONCE(rdp->qlen)) | 2409 | while (cpu_is_offline(cpu) && ACCESS_ONCE(rdp->qlen)) |
| 2343 | schedule_timeout_interruptible(1); | 2410 | schedule_timeout_interruptible(1); |
| 2344 | } else if (ACCESS_ONCE(rdp->qlen)) { | 2411 | } else if (ACCESS_ONCE(rdp->qlen)) { |
| 2345 | smp_call_function_single(cpu, rcu_barrier_func, | 2412 | _rcu_barrier_trace(rsp, "OnlineQ", cpu, |
| 2346 | (void *)call_rcu_func, 1); | 2413 | rsp->n_barrier_done); |
| 2414 | smp_call_function_single(cpu, rcu_barrier_func, rsp, 1); | ||
| 2347 | preempt_enable(); | 2415 | preempt_enable(); |
| 2348 | } else { | 2416 | } else { |
| 2417 | _rcu_barrier_trace(rsp, "OnlineNQ", cpu, | ||
| 2418 | rsp->n_barrier_done); | ||
| 2349 | preempt_enable(); | 2419 | preempt_enable(); |
| 2350 | } | 2420 | } |
| 2351 | } | 2421 | } |
| @@ -2362,24 +2432,32 @@ static void _rcu_barrier(struct rcu_state *rsp, | |||
| 2362 | rcu_adopt_orphan_cbs(rsp); | 2432 | rcu_adopt_orphan_cbs(rsp); |
| 2363 | rsp->rcu_barrier_in_progress = NULL; | 2433 | rsp->rcu_barrier_in_progress = NULL; |
| 2364 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | 2434 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); |
| 2365 | atomic_inc(&rcu_barrier_cpu_count); | 2435 | atomic_inc(&rsp->barrier_cpu_count); |
| 2366 | smp_mb__after_atomic_inc(); /* Ensure atomic_inc() before callback. */ | 2436 | smp_mb__after_atomic_inc(); /* Ensure atomic_inc() before callback. */ |
| 2367 | call_rcu_func(&rh, rcu_barrier_callback); | 2437 | rd.rsp = rsp; |
| 2438 | rsp->call(&rd.barrier_head, rcu_barrier_callback); | ||
| 2368 | 2439 | ||
| 2369 | /* | 2440 | /* |
| 2370 | * Now that we have an rcu_barrier_callback() callback on each | 2441 | * Now that we have an rcu_barrier_callback() callback on each |
| 2371 | * CPU, and thus each counted, remove the initial count. | 2442 | * CPU, and thus each counted, remove the initial count. |
| 2372 | */ | 2443 | */ |
| 2373 | if (atomic_dec_and_test(&rcu_barrier_cpu_count)) | 2444 | if (atomic_dec_and_test(&rsp->barrier_cpu_count)) |
| 2374 | complete(&rcu_barrier_completion); | 2445 | complete(&rsp->barrier_completion); |
| 2446 | |||
| 2447 | /* Increment ->n_barrier_done to prevent duplicate work. */ | ||
| 2448 | smp_mb(); /* Keep increment after above mechanism. */ | ||
| 2449 | ACCESS_ONCE(rsp->n_barrier_done)++; | ||
| 2450 | WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 0); | ||
| 2451 | _rcu_barrier_trace(rsp, "Inc2", -1, rsp->n_barrier_done); | ||
| 2452 | smp_mb(); /* Keep increment before caller's subsequent code. */ | ||
| 2375 | 2453 | ||
| 2376 | /* Wait for all rcu_barrier_callback() callbacks to be invoked. */ | 2454 | /* Wait for all rcu_barrier_callback() callbacks to be invoked. */ |
| 2377 | wait_for_completion(&rcu_barrier_completion); | 2455 | wait_for_completion(&rsp->barrier_completion); |
| 2378 | 2456 | ||
| 2379 | /* Other rcu_barrier() invocations can now safely proceed. */ | 2457 | /* Other rcu_barrier() invocations can now safely proceed. */ |
| 2380 | mutex_unlock(&rcu_barrier_mutex); | 2458 | mutex_unlock(&rsp->barrier_mutex); |
| 2381 | 2459 | ||
| 2382 | destroy_rcu_head_on_stack(&rh); | 2460 | destroy_rcu_head_on_stack(&rd.barrier_head); |
| 2383 | } | 2461 | } |
| 2384 | 2462 | ||
| 2385 | /** | 2463 | /** |
| @@ -2387,7 +2465,7 @@ static void _rcu_barrier(struct rcu_state *rsp, | |||
| 2387 | */ | 2465 | */ |
| 2388 | void rcu_barrier_bh(void) | 2466 | void rcu_barrier_bh(void) |
| 2389 | { | 2467 | { |
| 2390 | _rcu_barrier(&rcu_bh_state, call_rcu_bh); | 2468 | _rcu_barrier(&rcu_bh_state); |
| 2391 | } | 2469 | } |
| 2392 | EXPORT_SYMBOL_GPL(rcu_barrier_bh); | 2470 | EXPORT_SYMBOL_GPL(rcu_barrier_bh); |
| 2393 | 2471 | ||
| @@ -2396,7 +2474,7 @@ EXPORT_SYMBOL_GPL(rcu_barrier_bh); | |||
| 2396 | */ | 2474 | */ |
| 2397 | void rcu_barrier_sched(void) | 2475 | void rcu_barrier_sched(void) |
| 2398 | { | 2476 | { |
| 2399 | _rcu_barrier(&rcu_sched_state, call_rcu_sched); | 2477 | _rcu_barrier(&rcu_sched_state); |
| 2400 | } | 2478 | } |
| 2401 | EXPORT_SYMBOL_GPL(rcu_barrier_sched); | 2479 | EXPORT_SYMBOL_GPL(rcu_barrier_sched); |
| 2402 | 2480 | ||
| @@ -2407,18 +2485,15 @@ static void __init | |||
| 2407 | rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) | 2485 | rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) |
| 2408 | { | 2486 | { |
| 2409 | unsigned long flags; | 2487 | unsigned long flags; |
| 2410 | int i; | ||
| 2411 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | 2488 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); |
| 2412 | struct rcu_node *rnp = rcu_get_root(rsp); | 2489 | struct rcu_node *rnp = rcu_get_root(rsp); |
| 2413 | 2490 | ||
| 2414 | /* Set up local state, ensuring consistent view of global state. */ | 2491 | /* Set up local state, ensuring consistent view of global state. */ |
| 2415 | raw_spin_lock_irqsave(&rnp->lock, flags); | 2492 | raw_spin_lock_irqsave(&rnp->lock, flags); |
| 2416 | rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo); | 2493 | rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo); |
| 2417 | rdp->nxtlist = NULL; | 2494 | init_callback_list(rdp); |
| 2418 | for (i = 0; i < RCU_NEXT_SIZE; i++) | ||
| 2419 | rdp->nxttail[i] = &rdp->nxtlist; | ||
| 2420 | rdp->qlen_lazy = 0; | 2495 | rdp->qlen_lazy = 0; |
| 2421 | rdp->qlen = 0; | 2496 | ACCESS_ONCE(rdp->qlen) = 0; |
| 2422 | rdp->dynticks = &per_cpu(rcu_dynticks, cpu); | 2497 | rdp->dynticks = &per_cpu(rcu_dynticks, cpu); |
| 2423 | WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); | 2498 | WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); |
| 2424 | WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); | 2499 | WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); |
| @@ -2492,9 +2567,11 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) | |||
| 2492 | 2567 | ||
| 2493 | static void __cpuinit rcu_prepare_cpu(int cpu) | 2568 | static void __cpuinit rcu_prepare_cpu(int cpu) |
| 2494 | { | 2569 | { |
| 2495 | rcu_init_percpu_data(cpu, &rcu_sched_state, 0); | 2570 | struct rcu_state *rsp; |
| 2496 | rcu_init_percpu_data(cpu, &rcu_bh_state, 0); | 2571 | |
| 2497 | rcu_preempt_init_percpu_data(cpu); | 2572 | for_each_rcu_flavor(rsp) |
| 2573 | rcu_init_percpu_data(cpu, rsp, | ||
| 2574 | strcmp(rsp->name, "rcu_preempt") == 0); | ||
| 2498 | } | 2575 | } |
| 2499 | 2576 | ||
| 2500 | /* | 2577 | /* |
| @@ -2506,6 +2583,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | |||
| 2506 | long cpu = (long)hcpu; | 2583 | long cpu = (long)hcpu; |
| 2507 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); | 2584 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); |
| 2508 | struct rcu_node *rnp = rdp->mynode; | 2585 | struct rcu_node *rnp = rdp->mynode; |
| 2586 | struct rcu_state *rsp; | ||
| 2509 | 2587 | ||
| 2510 | trace_rcu_utilization("Start CPU hotplug"); | 2588 | trace_rcu_utilization("Start CPU hotplug"); |
| 2511 | switch (action) { | 2589 | switch (action) { |
| @@ -2530,18 +2608,16 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | |||
| 2530 | * touch any data without introducing corruption. We send the | 2608 | * touch any data without introducing corruption. We send the |
| 2531 | * dying CPU's callbacks to an arbitrarily chosen online CPU. | 2609 | * dying CPU's callbacks to an arbitrarily chosen online CPU. |
| 2532 | */ | 2610 | */ |
| 2533 | rcu_cleanup_dying_cpu(&rcu_bh_state); | 2611 | for_each_rcu_flavor(rsp) |
| 2534 | rcu_cleanup_dying_cpu(&rcu_sched_state); | 2612 | rcu_cleanup_dying_cpu(rsp); |
| 2535 | rcu_preempt_cleanup_dying_cpu(); | ||
| 2536 | rcu_cleanup_after_idle(cpu); | 2613 | rcu_cleanup_after_idle(cpu); |
| 2537 | break; | 2614 | break; |
| 2538 | case CPU_DEAD: | 2615 | case CPU_DEAD: |
| 2539 | case CPU_DEAD_FROZEN: | 2616 | case CPU_DEAD_FROZEN: |
| 2540 | case CPU_UP_CANCELED: | 2617 | case CPU_UP_CANCELED: |
| 2541 | case CPU_UP_CANCELED_FROZEN: | 2618 | case CPU_UP_CANCELED_FROZEN: |
| 2542 | rcu_cleanup_dead_cpu(cpu, &rcu_bh_state); | 2619 | for_each_rcu_flavor(rsp) |
| 2543 | rcu_cleanup_dead_cpu(cpu, &rcu_sched_state); | 2620 | rcu_cleanup_dead_cpu(cpu, rsp); |
| 2544 | rcu_preempt_cleanup_dead_cpu(cpu); | ||
| 2545 | break; | 2621 | break; |
| 2546 | default: | 2622 | default: |
| 2547 | break; | 2623 | break; |
| @@ -2574,9 +2650,9 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) | |||
| 2574 | { | 2650 | { |
| 2575 | int i; | 2651 | int i; |
| 2576 | 2652 | ||
| 2577 | for (i = NUM_RCU_LVLS - 1; i > 0; i--) | 2653 | for (i = rcu_num_lvls - 1; i > 0; i--) |
| 2578 | rsp->levelspread[i] = CONFIG_RCU_FANOUT; | 2654 | rsp->levelspread[i] = CONFIG_RCU_FANOUT; |
| 2579 | rsp->levelspread[0] = CONFIG_RCU_FANOUT_LEAF; | 2655 | rsp->levelspread[0] = rcu_fanout_leaf; |
| 2580 | } | 2656 | } |
| 2581 | #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ | 2657 | #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ |
| 2582 | static void __init rcu_init_levelspread(struct rcu_state *rsp) | 2658 | static void __init rcu_init_levelspread(struct rcu_state *rsp) |
| @@ -2586,7 +2662,7 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) | |||
| 2586 | int i; | 2662 | int i; |
| 2587 | 2663 | ||
| 2588 | cprv = NR_CPUS; | 2664 | cprv = NR_CPUS; |
| 2589 | for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { | 2665 | for (i = rcu_num_lvls - 1; i >= 0; i--) { |
| 2590 | ccur = rsp->levelcnt[i]; | 2666 | ccur = rsp->levelcnt[i]; |
| 2591 | rsp->levelspread[i] = (cprv + ccur - 1) / ccur; | 2667 | rsp->levelspread[i] = (cprv + ccur - 1) / ccur; |
| 2592 | cprv = ccur; | 2668 | cprv = ccur; |
| @@ -2613,13 +2689,15 @@ static void __init rcu_init_one(struct rcu_state *rsp, | |||
| 2613 | 2689 | ||
| 2614 | /* Initialize the level-tracking arrays. */ | 2690 | /* Initialize the level-tracking arrays. */ |
| 2615 | 2691 | ||
| 2616 | for (i = 1; i < NUM_RCU_LVLS; i++) | 2692 | for (i = 0; i < rcu_num_lvls; i++) |
| 2693 | rsp->levelcnt[i] = num_rcu_lvl[i]; | ||
| 2694 | for (i = 1; i < rcu_num_lvls; i++) | ||
| 2617 | rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1]; | 2695 | rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1]; |
| 2618 | rcu_init_levelspread(rsp); | 2696 | rcu_init_levelspread(rsp); |
| 2619 | 2697 | ||
| 2620 | /* Initialize the elements themselves, starting from the leaves. */ | 2698 | /* Initialize the elements themselves, starting from the leaves. */ |
| 2621 | 2699 | ||
| 2622 | for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { | 2700 | for (i = rcu_num_lvls - 1; i >= 0; i--) { |
| 2623 | cpustride *= rsp->levelspread[i]; | 2701 | cpustride *= rsp->levelspread[i]; |
| 2624 | rnp = rsp->level[i]; | 2702 | rnp = rsp->level[i]; |
| 2625 | for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { | 2703 | for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { |
| @@ -2649,13 +2727,74 @@ static void __init rcu_init_one(struct rcu_state *rsp, | |||
| 2649 | } | 2727 | } |
| 2650 | 2728 | ||
| 2651 | rsp->rda = rda; | 2729 | rsp->rda = rda; |
| 2652 | rnp = rsp->level[NUM_RCU_LVLS - 1]; | 2730 | rnp = rsp->level[rcu_num_lvls - 1]; |
| 2653 | for_each_possible_cpu(i) { | 2731 | for_each_possible_cpu(i) { |
| 2654 | while (i > rnp->grphi) | 2732 | while (i > rnp->grphi) |
| 2655 | rnp++; | 2733 | rnp++; |
| 2656 | per_cpu_ptr(rsp->rda, i)->mynode = rnp; | 2734 | per_cpu_ptr(rsp->rda, i)->mynode = rnp; |
| 2657 | rcu_boot_init_percpu_data(i, rsp); | 2735 | rcu_boot_init_percpu_data(i, rsp); |
| 2658 | } | 2736 | } |
| 2737 | list_add(&rsp->flavors, &rcu_struct_flavors); | ||
| 2738 | } | ||
| 2739 | |||
| 2740 | /* | ||
| 2741 | * Compute the rcu_node tree geometry from kernel parameters. This cannot | ||
| 2742 | * replace the definitions in rcutree.h because those are needed to size | ||
| 2743 | * the ->node array in the rcu_state structure. | ||
| 2744 | */ | ||
| 2745 | static void __init rcu_init_geometry(void) | ||
| 2746 | { | ||
| 2747 | int i; | ||
| 2748 | int j; | ||
| 2749 | int n = nr_cpu_ids; | ||
| 2750 | int rcu_capacity[MAX_RCU_LVLS + 1]; | ||
| 2751 | |||
| 2752 | /* If the compile-time values are accurate, just leave. */ | ||
| 2753 | if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF) | ||
| 2754 | return; | ||
| 2755 | |||
| 2756 | /* | ||
| 2757 | * Compute number of nodes that can be handled an rcu_node tree | ||
| 2758 | * with the given number of levels. Setting rcu_capacity[0] makes | ||
| 2759 | * some of the arithmetic easier. | ||
| 2760 | */ | ||
| 2761 | rcu_capacity[0] = 1; | ||
| 2762 | rcu_capacity[1] = rcu_fanout_leaf; | ||
| 2763 | for (i = 2; i <= MAX_RCU_LVLS; i++) | ||
| 2764 | rcu_capacity[i] = rcu_capacity[i - 1] * CONFIG_RCU_FANOUT; | ||
| 2765 | |||
| 2766 | /* | ||
| 2767 | * The boot-time rcu_fanout_leaf parameter is only permitted | ||
| 2768 | * to increase the leaf-level fanout, not decrease it. Of course, | ||
| 2769 | * the leaf-level fanout cannot exceed the number of bits in | ||
| 2770 | * the rcu_node masks. Finally, the tree must be able to accommodate | ||
| 2771 | * the configured number of CPUs. Complain and fall back to the | ||
| 2772 | * compile-time values if these limits are exceeded. | ||
| 2773 | */ | ||
| 2774 | if (rcu_fanout_leaf < CONFIG_RCU_FANOUT_LEAF || | ||
| 2775 | rcu_fanout_leaf > sizeof(unsigned long) * 8 || | ||
| 2776 | n > rcu_capacity[MAX_RCU_LVLS]) { | ||
| 2777 | WARN_ON(1); | ||
| 2778 | return; | ||
| 2779 | } | ||
| 2780 | |||
| 2781 | /* Calculate the number of rcu_nodes at each level of the tree. */ | ||
| 2782 | for (i = 1; i <= MAX_RCU_LVLS; i++) | ||
| 2783 | if (n <= rcu_capacity[i]) { | ||
| 2784 | for (j = 0; j <= i; j++) | ||
| 2785 | num_rcu_lvl[j] = | ||
| 2786 | DIV_ROUND_UP(n, rcu_capacity[i - j]); | ||
| 2787 | rcu_num_lvls = i; | ||
| 2788 | for (j = i + 1; j <= MAX_RCU_LVLS; j++) | ||
| 2789 | num_rcu_lvl[j] = 0; | ||
| 2790 | break; | ||
| 2791 | } | ||
| 2792 | |||
| 2793 | /* Calculate the total number of rcu_node structures. */ | ||
| 2794 | rcu_num_nodes = 0; | ||
| 2795 | for (i = 0; i <= MAX_RCU_LVLS; i++) | ||
| 2796 | rcu_num_nodes += num_rcu_lvl[i]; | ||
| 2797 | rcu_num_nodes -= n; | ||
| 2659 | } | 2798 | } |
| 2660 | 2799 | ||
| 2661 | void __init rcu_init(void) | 2800 | void __init rcu_init(void) |
| @@ -2663,6 +2802,7 @@ void __init rcu_init(void) | |||
| 2663 | int cpu; | 2802 | int cpu; |
| 2664 | 2803 | ||
| 2665 | rcu_bootup_announce(); | 2804 | rcu_bootup_announce(); |
| 2805 | rcu_init_geometry(); | ||
| 2666 | rcu_init_one(&rcu_sched_state, &rcu_sched_data); | 2806 | rcu_init_one(&rcu_sched_state, &rcu_sched_data); |
| 2667 | rcu_init_one(&rcu_bh_state, &rcu_bh_data); | 2807 | rcu_init_one(&rcu_bh_state, &rcu_bh_data); |
| 2668 | __rcu_init_preempt(); | 2808 | __rcu_init_preempt(); |
