diff options
author | Grant Likely <grant.likely@secretlab.ca> | 2012-07-25 00:31:09 -0400 |
---|---|---|
committer | Grant Likely <grant.likely@secretlab.ca> | 2012-07-25 00:34:40 -0400 |
commit | 6aeea3ecc33b1f36dbc3b80461d15a7052ae424f (patch) | |
tree | bbd273e3e0ca76094aed8e9c77e5adfe2b07f779 /kernel/rcutree.c | |
parent | 9844a5524ec532aee826c35e3031637c7fc8287b (diff) | |
parent | bdc0077af574800d24318b6945cf2344e8dbb050 (diff) |
Merge remote-tracking branch 'origin' into irqdomain/next
Diffstat (limited to 'kernel/rcutree.c')
-rw-r--r-- | kernel/rcutree.c | 479 |
1 files changed, 310 insertions, 169 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 38ecdda3f55f..f280e542e3e9 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -60,36 +60,44 @@ | |||
60 | 60 | ||
61 | /* Data structures. */ | 61 | /* Data structures. */ |
62 | 62 | ||
63 | static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; | 63 | static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; |
64 | 64 | ||
65 | #define RCU_STATE_INITIALIZER(structname) { \ | 65 | #define RCU_STATE_INITIALIZER(sname, cr) { \ |
66 | .level = { &structname##_state.node[0] }, \ | 66 | .level = { &sname##_state.node[0] }, \ |
67 | .levelcnt = { \ | 67 | .call = cr, \ |
68 | NUM_RCU_LVL_0, /* root of hierarchy. */ \ | ||
69 | NUM_RCU_LVL_1, \ | ||
70 | NUM_RCU_LVL_2, \ | ||
71 | NUM_RCU_LVL_3, \ | ||
72 | NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \ | ||
73 | }, \ | ||
74 | .fqs_state = RCU_GP_IDLE, \ | 68 | .fqs_state = RCU_GP_IDLE, \ |
75 | .gpnum = -300, \ | 69 | .gpnum = -300, \ |
76 | .completed = -300, \ | 70 | .completed = -300, \ |
77 | .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \ | 71 | .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.onofflock), \ |
78 | .orphan_nxttail = &structname##_state.orphan_nxtlist, \ | 72 | .orphan_nxttail = &sname##_state.orphan_nxtlist, \ |
79 | .orphan_donetail = &structname##_state.orphan_donelist, \ | 73 | .orphan_donetail = &sname##_state.orphan_donelist, \ |
80 | .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.fqslock), \ | 74 | .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ |
81 | .n_force_qs = 0, \ | 75 | .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.fqslock), \ |
82 | .n_force_qs_ngp = 0, \ | 76 | .name = #sname, \ |
83 | .name = #structname, \ | ||
84 | } | 77 | } |
85 | 78 | ||
86 | struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched); | 79 | struct rcu_state rcu_sched_state = |
80 | RCU_STATE_INITIALIZER(rcu_sched, call_rcu_sched); | ||
87 | DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); | 81 | DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); |
88 | 82 | ||
89 | struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh); | 83 | struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, call_rcu_bh); |
90 | DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); | 84 | DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); |
91 | 85 | ||
92 | static struct rcu_state *rcu_state; | 86 | static struct rcu_state *rcu_state; |
87 | LIST_HEAD(rcu_struct_flavors); | ||
88 | |||
89 | /* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */ | ||
90 | static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF; | ||
91 | module_param(rcu_fanout_leaf, int, 0); | ||
92 | int rcu_num_lvls __read_mostly = RCU_NUM_LVLS; | ||
93 | static int num_rcu_lvl[] = { /* Number of rcu_nodes at specified level. */ | ||
94 | NUM_RCU_LVL_0, | ||
95 | NUM_RCU_LVL_1, | ||
96 | NUM_RCU_LVL_2, | ||
97 | NUM_RCU_LVL_3, | ||
98 | NUM_RCU_LVL_4, | ||
99 | }; | ||
100 | int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */ | ||
93 | 101 | ||
94 | /* | 102 | /* |
95 | * The rcu_scheduler_active variable transitions from zero to one just | 103 | * The rcu_scheduler_active variable transitions from zero to one just |
@@ -147,13 +155,6 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); | |||
147 | unsigned long rcutorture_testseq; | 155 | unsigned long rcutorture_testseq; |
148 | unsigned long rcutorture_vernum; | 156 | unsigned long rcutorture_vernum; |
149 | 157 | ||
150 | /* State information for rcu_barrier() and friends. */ | ||
151 | |||
152 | static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; | ||
153 | static atomic_t rcu_barrier_cpu_count; | ||
154 | static DEFINE_MUTEX(rcu_barrier_mutex); | ||
155 | static struct completion rcu_barrier_completion; | ||
156 | |||
157 | /* | 158 | /* |
158 | * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s | 159 | * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s |
159 | * permit this function to be invoked without holding the root rcu_node | 160 | * permit this function to be invoked without holding the root rcu_node |
@@ -201,6 +202,7 @@ void rcu_note_context_switch(int cpu) | |||
201 | { | 202 | { |
202 | trace_rcu_utilization("Start context switch"); | 203 | trace_rcu_utilization("Start context switch"); |
203 | rcu_sched_qs(cpu); | 204 | rcu_sched_qs(cpu); |
205 | rcu_preempt_note_context_switch(cpu); | ||
204 | trace_rcu_utilization("End context switch"); | 206 | trace_rcu_utilization("End context switch"); |
205 | } | 207 | } |
206 | EXPORT_SYMBOL_GPL(rcu_note_context_switch); | 208 | EXPORT_SYMBOL_GPL(rcu_note_context_switch); |
@@ -357,7 +359,7 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval) | |||
357 | struct task_struct *idle = idle_task(smp_processor_id()); | 359 | struct task_struct *idle = idle_task(smp_processor_id()); |
358 | 360 | ||
359 | trace_rcu_dyntick("Error on entry: not idle task", oldval, 0); | 361 | trace_rcu_dyntick("Error on entry: not idle task", oldval, 0); |
360 | ftrace_dump(DUMP_ALL); | 362 | ftrace_dump(DUMP_ORIG); |
361 | WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", | 363 | WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", |
362 | current->pid, current->comm, | 364 | current->pid, current->comm, |
363 | idle->pid, idle->comm); /* must be idle task! */ | 365 | idle->pid, idle->comm); /* must be idle task! */ |
@@ -467,7 +469,7 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) | |||
467 | 469 | ||
468 | trace_rcu_dyntick("Error on exit: not idle task", | 470 | trace_rcu_dyntick("Error on exit: not idle task", |
469 | oldval, rdtp->dynticks_nesting); | 471 | oldval, rdtp->dynticks_nesting); |
470 | ftrace_dump(DUMP_ALL); | 472 | ftrace_dump(DUMP_ORIG); |
471 | WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", | 473 | WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", |
472 | current->pid, current->comm, | 474 | current->pid, current->comm, |
473 | idle->pid, idle->comm); /* must be idle task! */ | 475 | idle->pid, idle->comm); /* must be idle task! */ |
@@ -584,8 +586,6 @@ void rcu_nmi_exit(void) | |||
584 | WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); | 586 | WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); |
585 | } | 587 | } |
586 | 588 | ||
587 | #ifdef CONFIG_PROVE_RCU | ||
588 | |||
589 | /** | 589 | /** |
590 | * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle | 590 | * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle |
591 | * | 591 | * |
@@ -603,7 +603,7 @@ int rcu_is_cpu_idle(void) | |||
603 | } | 603 | } |
604 | EXPORT_SYMBOL(rcu_is_cpu_idle); | 604 | EXPORT_SYMBOL(rcu_is_cpu_idle); |
605 | 605 | ||
606 | #ifdef CONFIG_HOTPLUG_CPU | 606 | #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) |
607 | 607 | ||
608 | /* | 608 | /* |
609 | * Is the current CPU online? Disable preemption to avoid false positives | 609 | * Is the current CPU online? Disable preemption to avoid false positives |
@@ -644,9 +644,7 @@ bool rcu_lockdep_current_cpu_online(void) | |||
644 | } | 644 | } |
645 | EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online); | 645 | EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online); |
646 | 646 | ||
647 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | 647 | #endif /* #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) */ |
648 | |||
649 | #endif /* #ifdef CONFIG_PROVE_RCU */ | ||
650 | 648 | ||
651 | /** | 649 | /** |
652 | * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle | 650 | * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle |
@@ -732,7 +730,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp) | |||
732 | int cpu; | 730 | int cpu; |
733 | long delta; | 731 | long delta; |
734 | unsigned long flags; | 732 | unsigned long flags; |
735 | int ndetected; | 733 | int ndetected = 0; |
736 | struct rcu_node *rnp = rcu_get_root(rsp); | 734 | struct rcu_node *rnp = rcu_get_root(rsp); |
737 | 735 | ||
738 | /* Only let one CPU complain about others per time interval. */ | 736 | /* Only let one CPU complain about others per time interval. */ |
@@ -773,7 +771,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp) | |||
773 | */ | 771 | */ |
774 | rnp = rcu_get_root(rsp); | 772 | rnp = rcu_get_root(rsp); |
775 | raw_spin_lock_irqsave(&rnp->lock, flags); | 773 | raw_spin_lock_irqsave(&rnp->lock, flags); |
776 | ndetected = rcu_print_task_stall(rnp); | 774 | ndetected += rcu_print_task_stall(rnp); |
777 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 775 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
778 | 776 | ||
779 | print_cpu_stall_info_end(); | 777 | print_cpu_stall_info_end(); |
@@ -859,9 +857,10 @@ static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) | |||
859 | */ | 857 | */ |
860 | void rcu_cpu_stall_reset(void) | 858 | void rcu_cpu_stall_reset(void) |
861 | { | 859 | { |
862 | rcu_sched_state.jiffies_stall = jiffies + ULONG_MAX / 2; | 860 | struct rcu_state *rsp; |
863 | rcu_bh_state.jiffies_stall = jiffies + ULONG_MAX / 2; | 861 | |
864 | rcu_preempt_stall_reset(); | 862 | for_each_rcu_flavor(rsp) |
863 | rsp->jiffies_stall = jiffies + ULONG_MAX / 2; | ||
865 | } | 864 | } |
866 | 865 | ||
867 | static struct notifier_block rcu_panic_block = { | 866 | static struct notifier_block rcu_panic_block = { |
@@ -893,8 +892,9 @@ static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct | |||
893 | if (rnp->qsmask & rdp->grpmask) { | 892 | if (rnp->qsmask & rdp->grpmask) { |
894 | rdp->qs_pending = 1; | 893 | rdp->qs_pending = 1; |
895 | rdp->passed_quiesce = 0; | 894 | rdp->passed_quiesce = 0; |
896 | } else | 895 | } else { |
897 | rdp->qs_pending = 0; | 896 | rdp->qs_pending = 0; |
897 | } | ||
898 | zero_cpu_stall_ticks(rdp); | 898 | zero_cpu_stall_ticks(rdp); |
899 | } | 899 | } |
900 | } | 900 | } |
@@ -936,6 +936,18 @@ check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp) | |||
936 | } | 936 | } |
937 | 937 | ||
938 | /* | 938 | /* |
939 | * Initialize the specified rcu_data structure's callback list to empty. | ||
940 | */ | ||
941 | static void init_callback_list(struct rcu_data *rdp) | ||
942 | { | ||
943 | int i; | ||
944 | |||
945 | rdp->nxtlist = NULL; | ||
946 | for (i = 0; i < RCU_NEXT_SIZE; i++) | ||
947 | rdp->nxttail[i] = &rdp->nxtlist; | ||
948 | } | ||
949 | |||
950 | /* | ||
939 | * Advance this CPU's callbacks, but only if the current grace period | 951 | * Advance this CPU's callbacks, but only if the current grace period |
940 | * has ended. This may be called only from the CPU to whom the rdp | 952 | * has ended. This may be called only from the CPU to whom the rdp |
941 | * belongs. In addition, the corresponding leaf rcu_node structure's | 953 | * belongs. In addition, the corresponding leaf rcu_node structure's |
@@ -1327,8 +1339,6 @@ static void | |||
1327 | rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, | 1339 | rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, |
1328 | struct rcu_node *rnp, struct rcu_data *rdp) | 1340 | struct rcu_node *rnp, struct rcu_data *rdp) |
1329 | { | 1341 | { |
1330 | int i; | ||
1331 | |||
1332 | /* | 1342 | /* |
1333 | * Orphan the callbacks. First adjust the counts. This is safe | 1343 | * Orphan the callbacks. First adjust the counts. This is safe |
1334 | * because ->onofflock excludes _rcu_barrier()'s adoption of | 1344 | * because ->onofflock excludes _rcu_barrier()'s adoption of |
@@ -1339,7 +1349,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, | |||
1339 | rsp->qlen += rdp->qlen; | 1349 | rsp->qlen += rdp->qlen; |
1340 | rdp->n_cbs_orphaned += rdp->qlen; | 1350 | rdp->n_cbs_orphaned += rdp->qlen; |
1341 | rdp->qlen_lazy = 0; | 1351 | rdp->qlen_lazy = 0; |
1342 | rdp->qlen = 0; | 1352 | ACCESS_ONCE(rdp->qlen) = 0; |
1343 | } | 1353 | } |
1344 | 1354 | ||
1345 | /* | 1355 | /* |
@@ -1368,9 +1378,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, | |||
1368 | } | 1378 | } |
1369 | 1379 | ||
1370 | /* Finally, initialize the rcu_data structure's list to empty. */ | 1380 | /* Finally, initialize the rcu_data structure's list to empty. */ |
1371 | rdp->nxtlist = NULL; | 1381 | init_callback_list(rdp); |
1372 | for (i = 0; i < RCU_NEXT_SIZE; i++) | ||
1373 | rdp->nxttail[i] = &rdp->nxtlist; | ||
1374 | } | 1382 | } |
1375 | 1383 | ||
1376 | /* | 1384 | /* |
@@ -1504,6 +1512,9 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) | |||
1504 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1512 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1505 | if (need_report & RCU_OFL_TASKS_EXP_GP) | 1513 | if (need_report & RCU_OFL_TASKS_EXP_GP) |
1506 | rcu_report_exp_rnp(rsp, rnp, true); | 1514 | rcu_report_exp_rnp(rsp, rnp, true); |
1515 | WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL, | ||
1516 | "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n", | ||
1517 | cpu, rdp->qlen, rdp->nxtlist); | ||
1507 | } | 1518 | } |
1508 | 1519 | ||
1509 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ | 1520 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ |
@@ -1591,7 +1602,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1591 | } | 1602 | } |
1592 | smp_mb(); /* List handling before counting for rcu_barrier(). */ | 1603 | smp_mb(); /* List handling before counting for rcu_barrier(). */ |
1593 | rdp->qlen_lazy -= count_lazy; | 1604 | rdp->qlen_lazy -= count_lazy; |
1594 | rdp->qlen -= count; | 1605 | ACCESS_ONCE(rdp->qlen) -= count; |
1595 | rdp->n_cbs_invoked += count; | 1606 | rdp->n_cbs_invoked += count; |
1596 | 1607 | ||
1597 | /* Reinstate batch limit if we have worked down the excess. */ | 1608 | /* Reinstate batch limit if we have worked down the excess. */ |
@@ -1604,6 +1615,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1604 | rdp->n_force_qs_snap = rsp->n_force_qs; | 1615 | rdp->n_force_qs_snap = rsp->n_force_qs; |
1605 | } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark) | 1616 | } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark) |
1606 | rdp->qlen_last_fqs_check = rdp->qlen; | 1617 | rdp->qlen_last_fqs_check = rdp->qlen; |
1618 | WARN_ON_ONCE((rdp->nxtlist == NULL) != (rdp->qlen == 0)); | ||
1607 | 1619 | ||
1608 | local_irq_restore(flags); | 1620 | local_irq_restore(flags); |
1609 | 1621 | ||
@@ -1744,8 +1756,6 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) | |||
1744 | break; /* grace period idle or initializing, ignore. */ | 1756 | break; /* grace period idle or initializing, ignore. */ |
1745 | 1757 | ||
1746 | case RCU_SAVE_DYNTICK: | 1758 | case RCU_SAVE_DYNTICK: |
1747 | if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK) | ||
1748 | break; /* So gcc recognizes the dead code. */ | ||
1749 | 1759 | ||
1750 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ | 1760 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ |
1751 | 1761 | ||
@@ -1787,9 +1797,10 @@ unlock_fqs_ret: | |||
1787 | * whom the rdp belongs. | 1797 | * whom the rdp belongs. |
1788 | */ | 1798 | */ |
1789 | static void | 1799 | static void |
1790 | __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) | 1800 | __rcu_process_callbacks(struct rcu_state *rsp) |
1791 | { | 1801 | { |
1792 | unsigned long flags; | 1802 | unsigned long flags; |
1803 | struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); | ||
1793 | 1804 | ||
1794 | WARN_ON_ONCE(rdp->beenonline == 0); | 1805 | WARN_ON_ONCE(rdp->beenonline == 0); |
1795 | 1806 | ||
@@ -1825,11 +1836,11 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1825 | */ | 1836 | */ |
1826 | static void rcu_process_callbacks(struct softirq_action *unused) | 1837 | static void rcu_process_callbacks(struct softirq_action *unused) |
1827 | { | 1838 | { |
1839 | struct rcu_state *rsp; | ||
1840 | |||
1828 | trace_rcu_utilization("Start RCU core"); | 1841 | trace_rcu_utilization("Start RCU core"); |
1829 | __rcu_process_callbacks(&rcu_sched_state, | 1842 | for_each_rcu_flavor(rsp) |
1830 | &__get_cpu_var(rcu_sched_data)); | 1843 | __rcu_process_callbacks(rsp); |
1831 | __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); | ||
1832 | rcu_preempt_process_callbacks(); | ||
1833 | trace_rcu_utilization("End RCU core"); | 1844 | trace_rcu_utilization("End RCU core"); |
1834 | } | 1845 | } |
1835 | 1846 | ||
@@ -1856,6 +1867,56 @@ static void invoke_rcu_core(void) | |||
1856 | raise_softirq(RCU_SOFTIRQ); | 1867 | raise_softirq(RCU_SOFTIRQ); |
1857 | } | 1868 | } |
1858 | 1869 | ||
1870 | /* | ||
1871 | * Handle any core-RCU processing required by a call_rcu() invocation. | ||
1872 | */ | ||
1873 | static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp, | ||
1874 | struct rcu_head *head, unsigned long flags) | ||
1875 | { | ||
1876 | /* | ||
1877 | * If called from an extended quiescent state, invoke the RCU | ||
1878 | * core in order to force a re-evaluation of RCU's idleness. | ||
1879 | */ | ||
1880 | if (rcu_is_cpu_idle() && cpu_online(smp_processor_id())) | ||
1881 | invoke_rcu_core(); | ||
1882 | |||
1883 | /* If interrupts were disabled or CPU offline, don't invoke RCU core. */ | ||
1884 | if (irqs_disabled_flags(flags) || cpu_is_offline(smp_processor_id())) | ||
1885 | return; | ||
1886 | |||
1887 | /* | ||
1888 | * Force the grace period if too many callbacks or too long waiting. | ||
1889 | * Enforce hysteresis, and don't invoke force_quiescent_state() | ||
1890 | * if some other CPU has recently done so. Also, don't bother | ||
1891 | * invoking force_quiescent_state() if the newly enqueued callback | ||
1892 | * is the only one waiting for a grace period to complete. | ||
1893 | */ | ||
1894 | if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { | ||
1895 | |||
1896 | /* Are we ignoring a completed grace period? */ | ||
1897 | rcu_process_gp_end(rsp, rdp); | ||
1898 | check_for_new_grace_period(rsp, rdp); | ||
1899 | |||
1900 | /* Start a new grace period if one not already started. */ | ||
1901 | if (!rcu_gp_in_progress(rsp)) { | ||
1902 | unsigned long nestflag; | ||
1903 | struct rcu_node *rnp_root = rcu_get_root(rsp); | ||
1904 | |||
1905 | raw_spin_lock_irqsave(&rnp_root->lock, nestflag); | ||
1906 | rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */ | ||
1907 | } else { | ||
1908 | /* Give the grace period a kick. */ | ||
1909 | rdp->blimit = LONG_MAX; | ||
1910 | if (rsp->n_force_qs == rdp->n_force_qs_snap && | ||
1911 | *rdp->nxttail[RCU_DONE_TAIL] != head) | ||
1912 | force_quiescent_state(rsp, 0); | ||
1913 | rdp->n_force_qs_snap = rsp->n_force_qs; | ||
1914 | rdp->qlen_last_fqs_check = rdp->qlen; | ||
1915 | } | ||
1916 | } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) | ||
1917 | force_quiescent_state(rsp, 1); | ||
1918 | } | ||
1919 | |||
1859 | static void | 1920 | static void |
1860 | __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | 1921 | __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), |
1861 | struct rcu_state *rsp, bool lazy) | 1922 | struct rcu_state *rsp, bool lazy) |
@@ -1880,7 +1941,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | |||
1880 | rdp = this_cpu_ptr(rsp->rda); | 1941 | rdp = this_cpu_ptr(rsp->rda); |
1881 | 1942 | ||
1882 | /* Add the callback to our list. */ | 1943 | /* Add the callback to our list. */ |
1883 | rdp->qlen++; | 1944 | ACCESS_ONCE(rdp->qlen)++; |
1884 | if (lazy) | 1945 | if (lazy) |
1885 | rdp->qlen_lazy++; | 1946 | rdp->qlen_lazy++; |
1886 | else | 1947 | else |
@@ -1895,43 +1956,8 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | |||
1895 | else | 1956 | else |
1896 | trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen); | 1957 | trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen); |
1897 | 1958 | ||
1898 | /* If interrupts were disabled, don't dive into RCU core. */ | 1959 | /* Go handle any RCU core processing required. */ |
1899 | if (irqs_disabled_flags(flags)) { | 1960 | __call_rcu_core(rsp, rdp, head, flags); |
1900 | local_irq_restore(flags); | ||
1901 | return; | ||
1902 | } | ||
1903 | |||
1904 | /* | ||
1905 | * Force the grace period if too many callbacks or too long waiting. | ||
1906 | * Enforce hysteresis, and don't invoke force_quiescent_state() | ||
1907 | * if some other CPU has recently done so. Also, don't bother | ||
1908 | * invoking force_quiescent_state() if the newly enqueued callback | ||
1909 | * is the only one waiting for a grace period to complete. | ||
1910 | */ | ||
1911 | if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { | ||
1912 | |||
1913 | /* Are we ignoring a completed grace period? */ | ||
1914 | rcu_process_gp_end(rsp, rdp); | ||
1915 | check_for_new_grace_period(rsp, rdp); | ||
1916 | |||
1917 | /* Start a new grace period if one not already started. */ | ||
1918 | if (!rcu_gp_in_progress(rsp)) { | ||
1919 | unsigned long nestflag; | ||
1920 | struct rcu_node *rnp_root = rcu_get_root(rsp); | ||
1921 | |||
1922 | raw_spin_lock_irqsave(&rnp_root->lock, nestflag); | ||
1923 | rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */ | ||
1924 | } else { | ||
1925 | /* Give the grace period a kick. */ | ||
1926 | rdp->blimit = LONG_MAX; | ||
1927 | if (rsp->n_force_qs == rdp->n_force_qs_snap && | ||
1928 | *rdp->nxttail[RCU_DONE_TAIL] != head) | ||
1929 | force_quiescent_state(rsp, 0); | ||
1930 | rdp->n_force_qs_snap = rsp->n_force_qs; | ||
1931 | rdp->qlen_last_fqs_check = rdp->qlen; | ||
1932 | } | ||
1933 | } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) | ||
1934 | force_quiescent_state(rsp, 1); | ||
1935 | local_irq_restore(flags); | 1961 | local_irq_restore(flags); |
1936 | } | 1962 | } |
1937 | 1963 | ||
@@ -1961,28 +1987,16 @@ EXPORT_SYMBOL_GPL(call_rcu_bh); | |||
1961 | * occasionally incorrectly indicate that there are multiple CPUs online | 1987 | * occasionally incorrectly indicate that there are multiple CPUs online |
1962 | * when there was in fact only one the whole time, as this just adds | 1988 | * when there was in fact only one the whole time, as this just adds |
1963 | * some overhead: RCU still operates correctly. | 1989 | * some overhead: RCU still operates correctly. |
1964 | * | ||
1965 | * Of course, sampling num_online_cpus() with preemption enabled can | ||
1966 | * give erroneous results if there are concurrent CPU-hotplug operations. | ||
1967 | * For example, given a demonic sequence of preemptions in num_online_cpus() | ||
1968 | * and CPU-hotplug operations, there could be two or more CPUs online at | ||
1969 | * all times, but num_online_cpus() might well return one (or even zero). | ||
1970 | * | ||
1971 | * However, all such demonic sequences require at least one CPU-offline | ||
1972 | * operation. Furthermore, rcu_blocking_is_gp() giving the wrong answer | ||
1973 | * is only a problem if there is an RCU read-side critical section executing | ||
1974 | * throughout. But RCU-sched and RCU-bh read-side critical sections | ||
1975 | * disable either preemption or bh, which prevents a CPU from going offline. | ||
1976 | * Therefore, the only way that rcu_blocking_is_gp() can incorrectly return | ||
1977 | * that there is only one CPU when in fact there was more than one throughout | ||
1978 | * is when there were no RCU readers in the system. If there are no | ||
1979 | * RCU readers, the grace period by definition can be of zero length, | ||
1980 | * regardless of the number of online CPUs. | ||
1981 | */ | 1990 | */ |
1982 | static inline int rcu_blocking_is_gp(void) | 1991 | static inline int rcu_blocking_is_gp(void) |
1983 | { | 1992 | { |
1993 | int ret; | ||
1994 | |||
1984 | might_sleep(); /* Check for RCU read-side critical section. */ | 1995 | might_sleep(); /* Check for RCU read-side critical section. */ |
1985 | return num_online_cpus() <= 1; | 1996 | preempt_disable(); |
1997 | ret = num_online_cpus() <= 1; | ||
1998 | preempt_enable(); | ||
1999 | return ret; | ||
1986 | } | 2000 | } |
1987 | 2001 | ||
1988 | /** | 2002 | /** |
@@ -2117,9 +2131,9 @@ void synchronize_sched_expedited(void) | |||
2117 | put_online_cpus(); | 2131 | put_online_cpus(); |
2118 | 2132 | ||
2119 | /* No joy, try again later. Or just synchronize_sched(). */ | 2133 | /* No joy, try again later. Or just synchronize_sched(). */ |
2120 | if (trycount++ < 10) | 2134 | if (trycount++ < 10) { |
2121 | udelay(trycount * num_online_cpus()); | 2135 | udelay(trycount * num_online_cpus()); |
2122 | else { | 2136 | } else { |
2123 | synchronize_sched(); | 2137 | synchronize_sched(); |
2124 | return; | 2138 | return; |
2125 | } | 2139 | } |
@@ -2240,9 +2254,12 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) | |||
2240 | */ | 2254 | */ |
2241 | static int rcu_pending(int cpu) | 2255 | static int rcu_pending(int cpu) |
2242 | { | 2256 | { |
2243 | return __rcu_pending(&rcu_sched_state, &per_cpu(rcu_sched_data, cpu)) || | 2257 | struct rcu_state *rsp; |
2244 | __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu)) || | 2258 | |
2245 | rcu_preempt_pending(cpu); | 2259 | for_each_rcu_flavor(rsp) |
2260 | if (__rcu_pending(rsp, per_cpu_ptr(rsp->rda, cpu))) | ||
2261 | return 1; | ||
2262 | return 0; | ||
2246 | } | 2263 | } |
2247 | 2264 | ||
2248 | /* | 2265 | /* |
@@ -2252,20 +2269,41 @@ static int rcu_pending(int cpu) | |||
2252 | */ | 2269 | */ |
2253 | static int rcu_cpu_has_callbacks(int cpu) | 2270 | static int rcu_cpu_has_callbacks(int cpu) |
2254 | { | 2271 | { |
2272 | struct rcu_state *rsp; | ||
2273 | |||
2255 | /* RCU callbacks either ready or pending? */ | 2274 | /* RCU callbacks either ready or pending? */ |
2256 | return per_cpu(rcu_sched_data, cpu).nxtlist || | 2275 | for_each_rcu_flavor(rsp) |
2257 | per_cpu(rcu_bh_data, cpu).nxtlist || | 2276 | if (per_cpu_ptr(rsp->rda, cpu)->nxtlist) |
2258 | rcu_preempt_cpu_has_callbacks(cpu); | 2277 | return 1; |
2278 | return 0; | ||
2279 | } | ||
2280 | |||
2281 | /* | ||
2282 | * Helper function for _rcu_barrier() tracing. If tracing is disabled, | ||
2283 | * the compiler is expected to optimize this away. | ||
2284 | */ | ||
2285 | static void _rcu_barrier_trace(struct rcu_state *rsp, char *s, | ||
2286 | int cpu, unsigned long done) | ||
2287 | { | ||
2288 | trace_rcu_barrier(rsp->name, s, cpu, | ||
2289 | atomic_read(&rsp->barrier_cpu_count), done); | ||
2259 | } | 2290 | } |
2260 | 2291 | ||
2261 | /* | 2292 | /* |
2262 | * RCU callback function for _rcu_barrier(). If we are last, wake | 2293 | * RCU callback function for _rcu_barrier(). If we are last, wake |
2263 | * up the task executing _rcu_barrier(). | 2294 | * up the task executing _rcu_barrier(). |
2264 | */ | 2295 | */ |
2265 | static void rcu_barrier_callback(struct rcu_head *notused) | 2296 | static void rcu_barrier_callback(struct rcu_head *rhp) |
2266 | { | 2297 | { |
2267 | if (atomic_dec_and_test(&rcu_barrier_cpu_count)) | 2298 | struct rcu_data *rdp = container_of(rhp, struct rcu_data, barrier_head); |
2268 | complete(&rcu_barrier_completion); | 2299 | struct rcu_state *rsp = rdp->rsp; |
2300 | |||
2301 | if (atomic_dec_and_test(&rsp->barrier_cpu_count)) { | ||
2302 | _rcu_barrier_trace(rsp, "LastCB", -1, rsp->n_barrier_done); | ||
2303 | complete(&rsp->barrier_completion); | ||
2304 | } else { | ||
2305 | _rcu_barrier_trace(rsp, "CB", -1, rsp->n_barrier_done); | ||
2306 | } | ||
2269 | } | 2307 | } |
2270 | 2308 | ||
2271 | /* | 2309 | /* |
@@ -2273,35 +2311,63 @@ static void rcu_barrier_callback(struct rcu_head *notused) | |||
2273 | */ | 2311 | */ |
2274 | static void rcu_barrier_func(void *type) | 2312 | static void rcu_barrier_func(void *type) |
2275 | { | 2313 | { |
2276 | int cpu = smp_processor_id(); | 2314 | struct rcu_state *rsp = type; |
2277 | struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu); | 2315 | struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); |
2278 | void (*call_rcu_func)(struct rcu_head *head, | ||
2279 | void (*func)(struct rcu_head *head)); | ||
2280 | 2316 | ||
2281 | atomic_inc(&rcu_barrier_cpu_count); | 2317 | _rcu_barrier_trace(rsp, "IRQ", -1, rsp->n_barrier_done); |
2282 | call_rcu_func = type; | 2318 | atomic_inc(&rsp->barrier_cpu_count); |
2283 | call_rcu_func(head, rcu_barrier_callback); | 2319 | rsp->call(&rdp->barrier_head, rcu_barrier_callback); |
2284 | } | 2320 | } |
2285 | 2321 | ||
2286 | /* | 2322 | /* |
2287 | * Orchestrate the specified type of RCU barrier, waiting for all | 2323 | * Orchestrate the specified type of RCU barrier, waiting for all |
2288 | * RCU callbacks of the specified type to complete. | 2324 | * RCU callbacks of the specified type to complete. |
2289 | */ | 2325 | */ |
2290 | static void _rcu_barrier(struct rcu_state *rsp, | 2326 | static void _rcu_barrier(struct rcu_state *rsp) |
2291 | void (*call_rcu_func)(struct rcu_head *head, | ||
2292 | void (*func)(struct rcu_head *head))) | ||
2293 | { | 2327 | { |
2294 | int cpu; | 2328 | int cpu; |
2295 | unsigned long flags; | 2329 | unsigned long flags; |
2296 | struct rcu_data *rdp; | 2330 | struct rcu_data *rdp; |
2297 | struct rcu_head rh; | 2331 | struct rcu_data rd; |
2332 | unsigned long snap = ACCESS_ONCE(rsp->n_barrier_done); | ||
2333 | unsigned long snap_done; | ||
2298 | 2334 | ||
2299 | init_rcu_head_on_stack(&rh); | 2335 | init_rcu_head_on_stack(&rd.barrier_head); |
2336 | _rcu_barrier_trace(rsp, "Begin", -1, snap); | ||
2300 | 2337 | ||
2301 | /* Take mutex to serialize concurrent rcu_barrier() requests. */ | 2338 | /* Take mutex to serialize concurrent rcu_barrier() requests. */ |
2302 | mutex_lock(&rcu_barrier_mutex); | 2339 | mutex_lock(&rsp->barrier_mutex); |
2340 | |||
2341 | /* | ||
2342 | * Ensure that all prior references, including to ->n_barrier_done, | ||
2343 | * are ordered before the _rcu_barrier() machinery. | ||
2344 | */ | ||
2345 | smp_mb(); /* See above block comment. */ | ||
2346 | |||
2347 | /* | ||
2348 | * Recheck ->n_barrier_done to see if others did our work for us. | ||
2349 | * This means checking ->n_barrier_done for an even-to-odd-to-even | ||
2350 | * transition. The "if" expression below therefore rounds the old | ||
2351 | * value up to the next even number and adds two before comparing. | ||
2352 | */ | ||
2353 | snap_done = ACCESS_ONCE(rsp->n_barrier_done); | ||
2354 | _rcu_barrier_trace(rsp, "Check", -1, snap_done); | ||
2355 | if (ULONG_CMP_GE(snap_done, ((snap + 1) & ~0x1) + 2)) { | ||
2356 | _rcu_barrier_trace(rsp, "EarlyExit", -1, snap_done); | ||
2357 | smp_mb(); /* caller's subsequent code after above check. */ | ||
2358 | mutex_unlock(&rsp->barrier_mutex); | ||
2359 | return; | ||
2360 | } | ||
2303 | 2361 | ||
2304 | smp_mb(); /* Prevent any prior operations from leaking in. */ | 2362 | /* |
2363 | * Increment ->n_barrier_done to avoid duplicate work. Use | ||
2364 | * ACCESS_ONCE() to prevent the compiler from speculating | ||
2365 | * the increment to precede the early-exit check. | ||
2366 | */ | ||
2367 | ACCESS_ONCE(rsp->n_barrier_done)++; | ||
2368 | WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 1); | ||
2369 | _rcu_barrier_trace(rsp, "Inc1", -1, rsp->n_barrier_done); | ||
2370 | smp_mb(); /* Order ->n_barrier_done increment with below mechanism. */ | ||
2305 | 2371 | ||
2306 | /* | 2372 | /* |
2307 | * Initialize the count to one rather than to zero in order to | 2373 | * Initialize the count to one rather than to zero in order to |
@@ -2320,8 +2386,8 @@ static void _rcu_barrier(struct rcu_state *rsp, | |||
2320 | * 6. Both rcu_barrier_callback() callbacks are invoked, awakening | 2386 | * 6. Both rcu_barrier_callback() callbacks are invoked, awakening |
2321 | * us -- but before CPU 1's orphaned callbacks are invoked!!! | 2387 | * us -- but before CPU 1's orphaned callbacks are invoked!!! |
2322 | */ | 2388 | */ |
2323 | init_completion(&rcu_barrier_completion); | 2389 | init_completion(&rsp->barrier_completion); |
2324 | atomic_set(&rcu_barrier_cpu_count, 1); | 2390 | atomic_set(&rsp->barrier_cpu_count, 1); |
2325 | raw_spin_lock_irqsave(&rsp->onofflock, flags); | 2391 | raw_spin_lock_irqsave(&rsp->onofflock, flags); |
2326 | rsp->rcu_barrier_in_progress = current; | 2392 | rsp->rcu_barrier_in_progress = current; |
2327 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | 2393 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); |
@@ -2337,14 +2403,19 @@ static void _rcu_barrier(struct rcu_state *rsp, | |||
2337 | preempt_disable(); | 2403 | preempt_disable(); |
2338 | rdp = per_cpu_ptr(rsp->rda, cpu); | 2404 | rdp = per_cpu_ptr(rsp->rda, cpu); |
2339 | if (cpu_is_offline(cpu)) { | 2405 | if (cpu_is_offline(cpu)) { |
2406 | _rcu_barrier_trace(rsp, "Offline", cpu, | ||
2407 | rsp->n_barrier_done); | ||
2340 | preempt_enable(); | 2408 | preempt_enable(); |
2341 | while (cpu_is_offline(cpu) && ACCESS_ONCE(rdp->qlen)) | 2409 | while (cpu_is_offline(cpu) && ACCESS_ONCE(rdp->qlen)) |
2342 | schedule_timeout_interruptible(1); | 2410 | schedule_timeout_interruptible(1); |
2343 | } else if (ACCESS_ONCE(rdp->qlen)) { | 2411 | } else if (ACCESS_ONCE(rdp->qlen)) { |
2344 | smp_call_function_single(cpu, rcu_barrier_func, | 2412 | _rcu_barrier_trace(rsp, "OnlineQ", cpu, |
2345 | (void *)call_rcu_func, 1); | 2413 | rsp->n_barrier_done); |
2414 | smp_call_function_single(cpu, rcu_barrier_func, rsp, 1); | ||
2346 | preempt_enable(); | 2415 | preempt_enable(); |
2347 | } else { | 2416 | } else { |
2417 | _rcu_barrier_trace(rsp, "OnlineNQ", cpu, | ||
2418 | rsp->n_barrier_done); | ||
2348 | preempt_enable(); | 2419 | preempt_enable(); |
2349 | } | 2420 | } |
2350 | } | 2421 | } |
@@ -2361,24 +2432,32 @@ static void _rcu_barrier(struct rcu_state *rsp, | |||
2361 | rcu_adopt_orphan_cbs(rsp); | 2432 | rcu_adopt_orphan_cbs(rsp); |
2362 | rsp->rcu_barrier_in_progress = NULL; | 2433 | rsp->rcu_barrier_in_progress = NULL; |
2363 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | 2434 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); |
2364 | atomic_inc(&rcu_barrier_cpu_count); | 2435 | atomic_inc(&rsp->barrier_cpu_count); |
2365 | smp_mb__after_atomic_inc(); /* Ensure atomic_inc() before callback. */ | 2436 | smp_mb__after_atomic_inc(); /* Ensure atomic_inc() before callback. */ |
2366 | call_rcu_func(&rh, rcu_barrier_callback); | 2437 | rd.rsp = rsp; |
2438 | rsp->call(&rd.barrier_head, rcu_barrier_callback); | ||
2367 | 2439 | ||
2368 | /* | 2440 | /* |
2369 | * Now that we have an rcu_barrier_callback() callback on each | 2441 | * Now that we have an rcu_barrier_callback() callback on each |
2370 | * CPU, and thus each counted, remove the initial count. | 2442 | * CPU, and thus each counted, remove the initial count. |
2371 | */ | 2443 | */ |
2372 | if (atomic_dec_and_test(&rcu_barrier_cpu_count)) | 2444 | if (atomic_dec_and_test(&rsp->barrier_cpu_count)) |
2373 | complete(&rcu_barrier_completion); | 2445 | complete(&rsp->barrier_completion); |
2446 | |||
2447 | /* Increment ->n_barrier_done to prevent duplicate work. */ | ||
2448 | smp_mb(); /* Keep increment after above mechanism. */ | ||
2449 | ACCESS_ONCE(rsp->n_barrier_done)++; | ||
2450 | WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 0); | ||
2451 | _rcu_barrier_trace(rsp, "Inc2", -1, rsp->n_barrier_done); | ||
2452 | smp_mb(); /* Keep increment before caller's subsequent code. */ | ||
2374 | 2453 | ||
2375 | /* Wait for all rcu_barrier_callback() callbacks to be invoked. */ | 2454 | /* Wait for all rcu_barrier_callback() callbacks to be invoked. */ |
2376 | wait_for_completion(&rcu_barrier_completion); | 2455 | wait_for_completion(&rsp->barrier_completion); |
2377 | 2456 | ||
2378 | /* Other rcu_barrier() invocations can now safely proceed. */ | 2457 | /* Other rcu_barrier() invocations can now safely proceed. */ |
2379 | mutex_unlock(&rcu_barrier_mutex); | 2458 | mutex_unlock(&rsp->barrier_mutex); |
2380 | 2459 | ||
2381 | destroy_rcu_head_on_stack(&rh); | 2460 | destroy_rcu_head_on_stack(&rd.barrier_head); |
2382 | } | 2461 | } |
2383 | 2462 | ||
2384 | /** | 2463 | /** |
@@ -2386,7 +2465,7 @@ static void _rcu_barrier(struct rcu_state *rsp, | |||
2386 | */ | 2465 | */ |
2387 | void rcu_barrier_bh(void) | 2466 | void rcu_barrier_bh(void) |
2388 | { | 2467 | { |
2389 | _rcu_barrier(&rcu_bh_state, call_rcu_bh); | 2468 | _rcu_barrier(&rcu_bh_state); |
2390 | } | 2469 | } |
2391 | EXPORT_SYMBOL_GPL(rcu_barrier_bh); | 2470 | EXPORT_SYMBOL_GPL(rcu_barrier_bh); |
2392 | 2471 | ||
@@ -2395,7 +2474,7 @@ EXPORT_SYMBOL_GPL(rcu_barrier_bh); | |||
2395 | */ | 2474 | */ |
2396 | void rcu_barrier_sched(void) | 2475 | void rcu_barrier_sched(void) |
2397 | { | 2476 | { |
2398 | _rcu_barrier(&rcu_sched_state, call_rcu_sched); | 2477 | _rcu_barrier(&rcu_sched_state); |
2399 | } | 2478 | } |
2400 | EXPORT_SYMBOL_GPL(rcu_barrier_sched); | 2479 | EXPORT_SYMBOL_GPL(rcu_barrier_sched); |
2401 | 2480 | ||
@@ -2406,18 +2485,15 @@ static void __init | |||
2406 | rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) | 2485 | rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) |
2407 | { | 2486 | { |
2408 | unsigned long flags; | 2487 | unsigned long flags; |
2409 | int i; | ||
2410 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | 2488 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); |
2411 | struct rcu_node *rnp = rcu_get_root(rsp); | 2489 | struct rcu_node *rnp = rcu_get_root(rsp); |
2412 | 2490 | ||
2413 | /* Set up local state, ensuring consistent view of global state. */ | 2491 | /* Set up local state, ensuring consistent view of global state. */ |
2414 | raw_spin_lock_irqsave(&rnp->lock, flags); | 2492 | raw_spin_lock_irqsave(&rnp->lock, flags); |
2415 | rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo); | 2493 | rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo); |
2416 | rdp->nxtlist = NULL; | 2494 | init_callback_list(rdp); |
2417 | for (i = 0; i < RCU_NEXT_SIZE; i++) | ||
2418 | rdp->nxttail[i] = &rdp->nxtlist; | ||
2419 | rdp->qlen_lazy = 0; | 2495 | rdp->qlen_lazy = 0; |
2420 | rdp->qlen = 0; | 2496 | ACCESS_ONCE(rdp->qlen) = 0; |
2421 | rdp->dynticks = &per_cpu(rcu_dynticks, cpu); | 2497 | rdp->dynticks = &per_cpu(rcu_dynticks, cpu); |
2422 | WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); | 2498 | WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); |
2423 | WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); | 2499 | WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); |
@@ -2491,9 +2567,11 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) | |||
2491 | 2567 | ||
2492 | static void __cpuinit rcu_prepare_cpu(int cpu) | 2568 | static void __cpuinit rcu_prepare_cpu(int cpu) |
2493 | { | 2569 | { |
2494 | rcu_init_percpu_data(cpu, &rcu_sched_state, 0); | 2570 | struct rcu_state *rsp; |
2495 | rcu_init_percpu_data(cpu, &rcu_bh_state, 0); | 2571 | |
2496 | rcu_preempt_init_percpu_data(cpu); | 2572 | for_each_rcu_flavor(rsp) |
2573 | rcu_init_percpu_data(cpu, rsp, | ||
2574 | strcmp(rsp->name, "rcu_preempt") == 0); | ||
2497 | } | 2575 | } |
2498 | 2576 | ||
2499 | /* | 2577 | /* |
@@ -2505,6 +2583,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | |||
2505 | long cpu = (long)hcpu; | 2583 | long cpu = (long)hcpu; |
2506 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); | 2584 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); |
2507 | struct rcu_node *rnp = rdp->mynode; | 2585 | struct rcu_node *rnp = rdp->mynode; |
2586 | struct rcu_state *rsp; | ||
2508 | 2587 | ||
2509 | trace_rcu_utilization("Start CPU hotplug"); | 2588 | trace_rcu_utilization("Start CPU hotplug"); |
2510 | switch (action) { | 2589 | switch (action) { |
@@ -2529,18 +2608,16 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | |||
2529 | * touch any data without introducing corruption. We send the | 2608 | * touch any data without introducing corruption. We send the |
2530 | * dying CPU's callbacks to an arbitrarily chosen online CPU. | 2609 | * dying CPU's callbacks to an arbitrarily chosen online CPU. |
2531 | */ | 2610 | */ |
2532 | rcu_cleanup_dying_cpu(&rcu_bh_state); | 2611 | for_each_rcu_flavor(rsp) |
2533 | rcu_cleanup_dying_cpu(&rcu_sched_state); | 2612 | rcu_cleanup_dying_cpu(rsp); |
2534 | rcu_preempt_cleanup_dying_cpu(); | ||
2535 | rcu_cleanup_after_idle(cpu); | 2613 | rcu_cleanup_after_idle(cpu); |
2536 | break; | 2614 | break; |
2537 | case CPU_DEAD: | 2615 | case CPU_DEAD: |
2538 | case CPU_DEAD_FROZEN: | 2616 | case CPU_DEAD_FROZEN: |
2539 | case CPU_UP_CANCELED: | 2617 | case CPU_UP_CANCELED: |
2540 | case CPU_UP_CANCELED_FROZEN: | 2618 | case CPU_UP_CANCELED_FROZEN: |
2541 | rcu_cleanup_dead_cpu(cpu, &rcu_bh_state); | 2619 | for_each_rcu_flavor(rsp) |
2542 | rcu_cleanup_dead_cpu(cpu, &rcu_sched_state); | 2620 | rcu_cleanup_dead_cpu(cpu, rsp); |
2543 | rcu_preempt_cleanup_dead_cpu(cpu); | ||
2544 | break; | 2621 | break; |
2545 | default: | 2622 | default: |
2546 | break; | 2623 | break; |
@@ -2573,9 +2650,9 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) | |||
2573 | { | 2650 | { |
2574 | int i; | 2651 | int i; |
2575 | 2652 | ||
2576 | for (i = NUM_RCU_LVLS - 1; i > 0; i--) | 2653 | for (i = rcu_num_lvls - 1; i > 0; i--) |
2577 | rsp->levelspread[i] = CONFIG_RCU_FANOUT; | 2654 | rsp->levelspread[i] = CONFIG_RCU_FANOUT; |
2578 | rsp->levelspread[0] = CONFIG_RCU_FANOUT_LEAF; | 2655 | rsp->levelspread[0] = rcu_fanout_leaf; |
2579 | } | 2656 | } |
2580 | #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ | 2657 | #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ |
2581 | static void __init rcu_init_levelspread(struct rcu_state *rsp) | 2658 | static void __init rcu_init_levelspread(struct rcu_state *rsp) |
@@ -2585,7 +2662,7 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) | |||
2585 | int i; | 2662 | int i; |
2586 | 2663 | ||
2587 | cprv = NR_CPUS; | 2664 | cprv = NR_CPUS; |
2588 | for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { | 2665 | for (i = rcu_num_lvls - 1; i >= 0; i--) { |
2589 | ccur = rsp->levelcnt[i]; | 2666 | ccur = rsp->levelcnt[i]; |
2590 | rsp->levelspread[i] = (cprv + ccur - 1) / ccur; | 2667 | rsp->levelspread[i] = (cprv + ccur - 1) / ccur; |
2591 | cprv = ccur; | 2668 | cprv = ccur; |
@@ -2612,13 +2689,15 @@ static void __init rcu_init_one(struct rcu_state *rsp, | |||
2612 | 2689 | ||
2613 | /* Initialize the level-tracking arrays. */ | 2690 | /* Initialize the level-tracking arrays. */ |
2614 | 2691 | ||
2615 | for (i = 1; i < NUM_RCU_LVLS; i++) | 2692 | for (i = 0; i < rcu_num_lvls; i++) |
2693 | rsp->levelcnt[i] = num_rcu_lvl[i]; | ||
2694 | for (i = 1; i < rcu_num_lvls; i++) | ||
2616 | rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1]; | 2695 | rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1]; |
2617 | rcu_init_levelspread(rsp); | 2696 | rcu_init_levelspread(rsp); |
2618 | 2697 | ||
2619 | /* Initialize the elements themselves, starting from the leaves. */ | 2698 | /* Initialize the elements themselves, starting from the leaves. */ |
2620 | 2699 | ||
2621 | for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { | 2700 | for (i = rcu_num_lvls - 1; i >= 0; i--) { |
2622 | cpustride *= rsp->levelspread[i]; | 2701 | cpustride *= rsp->levelspread[i]; |
2623 | rnp = rsp->level[i]; | 2702 | rnp = rsp->level[i]; |
2624 | for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { | 2703 | for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { |
@@ -2648,13 +2727,74 @@ static void __init rcu_init_one(struct rcu_state *rsp, | |||
2648 | } | 2727 | } |
2649 | 2728 | ||
2650 | rsp->rda = rda; | 2729 | rsp->rda = rda; |
2651 | rnp = rsp->level[NUM_RCU_LVLS - 1]; | 2730 | rnp = rsp->level[rcu_num_lvls - 1]; |
2652 | for_each_possible_cpu(i) { | 2731 | for_each_possible_cpu(i) { |
2653 | while (i > rnp->grphi) | 2732 | while (i > rnp->grphi) |
2654 | rnp++; | 2733 | rnp++; |
2655 | per_cpu_ptr(rsp->rda, i)->mynode = rnp; | 2734 | per_cpu_ptr(rsp->rda, i)->mynode = rnp; |
2656 | rcu_boot_init_percpu_data(i, rsp); | 2735 | rcu_boot_init_percpu_data(i, rsp); |
2657 | } | 2736 | } |
2737 | list_add(&rsp->flavors, &rcu_struct_flavors); | ||
2738 | } | ||
2739 | |||
2740 | /* | ||
2741 | * Compute the rcu_node tree geometry from kernel parameters. This cannot | ||
2742 | * replace the definitions in rcutree.h because those are needed to size | ||
2743 | * the ->node array in the rcu_state structure. | ||
2744 | */ | ||
2745 | static void __init rcu_init_geometry(void) | ||
2746 | { | ||
2747 | int i; | ||
2748 | int j; | ||
2749 | int n = nr_cpu_ids; | ||
2750 | int rcu_capacity[MAX_RCU_LVLS + 1]; | ||
2751 | |||
2752 | /* If the compile-time values are accurate, just leave. */ | ||
2753 | if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF) | ||
2754 | return; | ||
2755 | |||
2756 | /* | ||
2757 | * Compute number of nodes that can be handled an rcu_node tree | ||
2758 | * with the given number of levels. Setting rcu_capacity[0] makes | ||
2759 | * some of the arithmetic easier. | ||
2760 | */ | ||
2761 | rcu_capacity[0] = 1; | ||
2762 | rcu_capacity[1] = rcu_fanout_leaf; | ||
2763 | for (i = 2; i <= MAX_RCU_LVLS; i++) | ||
2764 | rcu_capacity[i] = rcu_capacity[i - 1] * CONFIG_RCU_FANOUT; | ||
2765 | |||
2766 | /* | ||
2767 | * The boot-time rcu_fanout_leaf parameter is only permitted | ||
2768 | * to increase the leaf-level fanout, not decrease it. Of course, | ||
2769 | * the leaf-level fanout cannot exceed the number of bits in | ||
2770 | * the rcu_node masks. Finally, the tree must be able to accommodate | ||
2771 | * the configured number of CPUs. Complain and fall back to the | ||
2772 | * compile-time values if these limits are exceeded. | ||
2773 | */ | ||
2774 | if (rcu_fanout_leaf < CONFIG_RCU_FANOUT_LEAF || | ||
2775 | rcu_fanout_leaf > sizeof(unsigned long) * 8 || | ||
2776 | n > rcu_capacity[MAX_RCU_LVLS]) { | ||
2777 | WARN_ON(1); | ||
2778 | return; | ||
2779 | } | ||
2780 | |||
2781 | /* Calculate the number of rcu_nodes at each level of the tree. */ | ||
2782 | for (i = 1; i <= MAX_RCU_LVLS; i++) | ||
2783 | if (n <= rcu_capacity[i]) { | ||
2784 | for (j = 0; j <= i; j++) | ||
2785 | num_rcu_lvl[j] = | ||
2786 | DIV_ROUND_UP(n, rcu_capacity[i - j]); | ||
2787 | rcu_num_lvls = i; | ||
2788 | for (j = i + 1; j <= MAX_RCU_LVLS; j++) | ||
2789 | num_rcu_lvl[j] = 0; | ||
2790 | break; | ||
2791 | } | ||
2792 | |||
2793 | /* Calculate the total number of rcu_node structures. */ | ||
2794 | rcu_num_nodes = 0; | ||
2795 | for (i = 0; i <= MAX_RCU_LVLS; i++) | ||
2796 | rcu_num_nodes += num_rcu_lvl[i]; | ||
2797 | rcu_num_nodes -= n; | ||
2658 | } | 2798 | } |
2659 | 2799 | ||
2660 | void __init rcu_init(void) | 2800 | void __init rcu_init(void) |
@@ -2662,6 +2802,7 @@ void __init rcu_init(void) | |||
2662 | int cpu; | 2802 | int cpu; |
2663 | 2803 | ||
2664 | rcu_bootup_announce(); | 2804 | rcu_bootup_announce(); |
2805 | rcu_init_geometry(); | ||
2665 | rcu_init_one(&rcu_sched_state, &rcu_sched_data); | 2806 | rcu_init_one(&rcu_sched_state, &rcu_sched_data); |
2666 | rcu_init_one(&rcu_bh_state, &rcu_bh_data); | 2807 | rcu_init_one(&rcu_bh_state, &rcu_bh_data); |
2667 | __rcu_init_preempt(); | 2808 | __rcu_init_preempt(); |