aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcutree.c
diff options
context:
space:
mode:
authorGrant Likely <grant.likely@secretlab.ca>2012-07-25 00:31:09 -0400
committerGrant Likely <grant.likely@secretlab.ca>2012-07-25 00:34:40 -0400
commit6aeea3ecc33b1f36dbc3b80461d15a7052ae424f (patch)
treebbd273e3e0ca76094aed8e9c77e5adfe2b07f779 /kernel/rcutree.c
parent9844a5524ec532aee826c35e3031637c7fc8287b (diff)
parentbdc0077af574800d24318b6945cf2344e8dbb050 (diff)
Merge remote-tracking branch 'origin' into irqdomain/next
Diffstat (limited to 'kernel/rcutree.c')
-rw-r--r--kernel/rcutree.c479
1 files changed, 310 insertions, 169 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 38ecdda3f55f..f280e542e3e9 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -60,36 +60,44 @@
60 60
61/* Data structures. */ 61/* Data structures. */
62 62
63static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; 63static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
64 64
65#define RCU_STATE_INITIALIZER(structname) { \ 65#define RCU_STATE_INITIALIZER(sname, cr) { \
66 .level = { &structname##_state.node[0] }, \ 66 .level = { &sname##_state.node[0] }, \
67 .levelcnt = { \ 67 .call = cr, \
68 NUM_RCU_LVL_0, /* root of hierarchy. */ \
69 NUM_RCU_LVL_1, \
70 NUM_RCU_LVL_2, \
71 NUM_RCU_LVL_3, \
72 NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \
73 }, \
74 .fqs_state = RCU_GP_IDLE, \ 68 .fqs_state = RCU_GP_IDLE, \
75 .gpnum = -300, \ 69 .gpnum = -300, \
76 .completed = -300, \ 70 .completed = -300, \
77 .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \ 71 .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.onofflock), \
78 .orphan_nxttail = &structname##_state.orphan_nxtlist, \ 72 .orphan_nxttail = &sname##_state.orphan_nxtlist, \
79 .orphan_donetail = &structname##_state.orphan_donelist, \ 73 .orphan_donetail = &sname##_state.orphan_donelist, \
80 .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.fqslock), \ 74 .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
81 .n_force_qs = 0, \ 75 .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.fqslock), \
82 .n_force_qs_ngp = 0, \ 76 .name = #sname, \
83 .name = #structname, \
84} 77}
85 78
86struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched); 79struct rcu_state rcu_sched_state =
80 RCU_STATE_INITIALIZER(rcu_sched, call_rcu_sched);
87DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); 81DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
88 82
89struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh); 83struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, call_rcu_bh);
90DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); 84DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
91 85
92static struct rcu_state *rcu_state; 86static struct rcu_state *rcu_state;
87LIST_HEAD(rcu_struct_flavors);
88
89/* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */
90static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF;
91module_param(rcu_fanout_leaf, int, 0);
92int rcu_num_lvls __read_mostly = RCU_NUM_LVLS;
93static int num_rcu_lvl[] = { /* Number of rcu_nodes at specified level. */
94 NUM_RCU_LVL_0,
95 NUM_RCU_LVL_1,
96 NUM_RCU_LVL_2,
97 NUM_RCU_LVL_3,
98 NUM_RCU_LVL_4,
99};
100int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
93 101
94/* 102/*
95 * The rcu_scheduler_active variable transitions from zero to one just 103 * The rcu_scheduler_active variable transitions from zero to one just
@@ -147,13 +155,6 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);
147unsigned long rcutorture_testseq; 155unsigned long rcutorture_testseq;
148unsigned long rcutorture_vernum; 156unsigned long rcutorture_vernum;
149 157
150/* State information for rcu_barrier() and friends. */
151
152static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
153static atomic_t rcu_barrier_cpu_count;
154static DEFINE_MUTEX(rcu_barrier_mutex);
155static struct completion rcu_barrier_completion;
156
157/* 158/*
158 * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s 159 * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s
159 * permit this function to be invoked without holding the root rcu_node 160 * permit this function to be invoked without holding the root rcu_node
@@ -201,6 +202,7 @@ void rcu_note_context_switch(int cpu)
201{ 202{
202 trace_rcu_utilization("Start context switch"); 203 trace_rcu_utilization("Start context switch");
203 rcu_sched_qs(cpu); 204 rcu_sched_qs(cpu);
205 rcu_preempt_note_context_switch(cpu);
204 trace_rcu_utilization("End context switch"); 206 trace_rcu_utilization("End context switch");
205} 207}
206EXPORT_SYMBOL_GPL(rcu_note_context_switch); 208EXPORT_SYMBOL_GPL(rcu_note_context_switch);
@@ -357,7 +359,7 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval)
357 struct task_struct *idle = idle_task(smp_processor_id()); 359 struct task_struct *idle = idle_task(smp_processor_id());
358 360
359 trace_rcu_dyntick("Error on entry: not idle task", oldval, 0); 361 trace_rcu_dyntick("Error on entry: not idle task", oldval, 0);
360 ftrace_dump(DUMP_ALL); 362 ftrace_dump(DUMP_ORIG);
361 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", 363 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
362 current->pid, current->comm, 364 current->pid, current->comm,
363 idle->pid, idle->comm); /* must be idle task! */ 365 idle->pid, idle->comm); /* must be idle task! */
@@ -467,7 +469,7 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval)
467 469
468 trace_rcu_dyntick("Error on exit: not idle task", 470 trace_rcu_dyntick("Error on exit: not idle task",
469 oldval, rdtp->dynticks_nesting); 471 oldval, rdtp->dynticks_nesting);
470 ftrace_dump(DUMP_ALL); 472 ftrace_dump(DUMP_ORIG);
471 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", 473 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
472 current->pid, current->comm, 474 current->pid, current->comm,
473 idle->pid, idle->comm); /* must be idle task! */ 475 idle->pid, idle->comm); /* must be idle task! */
@@ -584,8 +586,6 @@ void rcu_nmi_exit(void)
584 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); 586 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
585} 587}
586 588
587#ifdef CONFIG_PROVE_RCU
588
589/** 589/**
590 * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle 590 * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle
591 * 591 *
@@ -603,7 +603,7 @@ int rcu_is_cpu_idle(void)
603} 603}
604EXPORT_SYMBOL(rcu_is_cpu_idle); 604EXPORT_SYMBOL(rcu_is_cpu_idle);
605 605
606#ifdef CONFIG_HOTPLUG_CPU 606#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
607 607
608/* 608/*
609 * Is the current CPU online? Disable preemption to avoid false positives 609 * Is the current CPU online? Disable preemption to avoid false positives
@@ -644,9 +644,7 @@ bool rcu_lockdep_current_cpu_online(void)
644} 644}
645EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online); 645EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
646 646
647#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 647#endif /* #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) */
648
649#endif /* #ifdef CONFIG_PROVE_RCU */
650 648
651/** 649/**
652 * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle 650 * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle
@@ -732,7 +730,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
732 int cpu; 730 int cpu;
733 long delta; 731 long delta;
734 unsigned long flags; 732 unsigned long flags;
735 int ndetected; 733 int ndetected = 0;
736 struct rcu_node *rnp = rcu_get_root(rsp); 734 struct rcu_node *rnp = rcu_get_root(rsp);
737 735
738 /* Only let one CPU complain about others per time interval. */ 736 /* Only let one CPU complain about others per time interval. */
@@ -773,7 +771,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
773 */ 771 */
774 rnp = rcu_get_root(rsp); 772 rnp = rcu_get_root(rsp);
775 raw_spin_lock_irqsave(&rnp->lock, flags); 773 raw_spin_lock_irqsave(&rnp->lock, flags);
776 ndetected = rcu_print_task_stall(rnp); 774 ndetected += rcu_print_task_stall(rnp);
777 raw_spin_unlock_irqrestore(&rnp->lock, flags); 775 raw_spin_unlock_irqrestore(&rnp->lock, flags);
778 776
779 print_cpu_stall_info_end(); 777 print_cpu_stall_info_end();
@@ -859,9 +857,10 @@ static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
859 */ 857 */
860void rcu_cpu_stall_reset(void) 858void rcu_cpu_stall_reset(void)
861{ 859{
862 rcu_sched_state.jiffies_stall = jiffies + ULONG_MAX / 2; 860 struct rcu_state *rsp;
863 rcu_bh_state.jiffies_stall = jiffies + ULONG_MAX / 2; 861
864 rcu_preempt_stall_reset(); 862 for_each_rcu_flavor(rsp)
863 rsp->jiffies_stall = jiffies + ULONG_MAX / 2;
865} 864}
866 865
867static struct notifier_block rcu_panic_block = { 866static struct notifier_block rcu_panic_block = {
@@ -893,8 +892,9 @@ static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct
893 if (rnp->qsmask & rdp->grpmask) { 892 if (rnp->qsmask & rdp->grpmask) {
894 rdp->qs_pending = 1; 893 rdp->qs_pending = 1;
895 rdp->passed_quiesce = 0; 894 rdp->passed_quiesce = 0;
896 } else 895 } else {
897 rdp->qs_pending = 0; 896 rdp->qs_pending = 0;
897 }
898 zero_cpu_stall_ticks(rdp); 898 zero_cpu_stall_ticks(rdp);
899 } 899 }
900} 900}
@@ -936,6 +936,18 @@ check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp)
936} 936}
937 937
938/* 938/*
939 * Initialize the specified rcu_data structure's callback list to empty.
940 */
941static void init_callback_list(struct rcu_data *rdp)
942{
943 int i;
944
945 rdp->nxtlist = NULL;
946 for (i = 0; i < RCU_NEXT_SIZE; i++)
947 rdp->nxttail[i] = &rdp->nxtlist;
948}
949
950/*
939 * Advance this CPU's callbacks, but only if the current grace period 951 * Advance this CPU's callbacks, but only if the current grace period
940 * has ended. This may be called only from the CPU to whom the rdp 952 * has ended. This may be called only from the CPU to whom the rdp
941 * belongs. In addition, the corresponding leaf rcu_node structure's 953 * belongs. In addition, the corresponding leaf rcu_node structure's
@@ -1327,8 +1339,6 @@ static void
1327rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, 1339rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
1328 struct rcu_node *rnp, struct rcu_data *rdp) 1340 struct rcu_node *rnp, struct rcu_data *rdp)
1329{ 1341{
1330 int i;
1331
1332 /* 1342 /*
1333 * Orphan the callbacks. First adjust the counts. This is safe 1343 * Orphan the callbacks. First adjust the counts. This is safe
1334 * because ->onofflock excludes _rcu_barrier()'s adoption of 1344 * because ->onofflock excludes _rcu_barrier()'s adoption of
@@ -1339,7 +1349,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
1339 rsp->qlen += rdp->qlen; 1349 rsp->qlen += rdp->qlen;
1340 rdp->n_cbs_orphaned += rdp->qlen; 1350 rdp->n_cbs_orphaned += rdp->qlen;
1341 rdp->qlen_lazy = 0; 1351 rdp->qlen_lazy = 0;
1342 rdp->qlen = 0; 1352 ACCESS_ONCE(rdp->qlen) = 0;
1343 } 1353 }
1344 1354
1345 /* 1355 /*
@@ -1368,9 +1378,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
1368 } 1378 }
1369 1379
1370 /* Finally, initialize the rcu_data structure's list to empty. */ 1380 /* Finally, initialize the rcu_data structure's list to empty. */
1371 rdp->nxtlist = NULL; 1381 init_callback_list(rdp);
1372 for (i = 0; i < RCU_NEXT_SIZE; i++)
1373 rdp->nxttail[i] = &rdp->nxtlist;
1374} 1382}
1375 1383
1376/* 1384/*
@@ -1504,6 +1512,9 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
1504 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1512 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1505 if (need_report & RCU_OFL_TASKS_EXP_GP) 1513 if (need_report & RCU_OFL_TASKS_EXP_GP)
1506 rcu_report_exp_rnp(rsp, rnp, true); 1514 rcu_report_exp_rnp(rsp, rnp, true);
1515 WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL,
1516 "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n",
1517 cpu, rdp->qlen, rdp->nxtlist);
1507} 1518}
1508 1519
1509#else /* #ifdef CONFIG_HOTPLUG_CPU */ 1520#else /* #ifdef CONFIG_HOTPLUG_CPU */
@@ -1591,7 +1602,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1591 } 1602 }
1592 smp_mb(); /* List handling before counting for rcu_barrier(). */ 1603 smp_mb(); /* List handling before counting for rcu_barrier(). */
1593 rdp->qlen_lazy -= count_lazy; 1604 rdp->qlen_lazy -= count_lazy;
1594 rdp->qlen -= count; 1605 ACCESS_ONCE(rdp->qlen) -= count;
1595 rdp->n_cbs_invoked += count; 1606 rdp->n_cbs_invoked += count;
1596 1607
1597 /* Reinstate batch limit if we have worked down the excess. */ 1608 /* Reinstate batch limit if we have worked down the excess. */
@@ -1604,6 +1615,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1604 rdp->n_force_qs_snap = rsp->n_force_qs; 1615 rdp->n_force_qs_snap = rsp->n_force_qs;
1605 } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark) 1616 } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark)
1606 rdp->qlen_last_fqs_check = rdp->qlen; 1617 rdp->qlen_last_fqs_check = rdp->qlen;
1618 WARN_ON_ONCE((rdp->nxtlist == NULL) != (rdp->qlen == 0));
1607 1619
1608 local_irq_restore(flags); 1620 local_irq_restore(flags);
1609 1621
@@ -1744,8 +1756,6 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
1744 break; /* grace period idle or initializing, ignore. */ 1756 break; /* grace period idle or initializing, ignore. */
1745 1757
1746 case RCU_SAVE_DYNTICK: 1758 case RCU_SAVE_DYNTICK:
1747 if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK)
1748 break; /* So gcc recognizes the dead code. */
1749 1759
1750 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ 1760 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
1751 1761
@@ -1787,9 +1797,10 @@ unlock_fqs_ret:
1787 * whom the rdp belongs. 1797 * whom the rdp belongs.
1788 */ 1798 */
1789static void 1799static void
1790__rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) 1800__rcu_process_callbacks(struct rcu_state *rsp)
1791{ 1801{
1792 unsigned long flags; 1802 unsigned long flags;
1803 struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
1793 1804
1794 WARN_ON_ONCE(rdp->beenonline == 0); 1805 WARN_ON_ONCE(rdp->beenonline == 0);
1795 1806
@@ -1825,11 +1836,11 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
1825 */ 1836 */
1826static void rcu_process_callbacks(struct softirq_action *unused) 1837static void rcu_process_callbacks(struct softirq_action *unused)
1827{ 1838{
1839 struct rcu_state *rsp;
1840
1828 trace_rcu_utilization("Start RCU core"); 1841 trace_rcu_utilization("Start RCU core");
1829 __rcu_process_callbacks(&rcu_sched_state, 1842 for_each_rcu_flavor(rsp)
1830 &__get_cpu_var(rcu_sched_data)); 1843 __rcu_process_callbacks(rsp);
1831 __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
1832 rcu_preempt_process_callbacks();
1833 trace_rcu_utilization("End RCU core"); 1844 trace_rcu_utilization("End RCU core");
1834} 1845}
1835 1846
@@ -1856,6 +1867,56 @@ static void invoke_rcu_core(void)
1856 raise_softirq(RCU_SOFTIRQ); 1867 raise_softirq(RCU_SOFTIRQ);
1857} 1868}
1858 1869
1870/*
1871 * Handle any core-RCU processing required by a call_rcu() invocation.
1872 */
1873static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
1874 struct rcu_head *head, unsigned long flags)
1875{
1876 /*
1877 * If called from an extended quiescent state, invoke the RCU
1878 * core in order to force a re-evaluation of RCU's idleness.
1879 */
1880 if (rcu_is_cpu_idle() && cpu_online(smp_processor_id()))
1881 invoke_rcu_core();
1882
1883 /* If interrupts were disabled or CPU offline, don't invoke RCU core. */
1884 if (irqs_disabled_flags(flags) || cpu_is_offline(smp_processor_id()))
1885 return;
1886
1887 /*
1888 * Force the grace period if too many callbacks or too long waiting.
1889 * Enforce hysteresis, and don't invoke force_quiescent_state()
1890 * if some other CPU has recently done so. Also, don't bother
1891 * invoking force_quiescent_state() if the newly enqueued callback
1892 * is the only one waiting for a grace period to complete.
1893 */
1894 if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) {
1895
1896 /* Are we ignoring a completed grace period? */
1897 rcu_process_gp_end(rsp, rdp);
1898 check_for_new_grace_period(rsp, rdp);
1899
1900 /* Start a new grace period if one not already started. */
1901 if (!rcu_gp_in_progress(rsp)) {
1902 unsigned long nestflag;
1903 struct rcu_node *rnp_root = rcu_get_root(rsp);
1904
1905 raw_spin_lock_irqsave(&rnp_root->lock, nestflag);
1906 rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */
1907 } else {
1908 /* Give the grace period a kick. */
1909 rdp->blimit = LONG_MAX;
1910 if (rsp->n_force_qs == rdp->n_force_qs_snap &&
1911 *rdp->nxttail[RCU_DONE_TAIL] != head)
1912 force_quiescent_state(rsp, 0);
1913 rdp->n_force_qs_snap = rsp->n_force_qs;
1914 rdp->qlen_last_fqs_check = rdp->qlen;
1915 }
1916 } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies))
1917 force_quiescent_state(rsp, 1);
1918}
1919
1859static void 1920static void
1860__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), 1921__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1861 struct rcu_state *rsp, bool lazy) 1922 struct rcu_state *rsp, bool lazy)
@@ -1880,7 +1941,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1880 rdp = this_cpu_ptr(rsp->rda); 1941 rdp = this_cpu_ptr(rsp->rda);
1881 1942
1882 /* Add the callback to our list. */ 1943 /* Add the callback to our list. */
1883 rdp->qlen++; 1944 ACCESS_ONCE(rdp->qlen)++;
1884 if (lazy) 1945 if (lazy)
1885 rdp->qlen_lazy++; 1946 rdp->qlen_lazy++;
1886 else 1947 else
@@ -1895,43 +1956,8 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1895 else 1956 else
1896 trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen); 1957 trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen);
1897 1958
1898 /* If interrupts were disabled, don't dive into RCU core. */ 1959 /* Go handle any RCU core processing required. */
1899 if (irqs_disabled_flags(flags)) { 1960 __call_rcu_core(rsp, rdp, head, flags);
1900 local_irq_restore(flags);
1901 return;
1902 }
1903
1904 /*
1905 * Force the grace period if too many callbacks or too long waiting.
1906 * Enforce hysteresis, and don't invoke force_quiescent_state()
1907 * if some other CPU has recently done so. Also, don't bother
1908 * invoking force_quiescent_state() if the newly enqueued callback
1909 * is the only one waiting for a grace period to complete.
1910 */
1911 if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) {
1912
1913 /* Are we ignoring a completed grace period? */
1914 rcu_process_gp_end(rsp, rdp);
1915 check_for_new_grace_period(rsp, rdp);
1916
1917 /* Start a new grace period if one not already started. */
1918 if (!rcu_gp_in_progress(rsp)) {
1919 unsigned long nestflag;
1920 struct rcu_node *rnp_root = rcu_get_root(rsp);
1921
1922 raw_spin_lock_irqsave(&rnp_root->lock, nestflag);
1923 rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */
1924 } else {
1925 /* Give the grace period a kick. */
1926 rdp->blimit = LONG_MAX;
1927 if (rsp->n_force_qs == rdp->n_force_qs_snap &&
1928 *rdp->nxttail[RCU_DONE_TAIL] != head)
1929 force_quiescent_state(rsp, 0);
1930 rdp->n_force_qs_snap = rsp->n_force_qs;
1931 rdp->qlen_last_fqs_check = rdp->qlen;
1932 }
1933 } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies))
1934 force_quiescent_state(rsp, 1);
1935 local_irq_restore(flags); 1961 local_irq_restore(flags);
1936} 1962}
1937 1963
@@ -1961,28 +1987,16 @@ EXPORT_SYMBOL_GPL(call_rcu_bh);
1961 * occasionally incorrectly indicate that there are multiple CPUs online 1987 * occasionally incorrectly indicate that there are multiple CPUs online
1962 * when there was in fact only one the whole time, as this just adds 1988 * when there was in fact only one the whole time, as this just adds
1963 * some overhead: RCU still operates correctly. 1989 * some overhead: RCU still operates correctly.
1964 *
1965 * Of course, sampling num_online_cpus() with preemption enabled can
1966 * give erroneous results if there are concurrent CPU-hotplug operations.
1967 * For example, given a demonic sequence of preemptions in num_online_cpus()
1968 * and CPU-hotplug operations, there could be two or more CPUs online at
1969 * all times, but num_online_cpus() might well return one (or even zero).
1970 *
1971 * However, all such demonic sequences require at least one CPU-offline
1972 * operation. Furthermore, rcu_blocking_is_gp() giving the wrong answer
1973 * is only a problem if there is an RCU read-side critical section executing
1974 * throughout. But RCU-sched and RCU-bh read-side critical sections
1975 * disable either preemption or bh, which prevents a CPU from going offline.
1976 * Therefore, the only way that rcu_blocking_is_gp() can incorrectly return
1977 * that there is only one CPU when in fact there was more than one throughout
1978 * is when there were no RCU readers in the system. If there are no
1979 * RCU readers, the grace period by definition can be of zero length,
1980 * regardless of the number of online CPUs.
1981 */ 1990 */
1982static inline int rcu_blocking_is_gp(void) 1991static inline int rcu_blocking_is_gp(void)
1983{ 1992{
1993 int ret;
1994
1984 might_sleep(); /* Check for RCU read-side critical section. */ 1995 might_sleep(); /* Check for RCU read-side critical section. */
1985 return num_online_cpus() <= 1; 1996 preempt_disable();
1997 ret = num_online_cpus() <= 1;
1998 preempt_enable();
1999 return ret;
1986} 2000}
1987 2001
1988/** 2002/**
@@ -2117,9 +2131,9 @@ void synchronize_sched_expedited(void)
2117 put_online_cpus(); 2131 put_online_cpus();
2118 2132
2119 /* No joy, try again later. Or just synchronize_sched(). */ 2133 /* No joy, try again later. Or just synchronize_sched(). */
2120 if (trycount++ < 10) 2134 if (trycount++ < 10) {
2121 udelay(trycount * num_online_cpus()); 2135 udelay(trycount * num_online_cpus());
2122 else { 2136 } else {
2123 synchronize_sched(); 2137 synchronize_sched();
2124 return; 2138 return;
2125 } 2139 }
@@ -2240,9 +2254,12 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
2240 */ 2254 */
2241static int rcu_pending(int cpu) 2255static int rcu_pending(int cpu)
2242{ 2256{
2243 return __rcu_pending(&rcu_sched_state, &per_cpu(rcu_sched_data, cpu)) || 2257 struct rcu_state *rsp;
2244 __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu)) || 2258
2245 rcu_preempt_pending(cpu); 2259 for_each_rcu_flavor(rsp)
2260 if (__rcu_pending(rsp, per_cpu_ptr(rsp->rda, cpu)))
2261 return 1;
2262 return 0;
2246} 2263}
2247 2264
2248/* 2265/*
@@ -2252,20 +2269,41 @@ static int rcu_pending(int cpu)
2252 */ 2269 */
2253static int rcu_cpu_has_callbacks(int cpu) 2270static int rcu_cpu_has_callbacks(int cpu)
2254{ 2271{
2272 struct rcu_state *rsp;
2273
2255 /* RCU callbacks either ready or pending? */ 2274 /* RCU callbacks either ready or pending? */
2256 return per_cpu(rcu_sched_data, cpu).nxtlist || 2275 for_each_rcu_flavor(rsp)
2257 per_cpu(rcu_bh_data, cpu).nxtlist || 2276 if (per_cpu_ptr(rsp->rda, cpu)->nxtlist)
2258 rcu_preempt_cpu_has_callbacks(cpu); 2277 return 1;
2278 return 0;
2279}
2280
2281/*
2282 * Helper function for _rcu_barrier() tracing. If tracing is disabled,
2283 * the compiler is expected to optimize this away.
2284 */
2285static void _rcu_barrier_trace(struct rcu_state *rsp, char *s,
2286 int cpu, unsigned long done)
2287{
2288 trace_rcu_barrier(rsp->name, s, cpu,
2289 atomic_read(&rsp->barrier_cpu_count), done);
2259} 2290}
2260 2291
2261/* 2292/*
2262 * RCU callback function for _rcu_barrier(). If we are last, wake 2293 * RCU callback function for _rcu_barrier(). If we are last, wake
2263 * up the task executing _rcu_barrier(). 2294 * up the task executing _rcu_barrier().
2264 */ 2295 */
2265static void rcu_barrier_callback(struct rcu_head *notused) 2296static void rcu_barrier_callback(struct rcu_head *rhp)
2266{ 2297{
2267 if (atomic_dec_and_test(&rcu_barrier_cpu_count)) 2298 struct rcu_data *rdp = container_of(rhp, struct rcu_data, barrier_head);
2268 complete(&rcu_barrier_completion); 2299 struct rcu_state *rsp = rdp->rsp;
2300
2301 if (atomic_dec_and_test(&rsp->barrier_cpu_count)) {
2302 _rcu_barrier_trace(rsp, "LastCB", -1, rsp->n_barrier_done);
2303 complete(&rsp->barrier_completion);
2304 } else {
2305 _rcu_barrier_trace(rsp, "CB", -1, rsp->n_barrier_done);
2306 }
2269} 2307}
2270 2308
2271/* 2309/*
@@ -2273,35 +2311,63 @@ static void rcu_barrier_callback(struct rcu_head *notused)
2273 */ 2311 */
2274static void rcu_barrier_func(void *type) 2312static void rcu_barrier_func(void *type)
2275{ 2313{
2276 int cpu = smp_processor_id(); 2314 struct rcu_state *rsp = type;
2277 struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu); 2315 struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
2278 void (*call_rcu_func)(struct rcu_head *head,
2279 void (*func)(struct rcu_head *head));
2280 2316
2281 atomic_inc(&rcu_barrier_cpu_count); 2317 _rcu_barrier_trace(rsp, "IRQ", -1, rsp->n_barrier_done);
2282 call_rcu_func = type; 2318 atomic_inc(&rsp->barrier_cpu_count);
2283 call_rcu_func(head, rcu_barrier_callback); 2319 rsp->call(&rdp->barrier_head, rcu_barrier_callback);
2284} 2320}
2285 2321
2286/* 2322/*
2287 * Orchestrate the specified type of RCU barrier, waiting for all 2323 * Orchestrate the specified type of RCU barrier, waiting for all
2288 * RCU callbacks of the specified type to complete. 2324 * RCU callbacks of the specified type to complete.
2289 */ 2325 */
2290static void _rcu_barrier(struct rcu_state *rsp, 2326static void _rcu_barrier(struct rcu_state *rsp)
2291 void (*call_rcu_func)(struct rcu_head *head,
2292 void (*func)(struct rcu_head *head)))
2293{ 2327{
2294 int cpu; 2328 int cpu;
2295 unsigned long flags; 2329 unsigned long flags;
2296 struct rcu_data *rdp; 2330 struct rcu_data *rdp;
2297 struct rcu_head rh; 2331 struct rcu_data rd;
2332 unsigned long snap = ACCESS_ONCE(rsp->n_barrier_done);
2333 unsigned long snap_done;
2298 2334
2299 init_rcu_head_on_stack(&rh); 2335 init_rcu_head_on_stack(&rd.barrier_head);
2336 _rcu_barrier_trace(rsp, "Begin", -1, snap);
2300 2337
2301 /* Take mutex to serialize concurrent rcu_barrier() requests. */ 2338 /* Take mutex to serialize concurrent rcu_barrier() requests. */
2302 mutex_lock(&rcu_barrier_mutex); 2339 mutex_lock(&rsp->barrier_mutex);
2340
2341 /*
2342 * Ensure that all prior references, including to ->n_barrier_done,
2343 * are ordered before the _rcu_barrier() machinery.
2344 */
2345 smp_mb(); /* See above block comment. */
2346
2347 /*
2348 * Recheck ->n_barrier_done to see if others did our work for us.
2349 * This means checking ->n_barrier_done for an even-to-odd-to-even
2350 * transition. The "if" expression below therefore rounds the old
2351 * value up to the next even number and adds two before comparing.
2352 */
2353 snap_done = ACCESS_ONCE(rsp->n_barrier_done);
2354 _rcu_barrier_trace(rsp, "Check", -1, snap_done);
2355 if (ULONG_CMP_GE(snap_done, ((snap + 1) & ~0x1) + 2)) {
2356 _rcu_barrier_trace(rsp, "EarlyExit", -1, snap_done);
2357 smp_mb(); /* caller's subsequent code after above check. */
2358 mutex_unlock(&rsp->barrier_mutex);
2359 return;
2360 }
2303 2361
2304 smp_mb(); /* Prevent any prior operations from leaking in. */ 2362 /*
2363 * Increment ->n_barrier_done to avoid duplicate work. Use
2364 * ACCESS_ONCE() to prevent the compiler from speculating
2365 * the increment to precede the early-exit check.
2366 */
2367 ACCESS_ONCE(rsp->n_barrier_done)++;
2368 WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 1);
2369 _rcu_barrier_trace(rsp, "Inc1", -1, rsp->n_barrier_done);
2370 smp_mb(); /* Order ->n_barrier_done increment with below mechanism. */
2305 2371
2306 /* 2372 /*
2307 * Initialize the count to one rather than to zero in order to 2373 * Initialize the count to one rather than to zero in order to
@@ -2320,8 +2386,8 @@ static void _rcu_barrier(struct rcu_state *rsp,
2320 * 6. Both rcu_barrier_callback() callbacks are invoked, awakening 2386 * 6. Both rcu_barrier_callback() callbacks are invoked, awakening
2321 * us -- but before CPU 1's orphaned callbacks are invoked!!! 2387 * us -- but before CPU 1's orphaned callbacks are invoked!!!
2322 */ 2388 */
2323 init_completion(&rcu_barrier_completion); 2389 init_completion(&rsp->barrier_completion);
2324 atomic_set(&rcu_barrier_cpu_count, 1); 2390 atomic_set(&rsp->barrier_cpu_count, 1);
2325 raw_spin_lock_irqsave(&rsp->onofflock, flags); 2391 raw_spin_lock_irqsave(&rsp->onofflock, flags);
2326 rsp->rcu_barrier_in_progress = current; 2392 rsp->rcu_barrier_in_progress = current;
2327 raw_spin_unlock_irqrestore(&rsp->onofflock, flags); 2393 raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
@@ -2337,14 +2403,19 @@ static void _rcu_barrier(struct rcu_state *rsp,
2337 preempt_disable(); 2403 preempt_disable();
2338 rdp = per_cpu_ptr(rsp->rda, cpu); 2404 rdp = per_cpu_ptr(rsp->rda, cpu);
2339 if (cpu_is_offline(cpu)) { 2405 if (cpu_is_offline(cpu)) {
2406 _rcu_barrier_trace(rsp, "Offline", cpu,
2407 rsp->n_barrier_done);
2340 preempt_enable(); 2408 preempt_enable();
2341 while (cpu_is_offline(cpu) && ACCESS_ONCE(rdp->qlen)) 2409 while (cpu_is_offline(cpu) && ACCESS_ONCE(rdp->qlen))
2342 schedule_timeout_interruptible(1); 2410 schedule_timeout_interruptible(1);
2343 } else if (ACCESS_ONCE(rdp->qlen)) { 2411 } else if (ACCESS_ONCE(rdp->qlen)) {
2344 smp_call_function_single(cpu, rcu_barrier_func, 2412 _rcu_barrier_trace(rsp, "OnlineQ", cpu,
2345 (void *)call_rcu_func, 1); 2413 rsp->n_barrier_done);
2414 smp_call_function_single(cpu, rcu_barrier_func, rsp, 1);
2346 preempt_enable(); 2415 preempt_enable();
2347 } else { 2416 } else {
2417 _rcu_barrier_trace(rsp, "OnlineNQ", cpu,
2418 rsp->n_barrier_done);
2348 preempt_enable(); 2419 preempt_enable();
2349 } 2420 }
2350 } 2421 }
@@ -2361,24 +2432,32 @@ static void _rcu_barrier(struct rcu_state *rsp,
2361 rcu_adopt_orphan_cbs(rsp); 2432 rcu_adopt_orphan_cbs(rsp);
2362 rsp->rcu_barrier_in_progress = NULL; 2433 rsp->rcu_barrier_in_progress = NULL;
2363 raw_spin_unlock_irqrestore(&rsp->onofflock, flags); 2434 raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
2364 atomic_inc(&rcu_barrier_cpu_count); 2435 atomic_inc(&rsp->barrier_cpu_count);
2365 smp_mb__after_atomic_inc(); /* Ensure atomic_inc() before callback. */ 2436 smp_mb__after_atomic_inc(); /* Ensure atomic_inc() before callback. */
2366 call_rcu_func(&rh, rcu_barrier_callback); 2437 rd.rsp = rsp;
2438 rsp->call(&rd.barrier_head, rcu_barrier_callback);
2367 2439
2368 /* 2440 /*
2369 * Now that we have an rcu_barrier_callback() callback on each 2441 * Now that we have an rcu_barrier_callback() callback on each
2370 * CPU, and thus each counted, remove the initial count. 2442 * CPU, and thus each counted, remove the initial count.
2371 */ 2443 */
2372 if (atomic_dec_and_test(&rcu_barrier_cpu_count)) 2444 if (atomic_dec_and_test(&rsp->barrier_cpu_count))
2373 complete(&rcu_barrier_completion); 2445 complete(&rsp->barrier_completion);
2446
2447 /* Increment ->n_barrier_done to prevent duplicate work. */
2448 smp_mb(); /* Keep increment after above mechanism. */
2449 ACCESS_ONCE(rsp->n_barrier_done)++;
2450 WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 0);
2451 _rcu_barrier_trace(rsp, "Inc2", -1, rsp->n_barrier_done);
2452 smp_mb(); /* Keep increment before caller's subsequent code. */
2374 2453
2375 /* Wait for all rcu_barrier_callback() callbacks to be invoked. */ 2454 /* Wait for all rcu_barrier_callback() callbacks to be invoked. */
2376 wait_for_completion(&rcu_barrier_completion); 2455 wait_for_completion(&rsp->barrier_completion);
2377 2456
2378 /* Other rcu_barrier() invocations can now safely proceed. */ 2457 /* Other rcu_barrier() invocations can now safely proceed. */
2379 mutex_unlock(&rcu_barrier_mutex); 2458 mutex_unlock(&rsp->barrier_mutex);
2380 2459
2381 destroy_rcu_head_on_stack(&rh); 2460 destroy_rcu_head_on_stack(&rd.barrier_head);
2382} 2461}
2383 2462
2384/** 2463/**
@@ -2386,7 +2465,7 @@ static void _rcu_barrier(struct rcu_state *rsp,
2386 */ 2465 */
2387void rcu_barrier_bh(void) 2466void rcu_barrier_bh(void)
2388{ 2467{
2389 _rcu_barrier(&rcu_bh_state, call_rcu_bh); 2468 _rcu_barrier(&rcu_bh_state);
2390} 2469}
2391EXPORT_SYMBOL_GPL(rcu_barrier_bh); 2470EXPORT_SYMBOL_GPL(rcu_barrier_bh);
2392 2471
@@ -2395,7 +2474,7 @@ EXPORT_SYMBOL_GPL(rcu_barrier_bh);
2395 */ 2474 */
2396void rcu_barrier_sched(void) 2475void rcu_barrier_sched(void)
2397{ 2476{
2398 _rcu_barrier(&rcu_sched_state, call_rcu_sched); 2477 _rcu_barrier(&rcu_sched_state);
2399} 2478}
2400EXPORT_SYMBOL_GPL(rcu_barrier_sched); 2479EXPORT_SYMBOL_GPL(rcu_barrier_sched);
2401 2480
@@ -2406,18 +2485,15 @@ static void __init
2406rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) 2485rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
2407{ 2486{
2408 unsigned long flags; 2487 unsigned long flags;
2409 int i;
2410 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 2488 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
2411 struct rcu_node *rnp = rcu_get_root(rsp); 2489 struct rcu_node *rnp = rcu_get_root(rsp);
2412 2490
2413 /* Set up local state, ensuring consistent view of global state. */ 2491 /* Set up local state, ensuring consistent view of global state. */
2414 raw_spin_lock_irqsave(&rnp->lock, flags); 2492 raw_spin_lock_irqsave(&rnp->lock, flags);
2415 rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo); 2493 rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo);
2416 rdp->nxtlist = NULL; 2494 init_callback_list(rdp);
2417 for (i = 0; i < RCU_NEXT_SIZE; i++)
2418 rdp->nxttail[i] = &rdp->nxtlist;
2419 rdp->qlen_lazy = 0; 2495 rdp->qlen_lazy = 0;
2420 rdp->qlen = 0; 2496 ACCESS_ONCE(rdp->qlen) = 0;
2421 rdp->dynticks = &per_cpu(rcu_dynticks, cpu); 2497 rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
2422 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); 2498 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
2423 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); 2499 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
@@ -2491,9 +2567,11 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
2491 2567
2492static void __cpuinit rcu_prepare_cpu(int cpu) 2568static void __cpuinit rcu_prepare_cpu(int cpu)
2493{ 2569{
2494 rcu_init_percpu_data(cpu, &rcu_sched_state, 0); 2570 struct rcu_state *rsp;
2495 rcu_init_percpu_data(cpu, &rcu_bh_state, 0); 2571
2496 rcu_preempt_init_percpu_data(cpu); 2572 for_each_rcu_flavor(rsp)
2573 rcu_init_percpu_data(cpu, rsp,
2574 strcmp(rsp->name, "rcu_preempt") == 0);
2497} 2575}
2498 2576
2499/* 2577/*
@@ -2505,6 +2583,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
2505 long cpu = (long)hcpu; 2583 long cpu = (long)hcpu;
2506 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); 2584 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
2507 struct rcu_node *rnp = rdp->mynode; 2585 struct rcu_node *rnp = rdp->mynode;
2586 struct rcu_state *rsp;
2508 2587
2509 trace_rcu_utilization("Start CPU hotplug"); 2588 trace_rcu_utilization("Start CPU hotplug");
2510 switch (action) { 2589 switch (action) {
@@ -2529,18 +2608,16 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
2529 * touch any data without introducing corruption. We send the 2608 * touch any data without introducing corruption. We send the
2530 * dying CPU's callbacks to an arbitrarily chosen online CPU. 2609 * dying CPU's callbacks to an arbitrarily chosen online CPU.
2531 */ 2610 */
2532 rcu_cleanup_dying_cpu(&rcu_bh_state); 2611 for_each_rcu_flavor(rsp)
2533 rcu_cleanup_dying_cpu(&rcu_sched_state); 2612 rcu_cleanup_dying_cpu(rsp);
2534 rcu_preempt_cleanup_dying_cpu();
2535 rcu_cleanup_after_idle(cpu); 2613 rcu_cleanup_after_idle(cpu);
2536 break; 2614 break;
2537 case CPU_DEAD: 2615 case CPU_DEAD:
2538 case CPU_DEAD_FROZEN: 2616 case CPU_DEAD_FROZEN:
2539 case CPU_UP_CANCELED: 2617 case CPU_UP_CANCELED:
2540 case CPU_UP_CANCELED_FROZEN: 2618 case CPU_UP_CANCELED_FROZEN:
2541 rcu_cleanup_dead_cpu(cpu, &rcu_bh_state); 2619 for_each_rcu_flavor(rsp)
2542 rcu_cleanup_dead_cpu(cpu, &rcu_sched_state); 2620 rcu_cleanup_dead_cpu(cpu, rsp);
2543 rcu_preempt_cleanup_dead_cpu(cpu);
2544 break; 2621 break;
2545 default: 2622 default:
2546 break; 2623 break;
@@ -2573,9 +2650,9 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
2573{ 2650{
2574 int i; 2651 int i;
2575 2652
2576 for (i = NUM_RCU_LVLS - 1; i > 0; i--) 2653 for (i = rcu_num_lvls - 1; i > 0; i--)
2577 rsp->levelspread[i] = CONFIG_RCU_FANOUT; 2654 rsp->levelspread[i] = CONFIG_RCU_FANOUT;
2578 rsp->levelspread[0] = CONFIG_RCU_FANOUT_LEAF; 2655 rsp->levelspread[0] = rcu_fanout_leaf;
2579} 2656}
2580#else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ 2657#else /* #ifdef CONFIG_RCU_FANOUT_EXACT */
2581static void __init rcu_init_levelspread(struct rcu_state *rsp) 2658static void __init rcu_init_levelspread(struct rcu_state *rsp)
@@ -2585,7 +2662,7 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
2585 int i; 2662 int i;
2586 2663
2587 cprv = NR_CPUS; 2664 cprv = NR_CPUS;
2588 for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { 2665 for (i = rcu_num_lvls - 1; i >= 0; i--) {
2589 ccur = rsp->levelcnt[i]; 2666 ccur = rsp->levelcnt[i];
2590 rsp->levelspread[i] = (cprv + ccur - 1) / ccur; 2667 rsp->levelspread[i] = (cprv + ccur - 1) / ccur;
2591 cprv = ccur; 2668 cprv = ccur;
@@ -2612,13 +2689,15 @@ static void __init rcu_init_one(struct rcu_state *rsp,
2612 2689
2613 /* Initialize the level-tracking arrays. */ 2690 /* Initialize the level-tracking arrays. */
2614 2691
2615 for (i = 1; i < NUM_RCU_LVLS; i++) 2692 for (i = 0; i < rcu_num_lvls; i++)
2693 rsp->levelcnt[i] = num_rcu_lvl[i];
2694 for (i = 1; i < rcu_num_lvls; i++)
2616 rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1]; 2695 rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1];
2617 rcu_init_levelspread(rsp); 2696 rcu_init_levelspread(rsp);
2618 2697
2619 /* Initialize the elements themselves, starting from the leaves. */ 2698 /* Initialize the elements themselves, starting from the leaves. */
2620 2699
2621 for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { 2700 for (i = rcu_num_lvls - 1; i >= 0; i--) {
2622 cpustride *= rsp->levelspread[i]; 2701 cpustride *= rsp->levelspread[i];
2623 rnp = rsp->level[i]; 2702 rnp = rsp->level[i];
2624 for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { 2703 for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) {
@@ -2648,13 +2727,74 @@ static void __init rcu_init_one(struct rcu_state *rsp,
2648 } 2727 }
2649 2728
2650 rsp->rda = rda; 2729 rsp->rda = rda;
2651 rnp = rsp->level[NUM_RCU_LVLS - 1]; 2730 rnp = rsp->level[rcu_num_lvls - 1];
2652 for_each_possible_cpu(i) { 2731 for_each_possible_cpu(i) {
2653 while (i > rnp->grphi) 2732 while (i > rnp->grphi)
2654 rnp++; 2733 rnp++;
2655 per_cpu_ptr(rsp->rda, i)->mynode = rnp; 2734 per_cpu_ptr(rsp->rda, i)->mynode = rnp;
2656 rcu_boot_init_percpu_data(i, rsp); 2735 rcu_boot_init_percpu_data(i, rsp);
2657 } 2736 }
2737 list_add(&rsp->flavors, &rcu_struct_flavors);
2738}
2739
2740/*
2741 * Compute the rcu_node tree geometry from kernel parameters. This cannot
2742 * replace the definitions in rcutree.h because those are needed to size
2743 * the ->node array in the rcu_state structure.
2744 */
2745static void __init rcu_init_geometry(void)
2746{
2747 int i;
2748 int j;
2749 int n = nr_cpu_ids;
2750 int rcu_capacity[MAX_RCU_LVLS + 1];
2751
2752 /* If the compile-time values are accurate, just leave. */
2753 if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF)
2754 return;
2755
2756 /*
2757 * Compute number of nodes that can be handled an rcu_node tree
2758 * with the given number of levels. Setting rcu_capacity[0] makes
2759 * some of the arithmetic easier.
2760 */
2761 rcu_capacity[0] = 1;
2762 rcu_capacity[1] = rcu_fanout_leaf;
2763 for (i = 2; i <= MAX_RCU_LVLS; i++)
2764 rcu_capacity[i] = rcu_capacity[i - 1] * CONFIG_RCU_FANOUT;
2765
2766 /*
2767 * The boot-time rcu_fanout_leaf parameter is only permitted
2768 * to increase the leaf-level fanout, not decrease it. Of course,
2769 * the leaf-level fanout cannot exceed the number of bits in
2770 * the rcu_node masks. Finally, the tree must be able to accommodate
2771 * the configured number of CPUs. Complain and fall back to the
2772 * compile-time values if these limits are exceeded.
2773 */
2774 if (rcu_fanout_leaf < CONFIG_RCU_FANOUT_LEAF ||
2775 rcu_fanout_leaf > sizeof(unsigned long) * 8 ||
2776 n > rcu_capacity[MAX_RCU_LVLS]) {
2777 WARN_ON(1);
2778 return;
2779 }
2780
2781 /* Calculate the number of rcu_nodes at each level of the tree. */
2782 for (i = 1; i <= MAX_RCU_LVLS; i++)
2783 if (n <= rcu_capacity[i]) {
2784 for (j = 0; j <= i; j++)
2785 num_rcu_lvl[j] =
2786 DIV_ROUND_UP(n, rcu_capacity[i - j]);
2787 rcu_num_lvls = i;
2788 for (j = i + 1; j <= MAX_RCU_LVLS; j++)
2789 num_rcu_lvl[j] = 0;
2790 break;
2791 }
2792
2793 /* Calculate the total number of rcu_node structures. */
2794 rcu_num_nodes = 0;
2795 for (i = 0; i <= MAX_RCU_LVLS; i++)
2796 rcu_num_nodes += num_rcu_lvl[i];
2797 rcu_num_nodes -= n;
2658} 2798}
2659 2799
2660void __init rcu_init(void) 2800void __init rcu_init(void)
@@ -2662,6 +2802,7 @@ void __init rcu_init(void)
2662 int cpu; 2802 int cpu;
2663 2803
2664 rcu_bootup_announce(); 2804 rcu_bootup_announce();
2805 rcu_init_geometry();
2665 rcu_init_one(&rcu_sched_state, &rcu_sched_data); 2806 rcu_init_one(&rcu_sched_state, &rcu_sched_data);
2666 rcu_init_one(&rcu_bh_state, &rcu_bh_data); 2807 rcu_init_one(&rcu_bh_state, &rcu_bh_data);
2667 __rcu_init_preempt(); 2808 __rcu_init_preempt();