aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcutree.h
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rcutree.h')
-rw-r--r--kernel/rcutree.h160
1 files changed, 136 insertions, 24 deletions
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 8e8287a983c2..d2a0046f63b2 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -34,10 +34,11 @@
34 * In practice, this has not been tested, so there is probably some 34 * In practice, this has not been tested, so there is probably some
35 * bug somewhere. 35 * bug somewhere.
36 */ 36 */
37#define MAX_RCU_LVLS 3 37#define MAX_RCU_LVLS 4
38#define RCU_FANOUT (CONFIG_RCU_FANOUT) 38#define RCU_FANOUT (CONFIG_RCU_FANOUT)
39#define RCU_FANOUT_SQ (RCU_FANOUT * RCU_FANOUT) 39#define RCU_FANOUT_SQ (RCU_FANOUT * RCU_FANOUT)
40#define RCU_FANOUT_CUBE (RCU_FANOUT_SQ * RCU_FANOUT) 40#define RCU_FANOUT_CUBE (RCU_FANOUT_SQ * RCU_FANOUT)
41#define RCU_FANOUT_FOURTH (RCU_FANOUT_CUBE * RCU_FANOUT)
41 42
42#if NR_CPUS <= RCU_FANOUT 43#if NR_CPUS <= RCU_FANOUT
43# define NUM_RCU_LVLS 1 44# define NUM_RCU_LVLS 1
@@ -45,23 +46,33 @@
45# define NUM_RCU_LVL_1 (NR_CPUS) 46# define NUM_RCU_LVL_1 (NR_CPUS)
46# define NUM_RCU_LVL_2 0 47# define NUM_RCU_LVL_2 0
47# define NUM_RCU_LVL_3 0 48# define NUM_RCU_LVL_3 0
49# define NUM_RCU_LVL_4 0
48#elif NR_CPUS <= RCU_FANOUT_SQ 50#elif NR_CPUS <= RCU_FANOUT_SQ
49# define NUM_RCU_LVLS 2 51# define NUM_RCU_LVLS 2
50# define NUM_RCU_LVL_0 1 52# define NUM_RCU_LVL_0 1
51# define NUM_RCU_LVL_1 (((NR_CPUS) + RCU_FANOUT - 1) / RCU_FANOUT) 53# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT)
52# define NUM_RCU_LVL_2 (NR_CPUS) 54# define NUM_RCU_LVL_2 (NR_CPUS)
53# define NUM_RCU_LVL_3 0 55# define NUM_RCU_LVL_3 0
56# define NUM_RCU_LVL_4 0
54#elif NR_CPUS <= RCU_FANOUT_CUBE 57#elif NR_CPUS <= RCU_FANOUT_CUBE
55# define NUM_RCU_LVLS 3 58# define NUM_RCU_LVLS 3
56# define NUM_RCU_LVL_0 1 59# define NUM_RCU_LVL_0 1
57# define NUM_RCU_LVL_1 (((NR_CPUS) + RCU_FANOUT_SQ - 1) / RCU_FANOUT_SQ) 60# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_SQ)
58# define NUM_RCU_LVL_2 (((NR_CPUS) + (RCU_FANOUT) - 1) / (RCU_FANOUT)) 61# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT)
59# define NUM_RCU_LVL_3 NR_CPUS 62# define NUM_RCU_LVL_3 NR_CPUS
63# define NUM_RCU_LVL_4 0
64#elif NR_CPUS <= RCU_FANOUT_FOURTH
65# define NUM_RCU_LVLS 4
66# define NUM_RCU_LVL_0 1
67# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_CUBE)
68# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_SQ)
69# define NUM_RCU_LVL_3 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT)
70# define NUM_RCU_LVL_4 NR_CPUS
60#else 71#else
61# error "CONFIG_RCU_FANOUT insufficient for NR_CPUS" 72# error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
62#endif /* #if (NR_CPUS) <= RCU_FANOUT */ 73#endif /* #if (NR_CPUS) <= RCU_FANOUT */
63 74
64#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3) 75#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4)
65#define NUM_RCU_NODES (RCU_SUM - NR_CPUS) 76#define NUM_RCU_NODES (RCU_SUM - NR_CPUS)
66 77
67/* 78/*
@@ -79,24 +90,67 @@ struct rcu_dynticks {
79 * Definition for node within the RCU grace-period-detection hierarchy. 90 * Definition for node within the RCU grace-period-detection hierarchy.
80 */ 91 */
81struct rcu_node { 92struct rcu_node {
82 spinlock_t lock; 93 spinlock_t lock; /* Root rcu_node's lock protects some */
94 /* rcu_state fields as well as following. */
83 long gpnum; /* Current grace period for this node. */ 95 long gpnum; /* Current grace period for this node. */
84 /* This will either be equal to or one */ 96 /* This will either be equal to or one */
85 /* behind the root rcu_node's gpnum. */ 97 /* behind the root rcu_node's gpnum. */
98 long completed; /* Last grace period completed for this node. */
99 /* This will either be equal to or one */
100 /* behind the root rcu_node's gpnum. */
86 unsigned long qsmask; /* CPUs or groups that need to switch in */ 101 unsigned long qsmask; /* CPUs or groups that need to switch in */
87 /* order for current grace period to proceed.*/ 102 /* order for current grace period to proceed.*/
103 /* In leaf rcu_node, each bit corresponds to */
104 /* an rcu_data structure, otherwise, each */
105 /* bit corresponds to a child rcu_node */
106 /* structure. */
107 unsigned long expmask; /* Groups that have ->blocked_tasks[] */
108 /* elements that need to drain to allow the */
109 /* current expedited grace period to */
110 /* complete (only for TREE_PREEMPT_RCU). */
88 unsigned long qsmaskinit; 111 unsigned long qsmaskinit;
89 /* Per-GP initialization for qsmask. */ 112 /* Per-GP initial value for qsmask & expmask. */
90 unsigned long grpmask; /* Mask to apply to parent qsmask. */ 113 unsigned long grpmask; /* Mask to apply to parent qsmask. */
114 /* Only one bit will be set in this mask. */
91 int grplo; /* lowest-numbered CPU or group here. */ 115 int grplo; /* lowest-numbered CPU or group here. */
92 int grphi; /* highest-numbered CPU or group here. */ 116 int grphi; /* highest-numbered CPU or group here. */
93 u8 grpnum; /* CPU/group number for next level up. */ 117 u8 grpnum; /* CPU/group number for next level up. */
94 u8 level; /* root is at level 0. */ 118 u8 level; /* root is at level 0. */
95 struct rcu_node *parent; 119 struct rcu_node *parent;
96 struct list_head blocked_tasks[2]; 120 struct list_head blocked_tasks[4];
97 /* Tasks blocked in RCU read-side critsect. */ 121 /* Tasks blocked in RCU read-side critsect. */
122 /* Grace period number (->gpnum) x blocked */
123 /* by tasks on the (x & 0x1) element of the */
124 /* blocked_tasks[] array. */
98} ____cacheline_internodealigned_in_smp; 125} ____cacheline_internodealigned_in_smp;
99 126
127/*
128 * Do a full breadth-first scan of the rcu_node structures for the
129 * specified rcu_state structure.
130 */
131#define rcu_for_each_node_breadth_first(rsp, rnp) \
132 for ((rnp) = &(rsp)->node[0]; \
133 (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++)
134
135/*
136 * Do a breadth-first scan of the non-leaf rcu_node structures for the
137 * specified rcu_state structure. Note that if there is a singleton
138 * rcu_node tree with but one rcu_node structure, this loop is a no-op.
139 */
140#define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \
141 for ((rnp) = &(rsp)->node[0]; \
142 (rnp) < (rsp)->level[NUM_RCU_LVLS - 1]; (rnp)++)
143
144/*
145 * Scan the leaves of the rcu_node hierarchy for the specified rcu_state
146 * structure. Note that if there is a singleton rcu_node tree with but
147 * one rcu_node structure, this loop -will- visit the rcu_node structure.
148 * It is still a leaf node, even if it is also the root node.
149 */
150#define rcu_for_each_leaf_node(rsp, rnp) \
151 for ((rnp) = (rsp)->level[NUM_RCU_LVLS - 1]; \
152 (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++)
153
100/* Index values for nxttail array in struct rcu_data. */ 154/* Index values for nxttail array in struct rcu_data. */
101#define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */ 155#define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */
102#define RCU_WAIT_TAIL 1 /* Also RCU_NEXT_READY head. */ 156#define RCU_WAIT_TAIL 1 /* Also RCU_NEXT_READY head. */
@@ -126,23 +180,30 @@ struct rcu_data {
126 * Any of the partitions might be empty, in which case the 180 * Any of the partitions might be empty, in which case the
127 * pointer to that partition will be equal to the pointer for 181 * pointer to that partition will be equal to the pointer for
128 * the following partition. When the list is empty, all of 182 * the following partition. When the list is empty, all of
129 * the nxttail elements point to nxtlist, which is NULL. 183 * the nxttail elements point to the ->nxtlist pointer itself,
184 * which in that case is NULL.
130 * 185 *
131 * [*nxttail[RCU_NEXT_READY_TAIL], NULL = *nxttail[RCU_NEXT_TAIL]):
132 * Entries that might have arrived after current GP ended
133 * [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]):
134 * Entries known to have arrived before current GP ended
135 * [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]):
136 * Entries that batch # <= ->completed - 1: waiting for current GP
137 * [nxtlist, *nxttail[RCU_DONE_TAIL]): 186 * [nxtlist, *nxttail[RCU_DONE_TAIL]):
138 * Entries that batch # <= ->completed 187 * Entries that batch # <= ->completed
139 * The grace period for these entries has completed, and 188 * The grace period for these entries has completed, and
140 * the other grace-period-completed entries may be moved 189 * the other grace-period-completed entries may be moved
141 * here temporarily in rcu_process_callbacks(). 190 * here temporarily in rcu_process_callbacks().
191 * [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]):
192 * Entries that batch # <= ->completed - 1: waiting for current GP
193 * [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]):
194 * Entries known to have arrived before current GP ended
195 * [*nxttail[RCU_NEXT_READY_TAIL], *nxttail[RCU_NEXT_TAIL]):
196 * Entries that might have arrived after current GP ended
197 * Note that the value of *nxttail[RCU_NEXT_TAIL] will
198 * always be NULL, as this is the end of the list.
142 */ 199 */
143 struct rcu_head *nxtlist; 200 struct rcu_head *nxtlist;
144 struct rcu_head **nxttail[RCU_NEXT_SIZE]; 201 struct rcu_head **nxttail[RCU_NEXT_SIZE];
145 long qlen; /* # of queued callbacks */ 202 long qlen; /* # of queued callbacks */
203 long qlen_last_fqs_check;
204 /* qlen at last check for QS forcing */
205 unsigned long n_force_qs_snap;
206 /* did other CPU force QS recently? */
146 long blimit; /* Upper limit on a processed batch */ 207 long blimit; /* Upper limit on a processed batch */
147 208
148#ifdef CONFIG_NO_HZ 209#ifdef CONFIG_NO_HZ
@@ -173,13 +234,15 @@ struct rcu_data {
173}; 234};
174 235
175/* Values for signaled field in struct rcu_state. */ 236/* Values for signaled field in struct rcu_state. */
176#define RCU_GP_INIT 0 /* Grace period being initialized. */ 237#define RCU_GP_IDLE 0 /* No grace period in progress. */
177#define RCU_SAVE_DYNTICK 1 /* Need to scan dyntick state. */ 238#define RCU_GP_INIT 1 /* Grace period being initialized. */
178#define RCU_FORCE_QS 2 /* Need to force quiescent state. */ 239#define RCU_SAVE_DYNTICK 2 /* Need to scan dyntick state. */
240#define RCU_SAVE_COMPLETED 3 /* Need to save rsp->completed. */
241#define RCU_FORCE_QS 4 /* Need to force quiescent state. */
179#ifdef CONFIG_NO_HZ 242#ifdef CONFIG_NO_HZ
180#define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK 243#define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK
181#else /* #ifdef CONFIG_NO_HZ */ 244#else /* #ifdef CONFIG_NO_HZ */
182#define RCU_SIGNAL_INIT RCU_FORCE_QS 245#define RCU_SIGNAL_INIT RCU_SAVE_COMPLETED
183#endif /* #else #ifdef CONFIG_NO_HZ */ 246#endif /* #else #ifdef CONFIG_NO_HZ */
184 247
185#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */ 248#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */
@@ -216,10 +279,23 @@ struct rcu_state {
216 /* Force QS state. */ 279 /* Force QS state. */
217 long gpnum; /* Current gp number. */ 280 long gpnum; /* Current gp number. */
218 long completed; /* # of last completed gp. */ 281 long completed; /* # of last completed gp. */
282
283 /* End of fields guarded by root rcu_node's lock. */
284
219 spinlock_t onofflock; /* exclude on/offline and */ 285 spinlock_t onofflock; /* exclude on/offline and */
220 /* starting new GP. */ 286 /* starting new GP. Also */
287 /* protects the following */
288 /* orphan_cbs fields. */
289 struct rcu_head *orphan_cbs_list; /* list of rcu_head structs */
290 /* orphaned by all CPUs in */
291 /* a given leaf rcu_node */
292 /* going offline. */
293 struct rcu_head **orphan_cbs_tail; /* And tail pointer. */
294 long orphan_qlen; /* Number of orphaned cbs. */
221 spinlock_t fqslock; /* Only one task forcing */ 295 spinlock_t fqslock; /* Only one task forcing */
222 /* quiescent states. */ 296 /* quiescent states. */
297 long completed_fqs; /* Value of completed @ snap. */
298 /* Protected by fqslock. */
223 unsigned long jiffies_force_qs; /* Time at which to invoke */ 299 unsigned long jiffies_force_qs; /* Time at which to invoke */
224 /* force_quiescent_state(). */ 300 /* force_quiescent_state(). */
225 unsigned long n_force_qs; /* Number of calls to */ 301 unsigned long n_force_qs; /* Number of calls to */
@@ -234,11 +310,15 @@ struct rcu_state {
234 unsigned long jiffies_stall; /* Time at which to check */ 310 unsigned long jiffies_stall; /* Time at which to check */
235 /* for CPU stalls. */ 311 /* for CPU stalls. */
236#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ 312#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
237#ifdef CONFIG_NO_HZ
238 long dynticks_completed; /* Value of completed @ snap. */
239#endif /* #ifdef CONFIG_NO_HZ */
240}; 313};
241 314
315/* Return values for rcu_preempt_offline_tasks(). */
316
317#define RCU_OFL_TASKS_NORM_GP 0x1 /* Tasks blocking normal */
318 /* GP were moved to root. */
319#define RCU_OFL_TASKS_EXP_GP 0x2 /* Tasks blocking expedited */
320 /* GP were moved to root. */
321
242#ifdef RCU_TREE_NONCORE 322#ifdef RCU_TREE_NONCORE
243 323
244/* 324/*
@@ -255,5 +335,37 @@ extern struct rcu_state rcu_preempt_state;
255DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data); 335DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data);
256#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 336#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
257 337
258#endif /* #ifdef RCU_TREE_NONCORE */ 338#else /* #ifdef RCU_TREE_NONCORE */
339
340/* Forward declarations for rcutree_plugin.h */
341static void rcu_bootup_announce(void);
342long rcu_batches_completed(void);
343static void rcu_preempt_note_context_switch(int cpu);
344static int rcu_preempted_readers(struct rcu_node *rnp);
345#ifdef CONFIG_HOTPLUG_CPU
346static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp,
347 unsigned long flags);
348#endif /* #ifdef CONFIG_HOTPLUG_CPU */
349#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
350static void rcu_print_task_stall(struct rcu_node *rnp);
351#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
352static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
353#ifdef CONFIG_HOTPLUG_CPU
354static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
355 struct rcu_node *rnp,
356 struct rcu_data *rdp);
357static void rcu_preempt_offline_cpu(int cpu);
358#endif /* #ifdef CONFIG_HOTPLUG_CPU */
359static void rcu_preempt_check_callbacks(int cpu);
360static void rcu_preempt_process_callbacks(void);
361void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
362#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU)
363static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp);
364#endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */
365static int rcu_preempt_pending(int cpu);
366static int rcu_preempt_needs_cpu(int cpu);
367static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
368static void rcu_preempt_send_cbs_to_orphanage(void);
369static void __init __rcu_init_preempt(void);
259 370
371#endif /* #else #ifdef RCU_TREE_NONCORE */