diff options
-rw-r--r-- | Documentation/RCU/trace.txt | 29 | ||||
-rw-r--r-- | kernel/rcutree.c | 5 | ||||
-rw-r--r-- | kernel/rcutree.h | 21 | ||||
-rw-r--r-- | kernel/rcutree_plugin.h | 163 | ||||
-rw-r--r-- | kernel/rcutree_trace.c | 11 |
5 files changed, 135 insertions, 94 deletions
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt index e731ad20d166..5a704ffd0bbc 100644 --- a/Documentation/RCU/trace.txt +++ b/Documentation/RCU/trace.txt | |||
@@ -166,14 +166,14 @@ o "gpnum" is the number of grace periods that have started. It is | |||
166 | The output of "cat rcu/rcuhier" looks as follows, with very long lines: | 166 | The output of "cat rcu/rcuhier" looks as follows, with very long lines: |
167 | 167 | ||
168 | c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6 | 168 | c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6 |
169 | 1/1 .>. 0:127 ^0 | 169 | 1/1 ..>. 0:127 ^0 |
170 | 3/3 .>. 0:35 ^0 0/0 .>. 36:71 ^1 0/0 .>. 72:107 ^2 0/0 .>. 108:127 ^3 | 170 | 3/3 ..>. 0:35 ^0 0/0 ..>. 36:71 ^1 0/0 ..>. 72:107 ^2 0/0 ..>. 108:127 ^3 |
171 | 3/3f .>. 0:5 ^0 2/3 .>. 6:11 ^1 0/0 .>. 12:17 ^2 0/0 .>. 18:23 ^3 0/0 .>. 24:29 ^4 0/0 .>. 30:35 ^5 0/0 .>. 36:41 ^0 0/0 .>. 42:47 ^1 0/0 .>. 48:53 ^2 0/0 .>. 54:59 ^3 0/0 .>. 60:65 ^4 0/0 .>. 66:71 ^5 0/0 .>. 72:77 ^0 0/0 .>. 78:83 ^1 0/0 .>. 84:89 ^2 0/0 .>. 90:95 ^3 0/0 .>. 96:101 ^4 0/0 .>. 102:107 ^5 0/0 .>. 108:113 ^0 0/0 .>. 114:119 ^1 0/0 .>. 120:125 ^2 0/0 .>. 126:127 ^3 | 171 | 3/3f ..>. 0:5 ^0 2/3 ..>. 6:11 ^1 0/0 ..>. 12:17 ^2 0/0 ..>. 18:23 ^3 0/0 ..>. 24:29 ^4 0/0 ..>. 30:35 ^5 0/0 ..>. 36:41 ^0 0/0 ..>. 42:47 ^1 0/0 ..>. 48:53 ^2 0/0 ..>. 54:59 ^3 0/0 ..>. 60:65 ^4 0/0 ..>. 66:71 ^5 0/0 ..>. 72:77 ^0 0/0 ..>. 78:83 ^1 0/0 ..>. 84:89 ^2 0/0 ..>. 90:95 ^3 0/0 ..>. 96:101 ^4 0/0 ..>. 102:107 ^5 0/0 ..>. 108:113 ^0 0/0 ..>. 114:119 ^1 0/0 ..>. 120:125 ^2 0/0 ..>. 126:127 ^3 |
172 | rcu_bh: | 172 | rcu_bh: |
173 | c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0 | 173 | c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0 |
174 | 0/1 .>. 0:127 ^0 | 174 | 0/1 ..>. 0:127 ^0 |
175 | 0/3 .>. 0:35 ^0 0/0 .>. 36:71 ^1 0/0 .>. 72:107 ^2 0/0 .>. 108:127 ^3 | 175 | 0/3 ..>. 0:35 ^0 0/0 ..>. 36:71 ^1 0/0 ..>. 72:107 ^2 0/0 ..>. 108:127 ^3 |
176 | 0/3f .>. 0:5 ^0 0/3 .>. 6:11 ^1 0/0 .>. 12:17 ^2 0/0 .>. 18:23 ^3 0/0 .>. 24:29 ^4 0/0 .>. 30:35 ^5 0/0 .>. 36:41 ^0 0/0 .>. 42:47 ^1 0/0 .>. 48:53 ^2 0/0 .>. 54:59 ^3 0/0 .>. 60:65 ^4 0/0 .>. 66:71 ^5 0/0 .>. 72:77 ^0 0/0 .>. 78:83 ^1 0/0 .>. 84:89 ^2 0/0 .>. 90:95 ^3 0/0 .>. 96:101 ^4 0/0 .>. 102:107 ^5 0/0 .>. 108:113 ^0 0/0 .>. 114:119 ^1 0/0 .>. 120:125 ^2 0/0 .>. 126:127 ^3 | 176 | 0/3f ..>. 0:5 ^0 0/3 ..>. 6:11 ^1 0/0 ..>. 12:17 ^2 0/0 ..>. 18:23 ^3 0/0 ..>. 24:29 ^4 0/0 ..>. 30:35 ^5 0/0 ..>. 36:41 ^0 0/0 ..>. 42:47 ^1 0/0 ..>. 48:53 ^2 0/0 ..>. 54:59 ^3 0/0 ..>. 60:65 ^4 0/0 ..>. 66:71 ^5 0/0 ..>. 72:77 ^0 0/0 ..>. 78:83 ^1 0/0 ..>. 84:89 ^2 0/0 ..>. 90:95 ^3 0/0 ..>. 96:101 ^4 0/0 ..>. 102:107 ^5 0/0 ..>. 108:113 ^0 0/0 ..>. 114:119 ^1 0/0 ..>. 120:125 ^2 0/0 ..>. 126:127 ^3 |
177 | 177 | ||
178 | This is once again split into "rcu_sched" and "rcu_bh" portions, | 178 | This is once again split into "rcu_sched" and "rcu_bh" portions, |
179 | and CONFIG_TREE_PREEMPT_RCU kernels will again have an additional | 179 | and CONFIG_TREE_PREEMPT_RCU kernels will again have an additional |
@@ -232,13 +232,20 @@ o Each element of the form "1/1 0:127 ^0" represents one struct | |||
232 | current grace period. | 232 | current grace period. |
233 | 233 | ||
234 | o The characters separated by the ">" indicate the state | 234 | o The characters separated by the ">" indicate the state |
235 | of the blocked-tasks lists. A "T" preceding the ">" | 235 | of the blocked-tasks lists. A "G" preceding the ">" |
236 | indicates that at least one task blocked in an RCU | 236 | indicates that at least one task blocked in an RCU |
237 | read-side critical section blocks the current grace | 237 | read-side critical section blocks the current grace |
238 | period, while a "." preceding the ">" indicates otherwise. | 238 | period, while a "E" preceding the ">" indicates that |
239 | The character following the ">" indicates similarly for | 239 | at least one task blocked in an RCU read-side critical |
240 | the next grace period. A "T" should appear in this | 240 | section blocks the current expedited grace period. |
241 | field only for rcu-preempt. | 241 | A "T" character following the ">" indicates that at |
242 | least one task is blocked within an RCU read-side | ||
243 | critical section, regardless of whether any current | ||
244 | grace period (expedited or normal) is inconvenienced. | ||
245 | A "." character appears if the corresponding condition | ||
246 | does not hold, so that "..>." indicates that no tasks | ||
247 | are blocked. In contrast, "GE>T" indicates maximal | ||
248 | inconvenience from blocked tasks. | ||
242 | 249 | ||
243 | o The numbers separated by the ":" are the range of CPUs | 250 | o The numbers separated by the ":" are the range of CPUs |
244 | served by this struct rcu_node. This can be helpful | 251 | served by this struct rcu_node. This can be helpful |
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 90104a19c564..0ac1cc03f935 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -1901,10 +1901,7 @@ static void __init rcu_init_one(struct rcu_state *rsp, | |||
1901 | j / rsp->levelspread[i - 1]; | 1901 | j / rsp->levelspread[i - 1]; |
1902 | } | 1902 | } |
1903 | rnp->level = i; | 1903 | rnp->level = i; |
1904 | INIT_LIST_HEAD(&rnp->blocked_tasks[0]); | 1904 | INIT_LIST_HEAD(&rnp->blkd_tasks); |
1905 | INIT_LIST_HEAD(&rnp->blocked_tasks[1]); | ||
1906 | INIT_LIST_HEAD(&rnp->blocked_tasks[2]); | ||
1907 | INIT_LIST_HEAD(&rnp->blocked_tasks[3]); | ||
1908 | } | 1905 | } |
1909 | } | 1906 | } |
1910 | 1907 | ||
diff --git a/kernel/rcutree.h b/kernel/rcutree.h index bd891def3303..5a439c180e69 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
@@ -107,7 +107,7 @@ struct rcu_node { | |||
107 | /* an rcu_data structure, otherwise, each */ | 107 | /* an rcu_data structure, otherwise, each */ |
108 | /* bit corresponds to a child rcu_node */ | 108 | /* bit corresponds to a child rcu_node */ |
109 | /* structure. */ | 109 | /* structure. */ |
110 | unsigned long expmask; /* Groups that have ->blocked_tasks[] */ | 110 | unsigned long expmask; /* Groups that have ->blkd_tasks */ |
111 | /* elements that need to drain to allow the */ | 111 | /* elements that need to drain to allow the */ |
112 | /* current expedited grace period to */ | 112 | /* current expedited grace period to */ |
113 | /* complete (only for TREE_PREEMPT_RCU). */ | 113 | /* complete (only for TREE_PREEMPT_RCU). */ |
@@ -120,11 +120,20 @@ struct rcu_node { | |||
120 | u8 grpnum; /* CPU/group number for next level up. */ | 120 | u8 grpnum; /* CPU/group number for next level up. */ |
121 | u8 level; /* root is at level 0. */ | 121 | u8 level; /* root is at level 0. */ |
122 | struct rcu_node *parent; | 122 | struct rcu_node *parent; |
123 | struct list_head blocked_tasks[4]; | 123 | struct list_head blkd_tasks; |
124 | /* Tasks blocked in RCU read-side critsect. */ | 124 | /* Tasks blocked in RCU read-side critical */ |
125 | /* Grace period number (->gpnum) x blocked */ | 125 | /* section. Tasks are placed at the head */ |
126 | /* by tasks on the (x & 0x1) element of the */ | 126 | /* of this list and age towards the tail. */ |
127 | /* blocked_tasks[] array. */ | 127 | struct list_head *gp_tasks; |
128 | /* Pointer to the first task blocking the */ | ||
129 | /* current grace period, or NULL if there */ | ||
130 | /* is no such task. */ | ||
131 | struct list_head *exp_tasks; | ||
132 | /* Pointer to the first task blocking the */ | ||
133 | /* current expedited grace period, or NULL */ | ||
134 | /* if there is no such task. If there */ | ||
135 | /* is no current expedited grace period, */ | ||
136 | /* then there can cannot be any such task. */ | ||
128 | } ____cacheline_internodealigned_in_smp; | 137 | } ____cacheline_internodealigned_in_smp; |
129 | 138 | ||
130 | /* | 139 | /* |
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 764b5fcc7c56..774f010a4619 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
@@ -130,12 +130,12 @@ static void rcu_preempt_qs(int cpu) | |||
130 | * We have entered the scheduler, and the current task might soon be | 130 | * We have entered the scheduler, and the current task might soon be |
131 | * context-switched away from. If this task is in an RCU read-side | 131 | * context-switched away from. If this task is in an RCU read-side |
132 | * critical section, we will no longer be able to rely on the CPU to | 132 | * critical section, we will no longer be able to rely on the CPU to |
133 | * record that fact, so we enqueue the task on the appropriate entry | 133 | * record that fact, so we enqueue the task on the blkd_tasks list. |
134 | * of the blocked_tasks[] array. The task will dequeue itself when | 134 | * The task will dequeue itself when it exits the outermost enclosing |
135 | * it exits the outermost enclosing RCU read-side critical section. | 135 | * RCU read-side critical section. Therefore, the current grace period |
136 | * Therefore, the current grace period cannot be permitted to complete | 136 | * cannot be permitted to complete until the blkd_tasks list entries |
137 | * until the blocked_tasks[] entry indexed by the low-order bit of | 137 | * predating the current grace period drain, in other words, until |
138 | * rnp->gpnum empties. | 138 | * rnp->gp_tasks becomes NULL. |
139 | * | 139 | * |
140 | * Caller must disable preemption. | 140 | * Caller must disable preemption. |
141 | */ | 141 | */ |
@@ -143,7 +143,6 @@ static void rcu_preempt_note_context_switch(int cpu) | |||
143 | { | 143 | { |
144 | struct task_struct *t = current; | 144 | struct task_struct *t = current; |
145 | unsigned long flags; | 145 | unsigned long flags; |
146 | int phase; | ||
147 | struct rcu_data *rdp; | 146 | struct rcu_data *rdp; |
148 | struct rcu_node *rnp; | 147 | struct rcu_node *rnp; |
149 | 148 | ||
@@ -165,15 +164,26 @@ static void rcu_preempt_note_context_switch(int cpu) | |||
165 | * (i.e., this CPU has not yet passed through a quiescent | 164 | * (i.e., this CPU has not yet passed through a quiescent |
166 | * state for the current grace period), then as long | 165 | * state for the current grace period), then as long |
167 | * as that task remains queued, the current grace period | 166 | * as that task remains queued, the current grace period |
168 | * cannot end. | 167 | * cannot end. Note that there is some uncertainty as |
168 | * to exactly when the current grace period started. | ||
169 | * We take a conservative approach, which can result | ||
170 | * in unnecessarily waiting on tasks that started very | ||
171 | * slightly after the current grace period began. C'est | ||
172 | * la vie!!! | ||
169 | * | 173 | * |
170 | * But first, note that the current CPU must still be | 174 | * But first, note that the current CPU must still be |
171 | * on line! | 175 | * on line! |
172 | */ | 176 | */ |
173 | WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0); | 177 | WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0); |
174 | WARN_ON_ONCE(!list_empty(&t->rcu_node_entry)); | 178 | WARN_ON_ONCE(!list_empty(&t->rcu_node_entry)); |
175 | phase = (rnp->gpnum + !(rnp->qsmask & rdp->grpmask)) & 0x1; | 179 | if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) { |
176 | list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]); | 180 | list_add(&t->rcu_node_entry, rnp->gp_tasks->prev); |
181 | rnp->gp_tasks = &t->rcu_node_entry; | ||
182 | } else { | ||
183 | list_add(&t->rcu_node_entry, &rnp->blkd_tasks); | ||
184 | if (rnp->qsmask & rdp->grpmask) | ||
185 | rnp->gp_tasks = &t->rcu_node_entry; | ||
186 | } | ||
177 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 187 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
178 | } | 188 | } |
179 | 189 | ||
@@ -210,10 +220,7 @@ EXPORT_SYMBOL_GPL(__rcu_read_lock); | |||
210 | */ | 220 | */ |
211 | static int rcu_preempted_readers(struct rcu_node *rnp) | 221 | static int rcu_preempted_readers(struct rcu_node *rnp) |
212 | { | 222 | { |
213 | int phase = rnp->gpnum & 0x1; | 223 | return rnp->gp_tasks != NULL; |
214 | |||
215 | return !list_empty(&rnp->blocked_tasks[phase]) || | ||
216 | !list_empty(&rnp->blocked_tasks[phase + 2]); | ||
217 | } | 224 | } |
218 | 225 | ||
219 | /* | 226 | /* |
@@ -253,6 +260,21 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) | |||
253 | } | 260 | } |
254 | 261 | ||
255 | /* | 262 | /* |
263 | * Advance a ->blkd_tasks-list pointer to the next entry, instead | ||
264 | * returning NULL if at the end of the list. | ||
265 | */ | ||
266 | static struct list_head *rcu_next_node_entry(struct task_struct *t, | ||
267 | struct rcu_node *rnp) | ||
268 | { | ||
269 | struct list_head *np; | ||
270 | |||
271 | np = t->rcu_node_entry.next; | ||
272 | if (np == &rnp->blkd_tasks) | ||
273 | np = NULL; | ||
274 | return np; | ||
275 | } | ||
276 | |||
277 | /* | ||
256 | * Handle special cases during rcu_read_unlock(), such as needing to | 278 | * Handle special cases during rcu_read_unlock(), such as needing to |
257 | * notify RCU core processing or task having blocked during the RCU | 279 | * notify RCU core processing or task having blocked during the RCU |
258 | * read-side critical section. | 280 | * read-side critical section. |
@@ -262,6 +284,7 @@ static void rcu_read_unlock_special(struct task_struct *t) | |||
262 | int empty; | 284 | int empty; |
263 | int empty_exp; | 285 | int empty_exp; |
264 | unsigned long flags; | 286 | unsigned long flags; |
287 | struct list_head *np; | ||
265 | struct rcu_node *rnp; | 288 | struct rcu_node *rnp; |
266 | int special; | 289 | int special; |
267 | 290 | ||
@@ -305,7 +328,12 @@ static void rcu_read_unlock_special(struct task_struct *t) | |||
305 | empty = !rcu_preempted_readers(rnp); | 328 | empty = !rcu_preempted_readers(rnp); |
306 | empty_exp = !rcu_preempted_readers_exp(rnp); | 329 | empty_exp = !rcu_preempted_readers_exp(rnp); |
307 | smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ | 330 | smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ |
331 | np = rcu_next_node_entry(t, rnp); | ||
308 | list_del_init(&t->rcu_node_entry); | 332 | list_del_init(&t->rcu_node_entry); |
333 | if (&t->rcu_node_entry == rnp->gp_tasks) | ||
334 | rnp->gp_tasks = np; | ||
335 | if (&t->rcu_node_entry == rnp->exp_tasks) | ||
336 | rnp->exp_tasks = np; | ||
309 | t->rcu_blocked_node = NULL; | 337 | t->rcu_blocked_node = NULL; |
310 | 338 | ||
311 | /* | 339 | /* |
@@ -361,18 +389,16 @@ EXPORT_SYMBOL_GPL(__rcu_read_unlock); | |||
361 | static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) | 389 | static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) |
362 | { | 390 | { |
363 | unsigned long flags; | 391 | unsigned long flags; |
364 | struct list_head *lp; | ||
365 | int phase; | ||
366 | struct task_struct *t; | 392 | struct task_struct *t; |
367 | 393 | ||
368 | if (rcu_preempted_readers(rnp)) { | 394 | if (!rcu_preempted_readers(rnp)) |
369 | raw_spin_lock_irqsave(&rnp->lock, flags); | 395 | return; |
370 | phase = rnp->gpnum & 0x1; | 396 | raw_spin_lock_irqsave(&rnp->lock, flags); |
371 | lp = &rnp->blocked_tasks[phase]; | 397 | t = list_entry(rnp->gp_tasks, |
372 | list_for_each_entry(t, lp, rcu_node_entry) | 398 | struct task_struct, rcu_node_entry); |
373 | sched_show_task(t); | 399 | list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) |
374 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 400 | sched_show_task(t); |
375 | } | 401 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
376 | } | 402 | } |
377 | 403 | ||
378 | /* | 404 | /* |
@@ -402,16 +428,14 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp) | |||
402 | */ | 428 | */ |
403 | static void rcu_print_task_stall(struct rcu_node *rnp) | 429 | static void rcu_print_task_stall(struct rcu_node *rnp) |
404 | { | 430 | { |
405 | struct list_head *lp; | ||
406 | int phase; | ||
407 | struct task_struct *t; | 431 | struct task_struct *t; |
408 | 432 | ||
409 | if (rcu_preempted_readers(rnp)) { | 433 | if (!rcu_preempted_readers(rnp)) |
410 | phase = rnp->gpnum & 0x1; | 434 | return; |
411 | lp = &rnp->blocked_tasks[phase]; | 435 | t = list_entry(rnp->gp_tasks, |
412 | list_for_each_entry(t, lp, rcu_node_entry) | 436 | struct task_struct, rcu_node_entry); |
413 | printk(" P%d", t->pid); | 437 | list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) |
414 | } | 438 | printk(" P%d", t->pid); |
415 | } | 439 | } |
416 | 440 | ||
417 | /* | 441 | /* |
@@ -430,10 +454,15 @@ static void rcu_preempt_stall_reset(void) | |||
430 | * period that still has RCU readers blocked! This function must be | 454 | * period that still has RCU readers blocked! This function must be |
431 | * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock | 455 | * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock |
432 | * must be held by the caller. | 456 | * must be held by the caller. |
457 | * | ||
458 | * Also, if there are blocked tasks on the list, they automatically | ||
459 | * block the newly created grace period, so set up ->gp_tasks accordingly. | ||
433 | */ | 460 | */ |
434 | static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) | 461 | static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) |
435 | { | 462 | { |
436 | WARN_ON_ONCE(rcu_preempted_readers(rnp)); | 463 | WARN_ON_ONCE(rcu_preempted_readers(rnp)); |
464 | if (!list_empty(&rnp->blkd_tasks)) | ||
465 | rnp->gp_tasks = rnp->blkd_tasks.next; | ||
437 | WARN_ON_ONCE(rnp->qsmask); | 466 | WARN_ON_ONCE(rnp->qsmask); |
438 | } | 467 | } |
439 | 468 | ||
@@ -457,45 +486,49 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | |||
457 | struct rcu_node *rnp, | 486 | struct rcu_node *rnp, |
458 | struct rcu_data *rdp) | 487 | struct rcu_data *rdp) |
459 | { | 488 | { |
460 | int i; | ||
461 | struct list_head *lp; | 489 | struct list_head *lp; |
462 | struct list_head *lp_root; | 490 | struct list_head *lp_root; |
463 | int retval = 0; | 491 | int retval = 0; |
464 | struct rcu_node *rnp_root = rcu_get_root(rsp); | 492 | struct rcu_node *rnp_root = rcu_get_root(rsp); |
465 | struct task_struct *tp; | 493 | struct task_struct *t; |
466 | 494 | ||
467 | if (rnp == rnp_root) { | 495 | if (rnp == rnp_root) { |
468 | WARN_ONCE(1, "Last CPU thought to be offlined?"); | 496 | WARN_ONCE(1, "Last CPU thought to be offlined?"); |
469 | return 0; /* Shouldn't happen: at least one CPU online. */ | 497 | return 0; /* Shouldn't happen: at least one CPU online. */ |
470 | } | 498 | } |
471 | WARN_ON_ONCE(rnp != rdp->mynode && | 499 | |
472 | (!list_empty(&rnp->blocked_tasks[0]) || | 500 | /* If we are on an internal node, complain bitterly. */ |
473 | !list_empty(&rnp->blocked_tasks[1]) || | 501 | WARN_ON_ONCE(rnp != rdp->mynode); |
474 | !list_empty(&rnp->blocked_tasks[2]) || | ||
475 | !list_empty(&rnp->blocked_tasks[3]))); | ||
476 | 502 | ||
477 | /* | 503 | /* |
478 | * Move tasks up to root rcu_node. Rely on the fact that the | 504 | * Move tasks up to root rcu_node. Don't try to get fancy for |
479 | * root rcu_node can be at most one ahead of the rest of the | 505 | * this corner-case operation -- just put this node's tasks |
480 | * rcu_nodes in terms of gp_num value. This fact allows us to | 506 | * at the head of the root node's list, and update the root node's |
481 | * move the blocked_tasks[] array directly, element by element. | 507 | * ->gp_tasks and ->exp_tasks pointers to those of this node's, |
508 | * if non-NULL. This might result in waiting for more tasks than | ||
509 | * absolutely necessary, but this is a good performance/complexity | ||
510 | * tradeoff. | ||
482 | */ | 511 | */ |
483 | if (rcu_preempted_readers(rnp)) | 512 | if (rcu_preempted_readers(rnp)) |
484 | retval |= RCU_OFL_TASKS_NORM_GP; | 513 | retval |= RCU_OFL_TASKS_NORM_GP; |
485 | if (rcu_preempted_readers_exp(rnp)) | 514 | if (rcu_preempted_readers_exp(rnp)) |
486 | retval |= RCU_OFL_TASKS_EXP_GP; | 515 | retval |= RCU_OFL_TASKS_EXP_GP; |
487 | for (i = 0; i < 4; i++) { | 516 | lp = &rnp->blkd_tasks; |
488 | lp = &rnp->blocked_tasks[i]; | 517 | lp_root = &rnp_root->blkd_tasks; |
489 | lp_root = &rnp_root->blocked_tasks[i]; | 518 | while (!list_empty(lp)) { |
490 | while (!list_empty(lp)) { | 519 | t = list_entry(lp->next, typeof(*t), rcu_node_entry); |
491 | tp = list_entry(lp->next, typeof(*tp), rcu_node_entry); | 520 | raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ |
492 | raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ | 521 | list_del(&t->rcu_node_entry); |
493 | list_del(&tp->rcu_node_entry); | 522 | t->rcu_blocked_node = rnp_root; |
494 | tp->rcu_blocked_node = rnp_root; | 523 | list_add(&t->rcu_node_entry, lp_root); |
495 | list_add(&tp->rcu_node_entry, lp_root); | 524 | if (&t->rcu_node_entry == rnp->gp_tasks) |
496 | raw_spin_unlock(&rnp_root->lock); /* irqs remain disabled */ | 525 | rnp_root->gp_tasks = rnp->gp_tasks; |
497 | } | 526 | if (&t->rcu_node_entry == rnp->exp_tasks) |
527 | rnp_root->exp_tasks = rnp->exp_tasks; | ||
528 | raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */ | ||
498 | } | 529 | } |
530 | rnp->gp_tasks = NULL; | ||
531 | rnp->exp_tasks = NULL; | ||
499 | return retval; | 532 | return retval; |
500 | } | 533 | } |
501 | 534 | ||
@@ -586,8 +619,7 @@ static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex); | |||
586 | */ | 619 | */ |
587 | static int rcu_preempted_readers_exp(struct rcu_node *rnp) | 620 | static int rcu_preempted_readers_exp(struct rcu_node *rnp) |
588 | { | 621 | { |
589 | return !list_empty(&rnp->blocked_tasks[2]) || | 622 | return rnp->exp_tasks != NULL; |
590 | !list_empty(&rnp->blocked_tasks[3]); | ||
591 | } | 623 | } |
592 | 624 | ||
593 | /* | 625 | /* |
@@ -647,12 +679,13 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) | |||
647 | static void | 679 | static void |
648 | sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) | 680 | sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) |
649 | { | 681 | { |
650 | int must_wait; | 682 | int must_wait = 0; |
651 | 683 | ||
652 | raw_spin_lock(&rnp->lock); /* irqs already disabled */ | 684 | raw_spin_lock(&rnp->lock); /* irqs already disabled */ |
653 | list_splice_init(&rnp->blocked_tasks[0], &rnp->blocked_tasks[2]); | 685 | if (!list_empty(&rnp->blkd_tasks)) { |
654 | list_splice_init(&rnp->blocked_tasks[1], &rnp->blocked_tasks[3]); | 686 | rnp->exp_tasks = rnp->blkd_tasks.next; |
655 | must_wait = rcu_preempted_readers_exp(rnp); | 687 | must_wait = 1; |
688 | } | ||
656 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ | 689 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ |
657 | if (!must_wait) | 690 | if (!must_wait) |
658 | rcu_report_exp_rnp(rsp, rnp); | 691 | rcu_report_exp_rnp(rsp, rnp); |
@@ -661,9 +694,7 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) | |||
661 | /* | 694 | /* |
662 | * Wait for an rcu-preempt grace period, but expedite it. The basic idea | 695 | * Wait for an rcu-preempt grace period, but expedite it. The basic idea |
663 | * is to invoke synchronize_sched_expedited() to push all the tasks to | 696 | * is to invoke synchronize_sched_expedited() to push all the tasks to |
664 | * the ->blocked_tasks[] lists, move all entries from the first set of | 697 | * the ->blkd_tasks lists and wait for this list to drain. |
665 | * ->blocked_tasks[] lists to the second set, and finally wait for this | ||
666 | * second set to drain. | ||
667 | */ | 698 | */ |
668 | void synchronize_rcu_expedited(void) | 699 | void synchronize_rcu_expedited(void) |
669 | { | 700 | { |
@@ -695,7 +726,7 @@ void synchronize_rcu_expedited(void) | |||
695 | if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) | 726 | if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) |
696 | goto unlock_mb_ret; /* Others did our work for us. */ | 727 | goto unlock_mb_ret; /* Others did our work for us. */ |
697 | 728 | ||
698 | /* force all RCU readers onto blocked_tasks[]. */ | 729 | /* force all RCU readers onto ->blkd_tasks lists. */ |
699 | synchronize_sched_expedited(); | 730 | synchronize_sched_expedited(); |
700 | 731 | ||
701 | raw_spin_lock_irqsave(&rsp->onofflock, flags); | 732 | raw_spin_lock_irqsave(&rsp->onofflock, flags); |
@@ -707,7 +738,7 @@ void synchronize_rcu_expedited(void) | |||
707 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 738 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
708 | } | 739 | } |
709 | 740 | ||
710 | /* Snapshot current state of ->blocked_tasks[] lists. */ | 741 | /* Snapshot current state of ->blkd_tasks lists. */ |
711 | rcu_for_each_leaf_node(rsp, rnp) | 742 | rcu_for_each_leaf_node(rsp, rnp) |
712 | sync_rcu_preempt_exp_init(rsp, rnp); | 743 | sync_rcu_preempt_exp_init(rsp, rnp); |
713 | if (NUM_RCU_NODES > 1) | 744 | if (NUM_RCU_NODES > 1) |
@@ -715,7 +746,7 @@ void synchronize_rcu_expedited(void) | |||
715 | 746 | ||
716 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | 747 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); |
717 | 748 | ||
718 | /* Wait for snapshotted ->blocked_tasks[] lists to drain. */ | 749 | /* Wait for snapshotted ->blkd_tasks lists to drain. */ |
719 | rnp = rcu_get_root(rsp); | 750 | rnp = rcu_get_root(rsp); |
720 | wait_event(sync_rcu_preempt_exp_wq, | 751 | wait_event(sync_rcu_preempt_exp_wq, |
721 | sync_rcu_preempt_exp_done(rnp)); | 752 | sync_rcu_preempt_exp_done(rnp)); |
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 4a21ca55ef7c..1cedf94e2c4f 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c | |||
@@ -161,7 +161,6 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) | |||
161 | { | 161 | { |
162 | unsigned long gpnum; | 162 | unsigned long gpnum; |
163 | int level = 0; | 163 | int level = 0; |
164 | int phase; | ||
165 | struct rcu_node *rnp; | 164 | struct rcu_node *rnp; |
166 | 165 | ||
167 | gpnum = rsp->gpnum; | 166 | gpnum = rsp->gpnum; |
@@ -178,13 +177,11 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) | |||
178 | seq_puts(m, "\n"); | 177 | seq_puts(m, "\n"); |
179 | level = rnp->level; | 178 | level = rnp->level; |
180 | } | 179 | } |
181 | phase = gpnum & 0x1; | 180 | seq_printf(m, "%lx/%lx %c%c>%c %d:%d ^%d ", |
182 | seq_printf(m, "%lx/%lx %c%c>%c%c %d:%d ^%d ", | ||
183 | rnp->qsmask, rnp->qsmaskinit, | 181 | rnp->qsmask, rnp->qsmaskinit, |
184 | "T."[list_empty(&rnp->blocked_tasks[phase])], | 182 | ".G"[rnp->gp_tasks != NULL], |
185 | "E."[list_empty(&rnp->blocked_tasks[phase + 2])], | 183 | ".E"[rnp->exp_tasks != NULL], |
186 | "T."[list_empty(&rnp->blocked_tasks[!phase])], | 184 | ".T"[!list_empty(&rnp->blkd_tasks)], |
187 | "E."[list_empty(&rnp->blocked_tasks[!phase + 2])], | ||
188 | rnp->grplo, rnp->grphi, rnp->grpnum); | 185 | rnp->grplo, rnp->grphi, rnp->grpnum); |
189 | } | 186 | } |
190 | seq_puts(m, "\n"); | 187 | seq_puts(m, "\n"); |