diff options
author | Paul E. McKenney <paul.mckenney@linaro.org> | 2010-09-27 20:25:23 -0400 |
---|---|---|
committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2010-11-30 01:01:54 -0500 |
commit | 24278d148316d2180be6df40e06db013d8b232b8 (patch) | |
tree | 6a579d483f8f799b352e39b972a7e03cc6204fc1 /kernel/rcutiny_plugin.h | |
parent | b2c0710c464ede15e1fc52fb1e7ee9ba54cea186 (diff) |
rcu: priority boosting for TINY_PREEMPT_RCU
Add priority boosting, but only for TINY_PREEMPT_RCU. This is enabled
by the default-off RCU_BOOST kernel parameter. The priority to which to
boost preempted RCU readers is controlled by the RCU_BOOST_PRIO kernel
parameter (defaulting to real-time priority 1) and the time to wait
before boosting the readers blocking a given grace period is controlled
by the RCU_BOOST_DELAY kernel parameter (defaulting to 500 milliseconds).
Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'kernel/rcutiny_plugin.h')
-rw-r--r-- | kernel/rcutiny_plugin.h | 208 |
1 files changed, 198 insertions, 10 deletions
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index 95f9239df512..24f43165f222 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h | |||
@@ -24,6 +24,29 @@ | |||
24 | 24 | ||
25 | #include <linux/kthread.h> | 25 | #include <linux/kthread.h> |
26 | 26 | ||
27 | /* Global control variables for rcupdate callback mechanism. */ | ||
28 | struct rcu_ctrlblk { | ||
29 | struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */ | ||
30 | struct rcu_head **donetail; /* ->next pointer of last "done" CB. */ | ||
31 | struct rcu_head **curtail; /* ->next pointer of last CB. */ | ||
32 | }; | ||
33 | |||
34 | /* Definition for rcupdate control block. */ | ||
35 | static struct rcu_ctrlblk rcu_sched_ctrlblk = { | ||
36 | .donetail = &rcu_sched_ctrlblk.rcucblist, | ||
37 | .curtail = &rcu_sched_ctrlblk.rcucblist, | ||
38 | }; | ||
39 | |||
40 | static struct rcu_ctrlblk rcu_bh_ctrlblk = { | ||
41 | .donetail = &rcu_bh_ctrlblk.rcucblist, | ||
42 | .curtail = &rcu_bh_ctrlblk.rcucblist, | ||
43 | }; | ||
44 | |||
45 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
46 | int rcu_scheduler_active __read_mostly; | ||
47 | EXPORT_SYMBOL_GPL(rcu_scheduler_active); | ||
48 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | ||
49 | |||
27 | #ifdef CONFIG_TINY_PREEMPT_RCU | 50 | #ifdef CONFIG_TINY_PREEMPT_RCU |
28 | 51 | ||
29 | #include <linux/delay.h> | 52 | #include <linux/delay.h> |
@@ -48,17 +71,27 @@ struct rcu_preempt_ctrlblk { | |||
48 | struct list_head *gp_tasks; | 71 | struct list_head *gp_tasks; |
49 | /* Pointer to the first task blocking the */ | 72 | /* Pointer to the first task blocking the */ |
50 | /* current grace period, or NULL if there */ | 73 | /* current grace period, or NULL if there */ |
51 | /* is not such task. */ | 74 | /* is no such task. */ |
52 | struct list_head *exp_tasks; | 75 | struct list_head *exp_tasks; |
53 | /* Pointer to first task blocking the */ | 76 | /* Pointer to first task blocking the */ |
54 | /* current expedited grace period, or NULL */ | 77 | /* current expedited grace period, or NULL */ |
55 | /* if there is no such task. If there */ | 78 | /* if there is no such task. If there */ |
56 | /* is no current expedited grace period, */ | 79 | /* is no current expedited grace period, */ |
57 | /* then there cannot be any such task. */ | 80 | /* then there cannot be any such task. */ |
81 | #ifdef CONFIG_RCU_BOOST | ||
82 | struct list_head *boost_tasks; | ||
83 | /* Pointer to first task that needs to be */ | ||
84 | /* priority-boosted, or NULL if no priority */ | ||
85 | /* boosting is needed. If there is no */ | ||
86 | /* current or expedited grace period, there */ | ||
87 | /* can be no such task. */ | ||
88 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
58 | u8 gpnum; /* Current grace period. */ | 89 | u8 gpnum; /* Current grace period. */ |
59 | u8 gpcpu; /* Last grace period blocked by the CPU. */ | 90 | u8 gpcpu; /* Last grace period blocked by the CPU. */ |
60 | u8 completed; /* Last grace period completed. */ | 91 | u8 completed; /* Last grace period completed. */ |
61 | /* If all three are equal, RCU is idle. */ | 92 | /* If all three are equal, RCU is idle. */ |
93 | s8 boosted_this_gp; /* Has boosting already happened? */ | ||
94 | unsigned long boost_time; /* When to start boosting (jiffies) */ | ||
62 | }; | 95 | }; |
63 | 96 | ||
64 | static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = { | 97 | static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = { |
@@ -124,6 +157,130 @@ static int rcu_preempt_gp_in_progress(void) | |||
124 | } | 157 | } |
125 | 158 | ||
126 | /* | 159 | /* |
160 | * Advance a ->blkd_tasks-list pointer to the next entry, instead | ||
161 | * returning NULL if at the end of the list. | ||
162 | */ | ||
163 | static struct list_head *rcu_next_node_entry(struct task_struct *t) | ||
164 | { | ||
165 | struct list_head *np; | ||
166 | |||
167 | np = t->rcu_node_entry.next; | ||
168 | if (np == &rcu_preempt_ctrlblk.blkd_tasks) | ||
169 | np = NULL; | ||
170 | return np; | ||
171 | } | ||
172 | |||
173 | #ifdef CONFIG_RCU_BOOST | ||
174 | |||
175 | #include "rtmutex_common.h" | ||
176 | |||
177 | /* | ||
178 | * Carry out RCU priority boosting on the task indicated by ->boost_tasks, | ||
179 | * and advance ->boost_tasks to the next task in the ->blkd_tasks list. | ||
180 | */ | ||
181 | static int rcu_boost(void) | ||
182 | { | ||
183 | unsigned long flags; | ||
184 | struct rt_mutex mtx; | ||
185 | struct list_head *np; | ||
186 | struct task_struct *t; | ||
187 | |||
188 | if (rcu_preempt_ctrlblk.boost_tasks == NULL) | ||
189 | return 0; /* Nothing to boost. */ | ||
190 | raw_local_irq_save(flags); | ||
191 | rcu_preempt_ctrlblk.boosted_this_gp++; | ||
192 | t = container_of(rcu_preempt_ctrlblk.boost_tasks, struct task_struct, | ||
193 | rcu_node_entry); | ||
194 | np = rcu_next_node_entry(t); | ||
195 | rt_mutex_init_proxy_locked(&mtx, t); | ||
196 | t->rcu_boost_mutex = &mtx; | ||
197 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED; | ||
198 | raw_local_irq_restore(flags); | ||
199 | rt_mutex_lock(&mtx); | ||
200 | rt_mutex_unlock(&mtx); | ||
201 | return rcu_preempt_ctrlblk.boost_tasks != NULL; | ||
202 | } | ||
203 | |||
204 | /* | ||
205 | * Check to see if it is now time to start boosting RCU readers blocking | ||
206 | * the current grace period, and, if so, tell the rcu_kthread_task to | ||
207 | * start boosting them. If there is an expedited boost in progress, | ||
208 | * we wait for it to complete. | ||
209 | */ | ||
210 | static void rcu_initiate_boost(void) | ||
211 | { | ||
212 | if (rcu_preempt_ctrlblk.gp_tasks != NULL && | ||
213 | rcu_preempt_ctrlblk.boost_tasks == NULL && | ||
214 | rcu_preempt_ctrlblk.boosted_this_gp == 0 && | ||
215 | ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time)) { | ||
216 | rcu_preempt_ctrlblk.boost_tasks = rcu_preempt_ctrlblk.gp_tasks; | ||
217 | invoke_rcu_kthread(); | ||
218 | } | ||
219 | } | ||
220 | |||
221 | /* | ||
222 | * Initiate boosting for an expedited grace period. | ||
223 | */ | ||
224 | static void rcu_initiate_expedited_boost(void) | ||
225 | { | ||
226 | unsigned long flags; | ||
227 | |||
228 | raw_local_irq_save(flags); | ||
229 | if (!list_empty(&rcu_preempt_ctrlblk.blkd_tasks)) { | ||
230 | rcu_preempt_ctrlblk.boost_tasks = | ||
231 | rcu_preempt_ctrlblk.blkd_tasks.next; | ||
232 | rcu_preempt_ctrlblk.boosted_this_gp = -1; | ||
233 | invoke_rcu_kthread(); | ||
234 | } | ||
235 | raw_local_irq_restore(flags); | ||
236 | } | ||
237 | |||
238 | #define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000); | ||
239 | |||
240 | /* | ||
241 | * Do priority-boost accounting for the start of a new grace period. | ||
242 | */ | ||
243 | static void rcu_preempt_boost_start_gp(void) | ||
244 | { | ||
245 | rcu_preempt_ctrlblk.boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES; | ||
246 | if (rcu_preempt_ctrlblk.boosted_this_gp > 0) | ||
247 | rcu_preempt_ctrlblk.boosted_this_gp = 0; | ||
248 | } | ||
249 | |||
250 | #else /* #ifdef CONFIG_RCU_BOOST */ | ||
251 | |||
252 | /* | ||
253 | * If there is no RCU priority boosting, we don't boost. | ||
254 | */ | ||
255 | static int rcu_boost(void) | ||
256 | { | ||
257 | return 0; | ||
258 | } | ||
259 | |||
260 | /* | ||
261 | * If there is no RCU priority boosting, we don't initiate boosting. | ||
262 | */ | ||
263 | static void rcu_initiate_boost(void) | ||
264 | { | ||
265 | } | ||
266 | |||
267 | /* | ||
268 | * If there is no RCU priority boosting, we don't initiate expedited boosting. | ||
269 | */ | ||
270 | static void rcu_initiate_expedited_boost(void) | ||
271 | { | ||
272 | } | ||
273 | |||
274 | /* | ||
275 | * If there is no RCU priority boosting, nothing to do at grace-period start. | ||
276 | */ | ||
277 | static void rcu_preempt_boost_start_gp(void) | ||
278 | { | ||
279 | } | ||
280 | |||
281 | #endif /* else #ifdef CONFIG_RCU_BOOST */ | ||
282 | |||
283 | /* | ||
127 | * Record a preemptible-RCU quiescent state for the specified CPU. Note | 284 | * Record a preemptible-RCU quiescent state for the specified CPU. Note |
128 | * that this just means that the task currently running on the CPU is | 285 | * that this just means that the task currently running on the CPU is |
129 | * in a quiescent state. There might be any number of tasks blocked | 286 | * in a quiescent state. There might be any number of tasks blocked |
@@ -150,12 +307,14 @@ static void rcu_preempt_cpu_qs(void) | |||
150 | rcu_preempt_ctrlblk.gpcpu = rcu_preempt_ctrlblk.gpnum; | 307 | rcu_preempt_ctrlblk.gpcpu = rcu_preempt_ctrlblk.gpnum; |
151 | current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; | 308 | current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; |
152 | 309 | ||
153 | /* | 310 | /* If there is no GP then there is nothing more to do. */ |
154 | * If there is no GP, or if blocked readers are still blocking GP, | ||
155 | * then there is nothing more to do. | ||
156 | */ | ||
157 | if (!rcu_preempt_gp_in_progress() || rcu_preempt_blocked_readers_cgp()) | 311 | if (!rcu_preempt_gp_in_progress() || rcu_preempt_blocked_readers_cgp()) |
158 | return; | 312 | return; |
313 | /* If there are blocked readers, go check up on boosting. */ | ||
314 | if (rcu_preempt_blocked_readers_cgp()) { | ||
315 | rcu_initiate_boost(); | ||
316 | return; | ||
317 | } | ||
159 | 318 | ||
160 | /* Advance callbacks. */ | 319 | /* Advance callbacks. */ |
161 | rcu_preempt_ctrlblk.completed = rcu_preempt_ctrlblk.gpnum; | 320 | rcu_preempt_ctrlblk.completed = rcu_preempt_ctrlblk.gpnum; |
@@ -168,7 +327,7 @@ static void rcu_preempt_cpu_qs(void) | |||
168 | 327 | ||
169 | /* If there are done callbacks, cause them to be invoked. */ | 328 | /* If there are done callbacks, cause them to be invoked. */ |
170 | if (*rcu_preempt_ctrlblk.rcb.donetail != NULL) | 329 | if (*rcu_preempt_ctrlblk.rcb.donetail != NULL) |
171 | invoke_rcu_cbs(); | 330 | invoke_rcu_kthread(); |
172 | } | 331 | } |
173 | 332 | ||
174 | /* | 333 | /* |
@@ -186,6 +345,9 @@ static void rcu_preempt_start_gp(void) | |||
186 | rcu_preempt_ctrlblk.gp_tasks = | 345 | rcu_preempt_ctrlblk.gp_tasks = |
187 | rcu_preempt_ctrlblk.blkd_tasks.next; | 346 | rcu_preempt_ctrlblk.blkd_tasks.next; |
188 | 347 | ||
348 | /* Set up for RCU priority boosting. */ | ||
349 | rcu_preempt_boost_start_gp(); | ||
350 | |||
189 | /* If there is no running reader, CPU is done with GP. */ | 351 | /* If there is no running reader, CPU is done with GP. */ |
190 | if (!rcu_preempt_running_reader()) | 352 | if (!rcu_preempt_running_reader()) |
191 | rcu_preempt_cpu_qs(); | 353 | rcu_preempt_cpu_qs(); |
@@ -306,14 +468,16 @@ static void rcu_read_unlock_special(struct task_struct *t) | |||
306 | */ | 468 | */ |
307 | empty = !rcu_preempt_blocked_readers_cgp(); | 469 | empty = !rcu_preempt_blocked_readers_cgp(); |
308 | empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL; | 470 | empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL; |
309 | np = t->rcu_node_entry.next; | 471 | np = rcu_next_node_entry(t); |
310 | if (np == &rcu_preempt_ctrlblk.blkd_tasks) | ||
311 | np = NULL; | ||
312 | list_del(&t->rcu_node_entry); | 472 | list_del(&t->rcu_node_entry); |
313 | if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks) | 473 | if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks) |
314 | rcu_preempt_ctrlblk.gp_tasks = np; | 474 | rcu_preempt_ctrlblk.gp_tasks = np; |
315 | if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks) | 475 | if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks) |
316 | rcu_preempt_ctrlblk.exp_tasks = np; | 476 | rcu_preempt_ctrlblk.exp_tasks = np; |
477 | #ifdef CONFIG_RCU_BOOST | ||
478 | if (&t->rcu_node_entry == rcu_preempt_ctrlblk.boost_tasks) | ||
479 | rcu_preempt_ctrlblk.boost_tasks = np; | ||
480 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
317 | INIT_LIST_HEAD(&t->rcu_node_entry); | 481 | INIT_LIST_HEAD(&t->rcu_node_entry); |
318 | 482 | ||
319 | /* | 483 | /* |
@@ -333,6 +497,14 @@ static void rcu_read_unlock_special(struct task_struct *t) | |||
333 | if (!empty_exp && rcu_preempt_ctrlblk.exp_tasks == NULL) | 497 | if (!empty_exp && rcu_preempt_ctrlblk.exp_tasks == NULL) |
334 | rcu_report_exp_done(); | 498 | rcu_report_exp_done(); |
335 | } | 499 | } |
500 | #ifdef CONFIG_RCU_BOOST | ||
501 | /* Unboost self if was boosted. */ | ||
502 | if (special & RCU_READ_UNLOCK_BOOSTED) { | ||
503 | t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED; | ||
504 | rt_mutex_unlock(t->rcu_boost_mutex); | ||
505 | t->rcu_boost_mutex = NULL; | ||
506 | } | ||
507 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
336 | local_irq_restore(flags); | 508 | local_irq_restore(flags); |
337 | } | 509 | } |
338 | 510 | ||
@@ -376,7 +548,7 @@ static void rcu_preempt_check_callbacks(void) | |||
376 | rcu_preempt_cpu_qs(); | 548 | rcu_preempt_cpu_qs(); |
377 | if (&rcu_preempt_ctrlblk.rcb.rcucblist != | 549 | if (&rcu_preempt_ctrlblk.rcb.rcucblist != |
378 | rcu_preempt_ctrlblk.rcb.donetail) | 550 | rcu_preempt_ctrlblk.rcb.donetail) |
379 | invoke_rcu_cbs(); | 551 | invoke_rcu_kthread(); |
380 | if (rcu_preempt_gp_in_progress() && | 552 | if (rcu_preempt_gp_in_progress() && |
381 | rcu_cpu_blocking_cur_gp() && | 553 | rcu_cpu_blocking_cur_gp() && |
382 | rcu_preempt_running_reader()) | 554 | rcu_preempt_running_reader()) |
@@ -534,6 +706,7 @@ void synchronize_rcu_expedited(void) | |||
534 | 706 | ||
535 | /* Wait for tail of ->blkd_tasks list to drain. */ | 707 | /* Wait for tail of ->blkd_tasks list to drain. */ |
536 | if (rcu_preempted_readers_exp()) | 708 | if (rcu_preempted_readers_exp()) |
709 | rcu_initiate_expedited_boost(); | ||
537 | wait_event(sync_rcu_preempt_exp_wq, | 710 | wait_event(sync_rcu_preempt_exp_wq, |
538 | !rcu_preempted_readers_exp()); | 711 | !rcu_preempted_readers_exp()); |
539 | 712 | ||
@@ -575,6 +748,15 @@ void exit_rcu(void) | |||
575 | #else /* #ifdef CONFIG_TINY_PREEMPT_RCU */ | 748 | #else /* #ifdef CONFIG_TINY_PREEMPT_RCU */ |
576 | 749 | ||
577 | /* | 750 | /* |
751 | * Because preemptible RCU does not exist, it is never necessary to | ||
752 | * boost preempted RCU readers. | ||
753 | */ | ||
754 | static int rcu_boost(void) | ||
755 | { | ||
756 | return 0; | ||
757 | } | ||
758 | |||
759 | /* | ||
578 | * Because preemptible RCU does not exist, it never has any callbacks | 760 | * Because preemptible RCU does not exist, it never has any callbacks |
579 | * to check. | 761 | * to check. |
580 | */ | 762 | */ |
@@ -614,3 +796,9 @@ void __init rcu_scheduler_starting(void) | |||
614 | } | 796 | } |
615 | 797 | ||
616 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | 798 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ |
799 | |||
800 | #ifdef CONFIG_RCU_BOOST | ||
801 | #define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO | ||
802 | #else /* #ifdef CONFIG_RCU_BOOST */ | ||
803 | #define RCU_BOOST_PRIO 1 | ||
804 | #endif /* #else #ifdef CONFIG_RCU_BOOST */ | ||