aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/locking
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2013-11-07 08:43:43 -0500
committerIngo Molnar <mingo@kernel.org>2014-01-13 07:41:50 -0500
commitfb00aca474405f4fa8a8519c3179fed722eabd83 (patch)
tree8a779629a771dd3340d5a3ba0ba16b732b8de1c8 /kernel/locking
parentaf6ace764d03900524e9b1ac621a1c520ee49fc6 (diff)
rtmutex: Turn the plist into an rb-tree
Turn the pi-chains from plist to rb-tree, in the rt_mutex code, and provide a proper comparison function for -deadline and -priority tasks. This is done mainly because: - classical prio field of the plist is just an int, which might not be enough for representing a deadline; - manipulating such a list would become O(nr_deadline_tasks), which might be to much, as the number of -deadline task increases. Therefore, an rb-tree is used, and tasks are queued in it according to the following logic: - among two -priority (i.e., SCHED_BATCH/OTHER/RR/FIFO) tasks, the one with the higher (lower, actually!) prio wins; - among a -priority and a -deadline task, the latter always wins; - among two -deadline tasks, the one with the earliest deadline wins. Queueing and dequeueing functions are changed accordingly, for both the list of a task's pi-waiters and the list of tasks blocked on a pi-lock. Signed-off-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Dario Faggioli <raistlin@linux.it> Signed-off-by: Juri Lelli <juri.lelli@gmail.com> Signed-off-again-by: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/1383831828-15501-10-git-send-email-juri.lelli@gmail.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/locking')
-rw-r--r--kernel/locking/rtmutex-debug.c8
-rw-r--r--kernel/locking/rtmutex.c151
-rw-r--r--kernel/locking/rtmutex_common.h22
3 files changed, 134 insertions, 47 deletions
diff --git a/kernel/locking/rtmutex-debug.c b/kernel/locking/rtmutex-debug.c
index 13b243a323fa..49b2ed3dced8 100644
--- a/kernel/locking/rtmutex-debug.c
+++ b/kernel/locking/rtmutex-debug.c
@@ -24,7 +24,7 @@
24#include <linux/kallsyms.h> 24#include <linux/kallsyms.h>
25#include <linux/syscalls.h> 25#include <linux/syscalls.h>
26#include <linux/interrupt.h> 26#include <linux/interrupt.h>
27#include <linux/plist.h> 27#include <linux/rbtree.h>
28#include <linux/fs.h> 28#include <linux/fs.h>
29#include <linux/debug_locks.h> 29#include <linux/debug_locks.h>
30 30
@@ -57,7 +57,7 @@ static void printk_lock(struct rt_mutex *lock, int print_owner)
57 57
58void rt_mutex_debug_task_free(struct task_struct *task) 58void rt_mutex_debug_task_free(struct task_struct *task)
59{ 59{
60 DEBUG_LOCKS_WARN_ON(!plist_head_empty(&task->pi_waiters)); 60 DEBUG_LOCKS_WARN_ON(!RB_EMPTY_ROOT(&task->pi_waiters));
61 DEBUG_LOCKS_WARN_ON(task->pi_blocked_on); 61 DEBUG_LOCKS_WARN_ON(task->pi_blocked_on);
62} 62}
63 63
@@ -154,16 +154,12 @@ void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock)
154void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) 154void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
155{ 155{
156 memset(waiter, 0x11, sizeof(*waiter)); 156 memset(waiter, 0x11, sizeof(*waiter));
157 plist_node_init(&waiter->list_entry, MAX_PRIO);
158 plist_node_init(&waiter->pi_list_entry, MAX_PRIO);
159 waiter->deadlock_task_pid = NULL; 157 waiter->deadlock_task_pid = NULL;
160} 158}
161 159
162void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter) 160void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter)
163{ 161{
164 put_pid(waiter->deadlock_task_pid); 162 put_pid(waiter->deadlock_task_pid);
165 DEBUG_LOCKS_WARN_ON(!plist_node_empty(&waiter->list_entry));
166 DEBUG_LOCKS_WARN_ON(!plist_node_empty(&waiter->pi_list_entry));
167 memset(waiter, 0x22, sizeof(*waiter)); 163 memset(waiter, 0x22, sizeof(*waiter));
168} 164}
169 165
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 0dd6aec1cb6a..3bf0aa68dd3f 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -14,6 +14,7 @@
14#include <linux/export.h> 14#include <linux/export.h>
15#include <linux/sched.h> 15#include <linux/sched.h>
16#include <linux/sched/rt.h> 16#include <linux/sched/rt.h>
17#include <linux/sched/deadline.h>
17#include <linux/timer.h> 18#include <linux/timer.h>
18 19
19#include "rtmutex_common.h" 20#include "rtmutex_common.h"
@@ -91,10 +92,104 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
91} 92}
92#endif 93#endif
93 94
95static inline int
96rt_mutex_waiter_less(struct rt_mutex_waiter *left,
97 struct rt_mutex_waiter *right)
98{
99 if (left->task->prio < right->task->prio)
100 return 1;
101
102 /*
103 * If both tasks are dl_task(), we check their deadlines.
104 */
105 if (dl_prio(left->task->prio) && dl_prio(right->task->prio))
106 return (left->task->dl.deadline < right->task->dl.deadline);
107
108 return 0;
109}
110
111static void
112rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
113{
114 struct rb_node **link = &lock->waiters.rb_node;
115 struct rb_node *parent = NULL;
116 struct rt_mutex_waiter *entry;
117 int leftmost = 1;
118
119 while (*link) {
120 parent = *link;
121 entry = rb_entry(parent, struct rt_mutex_waiter, tree_entry);
122 if (rt_mutex_waiter_less(waiter, entry)) {
123 link = &parent->rb_left;
124 } else {
125 link = &parent->rb_right;
126 leftmost = 0;
127 }
128 }
129
130 if (leftmost)
131 lock->waiters_leftmost = &waiter->tree_entry;
132
133 rb_link_node(&waiter->tree_entry, parent, link);
134 rb_insert_color(&waiter->tree_entry, &lock->waiters);
135}
136
137static void
138rt_mutex_dequeue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
139{
140 if (RB_EMPTY_NODE(&waiter->tree_entry))
141 return;
142
143 if (lock->waiters_leftmost == &waiter->tree_entry)
144 lock->waiters_leftmost = rb_next(&waiter->tree_entry);
145
146 rb_erase(&waiter->tree_entry, &lock->waiters);
147 RB_CLEAR_NODE(&waiter->tree_entry);
148}
149
150static void
151rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
152{
153 struct rb_node **link = &task->pi_waiters.rb_node;
154 struct rb_node *parent = NULL;
155 struct rt_mutex_waiter *entry;
156 int leftmost = 1;
157
158 while (*link) {
159 parent = *link;
160 entry = rb_entry(parent, struct rt_mutex_waiter, pi_tree_entry);
161 if (rt_mutex_waiter_less(waiter, entry)) {
162 link = &parent->rb_left;
163 } else {
164 link = &parent->rb_right;
165 leftmost = 0;
166 }
167 }
168
169 if (leftmost)
170 task->pi_waiters_leftmost = &waiter->pi_tree_entry;
171
172 rb_link_node(&waiter->pi_tree_entry, parent, link);
173 rb_insert_color(&waiter->pi_tree_entry, &task->pi_waiters);
174}
175
176static void
177rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
178{
179 if (RB_EMPTY_NODE(&waiter->pi_tree_entry))
180 return;
181
182 if (task->pi_waiters_leftmost == &waiter->pi_tree_entry)
183 task->pi_waiters_leftmost = rb_next(&waiter->pi_tree_entry);
184
185 rb_erase(&waiter->pi_tree_entry, &task->pi_waiters);
186 RB_CLEAR_NODE(&waiter->pi_tree_entry);
187}
188
94/* 189/*
95 * Calculate task priority from the waiter list priority 190 * Calculate task priority from the waiter tree priority
96 * 191 *
97 * Return task->normal_prio when the waiter list is empty or when 192 * Return task->normal_prio when the waiter tree is empty or when
98 * the waiter is not allowed to do priority boosting 193 * the waiter is not allowed to do priority boosting
99 */ 194 */
100int rt_mutex_getprio(struct task_struct *task) 195int rt_mutex_getprio(struct task_struct *task)
@@ -102,7 +197,7 @@ int rt_mutex_getprio(struct task_struct *task)
102 if (likely(!task_has_pi_waiters(task))) 197 if (likely(!task_has_pi_waiters(task)))
103 return task->normal_prio; 198 return task->normal_prio;
104 199
105 return min(task_top_pi_waiter(task)->pi_list_entry.prio, 200 return min(task_top_pi_waiter(task)->task->prio,
106 task->normal_prio); 201 task->normal_prio);
107} 202}
108 203
@@ -233,7 +328,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
233 * When deadlock detection is off then we check, if further 328 * When deadlock detection is off then we check, if further
234 * priority adjustment is necessary. 329 * priority adjustment is necessary.
235 */ 330 */
236 if (!detect_deadlock && waiter->list_entry.prio == task->prio) 331 if (!detect_deadlock && waiter->task->prio == task->prio)
237 goto out_unlock_pi; 332 goto out_unlock_pi;
238 333
239 lock = waiter->lock; 334 lock = waiter->lock;
@@ -254,9 +349,9 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
254 top_waiter = rt_mutex_top_waiter(lock); 349 top_waiter = rt_mutex_top_waiter(lock);
255 350
256 /* Requeue the waiter */ 351 /* Requeue the waiter */
257 plist_del(&waiter->list_entry, &lock->wait_list); 352 rt_mutex_dequeue(lock, waiter);
258 waiter->list_entry.prio = task->prio; 353 waiter->task->prio = task->prio;
259 plist_add(&waiter->list_entry, &lock->wait_list); 354 rt_mutex_enqueue(lock, waiter);
260 355
261 /* Release the task */ 356 /* Release the task */
262 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 357 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
@@ -280,17 +375,15 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
280 375
281 if (waiter == rt_mutex_top_waiter(lock)) { 376 if (waiter == rt_mutex_top_waiter(lock)) {
282 /* Boost the owner */ 377 /* Boost the owner */
283 plist_del(&top_waiter->pi_list_entry, &task->pi_waiters); 378 rt_mutex_dequeue_pi(task, top_waiter);
284 waiter->pi_list_entry.prio = waiter->list_entry.prio; 379 rt_mutex_enqueue_pi(task, waiter);
285 plist_add(&waiter->pi_list_entry, &task->pi_waiters);
286 __rt_mutex_adjust_prio(task); 380 __rt_mutex_adjust_prio(task);
287 381
288 } else if (top_waiter == waiter) { 382 } else if (top_waiter == waiter) {
289 /* Deboost the owner */ 383 /* Deboost the owner */
290 plist_del(&waiter->pi_list_entry, &task->pi_waiters); 384 rt_mutex_dequeue_pi(task, waiter);
291 waiter = rt_mutex_top_waiter(lock); 385 waiter = rt_mutex_top_waiter(lock);
292 waiter->pi_list_entry.prio = waiter->list_entry.prio; 386 rt_mutex_enqueue_pi(task, waiter);
293 plist_add(&waiter->pi_list_entry, &task->pi_waiters);
294 __rt_mutex_adjust_prio(task); 387 __rt_mutex_adjust_prio(task);
295 } 388 }
296 389
@@ -355,7 +448,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
355 * 3) it is top waiter 448 * 3) it is top waiter
356 */ 449 */
357 if (rt_mutex_has_waiters(lock)) { 450 if (rt_mutex_has_waiters(lock)) {
358 if (task->prio >= rt_mutex_top_waiter(lock)->list_entry.prio) { 451 if (task->prio >= rt_mutex_top_waiter(lock)->task->prio) {
359 if (!waiter || waiter != rt_mutex_top_waiter(lock)) 452 if (!waiter || waiter != rt_mutex_top_waiter(lock))
360 return 0; 453 return 0;
361 } 454 }
@@ -369,7 +462,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
369 462
370 /* remove the queued waiter. */ 463 /* remove the queued waiter. */
371 if (waiter) { 464 if (waiter) {
372 plist_del(&waiter->list_entry, &lock->wait_list); 465 rt_mutex_dequeue(lock, waiter);
373 task->pi_blocked_on = NULL; 466 task->pi_blocked_on = NULL;
374 } 467 }
375 468
@@ -379,8 +472,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
379 */ 472 */
380 if (rt_mutex_has_waiters(lock)) { 473 if (rt_mutex_has_waiters(lock)) {
381 top = rt_mutex_top_waiter(lock); 474 top = rt_mutex_top_waiter(lock);
382 top->pi_list_entry.prio = top->list_entry.prio; 475 rt_mutex_enqueue_pi(task, top);
383 plist_add(&top->pi_list_entry, &task->pi_waiters);
384 } 476 }
385 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 477 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
386 } 478 }
@@ -416,13 +508,11 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
416 __rt_mutex_adjust_prio(task); 508 __rt_mutex_adjust_prio(task);
417 waiter->task = task; 509 waiter->task = task;
418 waiter->lock = lock; 510 waiter->lock = lock;
419 plist_node_init(&waiter->list_entry, task->prio);
420 plist_node_init(&waiter->pi_list_entry, task->prio);
421 511
422 /* Get the top priority waiter on the lock */ 512 /* Get the top priority waiter on the lock */
423 if (rt_mutex_has_waiters(lock)) 513 if (rt_mutex_has_waiters(lock))
424 top_waiter = rt_mutex_top_waiter(lock); 514 top_waiter = rt_mutex_top_waiter(lock);
425 plist_add(&waiter->list_entry, &lock->wait_list); 515 rt_mutex_enqueue(lock, waiter);
426 516
427 task->pi_blocked_on = waiter; 517 task->pi_blocked_on = waiter;
428 518
@@ -433,8 +523,8 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
433 523
434 if (waiter == rt_mutex_top_waiter(lock)) { 524 if (waiter == rt_mutex_top_waiter(lock)) {
435 raw_spin_lock_irqsave(&owner->pi_lock, flags); 525 raw_spin_lock_irqsave(&owner->pi_lock, flags);
436 plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters); 526 rt_mutex_dequeue_pi(owner, top_waiter);
437 plist_add(&waiter->pi_list_entry, &owner->pi_waiters); 527 rt_mutex_enqueue_pi(owner, waiter);
438 528
439 __rt_mutex_adjust_prio(owner); 529 __rt_mutex_adjust_prio(owner);
440 if (owner->pi_blocked_on) 530 if (owner->pi_blocked_on)
@@ -486,7 +576,7 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
486 * boosted mode and go back to normal after releasing 576 * boosted mode and go back to normal after releasing
487 * lock->wait_lock. 577 * lock->wait_lock.
488 */ 578 */
489 plist_del(&waiter->pi_list_entry, &current->pi_waiters); 579 rt_mutex_dequeue_pi(current, waiter);
490 580
491 rt_mutex_set_owner(lock, NULL); 581 rt_mutex_set_owner(lock, NULL);
492 582
@@ -510,7 +600,7 @@ static void remove_waiter(struct rt_mutex *lock,
510 int chain_walk = 0; 600 int chain_walk = 0;
511 601
512 raw_spin_lock_irqsave(&current->pi_lock, flags); 602 raw_spin_lock_irqsave(&current->pi_lock, flags);
513 plist_del(&waiter->list_entry, &lock->wait_list); 603 rt_mutex_dequeue(lock, waiter);
514 current->pi_blocked_on = NULL; 604 current->pi_blocked_on = NULL;
515 raw_spin_unlock_irqrestore(&current->pi_lock, flags); 605 raw_spin_unlock_irqrestore(&current->pi_lock, flags);
516 606
@@ -521,13 +611,13 @@ static void remove_waiter(struct rt_mutex *lock,
521 611
522 raw_spin_lock_irqsave(&owner->pi_lock, flags); 612 raw_spin_lock_irqsave(&owner->pi_lock, flags);
523 613
524 plist_del(&waiter->pi_list_entry, &owner->pi_waiters); 614 rt_mutex_dequeue_pi(owner, waiter);
525 615
526 if (rt_mutex_has_waiters(lock)) { 616 if (rt_mutex_has_waiters(lock)) {
527 struct rt_mutex_waiter *next; 617 struct rt_mutex_waiter *next;
528 618
529 next = rt_mutex_top_waiter(lock); 619 next = rt_mutex_top_waiter(lock);
530 plist_add(&next->pi_list_entry, &owner->pi_waiters); 620 rt_mutex_enqueue_pi(owner, next);
531 } 621 }
532 __rt_mutex_adjust_prio(owner); 622 __rt_mutex_adjust_prio(owner);
533 623
@@ -537,8 +627,6 @@ static void remove_waiter(struct rt_mutex *lock,
537 raw_spin_unlock_irqrestore(&owner->pi_lock, flags); 627 raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
538 } 628 }
539 629
540 WARN_ON(!plist_node_empty(&waiter->pi_list_entry));
541
542 if (!chain_walk) 630 if (!chain_walk)
543 return; 631 return;
544 632
@@ -565,7 +653,7 @@ void rt_mutex_adjust_pi(struct task_struct *task)
565 raw_spin_lock_irqsave(&task->pi_lock, flags); 653 raw_spin_lock_irqsave(&task->pi_lock, flags);
566 654
567 waiter = task->pi_blocked_on; 655 waiter = task->pi_blocked_on;
568 if (!waiter || waiter->list_entry.prio == task->prio) { 656 if (!waiter || waiter->task->prio == task->prio) {
569 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 657 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
570 return; 658 return;
571 } 659 }
@@ -638,6 +726,8 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
638 int ret = 0; 726 int ret = 0;
639 727
640 debug_rt_mutex_init_waiter(&waiter); 728 debug_rt_mutex_init_waiter(&waiter);
729 RB_CLEAR_NODE(&waiter.pi_tree_entry);
730 RB_CLEAR_NODE(&waiter.tree_entry);
641 731
642 raw_spin_lock(&lock->wait_lock); 732 raw_spin_lock(&lock->wait_lock);
643 733
@@ -904,7 +994,8 @@ void __rt_mutex_init(struct rt_mutex *lock, const char *name)
904{ 994{
905 lock->owner = NULL; 995 lock->owner = NULL;
906 raw_spin_lock_init(&lock->wait_lock); 996 raw_spin_lock_init(&lock->wait_lock);
907 plist_head_init(&lock->wait_list); 997 lock->waiters = RB_ROOT;
998 lock->waiters_leftmost = NULL;
908 999
909 debug_rt_mutex_init(lock, name); 1000 debug_rt_mutex_init(lock, name);
910} 1001}
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
index 53a66c85261b..b65442fe5ade 100644
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -40,13 +40,13 @@ extern void schedule_rt_mutex_test(struct rt_mutex *lock);
40 * This is the control structure for tasks blocked on a rt_mutex, 40 * This is the control structure for tasks blocked on a rt_mutex,
41 * which is allocated on the kernel stack on of the blocked task. 41 * which is allocated on the kernel stack on of the blocked task.
42 * 42 *
43 * @list_entry: pi node to enqueue into the mutex waiters list 43 * @tree_entry: pi node to enqueue into the mutex waiters tree
44 * @pi_list_entry: pi node to enqueue into the mutex owner waiters list 44 * @pi_tree_entry: pi node to enqueue into the mutex owner waiters tree
45 * @task: task reference to the blocked task 45 * @task: task reference to the blocked task
46 */ 46 */
47struct rt_mutex_waiter { 47struct rt_mutex_waiter {
48 struct plist_node list_entry; 48 struct rb_node tree_entry;
49 struct plist_node pi_list_entry; 49 struct rb_node pi_tree_entry;
50 struct task_struct *task; 50 struct task_struct *task;
51 struct rt_mutex *lock; 51 struct rt_mutex *lock;
52#ifdef CONFIG_DEBUG_RT_MUTEXES 52#ifdef CONFIG_DEBUG_RT_MUTEXES
@@ -57,11 +57,11 @@ struct rt_mutex_waiter {
57}; 57};
58 58
59/* 59/*
60 * Various helpers to access the waiters-plist: 60 * Various helpers to access the waiters-tree:
61 */ 61 */
62static inline int rt_mutex_has_waiters(struct rt_mutex *lock) 62static inline int rt_mutex_has_waiters(struct rt_mutex *lock)
63{ 63{
64 return !plist_head_empty(&lock->wait_list); 64 return !RB_EMPTY_ROOT(&lock->waiters);
65} 65}
66 66
67static inline struct rt_mutex_waiter * 67static inline struct rt_mutex_waiter *
@@ -69,8 +69,8 @@ rt_mutex_top_waiter(struct rt_mutex *lock)
69{ 69{
70 struct rt_mutex_waiter *w; 70 struct rt_mutex_waiter *w;
71 71
72 w = plist_first_entry(&lock->wait_list, struct rt_mutex_waiter, 72 w = rb_entry(lock->waiters_leftmost, struct rt_mutex_waiter,
73 list_entry); 73 tree_entry);
74 BUG_ON(w->lock != lock); 74 BUG_ON(w->lock != lock);
75 75
76 return w; 76 return w;
@@ -78,14 +78,14 @@ rt_mutex_top_waiter(struct rt_mutex *lock)
78 78
79static inline int task_has_pi_waiters(struct task_struct *p) 79static inline int task_has_pi_waiters(struct task_struct *p)
80{ 80{
81 return !plist_head_empty(&p->pi_waiters); 81 return !RB_EMPTY_ROOT(&p->pi_waiters);
82} 82}
83 83
84static inline struct rt_mutex_waiter * 84static inline struct rt_mutex_waiter *
85task_top_pi_waiter(struct task_struct *p) 85task_top_pi_waiter(struct task_struct *p)
86{ 86{
87 return plist_first_entry(&p->pi_waiters, struct rt_mutex_waiter, 87 return rb_entry(p->pi_waiters_leftmost, struct rt_mutex_waiter,
88 pi_list_entry); 88 pi_tree_entry);
89} 89}
90 90
91/* 91/*