aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPierre Peiffer <pierre.peiffer@bull.net>2007-05-09 05:35:00 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-05-09 15:30:55 -0400
commitec92d08292d3e9b0823eba138a4564d2d39f25c7 (patch)
tree4dc94792e83d51b036d52e92e8d4f137a2efce97
parentf34c506b0385b43abd25c490335036ecbb173aed (diff)
futex priority based wakeup
Today, all threads waiting for a given futex are woken in FIFO order (first waiter woken first) instead of priority order. This patch makes use of plist (pirotity ordered lists) instead of simple list in futex_hash_bucket. All non-RT threads are stored with priority MAX_RT_PRIO, causing them to be woken last, in FIFO order (RT-threads are woken first, in priority order). Signed-off-by: Sebastien Dugue <sebastien.dugue@bull.net> Signed-off-by: Pierre Peiffer <pierre.peiffer@bull.net> Cc: Ingo Molnar <mingo@elte.hu> Cc: Ulrich Drepper <drepper@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--kernel/futex.c78
1 files changed, 49 insertions, 29 deletions
diff --git a/kernel/futex.c b/kernel/futex.c
index 600bc9d801f2..685ee2362a5e 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -81,12 +81,12 @@ struct futex_pi_state {
81 * we can wake only the relevant ones (hashed queues may be shared). 81 * we can wake only the relevant ones (hashed queues may be shared).
82 * 82 *
83 * A futex_q has a woken state, just like tasks have TASK_RUNNING. 83 * A futex_q has a woken state, just like tasks have TASK_RUNNING.
84 * It is considered woken when list_empty(&q->list) || q->lock_ptr == 0. 84 * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0.
85 * The order of wakup is always to make the first condition true, then 85 * The order of wakup is always to make the first condition true, then
86 * wake up q->waiters, then make the second condition true. 86 * wake up q->waiters, then make the second condition true.
87 */ 87 */
88struct futex_q { 88struct futex_q {
89 struct list_head list; 89 struct plist_node list;
90 wait_queue_head_t waiters; 90 wait_queue_head_t waiters;
91 91
92 /* Which hash list lock to use: */ 92 /* Which hash list lock to use: */
@@ -108,8 +108,8 @@ struct futex_q {
108 * Split the global futex_lock into every hash list lock. 108 * Split the global futex_lock into every hash list lock.
109 */ 109 */
110struct futex_hash_bucket { 110struct futex_hash_bucket {
111 spinlock_t lock; 111 spinlock_t lock;
112 struct list_head chain; 112 struct plist_head chain;
113}; 113};
114 114
115static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS]; 115static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
@@ -443,13 +443,13 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
443{ 443{
444 struct futex_pi_state *pi_state = NULL; 444 struct futex_pi_state *pi_state = NULL;
445 struct futex_q *this, *next; 445 struct futex_q *this, *next;
446 struct list_head *head; 446 struct plist_head *head;
447 struct task_struct *p; 447 struct task_struct *p;
448 pid_t pid; 448 pid_t pid;
449 449
450 head = &hb->chain; 450 head = &hb->chain;
451 451
452 list_for_each_entry_safe(this, next, head, list) { 452 plist_for_each_entry_safe(this, next, head, list) {
453 if (match_futex(&this->key, &me->key)) { 453 if (match_futex(&this->key, &me->key)) {
454 /* 454 /*
455 * Another waiter already exists - bump up 455 * Another waiter already exists - bump up
@@ -513,12 +513,12 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
513 */ 513 */
514static void wake_futex(struct futex_q *q) 514static void wake_futex(struct futex_q *q)
515{ 515{
516 list_del_init(&q->list); 516 plist_del(&q->list, &q->list.plist);
517 if (q->filp) 517 if (q->filp)
518 send_sigio(&q->filp->f_owner, q->fd, POLL_IN); 518 send_sigio(&q->filp->f_owner, q->fd, POLL_IN);
519 /* 519 /*
520 * The lock in wake_up_all() is a crucial memory barrier after the 520 * The lock in wake_up_all() is a crucial memory barrier after the
521 * list_del_init() and also before assigning to q->lock_ptr. 521 * plist_del() and also before assigning to q->lock_ptr.
522 */ 522 */
523 wake_up_all(&q->waiters); 523 wake_up_all(&q->waiters);
524 /* 524 /*
@@ -633,7 +633,7 @@ static int futex_wake(u32 __user *uaddr, int nr_wake)
633{ 633{
634 struct futex_hash_bucket *hb; 634 struct futex_hash_bucket *hb;
635 struct futex_q *this, *next; 635 struct futex_q *this, *next;
636 struct list_head *head; 636 struct plist_head *head;
637 union futex_key key; 637 union futex_key key;
638 int ret; 638 int ret;
639 639
@@ -647,7 +647,7 @@ static int futex_wake(u32 __user *uaddr, int nr_wake)
647 spin_lock(&hb->lock); 647 spin_lock(&hb->lock);
648 head = &hb->chain; 648 head = &hb->chain;
649 649
650 list_for_each_entry_safe(this, next, head, list) { 650 plist_for_each_entry_safe(this, next, head, list) {
651 if (match_futex (&this->key, &key)) { 651 if (match_futex (&this->key, &key)) {
652 if (this->pi_state) { 652 if (this->pi_state) {
653 ret = -EINVAL; 653 ret = -EINVAL;
@@ -675,7 +675,7 @@ futex_wake_op(u32 __user *uaddr1, u32 __user *uaddr2,
675{ 675{
676 union futex_key key1, key2; 676 union futex_key key1, key2;
677 struct futex_hash_bucket *hb1, *hb2; 677 struct futex_hash_bucket *hb1, *hb2;
678 struct list_head *head; 678 struct plist_head *head;
679 struct futex_q *this, *next; 679 struct futex_q *this, *next;
680 int ret, op_ret, attempt = 0; 680 int ret, op_ret, attempt = 0;
681 681
@@ -748,7 +748,7 @@ retry:
748 748
749 head = &hb1->chain; 749 head = &hb1->chain;
750 750
751 list_for_each_entry_safe(this, next, head, list) { 751 plist_for_each_entry_safe(this, next, head, list) {
752 if (match_futex (&this->key, &key1)) { 752 if (match_futex (&this->key, &key1)) {
753 wake_futex(this); 753 wake_futex(this);
754 if (++ret >= nr_wake) 754 if (++ret >= nr_wake)
@@ -760,7 +760,7 @@ retry:
760 head = &hb2->chain; 760 head = &hb2->chain;
761 761
762 op_ret = 0; 762 op_ret = 0;
763 list_for_each_entry_safe(this, next, head, list) { 763 plist_for_each_entry_safe(this, next, head, list) {
764 if (match_futex (&this->key, &key2)) { 764 if (match_futex (&this->key, &key2)) {
765 wake_futex(this); 765 wake_futex(this);
766 if (++op_ret >= nr_wake2) 766 if (++op_ret >= nr_wake2)
@@ -787,7 +787,7 @@ static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2,
787{ 787{
788 union futex_key key1, key2; 788 union futex_key key1, key2;
789 struct futex_hash_bucket *hb1, *hb2; 789 struct futex_hash_bucket *hb1, *hb2;
790 struct list_head *head1; 790 struct plist_head *head1;
791 struct futex_q *this, *next; 791 struct futex_q *this, *next;
792 int ret, drop_count = 0; 792 int ret, drop_count = 0;
793 793
@@ -836,7 +836,7 @@ static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2,
836 } 836 }
837 837
838 head1 = &hb1->chain; 838 head1 = &hb1->chain;
839 list_for_each_entry_safe(this, next, head1, list) { 839 plist_for_each_entry_safe(this, next, head1, list) {
840 if (!match_futex (&this->key, &key1)) 840 if (!match_futex (&this->key, &key1))
841 continue; 841 continue;
842 if (++ret <= nr_wake) { 842 if (++ret <= nr_wake) {
@@ -847,9 +847,13 @@ static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2,
847 * requeue. 847 * requeue.
848 */ 848 */
849 if (likely(head1 != &hb2->chain)) { 849 if (likely(head1 != &hb2->chain)) {
850 list_move_tail(&this->list, &hb2->chain); 850 plist_del(&this->list, &hb1->chain);
851 plist_add(&this->list, &hb2->chain);
851 this->lock_ptr = &hb2->lock; 852 this->lock_ptr = &hb2->lock;
852 } 853#ifdef CONFIG_DEBUG_PI_LIST
854 this->list.plist.lock = &hb2->lock;
855#endif
856 }
853 this->key = key2; 857 this->key = key2;
854 get_futex_key_refs(&key2); 858 get_futex_key_refs(&key2);
855 drop_count++; 859 drop_count++;
@@ -894,7 +898,23 @@ queue_lock(struct futex_q *q, int fd, struct file *filp)
894 898
895static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb) 899static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
896{ 900{
897 list_add_tail(&q->list, &hb->chain); 901 int prio;
902
903 /*
904 * The priority used to register this element is
905 * - either the real thread-priority for the real-time threads
906 * (i.e. threads with a priority lower than MAX_RT_PRIO)
907 * - or MAX_RT_PRIO for non-RT threads.
908 * Thus, all RT-threads are woken first in priority order, and
909 * the others are woken last, in FIFO order.
910 */
911 prio = min(current->normal_prio, MAX_RT_PRIO);
912
913 plist_node_init(&q->list, prio);
914#ifdef CONFIG_DEBUG_PI_LIST
915 q->list.plist.lock = &hb->lock;
916#endif
917 plist_add(&q->list, &hb->chain);
898 q->task = current; 918 q->task = current;
899 spin_unlock(&hb->lock); 919 spin_unlock(&hb->lock);
900} 920}
@@ -949,8 +969,8 @@ static int unqueue_me(struct futex_q *q)
949 spin_unlock(lock_ptr); 969 spin_unlock(lock_ptr);
950 goto retry; 970 goto retry;
951 } 971 }
952 WARN_ON(list_empty(&q->list)); 972 WARN_ON(plist_node_empty(&q->list));
953 list_del(&q->list); 973 plist_del(&q->list, &q->list.plist);
954 974
955 BUG_ON(q->pi_state); 975 BUG_ON(q->pi_state);
956 976
@@ -968,8 +988,8 @@ static int unqueue_me(struct futex_q *q)
968 */ 988 */
969static void unqueue_me_pi(struct futex_q *q, struct futex_hash_bucket *hb) 989static void unqueue_me_pi(struct futex_q *q, struct futex_hash_bucket *hb)
970{ 990{
971 WARN_ON(list_empty(&q->list)); 991 WARN_ON(plist_node_empty(&q->list));
972 list_del(&q->list); 992 plist_del(&q->list, &q->list.plist);
973 993
974 BUG_ON(!q->pi_state); 994 BUG_ON(!q->pi_state);
975 free_pi_state(q->pi_state); 995 free_pi_state(q->pi_state);
@@ -1065,11 +1085,11 @@ static int futex_wait_abstime(u32 __user *uaddr, u32 val,
1065 __set_current_state(TASK_INTERRUPTIBLE); 1085 __set_current_state(TASK_INTERRUPTIBLE);
1066 add_wait_queue(&q.waiters, &wait); 1086 add_wait_queue(&q.waiters, &wait);
1067 /* 1087 /*
1068 * !list_empty() is safe here without any lock. 1088 * !plist_node_empty() is safe here without any lock.
1069 * q.lock_ptr != 0 is not safe, because of ordering against wakeup. 1089 * q.lock_ptr != 0 is not safe, because of ordering against wakeup.
1070 */ 1090 */
1071 time_left = 0; 1091 time_left = 0;
1072 if (likely(!list_empty(&q.list))) { 1092 if (likely(!plist_node_empty(&q.list))) {
1073 unsigned long rel_time; 1093 unsigned long rel_time;
1074 1094
1075 if (timed) { 1095 if (timed) {
@@ -1384,7 +1404,7 @@ static int futex_unlock_pi(u32 __user *uaddr)
1384 struct futex_hash_bucket *hb; 1404 struct futex_hash_bucket *hb;
1385 struct futex_q *this, *next; 1405 struct futex_q *this, *next;
1386 u32 uval; 1406 u32 uval;
1387 struct list_head *head; 1407 struct plist_head *head;
1388 union futex_key key; 1408 union futex_key key;
1389 int ret, attempt = 0; 1409 int ret, attempt = 0;
1390 1410
@@ -1435,7 +1455,7 @@ retry_locked:
1435 */ 1455 */
1436 head = &hb->chain; 1456 head = &hb->chain;
1437 1457
1438 list_for_each_entry_safe(this, next, head, list) { 1458 plist_for_each_entry_safe(this, next, head, list) {
1439 if (!match_futex (&this->key, &key)) 1459 if (!match_futex (&this->key, &key))
1440 continue; 1460 continue;
1441 ret = wake_futex_pi(uaddr, uval, this); 1461 ret = wake_futex_pi(uaddr, uval, this);
@@ -1509,10 +1529,10 @@ static unsigned int futex_poll(struct file *filp,
1509 poll_wait(filp, &q->waiters, wait); 1529 poll_wait(filp, &q->waiters, wait);
1510 1530
1511 /* 1531 /*
1512 * list_empty() is safe here without any lock. 1532 * plist_node_empty() is safe here without any lock.
1513 * q->lock_ptr != 0 is not safe, because of ordering against wakeup. 1533 * q->lock_ptr != 0 is not safe, because of ordering against wakeup.
1514 */ 1534 */
1515 if (list_empty(&q->list)) 1535 if (plist_node_empty(&q->list))
1516 ret = POLLIN | POLLRDNORM; 1536 ret = POLLIN | POLLRDNORM;
1517 1537
1518 return ret; 1538 return ret;
@@ -1895,7 +1915,7 @@ static int __init init(void)
1895 } 1915 }
1896 1916
1897 for (i = 0; i < ARRAY_SIZE(futex_queues); i++) { 1917 for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
1898 INIT_LIST_HEAD(&futex_queues[i].chain); 1918 plist_head_init(&futex_queues[i].chain, &futex_queues[i].lock);
1899 spin_lock_init(&futex_queues[i].lock); 1919 spin_lock_init(&futex_queues[i].lock);
1900 } 1920 }
1901 return 0; 1921 return 0;