diff options
author | Pierre Peiffer <pierre.peiffer@bull.net> | 2007-05-09 05:35:00 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-05-09 15:30:55 -0400 |
commit | ec92d08292d3e9b0823eba138a4564d2d39f25c7 (patch) | |
tree | 4dc94792e83d51b036d52e92e8d4f137a2efce97 | |
parent | f34c506b0385b43abd25c490335036ecbb173aed (diff) |
futex priority based wakeup
Today, all threads waiting for a given futex are woken in FIFO order (first
waiter woken first) instead of priority order.
This patch makes use of plist (pirotity ordered lists) instead of simple list
in futex_hash_bucket.
All non-RT threads are stored with priority MAX_RT_PRIO, causing them to be
woken last, in FIFO order (RT-threads are woken first, in priority order).
Signed-off-by: Sebastien Dugue <sebastien.dugue@bull.net>
Signed-off-by: Pierre Peiffer <pierre.peiffer@bull.net>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Ulrich Drepper <drepper@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | kernel/futex.c | 78 |
1 files changed, 49 insertions, 29 deletions
diff --git a/kernel/futex.c b/kernel/futex.c index 600bc9d801f2..685ee2362a5e 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -81,12 +81,12 @@ struct futex_pi_state { | |||
81 | * we can wake only the relevant ones (hashed queues may be shared). | 81 | * we can wake only the relevant ones (hashed queues may be shared). |
82 | * | 82 | * |
83 | * A futex_q has a woken state, just like tasks have TASK_RUNNING. | 83 | * A futex_q has a woken state, just like tasks have TASK_RUNNING. |
84 | * It is considered woken when list_empty(&q->list) || q->lock_ptr == 0. | 84 | * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0. |
85 | * The order of wakup is always to make the first condition true, then | 85 | * The order of wakup is always to make the first condition true, then |
86 | * wake up q->waiters, then make the second condition true. | 86 | * wake up q->waiters, then make the second condition true. |
87 | */ | 87 | */ |
88 | struct futex_q { | 88 | struct futex_q { |
89 | struct list_head list; | 89 | struct plist_node list; |
90 | wait_queue_head_t waiters; | 90 | wait_queue_head_t waiters; |
91 | 91 | ||
92 | /* Which hash list lock to use: */ | 92 | /* Which hash list lock to use: */ |
@@ -108,8 +108,8 @@ struct futex_q { | |||
108 | * Split the global futex_lock into every hash list lock. | 108 | * Split the global futex_lock into every hash list lock. |
109 | */ | 109 | */ |
110 | struct futex_hash_bucket { | 110 | struct futex_hash_bucket { |
111 | spinlock_t lock; | 111 | spinlock_t lock; |
112 | struct list_head chain; | 112 | struct plist_head chain; |
113 | }; | 113 | }; |
114 | 114 | ||
115 | static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS]; | 115 | static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS]; |
@@ -443,13 +443,13 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me) | |||
443 | { | 443 | { |
444 | struct futex_pi_state *pi_state = NULL; | 444 | struct futex_pi_state *pi_state = NULL; |
445 | struct futex_q *this, *next; | 445 | struct futex_q *this, *next; |
446 | struct list_head *head; | 446 | struct plist_head *head; |
447 | struct task_struct *p; | 447 | struct task_struct *p; |
448 | pid_t pid; | 448 | pid_t pid; |
449 | 449 | ||
450 | head = &hb->chain; | 450 | head = &hb->chain; |
451 | 451 | ||
452 | list_for_each_entry_safe(this, next, head, list) { | 452 | plist_for_each_entry_safe(this, next, head, list) { |
453 | if (match_futex(&this->key, &me->key)) { | 453 | if (match_futex(&this->key, &me->key)) { |
454 | /* | 454 | /* |
455 | * Another waiter already exists - bump up | 455 | * Another waiter already exists - bump up |
@@ -513,12 +513,12 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me) | |||
513 | */ | 513 | */ |
514 | static void wake_futex(struct futex_q *q) | 514 | static void wake_futex(struct futex_q *q) |
515 | { | 515 | { |
516 | list_del_init(&q->list); | 516 | plist_del(&q->list, &q->list.plist); |
517 | if (q->filp) | 517 | if (q->filp) |
518 | send_sigio(&q->filp->f_owner, q->fd, POLL_IN); | 518 | send_sigio(&q->filp->f_owner, q->fd, POLL_IN); |
519 | /* | 519 | /* |
520 | * The lock in wake_up_all() is a crucial memory barrier after the | 520 | * The lock in wake_up_all() is a crucial memory barrier after the |
521 | * list_del_init() and also before assigning to q->lock_ptr. | 521 | * plist_del() and also before assigning to q->lock_ptr. |
522 | */ | 522 | */ |
523 | wake_up_all(&q->waiters); | 523 | wake_up_all(&q->waiters); |
524 | /* | 524 | /* |
@@ -633,7 +633,7 @@ static int futex_wake(u32 __user *uaddr, int nr_wake) | |||
633 | { | 633 | { |
634 | struct futex_hash_bucket *hb; | 634 | struct futex_hash_bucket *hb; |
635 | struct futex_q *this, *next; | 635 | struct futex_q *this, *next; |
636 | struct list_head *head; | 636 | struct plist_head *head; |
637 | union futex_key key; | 637 | union futex_key key; |
638 | int ret; | 638 | int ret; |
639 | 639 | ||
@@ -647,7 +647,7 @@ static int futex_wake(u32 __user *uaddr, int nr_wake) | |||
647 | spin_lock(&hb->lock); | 647 | spin_lock(&hb->lock); |
648 | head = &hb->chain; | 648 | head = &hb->chain; |
649 | 649 | ||
650 | list_for_each_entry_safe(this, next, head, list) { | 650 | plist_for_each_entry_safe(this, next, head, list) { |
651 | if (match_futex (&this->key, &key)) { | 651 | if (match_futex (&this->key, &key)) { |
652 | if (this->pi_state) { | 652 | if (this->pi_state) { |
653 | ret = -EINVAL; | 653 | ret = -EINVAL; |
@@ -675,7 +675,7 @@ futex_wake_op(u32 __user *uaddr1, u32 __user *uaddr2, | |||
675 | { | 675 | { |
676 | union futex_key key1, key2; | 676 | union futex_key key1, key2; |
677 | struct futex_hash_bucket *hb1, *hb2; | 677 | struct futex_hash_bucket *hb1, *hb2; |
678 | struct list_head *head; | 678 | struct plist_head *head; |
679 | struct futex_q *this, *next; | 679 | struct futex_q *this, *next; |
680 | int ret, op_ret, attempt = 0; | 680 | int ret, op_ret, attempt = 0; |
681 | 681 | ||
@@ -748,7 +748,7 @@ retry: | |||
748 | 748 | ||
749 | head = &hb1->chain; | 749 | head = &hb1->chain; |
750 | 750 | ||
751 | list_for_each_entry_safe(this, next, head, list) { | 751 | plist_for_each_entry_safe(this, next, head, list) { |
752 | if (match_futex (&this->key, &key1)) { | 752 | if (match_futex (&this->key, &key1)) { |
753 | wake_futex(this); | 753 | wake_futex(this); |
754 | if (++ret >= nr_wake) | 754 | if (++ret >= nr_wake) |
@@ -760,7 +760,7 @@ retry: | |||
760 | head = &hb2->chain; | 760 | head = &hb2->chain; |
761 | 761 | ||
762 | op_ret = 0; | 762 | op_ret = 0; |
763 | list_for_each_entry_safe(this, next, head, list) { | 763 | plist_for_each_entry_safe(this, next, head, list) { |
764 | if (match_futex (&this->key, &key2)) { | 764 | if (match_futex (&this->key, &key2)) { |
765 | wake_futex(this); | 765 | wake_futex(this); |
766 | if (++op_ret >= nr_wake2) | 766 | if (++op_ret >= nr_wake2) |
@@ -787,7 +787,7 @@ static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2, | |||
787 | { | 787 | { |
788 | union futex_key key1, key2; | 788 | union futex_key key1, key2; |
789 | struct futex_hash_bucket *hb1, *hb2; | 789 | struct futex_hash_bucket *hb1, *hb2; |
790 | struct list_head *head1; | 790 | struct plist_head *head1; |
791 | struct futex_q *this, *next; | 791 | struct futex_q *this, *next; |
792 | int ret, drop_count = 0; | 792 | int ret, drop_count = 0; |
793 | 793 | ||
@@ -836,7 +836,7 @@ static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2, | |||
836 | } | 836 | } |
837 | 837 | ||
838 | head1 = &hb1->chain; | 838 | head1 = &hb1->chain; |
839 | list_for_each_entry_safe(this, next, head1, list) { | 839 | plist_for_each_entry_safe(this, next, head1, list) { |
840 | if (!match_futex (&this->key, &key1)) | 840 | if (!match_futex (&this->key, &key1)) |
841 | continue; | 841 | continue; |
842 | if (++ret <= nr_wake) { | 842 | if (++ret <= nr_wake) { |
@@ -847,9 +847,13 @@ static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2, | |||
847 | * requeue. | 847 | * requeue. |
848 | */ | 848 | */ |
849 | if (likely(head1 != &hb2->chain)) { | 849 | if (likely(head1 != &hb2->chain)) { |
850 | list_move_tail(&this->list, &hb2->chain); | 850 | plist_del(&this->list, &hb1->chain); |
851 | plist_add(&this->list, &hb2->chain); | ||
851 | this->lock_ptr = &hb2->lock; | 852 | this->lock_ptr = &hb2->lock; |
852 | } | 853 | #ifdef CONFIG_DEBUG_PI_LIST |
854 | this->list.plist.lock = &hb2->lock; | ||
855 | #endif | ||
856 | } | ||
853 | this->key = key2; | 857 | this->key = key2; |
854 | get_futex_key_refs(&key2); | 858 | get_futex_key_refs(&key2); |
855 | drop_count++; | 859 | drop_count++; |
@@ -894,7 +898,23 @@ queue_lock(struct futex_q *q, int fd, struct file *filp) | |||
894 | 898 | ||
895 | static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb) | 899 | static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb) |
896 | { | 900 | { |
897 | list_add_tail(&q->list, &hb->chain); | 901 | int prio; |
902 | |||
903 | /* | ||
904 | * The priority used to register this element is | ||
905 | * - either the real thread-priority for the real-time threads | ||
906 | * (i.e. threads with a priority lower than MAX_RT_PRIO) | ||
907 | * - or MAX_RT_PRIO for non-RT threads. | ||
908 | * Thus, all RT-threads are woken first in priority order, and | ||
909 | * the others are woken last, in FIFO order. | ||
910 | */ | ||
911 | prio = min(current->normal_prio, MAX_RT_PRIO); | ||
912 | |||
913 | plist_node_init(&q->list, prio); | ||
914 | #ifdef CONFIG_DEBUG_PI_LIST | ||
915 | q->list.plist.lock = &hb->lock; | ||
916 | #endif | ||
917 | plist_add(&q->list, &hb->chain); | ||
898 | q->task = current; | 918 | q->task = current; |
899 | spin_unlock(&hb->lock); | 919 | spin_unlock(&hb->lock); |
900 | } | 920 | } |
@@ -949,8 +969,8 @@ static int unqueue_me(struct futex_q *q) | |||
949 | spin_unlock(lock_ptr); | 969 | spin_unlock(lock_ptr); |
950 | goto retry; | 970 | goto retry; |
951 | } | 971 | } |
952 | WARN_ON(list_empty(&q->list)); | 972 | WARN_ON(plist_node_empty(&q->list)); |
953 | list_del(&q->list); | 973 | plist_del(&q->list, &q->list.plist); |
954 | 974 | ||
955 | BUG_ON(q->pi_state); | 975 | BUG_ON(q->pi_state); |
956 | 976 | ||
@@ -968,8 +988,8 @@ static int unqueue_me(struct futex_q *q) | |||
968 | */ | 988 | */ |
969 | static void unqueue_me_pi(struct futex_q *q, struct futex_hash_bucket *hb) | 989 | static void unqueue_me_pi(struct futex_q *q, struct futex_hash_bucket *hb) |
970 | { | 990 | { |
971 | WARN_ON(list_empty(&q->list)); | 991 | WARN_ON(plist_node_empty(&q->list)); |
972 | list_del(&q->list); | 992 | plist_del(&q->list, &q->list.plist); |
973 | 993 | ||
974 | BUG_ON(!q->pi_state); | 994 | BUG_ON(!q->pi_state); |
975 | free_pi_state(q->pi_state); | 995 | free_pi_state(q->pi_state); |
@@ -1065,11 +1085,11 @@ static int futex_wait_abstime(u32 __user *uaddr, u32 val, | |||
1065 | __set_current_state(TASK_INTERRUPTIBLE); | 1085 | __set_current_state(TASK_INTERRUPTIBLE); |
1066 | add_wait_queue(&q.waiters, &wait); | 1086 | add_wait_queue(&q.waiters, &wait); |
1067 | /* | 1087 | /* |
1068 | * !list_empty() is safe here without any lock. | 1088 | * !plist_node_empty() is safe here without any lock. |
1069 | * q.lock_ptr != 0 is not safe, because of ordering against wakeup. | 1089 | * q.lock_ptr != 0 is not safe, because of ordering against wakeup. |
1070 | */ | 1090 | */ |
1071 | time_left = 0; | 1091 | time_left = 0; |
1072 | if (likely(!list_empty(&q.list))) { | 1092 | if (likely(!plist_node_empty(&q.list))) { |
1073 | unsigned long rel_time; | 1093 | unsigned long rel_time; |
1074 | 1094 | ||
1075 | if (timed) { | 1095 | if (timed) { |
@@ -1384,7 +1404,7 @@ static int futex_unlock_pi(u32 __user *uaddr) | |||
1384 | struct futex_hash_bucket *hb; | 1404 | struct futex_hash_bucket *hb; |
1385 | struct futex_q *this, *next; | 1405 | struct futex_q *this, *next; |
1386 | u32 uval; | 1406 | u32 uval; |
1387 | struct list_head *head; | 1407 | struct plist_head *head; |
1388 | union futex_key key; | 1408 | union futex_key key; |
1389 | int ret, attempt = 0; | 1409 | int ret, attempt = 0; |
1390 | 1410 | ||
@@ -1435,7 +1455,7 @@ retry_locked: | |||
1435 | */ | 1455 | */ |
1436 | head = &hb->chain; | 1456 | head = &hb->chain; |
1437 | 1457 | ||
1438 | list_for_each_entry_safe(this, next, head, list) { | 1458 | plist_for_each_entry_safe(this, next, head, list) { |
1439 | if (!match_futex (&this->key, &key)) | 1459 | if (!match_futex (&this->key, &key)) |
1440 | continue; | 1460 | continue; |
1441 | ret = wake_futex_pi(uaddr, uval, this); | 1461 | ret = wake_futex_pi(uaddr, uval, this); |
@@ -1509,10 +1529,10 @@ static unsigned int futex_poll(struct file *filp, | |||
1509 | poll_wait(filp, &q->waiters, wait); | 1529 | poll_wait(filp, &q->waiters, wait); |
1510 | 1530 | ||
1511 | /* | 1531 | /* |
1512 | * list_empty() is safe here without any lock. | 1532 | * plist_node_empty() is safe here without any lock. |
1513 | * q->lock_ptr != 0 is not safe, because of ordering against wakeup. | 1533 | * q->lock_ptr != 0 is not safe, because of ordering against wakeup. |
1514 | */ | 1534 | */ |
1515 | if (list_empty(&q->list)) | 1535 | if (plist_node_empty(&q->list)) |
1516 | ret = POLLIN | POLLRDNORM; | 1536 | ret = POLLIN | POLLRDNORM; |
1517 | 1537 | ||
1518 | return ret; | 1538 | return ret; |
@@ -1895,7 +1915,7 @@ static int __init init(void) | |||
1895 | } | 1915 | } |
1896 | 1916 | ||
1897 | for (i = 0; i < ARRAY_SIZE(futex_queues); i++) { | 1917 | for (i = 0; i < ARRAY_SIZE(futex_queues); i++) { |
1898 | INIT_LIST_HEAD(&futex_queues[i].chain); | 1918 | plist_head_init(&futex_queues[i].chain, &futex_queues[i].lock); |
1899 | spin_lock_init(&futex_queues[i].lock); | 1919 | spin_lock_init(&futex_queues[i].lock); |
1900 | } | 1920 | } |
1901 | return 0; | 1921 | return 0; |