diff options
| author | Pierre Peiffer <pierre.peiffer@bull.net> | 2007-05-09 05:35:00 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-05-09 15:30:55 -0400 |
| commit | ec92d08292d3e9b0823eba138a4564d2d39f25c7 (patch) | |
| tree | 4dc94792e83d51b036d52e92e8d4f137a2efce97 /kernel | |
| parent | f34c506b0385b43abd25c490335036ecbb173aed (diff) | |
futex priority based wakeup
Today, all threads waiting for a given futex are woken in FIFO order (first
waiter woken first) instead of priority order.
This patch makes use of plist (pirotity ordered lists) instead of simple list
in futex_hash_bucket.
All non-RT threads are stored with priority MAX_RT_PRIO, causing them to be
woken last, in FIFO order (RT-threads are woken first, in priority order).
Signed-off-by: Sebastien Dugue <sebastien.dugue@bull.net>
Signed-off-by: Pierre Peiffer <pierre.peiffer@bull.net>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Ulrich Drepper <drepper@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/futex.c | 78 |
1 files changed, 49 insertions, 29 deletions
diff --git a/kernel/futex.c b/kernel/futex.c index 600bc9d801f2..685ee2362a5e 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
| @@ -81,12 +81,12 @@ struct futex_pi_state { | |||
| 81 | * we can wake only the relevant ones (hashed queues may be shared). | 81 | * we can wake only the relevant ones (hashed queues may be shared). |
| 82 | * | 82 | * |
| 83 | * A futex_q has a woken state, just like tasks have TASK_RUNNING. | 83 | * A futex_q has a woken state, just like tasks have TASK_RUNNING. |
| 84 | * It is considered woken when list_empty(&q->list) || q->lock_ptr == 0. | 84 | * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0. |
| 85 | * The order of wakup is always to make the first condition true, then | 85 | * The order of wakup is always to make the first condition true, then |
| 86 | * wake up q->waiters, then make the second condition true. | 86 | * wake up q->waiters, then make the second condition true. |
| 87 | */ | 87 | */ |
| 88 | struct futex_q { | 88 | struct futex_q { |
| 89 | struct list_head list; | 89 | struct plist_node list; |
| 90 | wait_queue_head_t waiters; | 90 | wait_queue_head_t waiters; |
| 91 | 91 | ||
| 92 | /* Which hash list lock to use: */ | 92 | /* Which hash list lock to use: */ |
| @@ -108,8 +108,8 @@ struct futex_q { | |||
| 108 | * Split the global futex_lock into every hash list lock. | 108 | * Split the global futex_lock into every hash list lock. |
| 109 | */ | 109 | */ |
| 110 | struct futex_hash_bucket { | 110 | struct futex_hash_bucket { |
| 111 | spinlock_t lock; | 111 | spinlock_t lock; |
| 112 | struct list_head chain; | 112 | struct plist_head chain; |
| 113 | }; | 113 | }; |
| 114 | 114 | ||
| 115 | static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS]; | 115 | static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS]; |
| @@ -443,13 +443,13 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me) | |||
| 443 | { | 443 | { |
| 444 | struct futex_pi_state *pi_state = NULL; | 444 | struct futex_pi_state *pi_state = NULL; |
| 445 | struct futex_q *this, *next; | 445 | struct futex_q *this, *next; |
| 446 | struct list_head *head; | 446 | struct plist_head *head; |
| 447 | struct task_struct *p; | 447 | struct task_struct *p; |
| 448 | pid_t pid; | 448 | pid_t pid; |
| 449 | 449 | ||
| 450 | head = &hb->chain; | 450 | head = &hb->chain; |
| 451 | 451 | ||
| 452 | list_for_each_entry_safe(this, next, head, list) { | 452 | plist_for_each_entry_safe(this, next, head, list) { |
| 453 | if (match_futex(&this->key, &me->key)) { | 453 | if (match_futex(&this->key, &me->key)) { |
| 454 | /* | 454 | /* |
| 455 | * Another waiter already exists - bump up | 455 | * Another waiter already exists - bump up |
| @@ -513,12 +513,12 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me) | |||
| 513 | */ | 513 | */ |
| 514 | static void wake_futex(struct futex_q *q) | 514 | static void wake_futex(struct futex_q *q) |
| 515 | { | 515 | { |
| 516 | list_del_init(&q->list); | 516 | plist_del(&q->list, &q->list.plist); |
| 517 | if (q->filp) | 517 | if (q->filp) |
| 518 | send_sigio(&q->filp->f_owner, q->fd, POLL_IN); | 518 | send_sigio(&q->filp->f_owner, q->fd, POLL_IN); |
| 519 | /* | 519 | /* |
| 520 | * The lock in wake_up_all() is a crucial memory barrier after the | 520 | * The lock in wake_up_all() is a crucial memory barrier after the |
| 521 | * list_del_init() and also before assigning to q->lock_ptr. | 521 | * plist_del() and also before assigning to q->lock_ptr. |
| 522 | */ | 522 | */ |
| 523 | wake_up_all(&q->waiters); | 523 | wake_up_all(&q->waiters); |
| 524 | /* | 524 | /* |
| @@ -633,7 +633,7 @@ static int futex_wake(u32 __user *uaddr, int nr_wake) | |||
| 633 | { | 633 | { |
| 634 | struct futex_hash_bucket *hb; | 634 | struct futex_hash_bucket *hb; |
| 635 | struct futex_q *this, *next; | 635 | struct futex_q *this, *next; |
| 636 | struct list_head *head; | 636 | struct plist_head *head; |
| 637 | union futex_key key; | 637 | union futex_key key; |
| 638 | int ret; | 638 | int ret; |
| 639 | 639 | ||
| @@ -647,7 +647,7 @@ static int futex_wake(u32 __user *uaddr, int nr_wake) | |||
| 647 | spin_lock(&hb->lock); | 647 | spin_lock(&hb->lock); |
| 648 | head = &hb->chain; | 648 | head = &hb->chain; |
| 649 | 649 | ||
| 650 | list_for_each_entry_safe(this, next, head, list) { | 650 | plist_for_each_entry_safe(this, next, head, list) { |
| 651 | if (match_futex (&this->key, &key)) { | 651 | if (match_futex (&this->key, &key)) { |
| 652 | if (this->pi_state) { | 652 | if (this->pi_state) { |
| 653 | ret = -EINVAL; | 653 | ret = -EINVAL; |
| @@ -675,7 +675,7 @@ futex_wake_op(u32 __user *uaddr1, u32 __user *uaddr2, | |||
| 675 | { | 675 | { |
| 676 | union futex_key key1, key2; | 676 | union futex_key key1, key2; |
| 677 | struct futex_hash_bucket *hb1, *hb2; | 677 | struct futex_hash_bucket *hb1, *hb2; |
| 678 | struct list_head *head; | 678 | struct plist_head *head; |
| 679 | struct futex_q *this, *next; | 679 | struct futex_q *this, *next; |
| 680 | int ret, op_ret, attempt = 0; | 680 | int ret, op_ret, attempt = 0; |
| 681 | 681 | ||
| @@ -748,7 +748,7 @@ retry: | |||
| 748 | 748 | ||
| 749 | head = &hb1->chain; | 749 | head = &hb1->chain; |
| 750 | 750 | ||
| 751 | list_for_each_entry_safe(this, next, head, list) { | 751 | plist_for_each_entry_safe(this, next, head, list) { |
| 752 | if (match_futex (&this->key, &key1)) { | 752 | if (match_futex (&this->key, &key1)) { |
| 753 | wake_futex(this); | 753 | wake_futex(this); |
| 754 | if (++ret >= nr_wake) | 754 | if (++ret >= nr_wake) |
| @@ -760,7 +760,7 @@ retry: | |||
| 760 | head = &hb2->chain; | 760 | head = &hb2->chain; |
| 761 | 761 | ||
| 762 | op_ret = 0; | 762 | op_ret = 0; |
| 763 | list_for_each_entry_safe(this, next, head, list) { | 763 | plist_for_each_entry_safe(this, next, head, list) { |
| 764 | if (match_futex (&this->key, &key2)) { | 764 | if (match_futex (&this->key, &key2)) { |
| 765 | wake_futex(this); | 765 | wake_futex(this); |
| 766 | if (++op_ret >= nr_wake2) | 766 | if (++op_ret >= nr_wake2) |
| @@ -787,7 +787,7 @@ static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2, | |||
| 787 | { | 787 | { |
| 788 | union futex_key key1, key2; | 788 | union futex_key key1, key2; |
| 789 | struct futex_hash_bucket *hb1, *hb2; | 789 | struct futex_hash_bucket *hb1, *hb2; |
| 790 | struct list_head *head1; | 790 | struct plist_head *head1; |
| 791 | struct futex_q *this, *next; | 791 | struct futex_q *this, *next; |
| 792 | int ret, drop_count = 0; | 792 | int ret, drop_count = 0; |
| 793 | 793 | ||
| @@ -836,7 +836,7 @@ static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2, | |||
| 836 | } | 836 | } |
| 837 | 837 | ||
| 838 | head1 = &hb1->chain; | 838 | head1 = &hb1->chain; |
| 839 | list_for_each_entry_safe(this, next, head1, list) { | 839 | plist_for_each_entry_safe(this, next, head1, list) { |
| 840 | if (!match_futex (&this->key, &key1)) | 840 | if (!match_futex (&this->key, &key1)) |
| 841 | continue; | 841 | continue; |
| 842 | if (++ret <= nr_wake) { | 842 | if (++ret <= nr_wake) { |
| @@ -847,9 +847,13 @@ static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2, | |||
| 847 | * requeue. | 847 | * requeue. |
| 848 | */ | 848 | */ |
| 849 | if (likely(head1 != &hb2->chain)) { | 849 | if (likely(head1 != &hb2->chain)) { |
| 850 | list_move_tail(&this->list, &hb2->chain); | 850 | plist_del(&this->list, &hb1->chain); |
| 851 | plist_add(&this->list, &hb2->chain); | ||
| 851 | this->lock_ptr = &hb2->lock; | 852 | this->lock_ptr = &hb2->lock; |
| 852 | } | 853 | #ifdef CONFIG_DEBUG_PI_LIST |
| 854 | this->list.plist.lock = &hb2->lock; | ||
| 855 | #endif | ||
| 856 | } | ||
| 853 | this->key = key2; | 857 | this->key = key2; |
| 854 | get_futex_key_refs(&key2); | 858 | get_futex_key_refs(&key2); |
| 855 | drop_count++; | 859 | drop_count++; |
| @@ -894,7 +898,23 @@ queue_lock(struct futex_q *q, int fd, struct file *filp) | |||
| 894 | 898 | ||
| 895 | static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb) | 899 | static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb) |
| 896 | { | 900 | { |
| 897 | list_add_tail(&q->list, &hb->chain); | 901 | int prio; |
| 902 | |||
| 903 | /* | ||
| 904 | * The priority used to register this element is | ||
| 905 | * - either the real thread-priority for the real-time threads | ||
| 906 | * (i.e. threads with a priority lower than MAX_RT_PRIO) | ||
| 907 | * - or MAX_RT_PRIO for non-RT threads. | ||
| 908 | * Thus, all RT-threads are woken first in priority order, and | ||
| 909 | * the others are woken last, in FIFO order. | ||
| 910 | */ | ||
| 911 | prio = min(current->normal_prio, MAX_RT_PRIO); | ||
| 912 | |||
| 913 | plist_node_init(&q->list, prio); | ||
| 914 | #ifdef CONFIG_DEBUG_PI_LIST | ||
| 915 | q->list.plist.lock = &hb->lock; | ||
| 916 | #endif | ||
| 917 | plist_add(&q->list, &hb->chain); | ||
| 898 | q->task = current; | 918 | q->task = current; |
| 899 | spin_unlock(&hb->lock); | 919 | spin_unlock(&hb->lock); |
| 900 | } | 920 | } |
| @@ -949,8 +969,8 @@ static int unqueue_me(struct futex_q *q) | |||
| 949 | spin_unlock(lock_ptr); | 969 | spin_unlock(lock_ptr); |
| 950 | goto retry; | 970 | goto retry; |
| 951 | } | 971 | } |
| 952 | WARN_ON(list_empty(&q->list)); | 972 | WARN_ON(plist_node_empty(&q->list)); |
| 953 | list_del(&q->list); | 973 | plist_del(&q->list, &q->list.plist); |
| 954 | 974 | ||
| 955 | BUG_ON(q->pi_state); | 975 | BUG_ON(q->pi_state); |
| 956 | 976 | ||
| @@ -968,8 +988,8 @@ static int unqueue_me(struct futex_q *q) | |||
| 968 | */ | 988 | */ |
| 969 | static void unqueue_me_pi(struct futex_q *q, struct futex_hash_bucket *hb) | 989 | static void unqueue_me_pi(struct futex_q *q, struct futex_hash_bucket *hb) |
| 970 | { | 990 | { |
| 971 | WARN_ON(list_empty(&q->list)); | 991 | WARN_ON(plist_node_empty(&q->list)); |
| 972 | list_del(&q->list); | 992 | plist_del(&q->list, &q->list.plist); |
| 973 | 993 | ||
| 974 | BUG_ON(!q->pi_state); | 994 | BUG_ON(!q->pi_state); |
| 975 | free_pi_state(q->pi_state); | 995 | free_pi_state(q->pi_state); |
| @@ -1065,11 +1085,11 @@ static int futex_wait_abstime(u32 __user *uaddr, u32 val, | |||
| 1065 | __set_current_state(TASK_INTERRUPTIBLE); | 1085 | __set_current_state(TASK_INTERRUPTIBLE); |
| 1066 | add_wait_queue(&q.waiters, &wait); | 1086 | add_wait_queue(&q.waiters, &wait); |
| 1067 | /* | 1087 | /* |
| 1068 | * !list_empty() is safe here without any lock. | 1088 | * !plist_node_empty() is safe here without any lock. |
| 1069 | * q.lock_ptr != 0 is not safe, because of ordering against wakeup. | 1089 | * q.lock_ptr != 0 is not safe, because of ordering against wakeup. |
| 1070 | */ | 1090 | */ |
| 1071 | time_left = 0; | 1091 | time_left = 0; |
| 1072 | if (likely(!list_empty(&q.list))) { | 1092 | if (likely(!plist_node_empty(&q.list))) { |
| 1073 | unsigned long rel_time; | 1093 | unsigned long rel_time; |
| 1074 | 1094 | ||
| 1075 | if (timed) { | 1095 | if (timed) { |
| @@ -1384,7 +1404,7 @@ static int futex_unlock_pi(u32 __user *uaddr) | |||
| 1384 | struct futex_hash_bucket *hb; | 1404 | struct futex_hash_bucket *hb; |
| 1385 | struct futex_q *this, *next; | 1405 | struct futex_q *this, *next; |
| 1386 | u32 uval; | 1406 | u32 uval; |
| 1387 | struct list_head *head; | 1407 | struct plist_head *head; |
| 1388 | union futex_key key; | 1408 | union futex_key key; |
| 1389 | int ret, attempt = 0; | 1409 | int ret, attempt = 0; |
| 1390 | 1410 | ||
| @@ -1435,7 +1455,7 @@ retry_locked: | |||
| 1435 | */ | 1455 | */ |
| 1436 | head = &hb->chain; | 1456 | head = &hb->chain; |
| 1437 | 1457 | ||
| 1438 | list_for_each_entry_safe(this, next, head, list) { | 1458 | plist_for_each_entry_safe(this, next, head, list) { |
| 1439 | if (!match_futex (&this->key, &key)) | 1459 | if (!match_futex (&this->key, &key)) |
| 1440 | continue; | 1460 | continue; |
| 1441 | ret = wake_futex_pi(uaddr, uval, this); | 1461 | ret = wake_futex_pi(uaddr, uval, this); |
| @@ -1509,10 +1529,10 @@ static unsigned int futex_poll(struct file *filp, | |||
| 1509 | poll_wait(filp, &q->waiters, wait); | 1529 | poll_wait(filp, &q->waiters, wait); |
| 1510 | 1530 | ||
| 1511 | /* | 1531 | /* |
| 1512 | * list_empty() is safe here without any lock. | 1532 | * plist_node_empty() is safe here without any lock. |
| 1513 | * q->lock_ptr != 0 is not safe, because of ordering against wakeup. | 1533 | * q->lock_ptr != 0 is not safe, because of ordering against wakeup. |
| 1514 | */ | 1534 | */ |
| 1515 | if (list_empty(&q->list)) | 1535 | if (plist_node_empty(&q->list)) |
| 1516 | ret = POLLIN | POLLRDNORM; | 1536 | ret = POLLIN | POLLRDNORM; |
| 1517 | 1537 | ||
| 1518 | return ret; | 1538 | return ret; |
| @@ -1895,7 +1915,7 @@ static int __init init(void) | |||
| 1895 | } | 1915 | } |
| 1896 | 1916 | ||
| 1897 | for (i = 0; i < ARRAY_SIZE(futex_queues); i++) { | 1917 | for (i = 0; i < ARRAY_SIZE(futex_queues); i++) { |
| 1898 | INIT_LIST_HEAD(&futex_queues[i].chain); | 1918 | plist_head_init(&futex_queues[i].chain, &futex_queues[i].lock); |
| 1899 | spin_lock_init(&futex_queues[i].lock); | 1919 | spin_lock_init(&futex_queues[i].lock); |
| 1900 | } | 1920 | } |
| 1901 | return 0; | 1921 | return 0; |
