diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/rwsem.c | 64 |
1 files changed, 32 insertions, 32 deletions
diff --git a/lib/rwsem.c b/lib/rwsem.c index 9a675fa9d78e..bbe48c04f363 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c | |||
@@ -4,6 +4,7 @@ | |||
4 | * Derived from arch/i386/kernel/semaphore.c | 4 | * Derived from arch/i386/kernel/semaphore.c |
5 | * | 5 | * |
6 | * Writer lock-stealing by Alex Shi <alex.shi@intel.com> | 6 | * Writer lock-stealing by Alex Shi <alex.shi@intel.com> |
7 | * and Michel Lespinasse <walken@google.com> | ||
7 | */ | 8 | */ |
8 | #include <linux/rwsem.h> | 9 | #include <linux/rwsem.h> |
9 | #include <linux/sched.h> | 10 | #include <linux/sched.h> |
@@ -41,13 +42,11 @@ struct rwsem_waiter { | |||
41 | enum rwsem_waiter_type type; | 42 | enum rwsem_waiter_type type; |
42 | }; | 43 | }; |
43 | 44 | ||
44 | /* Wake types for __rwsem_do_wake(). Note that RWSEM_WAKE_NO_ACTIVE and | 45 | enum rwsem_wake_type { |
45 | * RWSEM_WAKE_READ_OWNED imply that the spinlock must have been kept held | 46 | RWSEM_WAKE_ANY, /* Wake whatever's at head of wait list */ |
46 | * since the rwsem value was observed. | 47 | RWSEM_WAKE_READERS, /* Wake readers only */ |
47 | */ | 48 | RWSEM_WAKE_READ_OWNED /* Waker thread holds the read lock */ |
48 | #define RWSEM_WAKE_ANY 0 /* Wake whatever's at head of wait list */ | 49 | }; |
49 | #define RWSEM_WAKE_NO_ACTIVE 1 /* rwsem was observed with no active thread */ | ||
50 | #define RWSEM_WAKE_READ_OWNED 2 /* rwsem was observed to be read owned */ | ||
51 | 50 | ||
52 | /* | 51 | /* |
53 | * handle the lock release when processes blocked on it that can now run | 52 | * handle the lock release when processes blocked on it that can now run |
@@ -60,16 +59,16 @@ struct rwsem_waiter { | |||
60 | * - writers are only woken if downgrading is false | 59 | * - writers are only woken if downgrading is false |
61 | */ | 60 | */ |
62 | static struct rw_semaphore * | 61 | static struct rw_semaphore * |
63 | __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) | 62 | __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type) |
64 | { | 63 | { |
65 | struct rwsem_waiter *waiter; | 64 | struct rwsem_waiter *waiter; |
66 | struct task_struct *tsk; | 65 | struct task_struct *tsk; |
67 | struct list_head *next; | 66 | struct list_head *next; |
68 | signed long woken, loop, adjustment; | 67 | signed long oldcount, woken, loop, adjustment; |
69 | 68 | ||
70 | waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); | 69 | waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); |
71 | if (waiter->type == RWSEM_WAITING_FOR_WRITE) { | 70 | if (waiter->type == RWSEM_WAITING_FOR_WRITE) { |
72 | if (wake_type != RWSEM_WAKE_READ_OWNED) | 71 | if (wake_type == RWSEM_WAKE_ANY) |
73 | /* Wake writer at the front of the queue, but do not | 72 | /* Wake writer at the front of the queue, but do not |
74 | * grant it the lock yet as we want other writers | 73 | * grant it the lock yet as we want other writers |
75 | * to be able to steal it. Readers, on the other hand, | 74 | * to be able to steal it. Readers, on the other hand, |
@@ -79,24 +78,24 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) | |||
79 | goto out; | 78 | goto out; |
80 | } | 79 | } |
81 | 80 | ||
82 | /* If we come here from up_xxxx(), another thread might have reached | 81 | /* Writers might steal the lock before we grant it to the next reader. |
83 | * rwsem_down_failed_common() before we acquired the spinlock and | 82 | * We prefer to do the first reader grant before counting readers |
84 | * woken up a waiter, making it now active. We prefer to check for | 83 | * so we can bail out early if a writer stole the lock. |
85 | * this first in order to not spend too much time with the spinlock | ||
86 | * held if we're not going to be able to wake up readers in the end. | ||
87 | * | ||
88 | * Note that we do not need to update the rwsem count: any writer | ||
89 | * trying to acquire rwsem will run rwsem_down_write_failed() due | ||
90 | * to the waiting threads and block trying to acquire the spinlock. | ||
91 | * | ||
92 | * We use a dummy atomic update in order to acquire the cache line | ||
93 | * exclusively since we expect to succeed and run the final rwsem | ||
94 | * count adjustment pretty soon. | ||
95 | */ | 84 | */ |
96 | if (wake_type == RWSEM_WAKE_ANY && | 85 | adjustment = 0; |
97 | rwsem_atomic_update(0, sem) < RWSEM_WAITING_BIAS) | 86 | if (wake_type != RWSEM_WAKE_READ_OWNED) { |
98 | /* Someone grabbed the sem for write already */ | 87 | adjustment = RWSEM_ACTIVE_READ_BIAS; |
99 | goto out; | 88 | try_reader_grant: |
89 | oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; | ||
90 | if (unlikely(oldcount < RWSEM_WAITING_BIAS)) { | ||
91 | /* A writer stole the lock. Undo our reader grant. */ | ||
92 | if (rwsem_atomic_update(-adjustment, sem) & | ||
93 | RWSEM_ACTIVE_MASK) | ||
94 | goto out; | ||
95 | /* Last active locker left. Retry waking readers. */ | ||
96 | goto try_reader_grant; | ||
97 | } | ||
98 | } | ||
100 | 99 | ||
101 | /* Grant an infinite number of read locks to the readers at the front | 100 | /* Grant an infinite number of read locks to the readers at the front |
102 | * of the queue. Note we increment the 'active part' of the count by | 101 | * of the queue. Note we increment the 'active part' of the count by |
@@ -114,12 +113,13 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) | |||
114 | 113 | ||
115 | } while (waiter->type != RWSEM_WAITING_FOR_WRITE); | 114 | } while (waiter->type != RWSEM_WAITING_FOR_WRITE); |
116 | 115 | ||
117 | adjustment = woken * RWSEM_ACTIVE_READ_BIAS; | 116 | adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; |
118 | if (waiter->type != RWSEM_WAITING_FOR_WRITE) | 117 | if (waiter->type != RWSEM_WAITING_FOR_WRITE) |
119 | /* hit end of list above */ | 118 | /* hit end of list above */ |
120 | adjustment -= RWSEM_WAITING_BIAS; | 119 | adjustment -= RWSEM_WAITING_BIAS; |
121 | 120 | ||
122 | rwsem_atomic_add(adjustment, sem); | 121 | if (adjustment) |
122 | rwsem_atomic_add(adjustment, sem); | ||
123 | 123 | ||
124 | next = sem->wait_list.next; | 124 | next = sem->wait_list.next; |
125 | loop = woken; | 125 | loop = woken; |
@@ -164,8 +164,8 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) | |||
164 | count = rwsem_atomic_update(adjustment, sem); | 164 | count = rwsem_atomic_update(adjustment, sem); |
165 | 165 | ||
166 | /* If there are no active locks, wake the front queued process(es). */ | 166 | /* If there are no active locks, wake the front queued process(es). */ |
167 | if (count == RWSEM_WAITING_BIAS) | 167 | if (!(count & RWSEM_ACTIVE_MASK)) |
168 | sem = __rwsem_do_wake(sem, RWSEM_WAKE_NO_ACTIVE); | 168 | sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY); |
169 | 169 | ||
170 | raw_spin_unlock_irq(&sem->wait_lock); | 170 | raw_spin_unlock_irq(&sem->wait_lock); |
171 | 171 | ||
@@ -209,7 +209,7 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) | |||
209 | * any read locks that were queued ahead of us. */ | 209 | * any read locks that were queued ahead of us. */ |
210 | if (count > RWSEM_WAITING_BIAS && | 210 | if (count > RWSEM_WAITING_BIAS && |
211 | adjustment == -RWSEM_ACTIVE_WRITE_BIAS) | 211 | adjustment == -RWSEM_ACTIVE_WRITE_BIAS) |
212 | sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); | 212 | sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS); |
213 | 213 | ||
214 | /* wait until we successfully acquire the lock */ | 214 | /* wait until we successfully acquire the lock */ |
215 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); | 215 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); |