diff options
author | Matthew Wilcox <matthew@wil.cx> | 2008-03-07 21:55:58 -0500 |
---|---|---|
committer | Matthew Wilcox <willy@linux.intel.com> | 2008-04-17 10:42:34 -0400 |
commit | 64ac24e738823161693bf791f87adc802cf529ff (patch) | |
tree | 19c0b0cf314d4394ca580c05b86cdf874ce0a167 /kernel | |
parent | e48b3deee475134585eed03e7afebe4bf9e0dba9 (diff) |
Generic semaphore implementation
Semaphores are no longer performance-critical, so a generic C
implementation is better for maintainability, debuggability and
extensibility. Thanks to Peter Zijlstra for fixing the lockdep
warning. Thanks to Harvey Harrison for pointing out that the
unlikely() was unnecessary.
Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 2 | ||||
-rw-r--r-- | kernel/semaphore.c | 187 |
2 files changed, 188 insertions, 1 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 6c584c55a6e9..f45c69e69688 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -8,7 +8,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ | |||
8 | signal.o sys.o kmod.o workqueue.o pid.o \ | 8 | signal.o sys.o kmod.o workqueue.o pid.o \ |
9 | rcupdate.o extable.o params.o posix-timers.o \ | 9 | rcupdate.o extable.o params.o posix-timers.o \ |
10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ | 10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ |
11 | hrtimer.o rwsem.o nsproxy.o srcu.o \ | 11 | hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ |
12 | notifier.o ksysfs.o pm_qos_params.o | 12 | notifier.o ksysfs.o pm_qos_params.o |
13 | 13 | ||
14 | obj-$(CONFIG_SYSCTL) += sysctl_check.o | 14 | obj-$(CONFIG_SYSCTL) += sysctl_check.o |
diff --git a/kernel/semaphore.c b/kernel/semaphore.c new file mode 100644 index 000000000000..d5a72702f261 --- /dev/null +++ b/kernel/semaphore.c | |||
@@ -0,0 +1,187 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2008 Intel Corporation | ||
3 | * Author: Matthew Wilcox <willy@linux.intel.com> | ||
4 | * | ||
5 | * Distributed under the terms of the GNU GPL, version 2 | ||
6 | */ | ||
7 | |||
8 | #include <linux/compiler.h> | ||
9 | #include <linux/kernel.h> | ||
10 | #include <linux/module.h> | ||
11 | #include <linux/sched.h> | ||
12 | #include <linux/semaphore.h> | ||
13 | #include <linux/spinlock.h> | ||
14 | |||
15 | /* | ||
16 | * Some notes on the implementation: | ||
17 | * | ||
18 | * down_trylock() and up() can be called from interrupt context. | ||
19 | * So we have to disable interrupts when taking the lock. | ||
20 | * | ||
21 | * The ->count variable, if positive, defines how many more tasks can | ||
22 | * acquire the semaphore. If negative, it represents how many tasks are | ||
23 | * waiting on the semaphore (*). If zero, no tasks are waiting, and no more | ||
24 | * tasks can acquire the semaphore. | ||
25 | * | ||
26 | * (*) Except for the window between one task calling up() and the task | ||
27 | * sleeping in a __down_common() waking up. In order to avoid a third task | ||
28 | * coming in and stealing the second task's wakeup, we leave the ->count | ||
29 | * negative. If we have a more complex situation, the ->count may become | ||
30 | * zero or negative (eg a semaphore with count = 2, three tasks attempt to | ||
31 | * acquire it, one sleeps, two finish and call up(), the second task to call | ||
32 | * up() notices that the list is empty and just increments count). | ||
33 | */ | ||
34 | |||
35 | static noinline void __down(struct semaphore *sem); | ||
36 | static noinline int __down_interruptible(struct semaphore *sem); | ||
37 | static noinline void __up(struct semaphore *sem); | ||
38 | |||
39 | void down(struct semaphore *sem) | ||
40 | { | ||
41 | unsigned long flags; | ||
42 | |||
43 | spin_lock_irqsave(&sem->lock, flags); | ||
44 | if (unlikely(sem->count-- <= 0)) | ||
45 | __down(sem); | ||
46 | spin_unlock_irqrestore(&sem->lock, flags); | ||
47 | } | ||
48 | EXPORT_SYMBOL(down); | ||
49 | |||
50 | int down_interruptible(struct semaphore *sem) | ||
51 | { | ||
52 | unsigned long flags; | ||
53 | int result = 0; | ||
54 | |||
55 | spin_lock_irqsave(&sem->lock, flags); | ||
56 | if (unlikely(sem->count-- <= 0)) | ||
57 | result = __down_interruptible(sem); | ||
58 | spin_unlock_irqrestore(&sem->lock, flags); | ||
59 | |||
60 | return result; | ||
61 | } | ||
62 | EXPORT_SYMBOL(down_interruptible); | ||
63 | |||
64 | /** | ||
65 | * down_trylock - try to acquire the semaphore, without waiting | ||
66 | * @sem: the semaphore to be acquired | ||
67 | * | ||
68 | * Try to acquire the semaphore atomically. Returns 0 if the mutex has | ||
69 | * been acquired successfully and 1 if it is contended. | ||
70 | * | ||
71 | * NOTE: This return value is inverted from both spin_trylock and | ||
72 | * mutex_trylock! Be careful about this when converting code. | ||
73 | * | ||
74 | * Unlike mutex_trylock, this function can be used from interrupt context, | ||
75 | * and the semaphore can be released by any task or interrupt. | ||
76 | */ | ||
77 | int down_trylock(struct semaphore *sem) | ||
78 | { | ||
79 | unsigned long flags; | ||
80 | int count; | ||
81 | |||
82 | spin_lock_irqsave(&sem->lock, flags); | ||
83 | count = sem->count - 1; | ||
84 | if (likely(count >= 0)) | ||
85 | sem->count = count; | ||
86 | spin_unlock_irqrestore(&sem->lock, flags); | ||
87 | |||
88 | return (count < 0); | ||
89 | } | ||
90 | EXPORT_SYMBOL(down_trylock); | ||
91 | |||
92 | void up(struct semaphore *sem) | ||
93 | { | ||
94 | unsigned long flags; | ||
95 | |||
96 | spin_lock_irqsave(&sem->lock, flags); | ||
97 | if (likely(sem->count >= 0)) | ||
98 | sem->count++; | ||
99 | else | ||
100 | __up(sem); | ||
101 | spin_unlock_irqrestore(&sem->lock, flags); | ||
102 | } | ||
103 | EXPORT_SYMBOL(up); | ||
104 | |||
105 | /* Functions for the contended case */ | ||
106 | |||
107 | struct semaphore_waiter { | ||
108 | struct list_head list; | ||
109 | struct task_struct *task; | ||
110 | int up; | ||
111 | }; | ||
112 | |||
113 | /* | ||
114 | * Wake up a process waiting on a semaphore. We need to call this from both | ||
115 | * __up and __down_common as it's possible to race a task into the semaphore | ||
116 | * if it comes in at just the right time between two tasks calling up() and | ||
117 | * a third task waking up. This function assumes the wait_list is already | ||
118 | * checked for being non-empty. | ||
119 | */ | ||
120 | static noinline void __sched __up_down_common(struct semaphore *sem) | ||
121 | { | ||
122 | struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list, | ||
123 | struct semaphore_waiter, list); | ||
124 | list_del(&waiter->list); | ||
125 | waiter->up = 1; | ||
126 | wake_up_process(waiter->task); | ||
127 | } | ||
128 | |||
129 | /* | ||
130 | * Because this function is inlined, the 'state' parameter will be constant, | ||
131 | * and thus optimised away by the compiler. | ||
132 | */ | ||
133 | static inline int __sched __down_common(struct semaphore *sem, long state) | ||
134 | { | ||
135 | int result = 0; | ||
136 | struct task_struct *task = current; | ||
137 | struct semaphore_waiter waiter; | ||
138 | |||
139 | list_add_tail(&waiter.list, &sem->wait_list); | ||
140 | waiter.task = task; | ||
141 | waiter.up = 0; | ||
142 | |||
143 | for (;;) { | ||
144 | if (state == TASK_INTERRUPTIBLE && signal_pending(task)) | ||
145 | goto interrupted; | ||
146 | __set_task_state(task, state); | ||
147 | spin_unlock_irq(&sem->lock); | ||
148 | schedule(); | ||
149 | spin_lock_irq(&sem->lock); | ||
150 | if (waiter.up) | ||
151 | goto woken; | ||
152 | } | ||
153 | |||
154 | interrupted: | ||
155 | list_del(&waiter.list); | ||
156 | result = -EINTR; | ||
157 | woken: | ||
158 | /* | ||
159 | * Account for the process which woke us up. For the case where | ||
160 | * we're interrupted, we need to increment the count on our own | ||
161 | * behalf. I don't believe we can hit the case where the | ||
162 | * sem->count hits zero, *and* there's a second task sleeping, | ||
163 | * but it doesn't hurt, that's not a commonly exercised path and | ||
164 | * it's not a performance path either. | ||
165 | */ | ||
166 | if (unlikely((++sem->count >= 0) && !list_empty(&sem->wait_list))) | ||
167 | __up_down_common(sem); | ||
168 | return result; | ||
169 | } | ||
170 | |||
171 | static noinline void __sched __down(struct semaphore *sem) | ||
172 | { | ||
173 | __down_common(sem, TASK_UNINTERRUPTIBLE); | ||
174 | } | ||
175 | |||
176 | static noinline int __sched __down_interruptible(struct semaphore *sem) | ||
177 | { | ||
178 | return __down_common(sem, TASK_INTERRUPTIBLE); | ||
179 | } | ||
180 | |||
181 | static noinline void __sched __up(struct semaphore *sem) | ||
182 | { | ||
183 | if (unlikely(list_empty(&sem->wait_list))) | ||
184 | sem->count++; | ||
185 | else | ||
186 | __up_down_common(sem); | ||
187 | } | ||