aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sys.c
diff options
context:
space:
mode:
authorAlan Stern <stern@rowland.harvard.edu>2006-03-27 04:16:30 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-03-27 11:44:50 -0500
commite041c683412d5bf44dc2b109053e3b837b71742d (patch)
tree9d271066ef379da0c0fb3b8cb4137abd5d2ebba0 /kernel/sys.c
parent76b81e2b0e2241accebcc68e126bc5ab958661b9 (diff)
[PATCH] Notifier chain update: API changes
The kernel's implementation of notifier chains is unsafe. There is no protection against entries being added to or removed from a chain while the chain is in use. The issues were discussed in this thread: http://marc.theaimsgroup.com/?l=linux-kernel&m=113018709002036&w=2 We noticed that notifier chains in the kernel fall into two basic usage classes: "Blocking" chains are always called from a process context and the callout routines are allowed to sleep; "Atomic" chains can be called from an atomic context and the callout routines are not allowed to sleep. We decided to codify this distinction and make it part of the API. Therefore this set of patches introduces three new, parallel APIs: one for blocking notifiers, one for atomic notifiers, and one for "raw" notifiers (which is really just the old API under a new name). New kinds of data structures are used for the heads of the chains, and new routines are defined for registration, unregistration, and calling a chain. The three APIs are explained in include/linux/notifier.h and their implementation is in kernel/sys.c. With atomic and blocking chains, the implementation guarantees that the chain links will not be corrupted and that chain callers will not get messed up by entries being added or removed. For raw chains the implementation provides no guarantees at all; users of this API must provide their own protections. (The idea was that situations may come up where the assumptions of the atomic and blocking APIs are not appropriate, so it should be possible for users to handle these things in their own way.) There are some limitations, which should not be too hard to live with. For atomic/blocking chains, registration and unregistration must always be done in a process context since the chain is protected by a mutex/rwsem. Also, a callout routine for a non-raw chain must not try to register or unregister entries on its own chain. (This did happen in a couple of places and the code had to be changed to avoid it.) Since atomic chains may be called from within an NMI handler, they cannot use spinlocks for synchronization. Instead we use RCU. The overhead falls almost entirely in the unregister routine, which is okay since unregistration is much less frequent that calling a chain. Here is the list of chains that we adjusted and their classifications. None of them use the raw API, so for the moment it is only a placeholder. ATOMIC CHAINS ------------- arch/i386/kernel/traps.c: i386die_chain arch/ia64/kernel/traps.c: ia64die_chain arch/powerpc/kernel/traps.c: powerpc_die_chain arch/sparc64/kernel/traps.c: sparc64die_chain arch/x86_64/kernel/traps.c: die_chain drivers/char/ipmi/ipmi_si_intf.c: xaction_notifier_list kernel/panic.c: panic_notifier_list kernel/profile.c: task_free_notifier net/bluetooth/hci_core.c: hci_notifier net/ipv4/netfilter/ip_conntrack_core.c: ip_conntrack_chain net/ipv4/netfilter/ip_conntrack_core.c: ip_conntrack_expect_chain net/ipv6/addrconf.c: inet6addr_chain net/netfilter/nf_conntrack_core.c: nf_conntrack_chain net/netfilter/nf_conntrack_core.c: nf_conntrack_expect_chain net/netlink/af_netlink.c: netlink_chain BLOCKING CHAINS --------------- arch/powerpc/platforms/pseries/reconfig.c: pSeries_reconfig_chain arch/s390/kernel/process.c: idle_chain arch/x86_64/kernel/process.c idle_notifier drivers/base/memory.c: memory_chain drivers/cpufreq/cpufreq.c cpufreq_policy_notifier_list drivers/cpufreq/cpufreq.c cpufreq_transition_notifier_list drivers/macintosh/adb.c: adb_client_list drivers/macintosh/via-pmu.c sleep_notifier_list drivers/macintosh/via-pmu68k.c sleep_notifier_list drivers/macintosh/windfarm_core.c wf_client_list drivers/usb/core/notify.c usb_notifier_list drivers/video/fbmem.c fb_notifier_list kernel/cpu.c cpu_chain kernel/module.c module_notify_list kernel/profile.c munmap_notifier kernel/profile.c task_exit_notifier kernel/sys.c reboot_notifier_list net/core/dev.c netdev_chain net/decnet/dn_dev.c: dnaddr_chain net/ipv4/devinet.c: inetaddr_chain It's possible that some of these classifications are wrong. If they are, please let us know or submit a patch to fix them. Note that any chain that gets called very frequently should be atomic, because the rwsem read-locking used for blocking chains is very likely to incur cache misses on SMP systems. (However, if the chain's callout routines may sleep then the chain cannot be atomic.) The patch set was written by Alan Stern and Chandra Seetharaman, incorporating material written by Keith Owens and suggestions from Paul McKenney and Andrew Morton. [jes@sgi.com: restructure the notifier chain initialization macros] Signed-off-by: Alan Stern <stern@rowland.harvard.edu> Signed-off-by: Chandra Seetharaman <sekharan@us.ibm.com> Signed-off-by: Jes Sorensen <jes@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'kernel/sys.c')
-rw-r--r--kernel/sys.c327
1 files changed, 266 insertions, 61 deletions
diff --git a/kernel/sys.c b/kernel/sys.c
index 38bc73ede2ba..c93d37f71aef 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -95,99 +95,304 @@ int cad_pid = 1;
95 * and the like. 95 * and the like.
96 */ 96 */
97 97
98static struct notifier_block *reboot_notifier_list; 98static BLOCKING_NOTIFIER_HEAD(reboot_notifier_list);
99static DEFINE_RWLOCK(notifier_lock); 99
100/*
101 * Notifier chain core routines. The exported routines below
102 * are layered on top of these, with appropriate locking added.
103 */
104
105static int notifier_chain_register(struct notifier_block **nl,
106 struct notifier_block *n)
107{
108 while ((*nl) != NULL) {
109 if (n->priority > (*nl)->priority)
110 break;
111 nl = &((*nl)->next);
112 }
113 n->next = *nl;
114 rcu_assign_pointer(*nl, n);
115 return 0;
116}
117
118static int notifier_chain_unregister(struct notifier_block **nl,
119 struct notifier_block *n)
120{
121 while ((*nl) != NULL) {
122 if ((*nl) == n) {
123 rcu_assign_pointer(*nl, n->next);
124 return 0;
125 }
126 nl = &((*nl)->next);
127 }
128 return -ENOENT;
129}
130
131static int __kprobes notifier_call_chain(struct notifier_block **nl,
132 unsigned long val, void *v)
133{
134 int ret = NOTIFY_DONE;
135 struct notifier_block *nb;
136
137 nb = rcu_dereference(*nl);
138 while (nb) {
139 ret = nb->notifier_call(nb, val, v);
140 if ((ret & NOTIFY_STOP_MASK) == NOTIFY_STOP_MASK)
141 break;
142 nb = rcu_dereference(nb->next);
143 }
144 return ret;
145}
146
147/*
148 * Atomic notifier chain routines. Registration and unregistration
149 * use a mutex, and call_chain is synchronized by RCU (no locks).
150 */
100 151
101/** 152/**
102 * notifier_chain_register - Add notifier to a notifier chain 153 * atomic_notifier_chain_register - Add notifier to an atomic notifier chain
103 * @list: Pointer to root list pointer 154 * @nh: Pointer to head of the atomic notifier chain
104 * @n: New entry in notifier chain 155 * @n: New entry in notifier chain
105 * 156 *
106 * Adds a notifier to a notifier chain. 157 * Adds a notifier to an atomic notifier chain.
107 * 158 *
108 * Currently always returns zero. 159 * Currently always returns zero.
109 */ 160 */
161
162int atomic_notifier_chain_register(struct atomic_notifier_head *nh,
163 struct notifier_block *n)
164{
165 unsigned long flags;
166 int ret;
167
168 spin_lock_irqsave(&nh->lock, flags);
169 ret = notifier_chain_register(&nh->head, n);
170 spin_unlock_irqrestore(&nh->lock, flags);
171 return ret;
172}
173
174EXPORT_SYMBOL_GPL(atomic_notifier_chain_register);
175
176/**
177 * atomic_notifier_chain_unregister - Remove notifier from an atomic notifier chain
178 * @nh: Pointer to head of the atomic notifier chain
179 * @n: Entry to remove from notifier chain
180 *
181 * Removes a notifier from an atomic notifier chain.
182 *
183 * Returns zero on success or %-ENOENT on failure.
184 */
185int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh,
186 struct notifier_block *n)
187{
188 unsigned long flags;
189 int ret;
190
191 spin_lock_irqsave(&nh->lock, flags);
192 ret = notifier_chain_unregister(&nh->head, n);
193 spin_unlock_irqrestore(&nh->lock, flags);
194 synchronize_rcu();
195 return ret;
196}
197
198EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister);
199
200/**
201 * atomic_notifier_call_chain - Call functions in an atomic notifier chain
202 * @nh: Pointer to head of the atomic notifier chain
203 * @val: Value passed unmodified to notifier function
204 * @v: Pointer passed unmodified to notifier function
205 *
206 * Calls each function in a notifier chain in turn. The functions
207 * run in an atomic context, so they must not block.
208 * This routine uses RCU to synchronize with changes to the chain.
209 *
210 * If the return value of the notifier can be and'ed
211 * with %NOTIFY_STOP_MASK then atomic_notifier_call_chain
212 * will return immediately, with the return value of
213 * the notifier function which halted execution.
214 * Otherwise the return value is the return value
215 * of the last notifier function called.
216 */
110 217
111int notifier_chain_register(struct notifier_block **list, struct notifier_block *n) 218int atomic_notifier_call_chain(struct atomic_notifier_head *nh,
219 unsigned long val, void *v)
112{ 220{
113 write_lock(&notifier_lock); 221 int ret;
114 while(*list) 222
115 { 223 rcu_read_lock();
116 if(n->priority > (*list)->priority) 224 ret = notifier_call_chain(&nh->head, val, v);
117 break; 225 rcu_read_unlock();
118 list= &((*list)->next); 226 return ret;
119 }
120 n->next = *list;
121 *list=n;
122 write_unlock(&notifier_lock);
123 return 0;
124} 227}
125 228
126EXPORT_SYMBOL(notifier_chain_register); 229EXPORT_SYMBOL_GPL(atomic_notifier_call_chain);
230
231/*
232 * Blocking notifier chain routines. All access to the chain is
233 * synchronized by an rwsem.
234 */
127 235
128/** 236/**
129 * notifier_chain_unregister - Remove notifier from a notifier chain 237 * blocking_notifier_chain_register - Add notifier to a blocking notifier chain
130 * @nl: Pointer to root list pointer 238 * @nh: Pointer to head of the blocking notifier chain
131 * @n: New entry in notifier chain 239 * @n: New entry in notifier chain
132 * 240 *
133 * Removes a notifier from a notifier chain. 241 * Adds a notifier to a blocking notifier chain.
242 * Must be called in process context.
134 * 243 *
135 * Returns zero on success, or %-ENOENT on failure. 244 * Currently always returns zero.
136 */ 245 */
137 246
138int notifier_chain_unregister(struct notifier_block **nl, struct notifier_block *n) 247int blocking_notifier_chain_register(struct blocking_notifier_head *nh,
248 struct notifier_block *n)
139{ 249{
140 write_lock(&notifier_lock); 250 int ret;
141 while((*nl)!=NULL) 251
142 { 252 /*
143 if((*nl)==n) 253 * This code gets used during boot-up, when task switching is
144 { 254 * not yet working and interrupts must remain disabled. At
145 *nl=n->next; 255 * such times we must not call down_write().
146 write_unlock(&notifier_lock); 256 */
147 return 0; 257 if (unlikely(system_state == SYSTEM_BOOTING))
148 } 258 return notifier_chain_register(&nh->head, n);
149 nl=&((*nl)->next); 259
150 } 260 down_write(&nh->rwsem);
151 write_unlock(&notifier_lock); 261 ret = notifier_chain_register(&nh->head, n);
152 return -ENOENT; 262 up_write(&nh->rwsem);
263 return ret;
153} 264}
154 265
155EXPORT_SYMBOL(notifier_chain_unregister); 266EXPORT_SYMBOL_GPL(blocking_notifier_chain_register);
156 267
157/** 268/**
158 * notifier_call_chain - Call functions in a notifier chain 269 * blocking_notifier_chain_unregister - Remove notifier from a blocking notifier chain
159 * @n: Pointer to root pointer of notifier chain 270 * @nh: Pointer to head of the blocking notifier chain
271 * @n: Entry to remove from notifier chain
272 *
273 * Removes a notifier from a blocking notifier chain.
274 * Must be called from process context.
275 *
276 * Returns zero on success or %-ENOENT on failure.
277 */
278int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh,
279 struct notifier_block *n)
280{
281 int ret;
282
283 /*
284 * This code gets used during boot-up, when task switching is
285 * not yet working and interrupts must remain disabled. At
286 * such times we must not call down_write().
287 */
288 if (unlikely(system_state == SYSTEM_BOOTING))
289 return notifier_chain_unregister(&nh->head, n);
290
291 down_write(&nh->rwsem);
292 ret = notifier_chain_unregister(&nh->head, n);
293 up_write(&nh->rwsem);
294 return ret;
295}
296
297EXPORT_SYMBOL_GPL(blocking_notifier_chain_unregister);
298
299/**
300 * blocking_notifier_call_chain - Call functions in a blocking notifier chain
301 * @nh: Pointer to head of the blocking notifier chain
160 * @val: Value passed unmodified to notifier function 302 * @val: Value passed unmodified to notifier function
161 * @v: Pointer passed unmodified to notifier function 303 * @v: Pointer passed unmodified to notifier function
162 * 304 *
163 * Calls each function in a notifier chain in turn. 305 * Calls each function in a notifier chain in turn. The functions
306 * run in a process context, so they are allowed to block.
164 * 307 *
165 * If the return value of the notifier can be and'd 308 * If the return value of the notifier can be and'ed
166 * with %NOTIFY_STOP_MASK, then notifier_call_chain 309 * with %NOTIFY_STOP_MASK then blocking_notifier_call_chain
167 * will return immediately, with the return value of 310 * will return immediately, with the return value of
168 * the notifier function which halted execution. 311 * the notifier function which halted execution.
169 * Otherwise, the return value is the return value 312 * Otherwise the return value is the return value
170 * of the last notifier function called. 313 * of the last notifier function called.
171 */ 314 */
172 315
173int __kprobes notifier_call_chain(struct notifier_block **n, unsigned long val, void *v) 316int blocking_notifier_call_chain(struct blocking_notifier_head *nh,
317 unsigned long val, void *v)
174{ 318{
175 int ret=NOTIFY_DONE; 319 int ret;
176 struct notifier_block *nb = *n;
177 320
178 while(nb) 321 down_read(&nh->rwsem);
179 { 322 ret = notifier_call_chain(&nh->head, val, v);
180 ret=nb->notifier_call(nb,val,v); 323 up_read(&nh->rwsem);
181 if(ret&NOTIFY_STOP_MASK)
182 {
183 return ret;
184 }
185 nb=nb->next;
186 }
187 return ret; 324 return ret;
188} 325}
189 326
190EXPORT_SYMBOL(notifier_call_chain); 327EXPORT_SYMBOL_GPL(blocking_notifier_call_chain);
328
329/*
330 * Raw notifier chain routines. There is no protection;
331 * the caller must provide it. Use at your own risk!
332 */
333
334/**
335 * raw_notifier_chain_register - Add notifier to a raw notifier chain
336 * @nh: Pointer to head of the raw notifier chain
337 * @n: New entry in notifier chain
338 *
339 * Adds a notifier to a raw notifier chain.
340 * All locking must be provided by the caller.
341 *
342 * Currently always returns zero.
343 */
344
345int raw_notifier_chain_register(struct raw_notifier_head *nh,
346 struct notifier_block *n)
347{
348 return notifier_chain_register(&nh->head, n);
349}
350
351EXPORT_SYMBOL_GPL(raw_notifier_chain_register);
352
353/**
354 * raw_notifier_chain_unregister - Remove notifier from a raw notifier chain
355 * @nh: Pointer to head of the raw notifier chain
356 * @n: Entry to remove from notifier chain
357 *
358 * Removes a notifier from a raw notifier chain.
359 * All locking must be provided by the caller.
360 *
361 * Returns zero on success or %-ENOENT on failure.
362 */
363int raw_notifier_chain_unregister(struct raw_notifier_head *nh,
364 struct notifier_block *n)
365{
366 return notifier_chain_unregister(&nh->head, n);
367}
368
369EXPORT_SYMBOL_GPL(raw_notifier_chain_unregister);
370
371/**
372 * raw_notifier_call_chain - Call functions in a raw notifier chain
373 * @nh: Pointer to head of the raw notifier chain
374 * @val: Value passed unmodified to notifier function
375 * @v: Pointer passed unmodified to notifier function
376 *
377 * Calls each function in a notifier chain in turn. The functions
378 * run in an undefined context.
379 * All locking must be provided by the caller.
380 *
381 * If the return value of the notifier can be and'ed
382 * with %NOTIFY_STOP_MASK then raw_notifier_call_chain
383 * will return immediately, with the return value of
384 * the notifier function which halted execution.
385 * Otherwise the return value is the return value
386 * of the last notifier function called.
387 */
388
389int raw_notifier_call_chain(struct raw_notifier_head *nh,
390 unsigned long val, void *v)
391{
392 return notifier_call_chain(&nh->head, val, v);
393}
394
395EXPORT_SYMBOL_GPL(raw_notifier_call_chain);
191 396
192/** 397/**
193 * register_reboot_notifier - Register function to be called at reboot time 398 * register_reboot_notifier - Register function to be called at reboot time
@@ -196,13 +401,13 @@ EXPORT_SYMBOL(notifier_call_chain);
196 * Registers a function with the list of functions 401 * Registers a function with the list of functions
197 * to be called at reboot time. 402 * to be called at reboot time.
198 * 403 *
199 * Currently always returns zero, as notifier_chain_register 404 * Currently always returns zero, as blocking_notifier_chain_register
200 * always returns zero. 405 * always returns zero.
201 */ 406 */
202 407
203int register_reboot_notifier(struct notifier_block * nb) 408int register_reboot_notifier(struct notifier_block * nb)
204{ 409{
205 return notifier_chain_register(&reboot_notifier_list, nb); 410 return blocking_notifier_chain_register(&reboot_notifier_list, nb);
206} 411}
207 412
208EXPORT_SYMBOL(register_reboot_notifier); 413EXPORT_SYMBOL(register_reboot_notifier);
@@ -219,7 +424,7 @@ EXPORT_SYMBOL(register_reboot_notifier);
219 424
220int unregister_reboot_notifier(struct notifier_block * nb) 425int unregister_reboot_notifier(struct notifier_block * nb)
221{ 426{
222 return notifier_chain_unregister(&reboot_notifier_list, nb); 427 return blocking_notifier_chain_unregister(&reboot_notifier_list, nb);
223} 428}
224 429
225EXPORT_SYMBOL(unregister_reboot_notifier); 430EXPORT_SYMBOL(unregister_reboot_notifier);
@@ -380,7 +585,7 @@ EXPORT_SYMBOL_GPL(emergency_restart);
380 585
381void kernel_restart_prepare(char *cmd) 586void kernel_restart_prepare(char *cmd)
382{ 587{
383 notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); 588 blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
384 system_state = SYSTEM_RESTART; 589 system_state = SYSTEM_RESTART;
385 device_shutdown(); 590 device_shutdown();
386} 591}
@@ -430,7 +635,7 @@ EXPORT_SYMBOL_GPL(kernel_kexec);
430 635
431void kernel_shutdown_prepare(enum system_states state) 636void kernel_shutdown_prepare(enum system_states state)
432{ 637{
433 notifier_call_chain(&reboot_notifier_list, 638 blocking_notifier_call_chain(&reboot_notifier_list,
434 (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL); 639 (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL);
435 system_state = state; 640 system_state = state;
436 device_shutdown(); 641 device_shutdown();