diff options
author | Alexey Dobriyan <adobriyan@sw.ru> | 2007-07-16 02:39:00 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-07-16 12:05:39 -0400 |
commit | 786d7e1612f0b0adb6046f19b906609e4fe8b1ba (patch) | |
tree | 9d5f1623c19c9d3f84606ea160d57cd3c8c97ea9 | |
parent | 5568b0e8028d966ddb16f0be44a9df1fcbd1dc8d (diff) |
Fix rmmod/read/write races in /proc entries
Fix following races:
===========================================
1. Write via ->write_proc sleeps in copy_from_user(). Module disappears
meanwhile. Or, more generically, system call done on /proc file, method
supplied by module is called, module dissapeares meanwhile.
pde = create_proc_entry()
if (!pde)
return -ENOMEM;
pde->write_proc = ...
open
write
copy_from_user
pde = create_proc_entry();
if (!pde) {
remove_proc_entry();
return -ENOMEM;
/* module unloaded */
}
*boom*
==========================================
2. bogo-revoke aka proc_kill_inodes()
remove_proc_entry vfs_read
proc_kill_inodes [check ->f_op validness]
[check ->f_op->read validness]
[verify_area, security permissions checks]
->f_op = NULL;
if (file->f_op->read)
/* ->f_op dereference, boom */
NOTE, NOTE, NOTE: file_operations are proxied for regular files only. Let's
see how this scheme behaves, then extend if needed for directories.
Directories creators in /proc only set ->owner for them, so proxying for
directories may be unneeded.
NOTE, NOTE, NOTE: methods being proxied are ->llseek, ->read, ->write,
->poll, ->unlocked_ioctl, ->ioctl, ->compat_ioctl, ->open, ->release.
If your in-tree module uses something else, yell on me. Full audit pending.
[akpm@linux-foundation.org: build fix]
Signed-off-by: Alexey Dobriyan <adobriyan@sw.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | fs/proc/generic.c | 32 | ||||
-rw-r--r-- | fs/proc/inode.c | 254 | ||||
-rw-r--r-- | include/linux/proc_fs.h | 13 |
3 files changed, 296 insertions, 3 deletions
diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 8a40e15f5ecb..4f8e53568b22 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/namei.h> | 20 | #include <linux/namei.h> |
21 | #include <linux/bitops.h> | 21 | #include <linux/bitops.h> |
22 | #include <linux/spinlock.h> | 22 | #include <linux/spinlock.h> |
23 | #include <linux/completion.h> | ||
23 | #include <asm/uaccess.h> | 24 | #include <asm/uaccess.h> |
24 | 25 | ||
25 | #include "internal.h" | 26 | #include "internal.h" |
@@ -613,6 +614,9 @@ static struct proc_dir_entry *proc_create(struct proc_dir_entry **parent, | |||
613 | ent->namelen = len; | 614 | ent->namelen = len; |
614 | ent->mode = mode; | 615 | ent->mode = mode; |
615 | ent->nlink = nlink; | 616 | ent->nlink = nlink; |
617 | ent->pde_users = 0; | ||
618 | spin_lock_init(&ent->pde_unload_lock); | ||
619 | ent->pde_unload_completion = NULL; | ||
616 | out: | 620 | out: |
617 | return ent; | 621 | return ent; |
618 | } | 622 | } |
@@ -734,9 +738,35 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) | |||
734 | de = *p; | 738 | de = *p; |
735 | *p = de->next; | 739 | *p = de->next; |
736 | de->next = NULL; | 740 | de->next = NULL; |
741 | |||
742 | spin_lock(&de->pde_unload_lock); | ||
743 | /* | ||
744 | * Stop accepting new callers into module. If you're | ||
745 | * dynamically allocating ->proc_fops, save a pointer somewhere. | ||
746 | */ | ||
747 | de->proc_fops = NULL; | ||
748 | /* Wait until all existing callers into module are done. */ | ||
749 | if (de->pde_users > 0) { | ||
750 | DECLARE_COMPLETION_ONSTACK(c); | ||
751 | |||
752 | if (!de->pde_unload_completion) | ||
753 | de->pde_unload_completion = &c; | ||
754 | |||
755 | spin_unlock(&de->pde_unload_lock); | ||
756 | spin_unlock(&proc_subdir_lock); | ||
757 | |||
758 | wait_for_completion(de->pde_unload_completion); | ||
759 | |||
760 | spin_lock(&proc_subdir_lock); | ||
761 | goto continue_removing; | ||
762 | } | ||
763 | spin_unlock(&de->pde_unload_lock); | ||
764 | |||
765 | continue_removing: | ||
737 | if (S_ISDIR(de->mode)) | 766 | if (S_ISDIR(de->mode)) |
738 | parent->nlink--; | 767 | parent->nlink--; |
739 | proc_kill_inodes(de); | 768 | if (!S_ISREG(de->mode)) |
769 | proc_kill_inodes(de); | ||
740 | de->nlink = 0; | 770 | de->nlink = 0; |
741 | WARN_ON(de->subdir); | 771 | WARN_ON(de->subdir); |
742 | if (!atomic_read(&de->count)) | 772 | if (!atomic_read(&de->count)) |
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index d5ce65c68d7b..dd28e86ab422 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/mm.h> | 10 | #include <linux/mm.h> |
11 | #include <linux/string.h> | 11 | #include <linux/string.h> |
12 | #include <linux/stat.h> | 12 | #include <linux/stat.h> |
13 | #include <linux/completion.h> | ||
13 | #include <linux/file.h> | 14 | #include <linux/file.h> |
14 | #include <linux/limits.h> | 15 | #include <linux/limits.h> |
15 | #include <linux/init.h> | 16 | #include <linux/init.h> |
@@ -140,6 +141,251 @@ static const struct super_operations proc_sops = { | |||
140 | .remount_fs = proc_remount, | 141 | .remount_fs = proc_remount, |
141 | }; | 142 | }; |
142 | 143 | ||
144 | static void pde_users_dec(struct proc_dir_entry *pde) | ||
145 | { | ||
146 | spin_lock(&pde->pde_unload_lock); | ||
147 | pde->pde_users--; | ||
148 | if (pde->pde_unload_completion && pde->pde_users == 0) | ||
149 | complete(pde->pde_unload_completion); | ||
150 | spin_unlock(&pde->pde_unload_lock); | ||
151 | } | ||
152 | |||
153 | static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence) | ||
154 | { | ||
155 | struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); | ||
156 | loff_t rv = -EINVAL; | ||
157 | loff_t (*llseek)(struct file *, loff_t, int); | ||
158 | |||
159 | spin_lock(&pde->pde_unload_lock); | ||
160 | /* | ||
161 | * remove_proc_entry() is going to delete PDE (as part of module | ||
162 | * cleanup sequence). No new callers into module allowed. | ||
163 | */ | ||
164 | if (!pde->proc_fops) { | ||
165 | spin_unlock(&pde->pde_unload_lock); | ||
166 | return rv; | ||
167 | } | ||
168 | /* | ||
169 | * Bump refcount so that remove_proc_entry will wail for ->llseek to | ||
170 | * complete. | ||
171 | */ | ||
172 | pde->pde_users++; | ||
173 | /* | ||
174 | * Save function pointer under lock, to protect against ->proc_fops | ||
175 | * NULL'ifying right after ->pde_unload_lock is dropped. | ||
176 | */ | ||
177 | llseek = pde->proc_fops->llseek; | ||
178 | spin_unlock(&pde->pde_unload_lock); | ||
179 | |||
180 | if (!llseek) | ||
181 | llseek = default_llseek; | ||
182 | rv = llseek(file, offset, whence); | ||
183 | |||
184 | pde_users_dec(pde); | ||
185 | return rv; | ||
186 | } | ||
187 | |||
188 | static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) | ||
189 | { | ||
190 | struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); | ||
191 | ssize_t rv = -EIO; | ||
192 | ssize_t (*read)(struct file *, char __user *, size_t, loff_t *); | ||
193 | |||
194 | spin_lock(&pde->pde_unload_lock); | ||
195 | if (!pde->proc_fops) { | ||
196 | spin_unlock(&pde->pde_unload_lock); | ||
197 | return rv; | ||
198 | } | ||
199 | pde->pde_users++; | ||
200 | read = pde->proc_fops->read; | ||
201 | spin_unlock(&pde->pde_unload_lock); | ||
202 | |||
203 | if (read) | ||
204 | rv = read(file, buf, count, ppos); | ||
205 | |||
206 | pde_users_dec(pde); | ||
207 | return rv; | ||
208 | } | ||
209 | |||
210 | static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) | ||
211 | { | ||
212 | struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); | ||
213 | ssize_t rv = -EIO; | ||
214 | ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *); | ||
215 | |||
216 | spin_lock(&pde->pde_unload_lock); | ||
217 | if (!pde->proc_fops) { | ||
218 | spin_unlock(&pde->pde_unload_lock); | ||
219 | return rv; | ||
220 | } | ||
221 | pde->pde_users++; | ||
222 | write = pde->proc_fops->write; | ||
223 | spin_unlock(&pde->pde_unload_lock); | ||
224 | |||
225 | if (write) | ||
226 | rv = write(file, buf, count, ppos); | ||
227 | |||
228 | pde_users_dec(pde); | ||
229 | return rv; | ||
230 | } | ||
231 | |||
232 | static unsigned int proc_reg_poll(struct file *file, struct poll_table_struct *pts) | ||
233 | { | ||
234 | struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); | ||
235 | unsigned int rv = 0; | ||
236 | unsigned int (*poll)(struct file *, struct poll_table_struct *); | ||
237 | |||
238 | spin_lock(&pde->pde_unload_lock); | ||
239 | if (!pde->proc_fops) { | ||
240 | spin_unlock(&pde->pde_unload_lock); | ||
241 | return rv; | ||
242 | } | ||
243 | pde->pde_users++; | ||
244 | poll = pde->proc_fops->poll; | ||
245 | spin_unlock(&pde->pde_unload_lock); | ||
246 | |||
247 | if (poll) | ||
248 | rv = poll(file, pts); | ||
249 | |||
250 | pde_users_dec(pde); | ||
251 | return rv; | ||
252 | } | ||
253 | |||
254 | static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | ||
255 | { | ||
256 | struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); | ||
257 | long rv = -ENOTTY; | ||
258 | long (*unlocked_ioctl)(struct file *, unsigned int, unsigned long); | ||
259 | int (*ioctl)(struct inode *, struct file *, unsigned int, unsigned long); | ||
260 | |||
261 | spin_lock(&pde->pde_unload_lock); | ||
262 | if (!pde->proc_fops) { | ||
263 | spin_unlock(&pde->pde_unload_lock); | ||
264 | return rv; | ||
265 | } | ||
266 | pde->pde_users++; | ||
267 | unlocked_ioctl = pde->proc_fops->unlocked_ioctl; | ||
268 | ioctl = pde->proc_fops->ioctl; | ||
269 | spin_unlock(&pde->pde_unload_lock); | ||
270 | |||
271 | if (unlocked_ioctl) { | ||
272 | rv = unlocked_ioctl(file, cmd, arg); | ||
273 | if (rv == -ENOIOCTLCMD) | ||
274 | rv = -EINVAL; | ||
275 | } else if (ioctl) { | ||
276 | lock_kernel(); | ||
277 | rv = ioctl(file->f_path.dentry->d_inode, file, cmd, arg); | ||
278 | unlock_kernel(); | ||
279 | } | ||
280 | |||
281 | pde_users_dec(pde); | ||
282 | return rv; | ||
283 | } | ||
284 | |||
285 | #ifdef CONFIG_COMPAT | ||
286 | static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | ||
287 | { | ||
288 | struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); | ||
289 | long rv = -ENOTTY; | ||
290 | long (*compat_ioctl)(struct file *, unsigned int, unsigned long); | ||
291 | |||
292 | spin_lock(&pde->pde_unload_lock); | ||
293 | if (!pde->proc_fops) { | ||
294 | spin_unlock(&pde->pde_unload_lock); | ||
295 | return rv; | ||
296 | } | ||
297 | pde->pde_users++; | ||
298 | compat_ioctl = pde->proc_fops->compat_ioctl; | ||
299 | spin_unlock(&pde->pde_unload_lock); | ||
300 | |||
301 | if (compat_ioctl) | ||
302 | rv = compat_ioctl(file, cmd, arg); | ||
303 | |||
304 | pde_users_dec(pde); | ||
305 | return rv; | ||
306 | } | ||
307 | #endif | ||
308 | |||
309 | static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma) | ||
310 | { | ||
311 | struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); | ||
312 | int rv = -EIO; | ||
313 | int (*mmap)(struct file *, struct vm_area_struct *); | ||
314 | |||
315 | spin_lock(&pde->pde_unload_lock); | ||
316 | if (!pde->proc_fops) { | ||
317 | spin_unlock(&pde->pde_unload_lock); | ||
318 | return rv; | ||
319 | } | ||
320 | pde->pde_users++; | ||
321 | mmap = pde->proc_fops->mmap; | ||
322 | spin_unlock(&pde->pde_unload_lock); | ||
323 | |||
324 | if (mmap) | ||
325 | rv = mmap(file, vma); | ||
326 | |||
327 | pde_users_dec(pde); | ||
328 | return rv; | ||
329 | } | ||
330 | |||
331 | static int proc_reg_open(struct inode *inode, struct file *file) | ||
332 | { | ||
333 | struct proc_dir_entry *pde = PDE(inode); | ||
334 | int rv = 0; | ||
335 | int (*open)(struct inode *, struct file *); | ||
336 | |||
337 | spin_lock(&pde->pde_unload_lock); | ||
338 | if (!pde->proc_fops) { | ||
339 | spin_unlock(&pde->pde_unload_lock); | ||
340 | return rv; | ||
341 | } | ||
342 | pde->pde_users++; | ||
343 | open = pde->proc_fops->open; | ||
344 | spin_unlock(&pde->pde_unload_lock); | ||
345 | |||
346 | if (open) | ||
347 | rv = open(inode, file); | ||
348 | |||
349 | pde_users_dec(pde); | ||
350 | return rv; | ||
351 | } | ||
352 | |||
353 | static int proc_reg_release(struct inode *inode, struct file *file) | ||
354 | { | ||
355 | struct proc_dir_entry *pde = PDE(inode); | ||
356 | int rv = 0; | ||
357 | int (*release)(struct inode *, struct file *); | ||
358 | |||
359 | spin_lock(&pde->pde_unload_lock); | ||
360 | if (!pde->proc_fops) { | ||
361 | spin_unlock(&pde->pde_unload_lock); | ||
362 | return rv; | ||
363 | } | ||
364 | pde->pde_users++; | ||
365 | release = pde->proc_fops->release; | ||
366 | spin_unlock(&pde->pde_unload_lock); | ||
367 | |||
368 | if (release) | ||
369 | rv = release(inode, file); | ||
370 | |||
371 | pde_users_dec(pde); | ||
372 | return rv; | ||
373 | } | ||
374 | |||
375 | static const struct file_operations proc_reg_file_ops = { | ||
376 | .llseek = proc_reg_llseek, | ||
377 | .read = proc_reg_read, | ||
378 | .write = proc_reg_write, | ||
379 | .poll = proc_reg_poll, | ||
380 | .unlocked_ioctl = proc_reg_unlocked_ioctl, | ||
381 | #ifdef CONFIG_COMPAT | ||
382 | .compat_ioctl = proc_reg_compat_ioctl, | ||
383 | #endif | ||
384 | .mmap = proc_reg_mmap, | ||
385 | .open = proc_reg_open, | ||
386 | .release = proc_reg_release, | ||
387 | }; | ||
388 | |||
143 | struct inode *proc_get_inode(struct super_block *sb, unsigned int ino, | 389 | struct inode *proc_get_inode(struct super_block *sb, unsigned int ino, |
144 | struct proc_dir_entry *de) | 390 | struct proc_dir_entry *de) |
145 | { | 391 | { |
@@ -166,8 +412,12 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino, | |||
166 | inode->i_nlink = de->nlink; | 412 | inode->i_nlink = de->nlink; |
167 | if (de->proc_iops) | 413 | if (de->proc_iops) |
168 | inode->i_op = de->proc_iops; | 414 | inode->i_op = de->proc_iops; |
169 | if (de->proc_fops) | 415 | if (de->proc_fops) { |
170 | inode->i_fop = de->proc_fops; | 416 | if (S_ISREG(inode->i_mode)) |
417 | inode->i_fop = &proc_reg_file_ops; | ||
418 | else | ||
419 | inode->i_fop = de->proc_fops; | ||
420 | } | ||
171 | } | 421 | } |
172 | 422 | ||
173 | return inode; | 423 | return inode; |
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 3469f96bc8b2..28e3664fdf1b 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h | |||
@@ -7,6 +7,8 @@ | |||
7 | #include <linux/magic.h> | 7 | #include <linux/magic.h> |
8 | #include <asm/atomic.h> | 8 | #include <asm/atomic.h> |
9 | 9 | ||
10 | struct completion; | ||
11 | |||
10 | /* | 12 | /* |
11 | * The proc filesystem constants/structures | 13 | * The proc filesystem constants/structures |
12 | */ | 14 | */ |
@@ -56,6 +58,14 @@ struct proc_dir_entry { | |||
56 | gid_t gid; | 58 | gid_t gid; |
57 | loff_t size; | 59 | loff_t size; |
58 | const struct inode_operations *proc_iops; | 60 | const struct inode_operations *proc_iops; |
61 | /* | ||
62 | * NULL ->proc_fops means "PDE is going away RSN" or | ||
63 | * "PDE is just created". In either case, e.g. ->read_proc won't be | ||
64 | * called because it's too late or too early, respectively. | ||
65 | * | ||
66 | * If you're allocating ->proc_fops dynamically, save a pointer | ||
67 | * somewhere. | ||
68 | */ | ||
59 | const struct file_operations *proc_fops; | 69 | const struct file_operations *proc_fops; |
60 | get_info_t *get_info; | 70 | get_info_t *get_info; |
61 | struct module *owner; | 71 | struct module *owner; |
@@ -66,6 +76,9 @@ struct proc_dir_entry { | |||
66 | atomic_t count; /* use count */ | 76 | atomic_t count; /* use count */ |
67 | int deleted; /* delete flag */ | 77 | int deleted; /* delete flag */ |
68 | void *set; | 78 | void *set; |
79 | int pde_users; /* number of callers into module in progress */ | ||
80 | spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */ | ||
81 | struct completion *pde_unload_completion; | ||
69 | }; | 82 | }; |
70 | 83 | ||
71 | struct kcore_list { | 84 | struct kcore_list { |