diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-02 23:25:04 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-02 23:25:04 -0400 |
commit | aab174f0df5d72d31caccf281af5f614fa254578 (patch) | |
tree | 2a172c5009c4ac8755e858593154c258ce7709a0 /fs | |
parent | ca41cc96b2813221b05af57d0355157924de5a07 (diff) | |
parent | 2bd2c1941f141ad780135ccc1cd08ca71a24f10a (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull vfs update from Al Viro:
- big one - consolidation of descriptor-related logics; almost all of
that is moved to fs/file.c
(BTW, I'm seriously tempted to rename the result to fd.c. As it is,
we have a situation when file_table.c is about handling of struct
file and file.c is about handling of descriptor tables; the reasons
are historical - file_table.c used to be about a static array of
struct file we used to have way back).
A lot of stray ends got cleaned up and converted to saner primitives,
disgusting mess in android/binder.c is still disgusting, but at least
doesn't poke so much in descriptor table guts anymore. A bunch of
relatively minor races got fixed in process, plus an ext4 struct file
leak.
- related thing - fget_light() partially unuglified; see fdget() in
there (and yes, it generates the code as good as we used to have).
- also related - bits of Cyrill's procfs stuff that got entangled into
that work; _not_ all of it, just the initial move to fs/proc/fd.c and
switch of fdinfo to seq_file.
- Alex's fs/coredump.c spiltoff - the same story, had been easier to
take that commit than mess with conflicts. The rest is a separate
pile, this was just a mechanical code movement.
- a few misc patches all over the place. Not all for this cycle,
there'll be more (and quite a few currently sit in akpm's tree)."
Fix up trivial conflicts in the android binder driver, and some fairly
simple conflicts due to two different changes to the sock_alloc_file()
interface ("take descriptor handling from sock_alloc_file() to callers"
vs "net: Providing protocol type via system.sockprotoname xattr of
/proc/PID/fd entries" adding a dentry name to the socket)
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (72 commits)
MAX_LFS_FILESIZE should be a loff_t
compat: fs: Generic compat_sys_sendfile implementation
fs: push rcu_barrier() from deactivate_locked_super() to filesystems
btrfs: reada_extent doesn't need kref for refcount
coredump: move core dump functionality into its own file
coredump: prevent double-free on an error path in core dumper
usb/gadget: fix misannotations
fcntl: fix misannotations
ceph: don't abuse d_delete() on failure exits
hypfs: ->d_parent is never NULL or negative
vfs: delete surplus inode NULL check
switch simple cases of fget_light to fdget
new helpers: fdget()/fdput()
switch o2hb_region_dev_write() to fget_light()
proc_map_files_readdir(): don't bother with grabbing files
make get_file() return its argument
vhost_set_vring(): turn pollstart/pollstop into bool
switch prctl_set_mm_exe_file() to fget_light()
switch xfs_find_handle() to fget_light()
switch xfs_swapext() to fget_light()
...
Diffstat (limited to 'fs')
94 files changed, 2486 insertions, 2092 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index b85efa773949..392c5dac1981 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c | |||
@@ -560,6 +560,11 @@ static int v9fs_init_inode_cache(void) | |||
560 | */ | 560 | */ |
561 | static void v9fs_destroy_inode_cache(void) | 561 | static void v9fs_destroy_inode_cache(void) |
562 | { | 562 | { |
563 | /* | ||
564 | * Make sure all delayed rcu free inodes are flushed before we | ||
565 | * destroy cache. | ||
566 | */ | ||
567 | rcu_barrier(); | ||
563 | kmem_cache_destroy(v9fs_inode_cache); | 568 | kmem_cache_destroy(v9fs_inode_cache); |
564 | } | 569 | } |
565 | 570 | ||
diff --git a/fs/Makefile b/fs/Makefile index 2fb977934673..8938f8250320 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
@@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \ | |||
11 | attr.o bad_inode.o file.o filesystems.o namespace.o \ | 11 | attr.o bad_inode.o file.o filesystems.o namespace.o \ |
12 | seq_file.o xattr.o libfs.o fs-writeback.o \ | 12 | seq_file.o xattr.o libfs.o fs-writeback.o \ |
13 | pnode.o drop_caches.o splice.o sync.o utimes.o \ | 13 | pnode.o drop_caches.o splice.o sync.o utimes.o \ |
14 | stack.o fs_struct.o statfs.o | 14 | stack.o fs_struct.o statfs.o coredump.o |
15 | 15 | ||
16 | ifeq ($(CONFIG_BLOCK),y) | 16 | ifeq ($(CONFIG_BLOCK),y) |
17 | obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o | 17 | obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o |
diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 22a0d7ed5fa1..d57122935793 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c | |||
@@ -280,6 +280,11 @@ static int init_inodecache(void) | |||
280 | 280 | ||
281 | static void destroy_inodecache(void) | 281 | static void destroy_inodecache(void) |
282 | { | 282 | { |
283 | /* | ||
284 | * Make sure all delayed rcu free inodes are flushed before we | ||
285 | * destroy cache. | ||
286 | */ | ||
287 | rcu_barrier(); | ||
283 | kmem_cache_destroy(adfs_inode_cachep); | 288 | kmem_cache_destroy(adfs_inode_cachep); |
284 | } | 289 | } |
285 | 290 | ||
diff --git a/fs/affs/super.c b/fs/affs/super.c index 1f030825cd3a..b84dc7352502 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c | |||
@@ -147,6 +147,11 @@ static int init_inodecache(void) | |||
147 | 147 | ||
148 | static void destroy_inodecache(void) | 148 | static void destroy_inodecache(void) |
149 | { | 149 | { |
150 | /* | ||
151 | * Make sure all delayed rcu free inodes are flushed before we | ||
152 | * destroy cache. | ||
153 | */ | ||
154 | rcu_barrier(); | ||
150 | kmem_cache_destroy(affs_inode_cachep); | 155 | kmem_cache_destroy(affs_inode_cachep); |
151 | } | 156 | } |
152 | 157 | ||
diff --git a/fs/afs/super.c b/fs/afs/super.c index df8c6047c2a1..43165009428d 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c | |||
@@ -123,6 +123,11 @@ void __exit afs_fs_exit(void) | |||
123 | BUG(); | 123 | BUG(); |
124 | } | 124 | } |
125 | 125 | ||
126 | /* | ||
127 | * Make sure all delayed rcu free inodes are flushed before we | ||
128 | * destroy cache. | ||
129 | */ | ||
130 | rcu_barrier(); | ||
126 | kmem_cache_destroy(afs_inode_cachep); | 131 | kmem_cache_destroy(afs_inode_cachep); |
127 | _leave(""); | 132 | _leave(""); |
128 | } | 133 | } |
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index abf645c1703b..a16214109d31 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c | |||
@@ -221,20 +221,6 @@ static int test_by_type(struct path *path, void *p) | |||
221 | return ino && ino->sbi->type & *(unsigned *)p; | 221 | return ino && ino->sbi->type & *(unsigned *)p; |
222 | } | 222 | } |
223 | 223 | ||
224 | static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file) | ||
225 | { | ||
226 | struct files_struct *files = current->files; | ||
227 | struct fdtable *fdt; | ||
228 | |||
229 | spin_lock(&files->file_lock); | ||
230 | fdt = files_fdtable(files); | ||
231 | BUG_ON(fdt->fd[fd] != NULL); | ||
232 | rcu_assign_pointer(fdt->fd[fd], file); | ||
233 | __set_close_on_exec(fd, fdt); | ||
234 | spin_unlock(&files->file_lock); | ||
235 | } | ||
236 | |||
237 | |||
238 | /* | 224 | /* |
239 | * Open a file descriptor on the autofs mount point corresponding | 225 | * Open a file descriptor on the autofs mount point corresponding |
240 | * to the given path and device number (aka. new_encode_dev(sb->s_dev)). | 226 | * to the given path and device number (aka. new_encode_dev(sb->s_dev)). |
@@ -243,7 +229,7 @@ static int autofs_dev_ioctl_open_mountpoint(const char *name, dev_t devid) | |||
243 | { | 229 | { |
244 | int err, fd; | 230 | int err, fd; |
245 | 231 | ||
246 | fd = get_unused_fd(); | 232 | fd = get_unused_fd_flags(O_CLOEXEC); |
247 | if (likely(fd >= 0)) { | 233 | if (likely(fd >= 0)) { |
248 | struct file *filp; | 234 | struct file *filp; |
249 | struct path path; | 235 | struct path path; |
@@ -264,7 +250,7 @@ static int autofs_dev_ioctl_open_mountpoint(const char *name, dev_t devid) | |||
264 | goto out; | 250 | goto out; |
265 | } | 251 | } |
266 | 252 | ||
267 | autofs_dev_ioctl_fd_install(fd, filp); | 253 | fd_install(fd, filp); |
268 | } | 254 | } |
269 | 255 | ||
270 | return fd; | 256 | return fd; |
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index da8876d38a7b..dce436e595c1 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c | |||
@@ -175,8 +175,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, | |||
175 | return; | 175 | return; |
176 | } | 176 | } |
177 | 177 | ||
178 | pipe = sbi->pipe; | 178 | pipe = get_file(sbi->pipe); |
179 | get_file(pipe); | ||
180 | 179 | ||
181 | mutex_unlock(&sbi->wq_mutex); | 180 | mutex_unlock(&sbi->wq_mutex); |
182 | 181 | ||
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 7f73a692bfd0..2b3bda8d5e68 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c | |||
@@ -457,6 +457,11 @@ befs_init_inodecache(void) | |||
457 | static void | 457 | static void |
458 | befs_destroy_inodecache(void) | 458 | befs_destroy_inodecache(void) |
459 | { | 459 | { |
460 | /* | ||
461 | * Make sure all delayed rcu free inodes are flushed before we | ||
462 | * destroy cache. | ||
463 | */ | ||
464 | rcu_barrier(); | ||
460 | kmem_cache_destroy(befs_inode_cachep); | 465 | kmem_cache_destroy(befs_inode_cachep); |
461 | } | 466 | } |
462 | 467 | ||
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index b242beba58ed..737aaa3f7090 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c | |||
@@ -280,6 +280,11 @@ static int init_inodecache(void) | |||
280 | 280 | ||
281 | static void destroy_inodecache(void) | 281 | static void destroy_inodecache(void) |
282 | { | 282 | { |
283 | /* | ||
284 | * Make sure all delayed rcu free inodes are flushed before we | ||
285 | * destroy cache. | ||
286 | */ | ||
287 | rcu_barrier(); | ||
283 | kmem_cache_destroy(bfs_inode_cachep); | 288 | kmem_cache_destroy(bfs_inode_cachep); |
284 | } | 289 | } |
285 | 290 | ||
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 1b52956afe33..0225fddf49b7 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
@@ -1696,30 +1696,19 @@ static int elf_note_info_init(struct elf_note_info *info) | |||
1696 | return 0; | 1696 | return 0; |
1697 | info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL); | 1697 | info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL); |
1698 | if (!info->psinfo) | 1698 | if (!info->psinfo) |
1699 | goto notes_free; | 1699 | return 0; |
1700 | info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL); | 1700 | info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL); |
1701 | if (!info->prstatus) | 1701 | if (!info->prstatus) |
1702 | goto psinfo_free; | 1702 | return 0; |
1703 | info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL); | 1703 | info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL); |
1704 | if (!info->fpu) | 1704 | if (!info->fpu) |
1705 | goto prstatus_free; | 1705 | return 0; |
1706 | #ifdef ELF_CORE_COPY_XFPREGS | 1706 | #ifdef ELF_CORE_COPY_XFPREGS |
1707 | info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL); | 1707 | info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL); |
1708 | if (!info->xfpu) | 1708 | if (!info->xfpu) |
1709 | goto fpu_free; | 1709 | return 0; |
1710 | #endif | 1710 | #endif |
1711 | return 1; | 1711 | return 1; |
1712 | #ifdef ELF_CORE_COPY_XFPREGS | ||
1713 | fpu_free: | ||
1714 | kfree(info->fpu); | ||
1715 | #endif | ||
1716 | prstatus_free: | ||
1717 | kfree(info->prstatus); | ||
1718 | psinfo_free: | ||
1719 | kfree(info->psinfo); | ||
1720 | notes_free: | ||
1721 | kfree(info->notes); | ||
1722 | return 0; | ||
1723 | } | 1712 | } |
1724 | 1713 | ||
1725 | static int fill_note_info(struct elfhdr *elf, int phdrs, | 1714 | static int fill_note_info(struct elfhdr *elf, int phdrs, |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 4c878476bb91..b08ea4717e9d 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -107,6 +107,12 @@ void extent_io_exit(void) | |||
107 | list_del(&eb->leak_list); | 107 | list_del(&eb->leak_list); |
108 | kmem_cache_free(extent_buffer_cache, eb); | 108 | kmem_cache_free(extent_buffer_cache, eb); |
109 | } | 109 | } |
110 | |||
111 | /* | ||
112 | * Make sure all delayed rcu free are flushed before we | ||
113 | * destroy caches. | ||
114 | */ | ||
115 | rcu_barrier(); | ||
110 | if (extent_state_cache) | 116 | if (extent_state_cache) |
111 | kmem_cache_destroy(extent_state_cache); | 117 | kmem_cache_destroy(extent_state_cache); |
112 | if (extent_buffer_cache) | 118 | if (extent_buffer_cache) |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2a028a58619c..a6ed6944e50c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -7076,6 +7076,11 @@ static void init_once(void *foo) | |||
7076 | 7076 | ||
7077 | void btrfs_destroy_cachep(void) | 7077 | void btrfs_destroy_cachep(void) |
7078 | { | 7078 | { |
7079 | /* | ||
7080 | * Make sure all delayed rcu free inodes are flushed before we | ||
7081 | * destroy cache. | ||
7082 | */ | ||
7083 | rcu_barrier(); | ||
7079 | if (btrfs_inode_cachep) | 7084 | if (btrfs_inode_cachep) |
7080 | kmem_cache_destroy(btrfs_inode_cachep); | 7085 | kmem_cache_destroy(btrfs_inode_cachep); |
7081 | if (btrfs_trans_handle_cachep) | 7086 | if (btrfs_trans_handle_cachep) |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 27bfce58da3b..47127c1bd290 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -1397,7 +1397,6 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, | |||
1397 | u64 *transid, bool readonly, | 1397 | u64 *transid, bool readonly, |
1398 | struct btrfs_qgroup_inherit **inherit) | 1398 | struct btrfs_qgroup_inherit **inherit) |
1399 | { | 1399 | { |
1400 | struct file *src_file; | ||
1401 | int namelen; | 1400 | int namelen; |
1402 | int ret = 0; | 1401 | int ret = 0; |
1403 | 1402 | ||
@@ -1421,25 +1420,24 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, | |||
1421 | ret = btrfs_mksubvol(&file->f_path, name, namelen, | 1420 | ret = btrfs_mksubvol(&file->f_path, name, namelen, |
1422 | NULL, transid, readonly, inherit); | 1421 | NULL, transid, readonly, inherit); |
1423 | } else { | 1422 | } else { |
1423 | struct fd src = fdget(fd); | ||
1424 | struct inode *src_inode; | 1424 | struct inode *src_inode; |
1425 | src_file = fget(fd); | 1425 | if (!src.file) { |
1426 | if (!src_file) { | ||
1427 | ret = -EINVAL; | 1426 | ret = -EINVAL; |
1428 | goto out_drop_write; | 1427 | goto out_drop_write; |
1429 | } | 1428 | } |
1430 | 1429 | ||
1431 | src_inode = src_file->f_path.dentry->d_inode; | 1430 | src_inode = src.file->f_path.dentry->d_inode; |
1432 | if (src_inode->i_sb != file->f_path.dentry->d_inode->i_sb) { | 1431 | if (src_inode->i_sb != file->f_path.dentry->d_inode->i_sb) { |
1433 | printk(KERN_INFO "btrfs: Snapshot src from " | 1432 | printk(KERN_INFO "btrfs: Snapshot src from " |
1434 | "another FS\n"); | 1433 | "another FS\n"); |
1435 | ret = -EINVAL; | 1434 | ret = -EINVAL; |
1436 | fput(src_file); | 1435 | } else { |
1437 | goto out_drop_write; | 1436 | ret = btrfs_mksubvol(&file->f_path, name, namelen, |
1437 | BTRFS_I(src_inode)->root, | ||
1438 | transid, readonly, inherit); | ||
1438 | } | 1439 | } |
1439 | ret = btrfs_mksubvol(&file->f_path, name, namelen, | 1440 | fdput(src); |
1440 | BTRFS_I(src_inode)->root, | ||
1441 | transid, readonly, inherit); | ||
1442 | fput(src_file); | ||
1443 | } | 1441 | } |
1444 | out_drop_write: | 1442 | out_drop_write: |
1445 | mnt_drop_write_file(file); | 1443 | mnt_drop_write_file(file); |
@@ -2341,7 +2339,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
2341 | { | 2339 | { |
2342 | struct inode *inode = fdentry(file)->d_inode; | 2340 | struct inode *inode = fdentry(file)->d_inode; |
2343 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2341 | struct btrfs_root *root = BTRFS_I(inode)->root; |
2344 | struct file *src_file; | 2342 | struct fd src_file; |
2345 | struct inode *src; | 2343 | struct inode *src; |
2346 | struct btrfs_trans_handle *trans; | 2344 | struct btrfs_trans_handle *trans; |
2347 | struct btrfs_path *path; | 2345 | struct btrfs_path *path; |
@@ -2376,24 +2374,24 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
2376 | if (ret) | 2374 | if (ret) |
2377 | return ret; | 2375 | return ret; |
2378 | 2376 | ||
2379 | src_file = fget(srcfd); | 2377 | src_file = fdget(srcfd); |
2380 | if (!src_file) { | 2378 | if (!src_file.file) { |
2381 | ret = -EBADF; | 2379 | ret = -EBADF; |
2382 | goto out_drop_write; | 2380 | goto out_drop_write; |
2383 | } | 2381 | } |
2384 | 2382 | ||
2385 | ret = -EXDEV; | 2383 | ret = -EXDEV; |
2386 | if (src_file->f_path.mnt != file->f_path.mnt) | 2384 | if (src_file.file->f_path.mnt != file->f_path.mnt) |
2387 | goto out_fput; | 2385 | goto out_fput; |
2388 | 2386 | ||
2389 | src = src_file->f_dentry->d_inode; | 2387 | src = src_file.file->f_dentry->d_inode; |
2390 | 2388 | ||
2391 | ret = -EINVAL; | 2389 | ret = -EINVAL; |
2392 | if (src == inode) | 2390 | if (src == inode) |
2393 | goto out_fput; | 2391 | goto out_fput; |
2394 | 2392 | ||
2395 | /* the src must be open for reading */ | 2393 | /* the src must be open for reading */ |
2396 | if (!(src_file->f_mode & FMODE_READ)) | 2394 | if (!(src_file.file->f_mode & FMODE_READ)) |
2397 | goto out_fput; | 2395 | goto out_fput; |
2398 | 2396 | ||
2399 | /* don't make the dst file partly checksummed */ | 2397 | /* don't make the dst file partly checksummed */ |
@@ -2724,7 +2722,7 @@ out_unlock: | |||
2724 | vfree(buf); | 2722 | vfree(buf); |
2725 | btrfs_free_path(path); | 2723 | btrfs_free_path(path); |
2726 | out_fput: | 2724 | out_fput: |
2727 | fput(src_file); | 2725 | fdput(src_file); |
2728 | out_drop_write: | 2726 | out_drop_write: |
2729 | mnt_drop_write_file(file); | 2727 | mnt_drop_write_file(file); |
2730 | return ret; | 2728 | return ret; |
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 48a4882d8ad5..a955669519a2 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c | |||
@@ -68,7 +68,7 @@ struct reada_extent { | |||
68 | u32 blocksize; | 68 | u32 blocksize; |
69 | int err; | 69 | int err; |
70 | struct list_head extctl; | 70 | struct list_head extctl; |
71 | struct kref refcnt; | 71 | int refcnt; |
72 | spinlock_t lock; | 72 | spinlock_t lock; |
73 | struct reada_zone *zones[BTRFS_MAX_MIRRORS]; | 73 | struct reada_zone *zones[BTRFS_MAX_MIRRORS]; |
74 | int nzones; | 74 | int nzones; |
@@ -126,7 +126,7 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, | |||
126 | spin_lock(&fs_info->reada_lock); | 126 | spin_lock(&fs_info->reada_lock); |
127 | re = radix_tree_lookup(&fs_info->reada_tree, index); | 127 | re = radix_tree_lookup(&fs_info->reada_tree, index); |
128 | if (re) | 128 | if (re) |
129 | kref_get(&re->refcnt); | 129 | re->refcnt++; |
130 | spin_unlock(&fs_info->reada_lock); | 130 | spin_unlock(&fs_info->reada_lock); |
131 | 131 | ||
132 | if (!re) | 132 | if (!re) |
@@ -336,7 +336,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
336 | spin_lock(&fs_info->reada_lock); | 336 | spin_lock(&fs_info->reada_lock); |
337 | re = radix_tree_lookup(&fs_info->reada_tree, index); | 337 | re = radix_tree_lookup(&fs_info->reada_tree, index); |
338 | if (re) | 338 | if (re) |
339 | kref_get(&re->refcnt); | 339 | re->refcnt++; |
340 | spin_unlock(&fs_info->reada_lock); | 340 | spin_unlock(&fs_info->reada_lock); |
341 | 341 | ||
342 | if (re) | 342 | if (re) |
@@ -352,7 +352,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
352 | re->top = *top; | 352 | re->top = *top; |
353 | INIT_LIST_HEAD(&re->extctl); | 353 | INIT_LIST_HEAD(&re->extctl); |
354 | spin_lock_init(&re->lock); | 354 | spin_lock_init(&re->lock); |
355 | kref_init(&re->refcnt); | 355 | re->refcnt = 1; |
356 | 356 | ||
357 | /* | 357 | /* |
358 | * map block | 358 | * map block |
@@ -398,7 +398,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
398 | if (ret == -EEXIST) { | 398 | if (ret == -EEXIST) { |
399 | re_exist = radix_tree_lookup(&fs_info->reada_tree, index); | 399 | re_exist = radix_tree_lookup(&fs_info->reada_tree, index); |
400 | BUG_ON(!re_exist); | 400 | BUG_ON(!re_exist); |
401 | kref_get(&re_exist->refcnt); | 401 | re_exist->refcnt++; |
402 | spin_unlock(&fs_info->reada_lock); | 402 | spin_unlock(&fs_info->reada_lock); |
403 | goto error; | 403 | goto error; |
404 | } | 404 | } |
@@ -465,10 +465,6 @@ error: | |||
465 | return re_exist; | 465 | return re_exist; |
466 | } | 466 | } |
467 | 467 | ||
468 | static void reada_kref_dummy(struct kref *kr) | ||
469 | { | ||
470 | } | ||
471 | |||
472 | static void reada_extent_put(struct btrfs_fs_info *fs_info, | 468 | static void reada_extent_put(struct btrfs_fs_info *fs_info, |
473 | struct reada_extent *re) | 469 | struct reada_extent *re) |
474 | { | 470 | { |
@@ -476,7 +472,7 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info, | |||
476 | unsigned long index = re->logical >> PAGE_CACHE_SHIFT; | 472 | unsigned long index = re->logical >> PAGE_CACHE_SHIFT; |
477 | 473 | ||
478 | spin_lock(&fs_info->reada_lock); | 474 | spin_lock(&fs_info->reada_lock); |
479 | if (!kref_put(&re->refcnt, reada_kref_dummy)) { | 475 | if (--re->refcnt) { |
480 | spin_unlock(&fs_info->reada_lock); | 476 | spin_unlock(&fs_info->reada_lock); |
481 | return; | 477 | return; |
482 | } | 478 | } |
@@ -671,7 +667,7 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, | |||
671 | return 0; | 667 | return 0; |
672 | } | 668 | } |
673 | dev->reada_next = re->logical + re->blocksize; | 669 | dev->reada_next = re->logical + re->blocksize; |
674 | kref_get(&re->refcnt); | 670 | re->refcnt++; |
675 | 671 | ||
676 | spin_unlock(&fs_info->reada_lock); | 672 | spin_unlock(&fs_info->reada_lock); |
677 | 673 | ||
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 4b5762ef7c2b..ba95eea201bf 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -1104,7 +1104,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1104 | pr_err("fill_trace bad get_inode " | 1104 | pr_err("fill_trace bad get_inode " |
1105 | "%llx.%llx\n", vino.ino, vino.snap); | 1105 | "%llx.%llx\n", vino.ino, vino.snap); |
1106 | err = PTR_ERR(in); | 1106 | err = PTR_ERR(in); |
1107 | d_delete(dn); | 1107 | d_drop(dn); |
1108 | goto done; | 1108 | goto done; |
1109 | } | 1109 | } |
1110 | dn = splice_dentry(dn, in, &have_lease, true); | 1110 | dn = splice_dentry(dn, in, &have_lease, true); |
@@ -1277,7 +1277,7 @@ retry_lookup: | |||
1277 | in = ceph_get_inode(parent->d_sb, vino); | 1277 | in = ceph_get_inode(parent->d_sb, vino); |
1278 | if (IS_ERR(in)) { | 1278 | if (IS_ERR(in)) { |
1279 | dout("new_inode badness\n"); | 1279 | dout("new_inode badness\n"); |
1280 | d_delete(dn); | 1280 | d_drop(dn); |
1281 | dput(dn); | 1281 | dput(dn); |
1282 | err = PTR_ERR(in); | 1282 | err = PTR_ERR(in); |
1283 | goto out; | 1283 | goto out; |
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index b982239f38f9..3a42d9326378 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
@@ -603,6 +603,11 @@ bad_cap: | |||
603 | 603 | ||
604 | static void destroy_caches(void) | 604 | static void destroy_caches(void) |
605 | { | 605 | { |
606 | /* | ||
607 | * Make sure all delayed rcu free inodes are flushed before we | ||
608 | * destroy cache. | ||
609 | */ | ||
610 | rcu_barrier(); | ||
606 | kmem_cache_destroy(ceph_inode_cachep); | 611 | kmem_cache_destroy(ceph_inode_cachep); |
607 | kmem_cache_destroy(ceph_cap_cachep); | 612 | kmem_cache_destroy(ceph_cap_cachep); |
608 | kmem_cache_destroy(ceph_dentry_cachep); | 613 | kmem_cache_destroy(ceph_dentry_cachep); |
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index a41044a31083..e7931cc55d0c 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
@@ -968,6 +968,11 @@ cifs_init_inodecache(void) | |||
968 | static void | 968 | static void |
969 | cifs_destroy_inodecache(void) | 969 | cifs_destroy_inodecache(void) |
970 | { | 970 | { |
971 | /* | ||
972 | * Make sure all delayed rcu free inodes are flushed before we | ||
973 | * destroy cache. | ||
974 | */ | ||
975 | rcu_barrier(); | ||
971 | kmem_cache_destroy(cifs_inode_cachep); | 976 | kmem_cache_destroy(cifs_inode_cachep); |
972 | } | 977 | } |
973 | 978 | ||
diff --git a/fs/coda/inode.c b/fs/coda/inode.c index f1813120d753..be2aa4909487 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c | |||
@@ -85,6 +85,11 @@ int coda_init_inodecache(void) | |||
85 | 85 | ||
86 | void coda_destroy_inodecache(void) | 86 | void coda_destroy_inodecache(void) |
87 | { | 87 | { |
88 | /* | ||
89 | * Make sure all delayed rcu free inodes are flushed before we | ||
90 | * destroy cache. | ||
91 | */ | ||
92 | rcu_barrier(); | ||
88 | kmem_cache_destroy(coda_inode_cachep); | 93 | kmem_cache_destroy(coda_inode_cachep); |
89 | } | 94 | } |
90 | 95 | ||
@@ -107,43 +112,41 @@ static const struct super_operations coda_super_operations = | |||
107 | 112 | ||
108 | static int get_device_index(struct coda_mount_data *data) | 113 | static int get_device_index(struct coda_mount_data *data) |
109 | { | 114 | { |
110 | struct file *file; | 115 | struct fd f; |
111 | struct inode *inode; | 116 | struct inode *inode; |
112 | int idx; | 117 | int idx; |
113 | 118 | ||
114 | if(data == NULL) { | 119 | if (data == NULL) { |
115 | printk("coda_read_super: Bad mount data\n"); | 120 | printk("coda_read_super: Bad mount data\n"); |
116 | return -1; | 121 | return -1; |
117 | } | 122 | } |
118 | 123 | ||
119 | if(data->version != CODA_MOUNT_VERSION) { | 124 | if (data->version != CODA_MOUNT_VERSION) { |
120 | printk("coda_read_super: Bad mount version\n"); | 125 | printk("coda_read_super: Bad mount version\n"); |
121 | return -1; | 126 | return -1; |
122 | } | 127 | } |
123 | 128 | ||
124 | file = fget(data->fd); | 129 | f = fdget(data->fd); |
125 | inode = NULL; | 130 | if (!f.file) |
126 | if(file) | 131 | goto Ebadf; |
127 | inode = file->f_path.dentry->d_inode; | 132 | inode = f.file->f_path.dentry->d_inode; |
128 | 133 | if (!S_ISCHR(inode->i_mode) || imajor(inode) != CODA_PSDEV_MAJOR) { | |
129 | if(!inode || !S_ISCHR(inode->i_mode) || | 134 | fdput(f); |
130 | imajor(inode) != CODA_PSDEV_MAJOR) { | 135 | goto Ebadf; |
131 | if(file) | ||
132 | fput(file); | ||
133 | |||
134 | printk("coda_read_super: Bad file\n"); | ||
135 | return -1; | ||
136 | } | 136 | } |
137 | 137 | ||
138 | idx = iminor(inode); | 138 | idx = iminor(inode); |
139 | fput(file); | 139 | fdput(f); |
140 | 140 | ||
141 | if(idx < 0 || idx >= MAX_CODADEVS) { | 141 | if (idx < 0 || idx >= MAX_CODADEVS) { |
142 | printk("coda_read_super: Bad minor number\n"); | 142 | printk("coda_read_super: Bad minor number\n"); |
143 | return -1; | 143 | return -1; |
144 | } | 144 | } |
145 | 145 | ||
146 | return idx; | 146 | return idx; |
147 | Ebadf: | ||
148 | printk("coda_read_super: Bad file\n"); | ||
149 | return -1; | ||
147 | } | 150 | } |
148 | 151 | ||
149 | static int coda_fill_super(struct super_block *sb, void *data, int silent) | 152 | static int coda_fill_super(struct super_block *sb, void *data, int silent) |
diff --git a/fs/compat.c b/fs/compat.c index 1bdb350ea5d3..b7a24d0ca30d 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -870,22 +870,20 @@ asmlinkage long compat_sys_old_readdir(unsigned int fd, | |||
870 | struct compat_old_linux_dirent __user *dirent, unsigned int count) | 870 | struct compat_old_linux_dirent __user *dirent, unsigned int count) |
871 | { | 871 | { |
872 | int error; | 872 | int error; |
873 | struct file *file; | 873 | struct fd f = fdget(fd); |
874 | int fput_needed; | ||
875 | struct compat_readdir_callback buf; | 874 | struct compat_readdir_callback buf; |
876 | 875 | ||
877 | file = fget_light(fd, &fput_needed); | 876 | if (!f.file) |
878 | if (!file) | ||
879 | return -EBADF; | 877 | return -EBADF; |
880 | 878 | ||
881 | buf.result = 0; | 879 | buf.result = 0; |
882 | buf.dirent = dirent; | 880 | buf.dirent = dirent; |
883 | 881 | ||
884 | error = vfs_readdir(file, compat_fillonedir, &buf); | 882 | error = vfs_readdir(f.file, compat_fillonedir, &buf); |
885 | if (buf.result) | 883 | if (buf.result) |
886 | error = buf.result; | 884 | error = buf.result; |
887 | 885 | ||
888 | fput_light(file, fput_needed); | 886 | fdput(f); |
889 | return error; | 887 | return error; |
890 | } | 888 | } |
891 | 889 | ||
@@ -949,17 +947,16 @@ efault: | |||
949 | asmlinkage long compat_sys_getdents(unsigned int fd, | 947 | asmlinkage long compat_sys_getdents(unsigned int fd, |
950 | struct compat_linux_dirent __user *dirent, unsigned int count) | 948 | struct compat_linux_dirent __user *dirent, unsigned int count) |
951 | { | 949 | { |
952 | struct file * file; | 950 | struct fd f; |
953 | struct compat_linux_dirent __user * lastdirent; | 951 | struct compat_linux_dirent __user * lastdirent; |
954 | struct compat_getdents_callback buf; | 952 | struct compat_getdents_callback buf; |
955 | int fput_needed; | ||
956 | int error; | 953 | int error; |
957 | 954 | ||
958 | if (!access_ok(VERIFY_WRITE, dirent, count)) | 955 | if (!access_ok(VERIFY_WRITE, dirent, count)) |
959 | return -EFAULT; | 956 | return -EFAULT; |
960 | 957 | ||
961 | file = fget_light(fd, &fput_needed); | 958 | f = fdget(fd); |
962 | if (!file) | 959 | if (!f.file) |
963 | return -EBADF; | 960 | return -EBADF; |
964 | 961 | ||
965 | buf.current_dir = dirent; | 962 | buf.current_dir = dirent; |
@@ -967,17 +964,17 @@ asmlinkage long compat_sys_getdents(unsigned int fd, | |||
967 | buf.count = count; | 964 | buf.count = count; |
968 | buf.error = 0; | 965 | buf.error = 0; |
969 | 966 | ||
970 | error = vfs_readdir(file, compat_filldir, &buf); | 967 | error = vfs_readdir(f.file, compat_filldir, &buf); |
971 | if (error >= 0) | 968 | if (error >= 0) |
972 | error = buf.error; | 969 | error = buf.error; |
973 | lastdirent = buf.previous; | 970 | lastdirent = buf.previous; |
974 | if (lastdirent) { | 971 | if (lastdirent) { |
975 | if (put_user(file->f_pos, &lastdirent->d_off)) | 972 | if (put_user(f.file->f_pos, &lastdirent->d_off)) |
976 | error = -EFAULT; | 973 | error = -EFAULT; |
977 | else | 974 | else |
978 | error = count - buf.count; | 975 | error = count - buf.count; |
979 | } | 976 | } |
980 | fput_light(file, fput_needed); | 977 | fdput(f); |
981 | return error; | 978 | return error; |
982 | } | 979 | } |
983 | 980 | ||
@@ -1035,17 +1032,16 @@ efault: | |||
1035 | asmlinkage long compat_sys_getdents64(unsigned int fd, | 1032 | asmlinkage long compat_sys_getdents64(unsigned int fd, |
1036 | struct linux_dirent64 __user * dirent, unsigned int count) | 1033 | struct linux_dirent64 __user * dirent, unsigned int count) |
1037 | { | 1034 | { |
1038 | struct file * file; | 1035 | struct fd f; |
1039 | struct linux_dirent64 __user * lastdirent; | 1036 | struct linux_dirent64 __user * lastdirent; |
1040 | struct compat_getdents_callback64 buf; | 1037 | struct compat_getdents_callback64 buf; |
1041 | int fput_needed; | ||
1042 | int error; | 1038 | int error; |
1043 | 1039 | ||
1044 | if (!access_ok(VERIFY_WRITE, dirent, count)) | 1040 | if (!access_ok(VERIFY_WRITE, dirent, count)) |
1045 | return -EFAULT; | 1041 | return -EFAULT; |
1046 | 1042 | ||
1047 | file = fget_light(fd, &fput_needed); | 1043 | f = fdget(fd); |
1048 | if (!file) | 1044 | if (!f.file) |
1049 | return -EBADF; | 1045 | return -EBADF; |
1050 | 1046 | ||
1051 | buf.current_dir = dirent; | 1047 | buf.current_dir = dirent; |
@@ -1053,18 +1049,18 @@ asmlinkage long compat_sys_getdents64(unsigned int fd, | |||
1053 | buf.count = count; | 1049 | buf.count = count; |
1054 | buf.error = 0; | 1050 | buf.error = 0; |
1055 | 1051 | ||
1056 | error = vfs_readdir(file, compat_filldir64, &buf); | 1052 | error = vfs_readdir(f.file, compat_filldir64, &buf); |
1057 | if (error >= 0) | 1053 | if (error >= 0) |
1058 | error = buf.error; | 1054 | error = buf.error; |
1059 | lastdirent = buf.previous; | 1055 | lastdirent = buf.previous; |
1060 | if (lastdirent) { | 1056 | if (lastdirent) { |
1061 | typeof(lastdirent->d_off) d_off = file->f_pos; | 1057 | typeof(lastdirent->d_off) d_off = f.file->f_pos; |
1062 | if (__put_user_unaligned(d_off, &lastdirent->d_off)) | 1058 | if (__put_user_unaligned(d_off, &lastdirent->d_off)) |
1063 | error = -EFAULT; | 1059 | error = -EFAULT; |
1064 | else | 1060 | else |
1065 | error = count - buf.count; | 1061 | error = count - buf.count; |
1066 | } | 1062 | } |
1067 | fput_light(file, fput_needed); | 1063 | fdput(f); |
1068 | return error; | 1064 | return error; |
1069 | } | 1065 | } |
1070 | #endif /* ! __ARCH_OMIT_COMPAT_SYS_GETDENTS64 */ | 1066 | #endif /* ! __ARCH_OMIT_COMPAT_SYS_GETDENTS64 */ |
@@ -1152,18 +1148,16 @@ asmlinkage ssize_t | |||
1152 | compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec, | 1148 | compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec, |
1153 | unsigned long vlen) | 1149 | unsigned long vlen) |
1154 | { | 1150 | { |
1155 | struct file *file; | 1151 | struct fd f = fdget(fd); |
1156 | int fput_needed; | ||
1157 | ssize_t ret; | 1152 | ssize_t ret; |
1158 | loff_t pos; | 1153 | loff_t pos; |
1159 | 1154 | ||
1160 | file = fget_light(fd, &fput_needed); | 1155 | if (!f.file) |
1161 | if (!file) | ||
1162 | return -EBADF; | 1156 | return -EBADF; |
1163 | pos = file->f_pos; | 1157 | pos = f.file->f_pos; |
1164 | ret = compat_readv(file, vec, vlen, &pos); | 1158 | ret = compat_readv(f.file, vec, vlen, &pos); |
1165 | file->f_pos = pos; | 1159 | f.file->f_pos = pos; |
1166 | fput_light(file, fput_needed); | 1160 | fdput(f); |
1167 | return ret; | 1161 | return ret; |
1168 | } | 1162 | } |
1169 | 1163 | ||
@@ -1171,19 +1165,18 @@ asmlinkage ssize_t | |||
1171 | compat_sys_preadv64(unsigned long fd, const struct compat_iovec __user *vec, | 1165 | compat_sys_preadv64(unsigned long fd, const struct compat_iovec __user *vec, |
1172 | unsigned long vlen, loff_t pos) | 1166 | unsigned long vlen, loff_t pos) |
1173 | { | 1167 | { |
1174 | struct file *file; | 1168 | struct fd f; |
1175 | int fput_needed; | ||
1176 | ssize_t ret; | 1169 | ssize_t ret; |
1177 | 1170 | ||
1178 | if (pos < 0) | 1171 | if (pos < 0) |
1179 | return -EINVAL; | 1172 | return -EINVAL; |
1180 | file = fget_light(fd, &fput_needed); | 1173 | f = fdget(fd); |
1181 | if (!file) | 1174 | if (!f.file) |
1182 | return -EBADF; | 1175 | return -EBADF; |
1183 | ret = -ESPIPE; | 1176 | ret = -ESPIPE; |
1184 | if (file->f_mode & FMODE_PREAD) | 1177 | if (f.file->f_mode & FMODE_PREAD) |
1185 | ret = compat_readv(file, vec, vlen, &pos); | 1178 | ret = compat_readv(f.file, vec, vlen, &pos); |
1186 | fput_light(file, fput_needed); | 1179 | fdput(f); |
1187 | return ret; | 1180 | return ret; |
1188 | } | 1181 | } |
1189 | 1182 | ||
@@ -1221,18 +1214,16 @@ asmlinkage ssize_t | |||
1221 | compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec, | 1214 | compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec, |
1222 | unsigned long vlen) | 1215 | unsigned long vlen) |
1223 | { | 1216 | { |
1224 | struct file *file; | 1217 | struct fd f = fdget(fd); |
1225 | int fput_needed; | ||
1226 | ssize_t ret; | 1218 | ssize_t ret; |
1227 | loff_t pos; | 1219 | loff_t pos; |
1228 | 1220 | ||
1229 | file = fget_light(fd, &fput_needed); | 1221 | if (!f.file) |
1230 | if (!file) | ||
1231 | return -EBADF; | 1222 | return -EBADF; |
1232 | pos = file->f_pos; | 1223 | pos = f.file->f_pos; |
1233 | ret = compat_writev(file, vec, vlen, &pos); | 1224 | ret = compat_writev(f.file, vec, vlen, &pos); |
1234 | file->f_pos = pos; | 1225 | f.file->f_pos = pos; |
1235 | fput_light(file, fput_needed); | 1226 | fdput(f); |
1236 | return ret; | 1227 | return ret; |
1237 | } | 1228 | } |
1238 | 1229 | ||
@@ -1240,19 +1231,18 @@ asmlinkage ssize_t | |||
1240 | compat_sys_pwritev64(unsigned long fd, const struct compat_iovec __user *vec, | 1231 | compat_sys_pwritev64(unsigned long fd, const struct compat_iovec __user *vec, |
1241 | unsigned long vlen, loff_t pos) | 1232 | unsigned long vlen, loff_t pos) |
1242 | { | 1233 | { |
1243 | struct file *file; | 1234 | struct fd f; |
1244 | int fput_needed; | ||
1245 | ssize_t ret; | 1235 | ssize_t ret; |
1246 | 1236 | ||
1247 | if (pos < 0) | 1237 | if (pos < 0) |
1248 | return -EINVAL; | 1238 | return -EINVAL; |
1249 | file = fget_light(fd, &fput_needed); | 1239 | f = fdget(fd); |
1250 | if (!file) | 1240 | if (!f.file) |
1251 | return -EBADF; | 1241 | return -EBADF; |
1252 | ret = -ESPIPE; | 1242 | ret = -ESPIPE; |
1253 | if (file->f_mode & FMODE_PWRITE) | 1243 | if (f.file->f_mode & FMODE_PWRITE) |
1254 | ret = compat_writev(file, vec, vlen, &pos); | 1244 | ret = compat_writev(f.file, vec, vlen, &pos); |
1255 | fput_light(file, fput_needed); | 1245 | fdput(f); |
1256 | return ret; | 1246 | return ret; |
1257 | } | 1247 | } |
1258 | 1248 | ||
@@ -1802,3 +1792,25 @@ compat_sys_open_by_handle_at(int mountdirfd, | |||
1802 | return do_handle_open(mountdirfd, handle, flags); | 1792 | return do_handle_open(mountdirfd, handle, flags); |
1803 | } | 1793 | } |
1804 | #endif | 1794 | #endif |
1795 | |||
1796 | #ifdef __ARCH_WANT_COMPAT_SYS_SENDFILE | ||
1797 | asmlinkage long compat_sys_sendfile(int out_fd, int in_fd, | ||
1798 | compat_off_t __user *offset, compat_size_t count) | ||
1799 | { | ||
1800 | loff_t pos; | ||
1801 | off_t off; | ||
1802 | ssize_t ret; | ||
1803 | |||
1804 | if (offset) { | ||
1805 | if (unlikely(get_user(off, offset))) | ||
1806 | return -EFAULT; | ||
1807 | pos = off; | ||
1808 | ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); | ||
1809 | if (unlikely(put_user(pos, offset))) | ||
1810 | return -EFAULT; | ||
1811 | return ret; | ||
1812 | } | ||
1813 | |||
1814 | return do_sendfile(out_fd, in_fd, NULL, count, 0); | ||
1815 | } | ||
1816 | #endif /* __ARCH_WANT_COMPAT_SYS_SENDFILE */ | ||
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 9c03a3ae898f..f5054025f9da 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c | |||
@@ -1539,16 +1539,13 @@ static int compat_ioctl_check_table(unsigned int xcmd) | |||
1539 | asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, | 1539 | asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, |
1540 | unsigned long arg) | 1540 | unsigned long arg) |
1541 | { | 1541 | { |
1542 | struct file *filp; | 1542 | struct fd f = fdget(fd); |
1543 | int error = -EBADF; | 1543 | int error = -EBADF; |
1544 | int fput_needed; | 1544 | if (!f.file) |
1545 | |||
1546 | filp = fget_light(fd, &fput_needed); | ||
1547 | if (!filp) | ||
1548 | goto out; | 1545 | goto out; |
1549 | 1546 | ||
1550 | /* RED-PEN how should LSM module know it's handling 32bit? */ | 1547 | /* RED-PEN how should LSM module know it's handling 32bit? */ |
1551 | error = security_file_ioctl(filp, cmd, arg); | 1548 | error = security_file_ioctl(f.file, cmd, arg); |
1552 | if (error) | 1549 | if (error) |
1553 | goto out_fput; | 1550 | goto out_fput; |
1554 | 1551 | ||
@@ -1568,30 +1565,30 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, | |||
1568 | #if defined(CONFIG_IA64) || defined(CONFIG_X86_64) | 1565 | #if defined(CONFIG_IA64) || defined(CONFIG_X86_64) |
1569 | case FS_IOC_RESVSP_32: | 1566 | case FS_IOC_RESVSP_32: |
1570 | case FS_IOC_RESVSP64_32: | 1567 | case FS_IOC_RESVSP64_32: |
1571 | error = compat_ioctl_preallocate(filp, compat_ptr(arg)); | 1568 | error = compat_ioctl_preallocate(f.file, compat_ptr(arg)); |
1572 | goto out_fput; | 1569 | goto out_fput; |
1573 | #else | 1570 | #else |
1574 | case FS_IOC_RESVSP: | 1571 | case FS_IOC_RESVSP: |
1575 | case FS_IOC_RESVSP64: | 1572 | case FS_IOC_RESVSP64: |
1576 | error = ioctl_preallocate(filp, compat_ptr(arg)); | 1573 | error = ioctl_preallocate(f.file, compat_ptr(arg)); |
1577 | goto out_fput; | 1574 | goto out_fput; |
1578 | #endif | 1575 | #endif |
1579 | 1576 | ||
1580 | case FIBMAP: | 1577 | case FIBMAP: |
1581 | case FIGETBSZ: | 1578 | case FIGETBSZ: |
1582 | case FIONREAD: | 1579 | case FIONREAD: |
1583 | if (S_ISREG(filp->f_path.dentry->d_inode->i_mode)) | 1580 | if (S_ISREG(f.file->f_path.dentry->d_inode->i_mode)) |
1584 | break; | 1581 | break; |
1585 | /*FALL THROUGH*/ | 1582 | /*FALL THROUGH*/ |
1586 | 1583 | ||
1587 | default: | 1584 | default: |
1588 | if (filp->f_op && filp->f_op->compat_ioctl) { | 1585 | if (f.file->f_op && f.file->f_op->compat_ioctl) { |
1589 | error = filp->f_op->compat_ioctl(filp, cmd, arg); | 1586 | error = f.file->f_op->compat_ioctl(f.file, cmd, arg); |
1590 | if (error != -ENOIOCTLCMD) | 1587 | if (error != -ENOIOCTLCMD) |
1591 | goto out_fput; | 1588 | goto out_fput; |
1592 | } | 1589 | } |
1593 | 1590 | ||
1594 | if (!filp->f_op || !filp->f_op->unlocked_ioctl) | 1591 | if (!f.file->f_op || !f.file->f_op->unlocked_ioctl) |
1595 | goto do_ioctl; | 1592 | goto do_ioctl; |
1596 | break; | 1593 | break; |
1597 | } | 1594 | } |
@@ -1599,7 +1596,7 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, | |||
1599 | if (compat_ioctl_check_table(XFORM(cmd))) | 1596 | if (compat_ioctl_check_table(XFORM(cmd))) |
1600 | goto found_handler; | 1597 | goto found_handler; |
1601 | 1598 | ||
1602 | error = do_ioctl_trans(fd, cmd, arg, filp); | 1599 | error = do_ioctl_trans(fd, cmd, arg, f.file); |
1603 | if (error == -ENOIOCTLCMD) | 1600 | if (error == -ENOIOCTLCMD) |
1604 | error = -ENOTTY; | 1601 | error = -ENOTTY; |
1605 | 1602 | ||
@@ -1608,9 +1605,9 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, | |||
1608 | found_handler: | 1605 | found_handler: |
1609 | arg = (unsigned long)compat_ptr(arg); | 1606 | arg = (unsigned long)compat_ptr(arg); |
1610 | do_ioctl: | 1607 | do_ioctl: |
1611 | error = do_vfs_ioctl(filp, fd, cmd, arg); | 1608 | error = do_vfs_ioctl(f.file, fd, cmd, arg); |
1612 | out_fput: | 1609 | out_fput: |
1613 | fput_light(filp, fput_needed); | 1610 | fdput(f); |
1614 | out: | 1611 | out: |
1615 | return error; | 1612 | return error; |
1616 | } | 1613 | } |
diff --git a/fs/coredump.c b/fs/coredump.c new file mode 100644 index 000000000000..f045bbad6822 --- /dev/null +++ b/fs/coredump.c | |||
@@ -0,0 +1,686 @@ | |||
1 | #include <linux/slab.h> | ||
2 | #include <linux/file.h> | ||
3 | #include <linux/fdtable.h> | ||
4 | #include <linux/mm.h> | ||
5 | #include <linux/stat.h> | ||
6 | #include <linux/fcntl.h> | ||
7 | #include <linux/swap.h> | ||
8 | #include <linux/string.h> | ||
9 | #include <linux/init.h> | ||
10 | #include <linux/pagemap.h> | ||
11 | #include <linux/perf_event.h> | ||
12 | #include <linux/highmem.h> | ||
13 | #include <linux/spinlock.h> | ||
14 | #include <linux/key.h> | ||
15 | #include <linux/personality.h> | ||
16 | #include <linux/binfmts.h> | ||
17 | #include <linux/utsname.h> | ||
18 | #include <linux/pid_namespace.h> | ||
19 | #include <linux/module.h> | ||
20 | #include <linux/namei.h> | ||
21 | #include <linux/mount.h> | ||
22 | #include <linux/security.h> | ||
23 | #include <linux/syscalls.h> | ||
24 | #include <linux/tsacct_kern.h> | ||
25 | #include <linux/cn_proc.h> | ||
26 | #include <linux/audit.h> | ||
27 | #include <linux/tracehook.h> | ||
28 | #include <linux/kmod.h> | ||
29 | #include <linux/fsnotify.h> | ||
30 | #include <linux/fs_struct.h> | ||
31 | #include <linux/pipe_fs_i.h> | ||
32 | #include <linux/oom.h> | ||
33 | #include <linux/compat.h> | ||
34 | |||
35 | #include <asm/uaccess.h> | ||
36 | #include <asm/mmu_context.h> | ||
37 | #include <asm/tlb.h> | ||
38 | #include <asm/exec.h> | ||
39 | |||
40 | #include <trace/events/task.h> | ||
41 | #include "internal.h" | ||
42 | |||
43 | #include <trace/events/sched.h> | ||
44 | |||
45 | int core_uses_pid; | ||
46 | char core_pattern[CORENAME_MAX_SIZE] = "core"; | ||
47 | unsigned int core_pipe_limit; | ||
48 | |||
49 | struct core_name { | ||
50 | char *corename; | ||
51 | int used, size; | ||
52 | }; | ||
53 | static atomic_t call_count = ATOMIC_INIT(1); | ||
54 | |||
55 | /* The maximal length of core_pattern is also specified in sysctl.c */ | ||
56 | |||
57 | static int expand_corename(struct core_name *cn) | ||
58 | { | ||
59 | char *old_corename = cn->corename; | ||
60 | |||
61 | cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count); | ||
62 | cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL); | ||
63 | |||
64 | if (!cn->corename) { | ||
65 | kfree(old_corename); | ||
66 | return -ENOMEM; | ||
67 | } | ||
68 | |||
69 | return 0; | ||
70 | } | ||
71 | |||
72 | static int cn_printf(struct core_name *cn, const char *fmt, ...) | ||
73 | { | ||
74 | char *cur; | ||
75 | int need; | ||
76 | int ret; | ||
77 | va_list arg; | ||
78 | |||
79 | va_start(arg, fmt); | ||
80 | need = vsnprintf(NULL, 0, fmt, arg); | ||
81 | va_end(arg); | ||
82 | |||
83 | if (likely(need < cn->size - cn->used - 1)) | ||
84 | goto out_printf; | ||
85 | |||
86 | ret = expand_corename(cn); | ||
87 | if (ret) | ||
88 | goto expand_fail; | ||
89 | |||
90 | out_printf: | ||
91 | cur = cn->corename + cn->used; | ||
92 | va_start(arg, fmt); | ||
93 | vsnprintf(cur, need + 1, fmt, arg); | ||
94 | va_end(arg); | ||
95 | cn->used += need; | ||
96 | return 0; | ||
97 | |||
98 | expand_fail: | ||
99 | return ret; | ||
100 | } | ||
101 | |||
102 | static void cn_escape(char *str) | ||
103 | { | ||
104 | for (; *str; str++) | ||
105 | if (*str == '/') | ||
106 | *str = '!'; | ||
107 | } | ||
108 | |||
109 | static int cn_print_exe_file(struct core_name *cn) | ||
110 | { | ||
111 | struct file *exe_file; | ||
112 | char *pathbuf, *path; | ||
113 | int ret; | ||
114 | |||
115 | exe_file = get_mm_exe_file(current->mm); | ||
116 | if (!exe_file) { | ||
117 | char *commstart = cn->corename + cn->used; | ||
118 | ret = cn_printf(cn, "%s (path unknown)", current->comm); | ||
119 | cn_escape(commstart); | ||
120 | return ret; | ||
121 | } | ||
122 | |||
123 | pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); | ||
124 | if (!pathbuf) { | ||
125 | ret = -ENOMEM; | ||
126 | goto put_exe_file; | ||
127 | } | ||
128 | |||
129 | path = d_path(&exe_file->f_path, pathbuf, PATH_MAX); | ||
130 | if (IS_ERR(path)) { | ||
131 | ret = PTR_ERR(path); | ||
132 | goto free_buf; | ||
133 | } | ||
134 | |||
135 | cn_escape(path); | ||
136 | |||
137 | ret = cn_printf(cn, "%s", path); | ||
138 | |||
139 | free_buf: | ||
140 | kfree(pathbuf); | ||
141 | put_exe_file: | ||
142 | fput(exe_file); | ||
143 | return ret; | ||
144 | } | ||
145 | |||
146 | /* format_corename will inspect the pattern parameter, and output a | ||
147 | * name into corename, which must have space for at least | ||
148 | * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. | ||
149 | */ | ||
150 | static int format_corename(struct core_name *cn, long signr) | ||
151 | { | ||
152 | const struct cred *cred = current_cred(); | ||
153 | const char *pat_ptr = core_pattern; | ||
154 | int ispipe = (*pat_ptr == '|'); | ||
155 | int pid_in_pattern = 0; | ||
156 | int err = 0; | ||
157 | |||
158 | cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count); | ||
159 | cn->corename = kmalloc(cn->size, GFP_KERNEL); | ||
160 | cn->used = 0; | ||
161 | |||
162 | if (!cn->corename) | ||
163 | return -ENOMEM; | ||
164 | |||
165 | /* Repeat as long as we have more pattern to process and more output | ||
166 | space */ | ||
167 | while (*pat_ptr) { | ||
168 | if (*pat_ptr != '%') { | ||
169 | if (*pat_ptr == 0) | ||
170 | goto out; | ||
171 | err = cn_printf(cn, "%c", *pat_ptr++); | ||
172 | } else { | ||
173 | switch (*++pat_ptr) { | ||
174 | /* single % at the end, drop that */ | ||
175 | case 0: | ||
176 | goto out; | ||
177 | /* Double percent, output one percent */ | ||
178 | case '%': | ||
179 | err = cn_printf(cn, "%c", '%'); | ||
180 | break; | ||
181 | /* pid */ | ||
182 | case 'p': | ||
183 | pid_in_pattern = 1; | ||
184 | err = cn_printf(cn, "%d", | ||
185 | task_tgid_vnr(current)); | ||
186 | break; | ||
187 | /* uid */ | ||
188 | case 'u': | ||
189 | err = cn_printf(cn, "%d", cred->uid); | ||
190 | break; | ||
191 | /* gid */ | ||
192 | case 'g': | ||
193 | err = cn_printf(cn, "%d", cred->gid); | ||
194 | break; | ||
195 | /* signal that caused the coredump */ | ||
196 | case 's': | ||
197 | err = cn_printf(cn, "%ld", signr); | ||
198 | break; | ||
199 | /* UNIX time of coredump */ | ||
200 | case 't': { | ||
201 | struct timeval tv; | ||
202 | do_gettimeofday(&tv); | ||
203 | err = cn_printf(cn, "%lu", tv.tv_sec); | ||
204 | break; | ||
205 | } | ||
206 | /* hostname */ | ||
207 | case 'h': { | ||
208 | char *namestart = cn->corename + cn->used; | ||
209 | down_read(&uts_sem); | ||
210 | err = cn_printf(cn, "%s", | ||
211 | utsname()->nodename); | ||
212 | up_read(&uts_sem); | ||
213 | cn_escape(namestart); | ||
214 | break; | ||
215 | } | ||
216 | /* executable */ | ||
217 | case 'e': { | ||
218 | char *commstart = cn->corename + cn->used; | ||
219 | err = cn_printf(cn, "%s", current->comm); | ||
220 | cn_escape(commstart); | ||
221 | break; | ||
222 | } | ||
223 | case 'E': | ||
224 | err = cn_print_exe_file(cn); | ||
225 | break; | ||
226 | /* core limit size */ | ||
227 | case 'c': | ||
228 | err = cn_printf(cn, "%lu", | ||
229 | rlimit(RLIMIT_CORE)); | ||
230 | break; | ||
231 | default: | ||
232 | break; | ||
233 | } | ||
234 | ++pat_ptr; | ||
235 | } | ||
236 | |||
237 | if (err) | ||
238 | return err; | ||
239 | } | ||
240 | |||
241 | /* Backward compatibility with core_uses_pid: | ||
242 | * | ||
243 | * If core_pattern does not include a %p (as is the default) | ||
244 | * and core_uses_pid is set, then .%pid will be appended to | ||
245 | * the filename. Do not do this for piped commands. */ | ||
246 | if (!ispipe && !pid_in_pattern && core_uses_pid) { | ||
247 | err = cn_printf(cn, ".%d", task_tgid_vnr(current)); | ||
248 | if (err) | ||
249 | return err; | ||
250 | } | ||
251 | out: | ||
252 | return ispipe; | ||
253 | } | ||
254 | |||
255 | static int zap_process(struct task_struct *start, int exit_code) | ||
256 | { | ||
257 | struct task_struct *t; | ||
258 | int nr = 0; | ||
259 | |||
260 | start->signal->flags = SIGNAL_GROUP_EXIT; | ||
261 | start->signal->group_exit_code = exit_code; | ||
262 | start->signal->group_stop_count = 0; | ||
263 | |||
264 | t = start; | ||
265 | do { | ||
266 | task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); | ||
267 | if (t != current && t->mm) { | ||
268 | sigaddset(&t->pending.signal, SIGKILL); | ||
269 | signal_wake_up(t, 1); | ||
270 | nr++; | ||
271 | } | ||
272 | } while_each_thread(start, t); | ||
273 | |||
274 | return nr; | ||
275 | } | ||
276 | |||
277 | static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, | ||
278 | struct core_state *core_state, int exit_code) | ||
279 | { | ||
280 | struct task_struct *g, *p; | ||
281 | unsigned long flags; | ||
282 | int nr = -EAGAIN; | ||
283 | |||
284 | spin_lock_irq(&tsk->sighand->siglock); | ||
285 | if (!signal_group_exit(tsk->signal)) { | ||
286 | mm->core_state = core_state; | ||
287 | nr = zap_process(tsk, exit_code); | ||
288 | } | ||
289 | spin_unlock_irq(&tsk->sighand->siglock); | ||
290 | if (unlikely(nr < 0)) | ||
291 | return nr; | ||
292 | |||
293 | if (atomic_read(&mm->mm_users) == nr + 1) | ||
294 | goto done; | ||
295 | /* | ||
296 | * We should find and kill all tasks which use this mm, and we should | ||
297 | * count them correctly into ->nr_threads. We don't take tasklist | ||
298 | * lock, but this is safe wrt: | ||
299 | * | ||
300 | * fork: | ||
301 | * None of sub-threads can fork after zap_process(leader). All | ||
302 | * processes which were created before this point should be | ||
303 | * visible to zap_threads() because copy_process() adds the new | ||
304 | * process to the tail of init_task.tasks list, and lock/unlock | ||
305 | * of ->siglock provides a memory barrier. | ||
306 | * | ||
307 | * do_exit: | ||
308 | * The caller holds mm->mmap_sem. This means that the task which | ||
309 | * uses this mm can't pass exit_mm(), so it can't exit or clear | ||
310 | * its ->mm. | ||
311 | * | ||
312 | * de_thread: | ||
313 | * It does list_replace_rcu(&leader->tasks, ¤t->tasks), | ||
314 | * we must see either old or new leader, this does not matter. | ||
315 | * However, it can change p->sighand, so lock_task_sighand(p) | ||
316 | * must be used. Since p->mm != NULL and we hold ->mmap_sem | ||
317 | * it can't fail. | ||
318 | * | ||
319 | * Note also that "g" can be the old leader with ->mm == NULL | ||
320 | * and already unhashed and thus removed from ->thread_group. | ||
321 | * This is OK, __unhash_process()->list_del_rcu() does not | ||
322 | * clear the ->next pointer, we will find the new leader via | ||
323 | * next_thread(). | ||
324 | */ | ||
325 | rcu_read_lock(); | ||
326 | for_each_process(g) { | ||
327 | if (g == tsk->group_leader) | ||
328 | continue; | ||
329 | if (g->flags & PF_KTHREAD) | ||
330 | continue; | ||
331 | p = g; | ||
332 | do { | ||
333 | if (p->mm) { | ||
334 | if (unlikely(p->mm == mm)) { | ||
335 | lock_task_sighand(p, &flags); | ||
336 | nr += zap_process(p, exit_code); | ||
337 | unlock_task_sighand(p, &flags); | ||
338 | } | ||
339 | break; | ||
340 | } | ||
341 | } while_each_thread(g, p); | ||
342 | } | ||
343 | rcu_read_unlock(); | ||
344 | done: | ||
345 | atomic_set(&core_state->nr_threads, nr); | ||
346 | return nr; | ||
347 | } | ||
348 | |||
349 | static int coredump_wait(int exit_code, struct core_state *core_state) | ||
350 | { | ||
351 | struct task_struct *tsk = current; | ||
352 | struct mm_struct *mm = tsk->mm; | ||
353 | int core_waiters = -EBUSY; | ||
354 | |||
355 | init_completion(&core_state->startup); | ||
356 | core_state->dumper.task = tsk; | ||
357 | core_state->dumper.next = NULL; | ||
358 | |||
359 | down_write(&mm->mmap_sem); | ||
360 | if (!mm->core_state) | ||
361 | core_waiters = zap_threads(tsk, mm, core_state, exit_code); | ||
362 | up_write(&mm->mmap_sem); | ||
363 | |||
364 | if (core_waiters > 0) { | ||
365 | struct core_thread *ptr; | ||
366 | |||
367 | wait_for_completion(&core_state->startup); | ||
368 | /* | ||
369 | * Wait for all the threads to become inactive, so that | ||
370 | * all the thread context (extended register state, like | ||
371 | * fpu etc) gets copied to the memory. | ||
372 | */ | ||
373 | ptr = core_state->dumper.next; | ||
374 | while (ptr != NULL) { | ||
375 | wait_task_inactive(ptr->task, 0); | ||
376 | ptr = ptr->next; | ||
377 | } | ||
378 | } | ||
379 | |||
380 | return core_waiters; | ||
381 | } | ||
382 | |||
383 | static void coredump_finish(struct mm_struct *mm) | ||
384 | { | ||
385 | struct core_thread *curr, *next; | ||
386 | struct task_struct *task; | ||
387 | |||
388 | next = mm->core_state->dumper.next; | ||
389 | while ((curr = next) != NULL) { | ||
390 | next = curr->next; | ||
391 | task = curr->task; | ||
392 | /* | ||
393 | * see exit_mm(), curr->task must not see | ||
394 | * ->task == NULL before we read ->next. | ||
395 | */ | ||
396 | smp_mb(); | ||
397 | curr->task = NULL; | ||
398 | wake_up_process(task); | ||
399 | } | ||
400 | |||
401 | mm->core_state = NULL; | ||
402 | } | ||
403 | |||
404 | static void wait_for_dump_helpers(struct file *file) | ||
405 | { | ||
406 | struct pipe_inode_info *pipe; | ||
407 | |||
408 | pipe = file->f_path.dentry->d_inode->i_pipe; | ||
409 | |||
410 | pipe_lock(pipe); | ||
411 | pipe->readers++; | ||
412 | pipe->writers--; | ||
413 | |||
414 | while ((pipe->readers > 1) && (!signal_pending(current))) { | ||
415 | wake_up_interruptible_sync(&pipe->wait); | ||
416 | kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); | ||
417 | pipe_wait(pipe); | ||
418 | } | ||
419 | |||
420 | pipe->readers--; | ||
421 | pipe->writers++; | ||
422 | pipe_unlock(pipe); | ||
423 | |||
424 | } | ||
425 | |||
426 | /* | ||
427 | * umh_pipe_setup | ||
428 | * helper function to customize the process used | ||
429 | * to collect the core in userspace. Specifically | ||
430 | * it sets up a pipe and installs it as fd 0 (stdin) | ||
431 | * for the process. Returns 0 on success, or | ||
432 | * PTR_ERR on failure. | ||
433 | * Note that it also sets the core limit to 1. This | ||
434 | * is a special value that we use to trap recursive | ||
435 | * core dumps | ||
436 | */ | ||
437 | static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) | ||
438 | { | ||
439 | struct file *files[2]; | ||
440 | struct coredump_params *cp = (struct coredump_params *)info->data; | ||
441 | int err = create_pipe_files(files, 0); | ||
442 | if (err) | ||
443 | return err; | ||
444 | |||
445 | cp->file = files[1]; | ||
446 | |||
447 | replace_fd(0, files[0], 0); | ||
448 | /* and disallow core files too */ | ||
449 | current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1}; | ||
450 | |||
451 | return 0; | ||
452 | } | ||
453 | |||
454 | void do_coredump(long signr, int exit_code, struct pt_regs *regs) | ||
455 | { | ||
456 | struct core_state core_state; | ||
457 | struct core_name cn; | ||
458 | struct mm_struct *mm = current->mm; | ||
459 | struct linux_binfmt * binfmt; | ||
460 | const struct cred *old_cred; | ||
461 | struct cred *cred; | ||
462 | int retval = 0; | ||
463 | int flag = 0; | ||
464 | int ispipe; | ||
465 | struct files_struct *displaced; | ||
466 | bool need_nonrelative = false; | ||
467 | static atomic_t core_dump_count = ATOMIC_INIT(0); | ||
468 | struct coredump_params cprm = { | ||
469 | .signr = signr, | ||
470 | .regs = regs, | ||
471 | .limit = rlimit(RLIMIT_CORE), | ||
472 | /* | ||
473 | * We must use the same mm->flags while dumping core to avoid | ||
474 | * inconsistency of bit flags, since this flag is not protected | ||
475 | * by any locks. | ||
476 | */ | ||
477 | .mm_flags = mm->flags, | ||
478 | }; | ||
479 | |||
480 | audit_core_dumps(signr); | ||
481 | |||
482 | binfmt = mm->binfmt; | ||
483 | if (!binfmt || !binfmt->core_dump) | ||
484 | goto fail; | ||
485 | if (!__get_dumpable(cprm.mm_flags)) | ||
486 | goto fail; | ||
487 | |||
488 | cred = prepare_creds(); | ||
489 | if (!cred) | ||
490 | goto fail; | ||
491 | /* | ||
492 | * We cannot trust fsuid as being the "true" uid of the process | ||
493 | * nor do we know its entire history. We only know it was tainted | ||
494 | * so we dump it as root in mode 2, and only into a controlled | ||
495 | * environment (pipe handler or fully qualified path). | ||
496 | */ | ||
497 | if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) { | ||
498 | /* Setuid core dump mode */ | ||
499 | flag = O_EXCL; /* Stop rewrite attacks */ | ||
500 | cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ | ||
501 | need_nonrelative = true; | ||
502 | } | ||
503 | |||
504 | retval = coredump_wait(exit_code, &core_state); | ||
505 | if (retval < 0) | ||
506 | goto fail_creds; | ||
507 | |||
508 | old_cred = override_creds(cred); | ||
509 | |||
510 | /* | ||
511 | * Clear any false indication of pending signals that might | ||
512 | * be seen by the filesystem code called to write the core file. | ||
513 | */ | ||
514 | clear_thread_flag(TIF_SIGPENDING); | ||
515 | |||
516 | ispipe = format_corename(&cn, signr); | ||
517 | |||
518 | if (ispipe) { | ||
519 | int dump_count; | ||
520 | char **helper_argv; | ||
521 | |||
522 | if (ispipe < 0) { | ||
523 | printk(KERN_WARNING "format_corename failed\n"); | ||
524 | printk(KERN_WARNING "Aborting core\n"); | ||
525 | goto fail_corename; | ||
526 | } | ||
527 | |||
528 | if (cprm.limit == 1) { | ||
529 | /* See umh_pipe_setup() which sets RLIMIT_CORE = 1. | ||
530 | * | ||
531 | * Normally core limits are irrelevant to pipes, since | ||
532 | * we're not writing to the file system, but we use | ||
533 | * cprm.limit of 1 here as a speacial value, this is a | ||
534 | * consistent way to catch recursive crashes. | ||
535 | * We can still crash if the core_pattern binary sets | ||
536 | * RLIM_CORE = !1, but it runs as root, and can do | ||
537 | * lots of stupid things. | ||
538 | * | ||
539 | * Note that we use task_tgid_vnr here to grab the pid | ||
540 | * of the process group leader. That way we get the | ||
541 | * right pid if a thread in a multi-threaded | ||
542 | * core_pattern process dies. | ||
543 | */ | ||
544 | printk(KERN_WARNING | ||
545 | "Process %d(%s) has RLIMIT_CORE set to 1\n", | ||
546 | task_tgid_vnr(current), current->comm); | ||
547 | printk(KERN_WARNING "Aborting core\n"); | ||
548 | goto fail_unlock; | ||
549 | } | ||
550 | cprm.limit = RLIM_INFINITY; | ||
551 | |||
552 | dump_count = atomic_inc_return(&core_dump_count); | ||
553 | if (core_pipe_limit && (core_pipe_limit < dump_count)) { | ||
554 | printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n", | ||
555 | task_tgid_vnr(current), current->comm); | ||
556 | printk(KERN_WARNING "Skipping core dump\n"); | ||
557 | goto fail_dropcount; | ||
558 | } | ||
559 | |||
560 | helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL); | ||
561 | if (!helper_argv) { | ||
562 | printk(KERN_WARNING "%s failed to allocate memory\n", | ||
563 | __func__); | ||
564 | goto fail_dropcount; | ||
565 | } | ||
566 | |||
567 | retval = call_usermodehelper_fns(helper_argv[0], helper_argv, | ||
568 | NULL, UMH_WAIT_EXEC, umh_pipe_setup, | ||
569 | NULL, &cprm); | ||
570 | argv_free(helper_argv); | ||
571 | if (retval) { | ||
572 | printk(KERN_INFO "Core dump to %s pipe failed\n", | ||
573 | cn.corename); | ||
574 | goto close_fail; | ||
575 | } | ||
576 | } else { | ||
577 | struct inode *inode; | ||
578 | |||
579 | if (cprm.limit < binfmt->min_coredump) | ||
580 | goto fail_unlock; | ||
581 | |||
582 | if (need_nonrelative && cn.corename[0] != '/') { | ||
583 | printk(KERN_WARNING "Pid %d(%s) can only dump core "\ | ||
584 | "to fully qualified path!\n", | ||
585 | task_tgid_vnr(current), current->comm); | ||
586 | printk(KERN_WARNING "Skipping core dump\n"); | ||
587 | goto fail_unlock; | ||
588 | } | ||
589 | |||
590 | cprm.file = filp_open(cn.corename, | ||
591 | O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, | ||
592 | 0600); | ||
593 | if (IS_ERR(cprm.file)) | ||
594 | goto fail_unlock; | ||
595 | |||
596 | inode = cprm.file->f_path.dentry->d_inode; | ||
597 | if (inode->i_nlink > 1) | ||
598 | goto close_fail; | ||
599 | if (d_unhashed(cprm.file->f_path.dentry)) | ||
600 | goto close_fail; | ||
601 | /* | ||
602 | * AK: actually i see no reason to not allow this for named | ||
603 | * pipes etc, but keep the previous behaviour for now. | ||
604 | */ | ||
605 | if (!S_ISREG(inode->i_mode)) | ||
606 | goto close_fail; | ||
607 | /* | ||
608 | * Dont allow local users get cute and trick others to coredump | ||
609 | * into their pre-created files. | ||
610 | */ | ||
611 | if (!uid_eq(inode->i_uid, current_fsuid())) | ||
612 | goto close_fail; | ||
613 | if (!cprm.file->f_op || !cprm.file->f_op->write) | ||
614 | goto close_fail; | ||
615 | if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file)) | ||
616 | goto close_fail; | ||
617 | } | ||
618 | |||
619 | /* get us an unshared descriptor table; almost always a no-op */ | ||
620 | retval = unshare_files(&displaced); | ||
621 | if (retval) | ||
622 | goto close_fail; | ||
623 | if (displaced) | ||
624 | put_files_struct(displaced); | ||
625 | retval = binfmt->core_dump(&cprm); | ||
626 | if (retval) | ||
627 | current->signal->group_exit_code |= 0x80; | ||
628 | |||
629 | if (ispipe && core_pipe_limit) | ||
630 | wait_for_dump_helpers(cprm.file); | ||
631 | close_fail: | ||
632 | if (cprm.file) | ||
633 | filp_close(cprm.file, NULL); | ||
634 | fail_dropcount: | ||
635 | if (ispipe) | ||
636 | atomic_dec(&core_dump_count); | ||
637 | fail_unlock: | ||
638 | kfree(cn.corename); | ||
639 | fail_corename: | ||
640 | coredump_finish(mm); | ||
641 | revert_creds(old_cred); | ||
642 | fail_creds: | ||
643 | put_cred(cred); | ||
644 | fail: | ||
645 | return; | ||
646 | } | ||
647 | |||
648 | /* | ||
649 | * Core dumping helper functions. These are the only things you should | ||
650 | * do on a core-file: use only these functions to write out all the | ||
651 | * necessary info. | ||
652 | */ | ||
653 | int dump_write(struct file *file, const void *addr, int nr) | ||
654 | { | ||
655 | return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr; | ||
656 | } | ||
657 | EXPORT_SYMBOL(dump_write); | ||
658 | |||
659 | int dump_seek(struct file *file, loff_t off) | ||
660 | { | ||
661 | int ret = 1; | ||
662 | |||
663 | if (file->f_op->llseek && file->f_op->llseek != no_llseek) { | ||
664 | if (file->f_op->llseek(file, off, SEEK_CUR) < 0) | ||
665 | return 0; | ||
666 | } else { | ||
667 | char *buf = (char *)get_zeroed_page(GFP_KERNEL); | ||
668 | |||
669 | if (!buf) | ||
670 | return 0; | ||
671 | while (off > 0) { | ||
672 | unsigned long n = off; | ||
673 | |||
674 | if (n > PAGE_SIZE) | ||
675 | n = PAGE_SIZE; | ||
676 | if (!dump_write(file, buf, n)) { | ||
677 | ret = 0; | ||
678 | break; | ||
679 | } | ||
680 | off -= n; | ||
681 | } | ||
682 | free_page((unsigned long)buf); | ||
683 | } | ||
684 | return ret; | ||
685 | } | ||
686 | EXPORT_SYMBOL(dump_seek); | ||
diff --git a/fs/dcache.c b/fs/dcache.c index 693f95bf1cae..3a463d0c4fe8 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -2113,7 +2113,7 @@ again: | |||
2113 | inode = dentry->d_inode; | 2113 | inode = dentry->d_inode; |
2114 | isdir = S_ISDIR(inode->i_mode); | 2114 | isdir = S_ISDIR(inode->i_mode); |
2115 | if (dentry->d_count == 1) { | 2115 | if (dentry->d_count == 1) { |
2116 | if (inode && !spin_trylock(&inode->i_lock)) { | 2116 | if (!spin_trylock(&inode->i_lock)) { |
2117 | spin_unlock(&dentry->d_lock); | 2117 | spin_unlock(&dentry->d_lock); |
2118 | cpu_relax(); | 2118 | cpu_relax(); |
2119 | goto again; | 2119 | goto again; |
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 24bb043e50d9..4e0886c9e5c4 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c | |||
@@ -711,6 +711,12 @@ static void ecryptfs_free_kmem_caches(void) | |||
711 | { | 711 | { |
712 | int i; | 712 | int i; |
713 | 713 | ||
714 | /* | ||
715 | * Make sure all delayed rcu free inodes are flushed before we | ||
716 | * destroy cache. | ||
717 | */ | ||
718 | rcu_barrier(); | ||
719 | |||
714 | for (i = 0; i < ARRAY_SIZE(ecryptfs_cache_infos); i++) { | 720 | for (i = 0; i < ARRAY_SIZE(ecryptfs_cache_infos); i++) { |
715 | struct ecryptfs_cache_info *info; | 721 | struct ecryptfs_cache_info *info; |
716 | 722 | ||
diff --git a/fs/efs/super.c b/fs/efs/super.c index e755ec746c69..2002431ef9a0 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c | |||
@@ -96,6 +96,11 @@ static int init_inodecache(void) | |||
96 | 96 | ||
97 | static void destroy_inodecache(void) | 97 | static void destroy_inodecache(void) |
98 | { | 98 | { |
99 | /* | ||
100 | * Make sure all delayed rcu free inodes are flushed before we | ||
101 | * destroy cache. | ||
102 | */ | ||
103 | rcu_barrier(); | ||
99 | kmem_cache_destroy(efs_inode_cachep); | 104 | kmem_cache_destroy(efs_inode_cachep); |
100 | } | 105 | } |
101 | 106 | ||
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index eedec84c1809..cd96649bfe62 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
@@ -1810,7 +1810,7 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, | |||
1810 | int, maxevents, int, timeout) | 1810 | int, maxevents, int, timeout) |
1811 | { | 1811 | { |
1812 | int error; | 1812 | int error; |
1813 | struct file *file; | 1813 | struct fd f; |
1814 | struct eventpoll *ep; | 1814 | struct eventpoll *ep; |
1815 | 1815 | ||
1816 | /* The maximum number of event must be greater than zero */ | 1816 | /* The maximum number of event must be greater than zero */ |
@@ -1818,38 +1818,33 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, | |||
1818 | return -EINVAL; | 1818 | return -EINVAL; |
1819 | 1819 | ||
1820 | /* Verify that the area passed by the user is writeable */ | 1820 | /* Verify that the area passed by the user is writeable */ |
1821 | if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event))) { | 1821 | if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event))) |
1822 | error = -EFAULT; | 1822 | return -EFAULT; |
1823 | goto error_return; | ||
1824 | } | ||
1825 | 1823 | ||
1826 | /* Get the "struct file *" for the eventpoll file */ | 1824 | /* Get the "struct file *" for the eventpoll file */ |
1827 | error = -EBADF; | 1825 | f = fdget(epfd); |
1828 | file = fget(epfd); | 1826 | if (!f.file) |
1829 | if (!file) | 1827 | return -EBADF; |
1830 | goto error_return; | ||
1831 | 1828 | ||
1832 | /* | 1829 | /* |
1833 | * We have to check that the file structure underneath the fd | 1830 | * We have to check that the file structure underneath the fd |
1834 | * the user passed to us _is_ an eventpoll file. | 1831 | * the user passed to us _is_ an eventpoll file. |
1835 | */ | 1832 | */ |
1836 | error = -EINVAL; | 1833 | error = -EINVAL; |
1837 | if (!is_file_epoll(file)) | 1834 | if (!is_file_epoll(f.file)) |
1838 | goto error_fput; | 1835 | goto error_fput; |
1839 | 1836 | ||
1840 | /* | 1837 | /* |
1841 | * At this point it is safe to assume that the "private_data" contains | 1838 | * At this point it is safe to assume that the "private_data" contains |
1842 | * our own data structure. | 1839 | * our own data structure. |
1843 | */ | 1840 | */ |
1844 | ep = file->private_data; | 1841 | ep = f.file->private_data; |
1845 | 1842 | ||
1846 | /* Time to fish for events ... */ | 1843 | /* Time to fish for events ... */ |
1847 | error = ep_poll(ep, events, maxevents, timeout); | 1844 | error = ep_poll(ep, events, maxevents, timeout); |
1848 | 1845 | ||
1849 | error_fput: | 1846 | error_fput: |
1850 | fput(file); | 1847 | fdput(f); |
1851 | error_return: | ||
1852 | |||
1853 | return error; | 1848 | return error; |
1854 | } | 1849 | } |
1855 | 1850 | ||
@@ -66,19 +66,8 @@ | |||
66 | 66 | ||
67 | #include <trace/events/sched.h> | 67 | #include <trace/events/sched.h> |
68 | 68 | ||
69 | int core_uses_pid; | ||
70 | char core_pattern[CORENAME_MAX_SIZE] = "core"; | ||
71 | unsigned int core_pipe_limit; | ||
72 | int suid_dumpable = 0; | 69 | int suid_dumpable = 0; |
73 | 70 | ||
74 | struct core_name { | ||
75 | char *corename; | ||
76 | int used, size; | ||
77 | }; | ||
78 | static atomic_t call_count = ATOMIC_INIT(1); | ||
79 | |||
80 | /* The maximal length of core_pattern is also specified in sysctl.c */ | ||
81 | |||
82 | static LIST_HEAD(formats); | 71 | static LIST_HEAD(formats); |
83 | static DEFINE_RWLOCK(binfmt_lock); | 72 | static DEFINE_RWLOCK(binfmt_lock); |
84 | 73 | ||
@@ -1006,40 +995,6 @@ no_thread_group: | |||
1006 | return 0; | 995 | return 0; |
1007 | } | 996 | } |
1008 | 997 | ||
1009 | /* | ||
1010 | * These functions flushes out all traces of the currently running executable | ||
1011 | * so that a new one can be started | ||
1012 | */ | ||
1013 | static void flush_old_files(struct files_struct * files) | ||
1014 | { | ||
1015 | long j = -1; | ||
1016 | struct fdtable *fdt; | ||
1017 | |||
1018 | spin_lock(&files->file_lock); | ||
1019 | for (;;) { | ||
1020 | unsigned long set, i; | ||
1021 | |||
1022 | j++; | ||
1023 | i = j * BITS_PER_LONG; | ||
1024 | fdt = files_fdtable(files); | ||
1025 | if (i >= fdt->max_fds) | ||
1026 | break; | ||
1027 | set = fdt->close_on_exec[j]; | ||
1028 | if (!set) | ||
1029 | continue; | ||
1030 | fdt->close_on_exec[j] = 0; | ||
1031 | spin_unlock(&files->file_lock); | ||
1032 | for ( ; set ; i++,set >>= 1) { | ||
1033 | if (set & 1) { | ||
1034 | sys_close(i); | ||
1035 | } | ||
1036 | } | ||
1037 | spin_lock(&files->file_lock); | ||
1038 | |||
1039 | } | ||
1040 | spin_unlock(&files->file_lock); | ||
1041 | } | ||
1042 | |||
1043 | char *get_task_comm(char *buf, struct task_struct *tsk) | 998 | char *get_task_comm(char *buf, struct task_struct *tsk) |
1044 | { | 999 | { |
1045 | /* buf must be at least sizeof(tsk->comm) in size */ | 1000 | /* buf must be at least sizeof(tsk->comm) in size */ |
@@ -1050,6 +1005,11 @@ char *get_task_comm(char *buf, struct task_struct *tsk) | |||
1050 | } | 1005 | } |
1051 | EXPORT_SYMBOL_GPL(get_task_comm); | 1006 | EXPORT_SYMBOL_GPL(get_task_comm); |
1052 | 1007 | ||
1008 | /* | ||
1009 | * These functions flushes out all traces of the currently running executable | ||
1010 | * so that a new one can be started | ||
1011 | */ | ||
1012 | |||
1053 | void set_task_comm(struct task_struct *tsk, char *buf) | 1013 | void set_task_comm(struct task_struct *tsk, char *buf) |
1054 | { | 1014 | { |
1055 | task_lock(tsk); | 1015 | task_lock(tsk); |
@@ -1171,7 +1131,7 @@ void setup_new_exec(struct linux_binprm * bprm) | |||
1171 | current->self_exec_id++; | 1131 | current->self_exec_id++; |
1172 | 1132 | ||
1173 | flush_signal_handlers(current, 0); | 1133 | flush_signal_handlers(current, 0); |
1174 | flush_old_files(current->files); | 1134 | do_close_on_exec(current->files); |
1175 | } | 1135 | } |
1176 | EXPORT_SYMBOL(setup_new_exec); | 1136 | EXPORT_SYMBOL(setup_new_exec); |
1177 | 1137 | ||
@@ -1632,353 +1592,6 @@ void set_binfmt(struct linux_binfmt *new) | |||
1632 | 1592 | ||
1633 | EXPORT_SYMBOL(set_binfmt); | 1593 | EXPORT_SYMBOL(set_binfmt); |
1634 | 1594 | ||
1635 | static int expand_corename(struct core_name *cn) | ||
1636 | { | ||
1637 | char *old_corename = cn->corename; | ||
1638 | |||
1639 | cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count); | ||
1640 | cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL); | ||
1641 | |||
1642 | if (!cn->corename) { | ||
1643 | kfree(old_corename); | ||
1644 | return -ENOMEM; | ||
1645 | } | ||
1646 | |||
1647 | return 0; | ||
1648 | } | ||
1649 | |||
1650 | static int cn_printf(struct core_name *cn, const char *fmt, ...) | ||
1651 | { | ||
1652 | char *cur; | ||
1653 | int need; | ||
1654 | int ret; | ||
1655 | va_list arg; | ||
1656 | |||
1657 | va_start(arg, fmt); | ||
1658 | need = vsnprintf(NULL, 0, fmt, arg); | ||
1659 | va_end(arg); | ||
1660 | |||
1661 | if (likely(need < cn->size - cn->used - 1)) | ||
1662 | goto out_printf; | ||
1663 | |||
1664 | ret = expand_corename(cn); | ||
1665 | if (ret) | ||
1666 | goto expand_fail; | ||
1667 | |||
1668 | out_printf: | ||
1669 | cur = cn->corename + cn->used; | ||
1670 | va_start(arg, fmt); | ||
1671 | vsnprintf(cur, need + 1, fmt, arg); | ||
1672 | va_end(arg); | ||
1673 | cn->used += need; | ||
1674 | return 0; | ||
1675 | |||
1676 | expand_fail: | ||
1677 | return ret; | ||
1678 | } | ||
1679 | |||
1680 | static void cn_escape(char *str) | ||
1681 | { | ||
1682 | for (; *str; str++) | ||
1683 | if (*str == '/') | ||
1684 | *str = '!'; | ||
1685 | } | ||
1686 | |||
1687 | static int cn_print_exe_file(struct core_name *cn) | ||
1688 | { | ||
1689 | struct file *exe_file; | ||
1690 | char *pathbuf, *path; | ||
1691 | int ret; | ||
1692 | |||
1693 | exe_file = get_mm_exe_file(current->mm); | ||
1694 | if (!exe_file) { | ||
1695 | char *commstart = cn->corename + cn->used; | ||
1696 | ret = cn_printf(cn, "%s (path unknown)", current->comm); | ||
1697 | cn_escape(commstart); | ||
1698 | return ret; | ||
1699 | } | ||
1700 | |||
1701 | pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); | ||
1702 | if (!pathbuf) { | ||
1703 | ret = -ENOMEM; | ||
1704 | goto put_exe_file; | ||
1705 | } | ||
1706 | |||
1707 | path = d_path(&exe_file->f_path, pathbuf, PATH_MAX); | ||
1708 | if (IS_ERR(path)) { | ||
1709 | ret = PTR_ERR(path); | ||
1710 | goto free_buf; | ||
1711 | } | ||
1712 | |||
1713 | cn_escape(path); | ||
1714 | |||
1715 | ret = cn_printf(cn, "%s", path); | ||
1716 | |||
1717 | free_buf: | ||
1718 | kfree(pathbuf); | ||
1719 | put_exe_file: | ||
1720 | fput(exe_file); | ||
1721 | return ret; | ||
1722 | } | ||
1723 | |||
1724 | /* format_corename will inspect the pattern parameter, and output a | ||
1725 | * name into corename, which must have space for at least | ||
1726 | * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. | ||
1727 | */ | ||
1728 | static int format_corename(struct core_name *cn, long signr) | ||
1729 | { | ||
1730 | const struct cred *cred = current_cred(); | ||
1731 | const char *pat_ptr = core_pattern; | ||
1732 | int ispipe = (*pat_ptr == '|'); | ||
1733 | int pid_in_pattern = 0; | ||
1734 | int err = 0; | ||
1735 | |||
1736 | cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count); | ||
1737 | cn->corename = kmalloc(cn->size, GFP_KERNEL); | ||
1738 | cn->used = 0; | ||
1739 | |||
1740 | if (!cn->corename) | ||
1741 | return -ENOMEM; | ||
1742 | |||
1743 | /* Repeat as long as we have more pattern to process and more output | ||
1744 | space */ | ||
1745 | while (*pat_ptr) { | ||
1746 | if (*pat_ptr != '%') { | ||
1747 | if (*pat_ptr == 0) | ||
1748 | goto out; | ||
1749 | err = cn_printf(cn, "%c", *pat_ptr++); | ||
1750 | } else { | ||
1751 | switch (*++pat_ptr) { | ||
1752 | /* single % at the end, drop that */ | ||
1753 | case 0: | ||
1754 | goto out; | ||
1755 | /* Double percent, output one percent */ | ||
1756 | case '%': | ||
1757 | err = cn_printf(cn, "%c", '%'); | ||
1758 | break; | ||
1759 | /* pid */ | ||
1760 | case 'p': | ||
1761 | pid_in_pattern = 1; | ||
1762 | err = cn_printf(cn, "%d", | ||
1763 | task_tgid_vnr(current)); | ||
1764 | break; | ||
1765 | /* uid */ | ||
1766 | case 'u': | ||
1767 | err = cn_printf(cn, "%d", cred->uid); | ||
1768 | break; | ||
1769 | /* gid */ | ||
1770 | case 'g': | ||
1771 | err = cn_printf(cn, "%d", cred->gid); | ||
1772 | break; | ||
1773 | /* signal that caused the coredump */ | ||
1774 | case 's': | ||
1775 | err = cn_printf(cn, "%ld", signr); | ||
1776 | break; | ||
1777 | /* UNIX time of coredump */ | ||
1778 | case 't': { | ||
1779 | struct timeval tv; | ||
1780 | do_gettimeofday(&tv); | ||
1781 | err = cn_printf(cn, "%lu", tv.tv_sec); | ||
1782 | break; | ||
1783 | } | ||
1784 | /* hostname */ | ||
1785 | case 'h': { | ||
1786 | char *namestart = cn->corename + cn->used; | ||
1787 | down_read(&uts_sem); | ||
1788 | err = cn_printf(cn, "%s", | ||
1789 | utsname()->nodename); | ||
1790 | up_read(&uts_sem); | ||
1791 | cn_escape(namestart); | ||
1792 | break; | ||
1793 | } | ||
1794 | /* executable */ | ||
1795 | case 'e': { | ||
1796 | char *commstart = cn->corename + cn->used; | ||
1797 | err = cn_printf(cn, "%s", current->comm); | ||
1798 | cn_escape(commstart); | ||
1799 | break; | ||
1800 | } | ||
1801 | case 'E': | ||
1802 | err = cn_print_exe_file(cn); | ||
1803 | break; | ||
1804 | /* core limit size */ | ||
1805 | case 'c': | ||
1806 | err = cn_printf(cn, "%lu", | ||
1807 | rlimit(RLIMIT_CORE)); | ||
1808 | break; | ||
1809 | default: | ||
1810 | break; | ||
1811 | } | ||
1812 | ++pat_ptr; | ||
1813 | } | ||
1814 | |||
1815 | if (err) | ||
1816 | return err; | ||
1817 | } | ||
1818 | |||
1819 | /* Backward compatibility with core_uses_pid: | ||
1820 | * | ||
1821 | * If core_pattern does not include a %p (as is the default) | ||
1822 | * and core_uses_pid is set, then .%pid will be appended to | ||
1823 | * the filename. Do not do this for piped commands. */ | ||
1824 | if (!ispipe && !pid_in_pattern && core_uses_pid) { | ||
1825 | err = cn_printf(cn, ".%d", task_tgid_vnr(current)); | ||
1826 | if (err) | ||
1827 | return err; | ||
1828 | } | ||
1829 | out: | ||
1830 | return ispipe; | ||
1831 | } | ||
1832 | |||
1833 | static int zap_process(struct task_struct *start, int exit_code) | ||
1834 | { | ||
1835 | struct task_struct *t; | ||
1836 | int nr = 0; | ||
1837 | |||
1838 | start->signal->flags = SIGNAL_GROUP_EXIT; | ||
1839 | start->signal->group_exit_code = exit_code; | ||
1840 | start->signal->group_stop_count = 0; | ||
1841 | |||
1842 | t = start; | ||
1843 | do { | ||
1844 | task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); | ||
1845 | if (t != current && t->mm) { | ||
1846 | sigaddset(&t->pending.signal, SIGKILL); | ||
1847 | signal_wake_up(t, 1); | ||
1848 | nr++; | ||
1849 | } | ||
1850 | } while_each_thread(start, t); | ||
1851 | |||
1852 | return nr; | ||
1853 | } | ||
1854 | |||
1855 | static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, | ||
1856 | struct core_state *core_state, int exit_code) | ||
1857 | { | ||
1858 | struct task_struct *g, *p; | ||
1859 | unsigned long flags; | ||
1860 | int nr = -EAGAIN; | ||
1861 | |||
1862 | spin_lock_irq(&tsk->sighand->siglock); | ||
1863 | if (!signal_group_exit(tsk->signal)) { | ||
1864 | mm->core_state = core_state; | ||
1865 | nr = zap_process(tsk, exit_code); | ||
1866 | } | ||
1867 | spin_unlock_irq(&tsk->sighand->siglock); | ||
1868 | if (unlikely(nr < 0)) | ||
1869 | return nr; | ||
1870 | |||
1871 | if (atomic_read(&mm->mm_users) == nr + 1) | ||
1872 | goto done; | ||
1873 | /* | ||
1874 | * We should find and kill all tasks which use this mm, and we should | ||
1875 | * count them correctly into ->nr_threads. We don't take tasklist | ||
1876 | * lock, but this is safe wrt: | ||
1877 | * | ||
1878 | * fork: | ||
1879 | * None of sub-threads can fork after zap_process(leader). All | ||
1880 | * processes which were created before this point should be | ||
1881 | * visible to zap_threads() because copy_process() adds the new | ||
1882 | * process to the tail of init_task.tasks list, and lock/unlock | ||
1883 | * of ->siglock provides a memory barrier. | ||
1884 | * | ||
1885 | * do_exit: | ||
1886 | * The caller holds mm->mmap_sem. This means that the task which | ||
1887 | * uses this mm can't pass exit_mm(), so it can't exit or clear | ||
1888 | * its ->mm. | ||
1889 | * | ||
1890 | * de_thread: | ||
1891 | * It does list_replace_rcu(&leader->tasks, ¤t->tasks), | ||
1892 | * we must see either old or new leader, this does not matter. | ||
1893 | * However, it can change p->sighand, so lock_task_sighand(p) | ||
1894 | * must be used. Since p->mm != NULL and we hold ->mmap_sem | ||
1895 | * it can't fail. | ||
1896 | * | ||
1897 | * Note also that "g" can be the old leader with ->mm == NULL | ||
1898 | * and already unhashed and thus removed from ->thread_group. | ||
1899 | * This is OK, __unhash_process()->list_del_rcu() does not | ||
1900 | * clear the ->next pointer, we will find the new leader via | ||
1901 | * next_thread(). | ||
1902 | */ | ||
1903 | rcu_read_lock(); | ||
1904 | for_each_process(g) { | ||
1905 | if (g == tsk->group_leader) | ||
1906 | continue; | ||
1907 | if (g->flags & PF_KTHREAD) | ||
1908 | continue; | ||
1909 | p = g; | ||
1910 | do { | ||
1911 | if (p->mm) { | ||
1912 | if (unlikely(p->mm == mm)) { | ||
1913 | lock_task_sighand(p, &flags); | ||
1914 | nr += zap_process(p, exit_code); | ||
1915 | unlock_task_sighand(p, &flags); | ||
1916 | } | ||
1917 | break; | ||
1918 | } | ||
1919 | } while_each_thread(g, p); | ||
1920 | } | ||
1921 | rcu_read_unlock(); | ||
1922 | done: | ||
1923 | atomic_set(&core_state->nr_threads, nr); | ||
1924 | return nr; | ||
1925 | } | ||
1926 | |||
1927 | static int coredump_wait(int exit_code, struct core_state *core_state) | ||
1928 | { | ||
1929 | struct task_struct *tsk = current; | ||
1930 | struct mm_struct *mm = tsk->mm; | ||
1931 | int core_waiters = -EBUSY; | ||
1932 | |||
1933 | init_completion(&core_state->startup); | ||
1934 | core_state->dumper.task = tsk; | ||
1935 | core_state->dumper.next = NULL; | ||
1936 | |||
1937 | down_write(&mm->mmap_sem); | ||
1938 | if (!mm->core_state) | ||
1939 | core_waiters = zap_threads(tsk, mm, core_state, exit_code); | ||
1940 | up_write(&mm->mmap_sem); | ||
1941 | |||
1942 | if (core_waiters > 0) { | ||
1943 | struct core_thread *ptr; | ||
1944 | |||
1945 | wait_for_completion(&core_state->startup); | ||
1946 | /* | ||
1947 | * Wait for all the threads to become inactive, so that | ||
1948 | * all the thread context (extended register state, like | ||
1949 | * fpu etc) gets copied to the memory. | ||
1950 | */ | ||
1951 | ptr = core_state->dumper.next; | ||
1952 | while (ptr != NULL) { | ||
1953 | wait_task_inactive(ptr->task, 0); | ||
1954 | ptr = ptr->next; | ||
1955 | } | ||
1956 | } | ||
1957 | |||
1958 | return core_waiters; | ||
1959 | } | ||
1960 | |||
1961 | static void coredump_finish(struct mm_struct *mm) | ||
1962 | { | ||
1963 | struct core_thread *curr, *next; | ||
1964 | struct task_struct *task; | ||
1965 | |||
1966 | next = mm->core_state->dumper.next; | ||
1967 | while ((curr = next) != NULL) { | ||
1968 | next = curr->next; | ||
1969 | task = curr->task; | ||
1970 | /* | ||
1971 | * see exit_mm(), curr->task must not see | ||
1972 | * ->task == NULL before we read ->next. | ||
1973 | */ | ||
1974 | smp_mb(); | ||
1975 | curr->task = NULL; | ||
1976 | wake_up_process(task); | ||
1977 | } | ||
1978 | |||
1979 | mm->core_state = NULL; | ||
1980 | } | ||
1981 | |||
1982 | /* | 1595 | /* |
1983 | * set_dumpable converts traditional three-value dumpable to two flags and | 1596 | * set_dumpable converts traditional three-value dumpable to two flags and |
1984 | * stores them into mm->flags. It modifies lower two bits of mm->flags, but | 1597 | * stores them into mm->flags. It modifies lower two bits of mm->flags, but |
@@ -2020,7 +1633,7 @@ void set_dumpable(struct mm_struct *mm, int value) | |||
2020 | } | 1633 | } |
2021 | } | 1634 | } |
2022 | 1635 | ||
2023 | static int __get_dumpable(unsigned long mm_flags) | 1636 | int __get_dumpable(unsigned long mm_flags) |
2024 | { | 1637 | { |
2025 | int ret; | 1638 | int ret; |
2026 | 1639 | ||
@@ -2032,290 +1645,3 @@ int get_dumpable(struct mm_struct *mm) | |||
2032 | { | 1645 | { |
2033 | return __get_dumpable(mm->flags); | 1646 | return __get_dumpable(mm->flags); |
2034 | } | 1647 | } |
2035 | |||
2036 | static void wait_for_dump_helpers(struct file *file) | ||
2037 | { | ||
2038 | struct pipe_inode_info *pipe; | ||
2039 | |||
2040 | pipe = file->f_path.dentry->d_inode->i_pipe; | ||
2041 | |||
2042 | pipe_lock(pipe); | ||
2043 | pipe->readers++; | ||
2044 | pipe->writers--; | ||
2045 | |||
2046 | while ((pipe->readers > 1) && (!signal_pending(current))) { | ||
2047 | wake_up_interruptible_sync(&pipe->wait); | ||
2048 | kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); | ||
2049 | pipe_wait(pipe); | ||
2050 | } | ||
2051 | |||
2052 | pipe->readers--; | ||
2053 | pipe->writers++; | ||
2054 | pipe_unlock(pipe); | ||
2055 | |||
2056 | } | ||
2057 | |||
2058 | |||
2059 | /* | ||
2060 | * umh_pipe_setup | ||
2061 | * helper function to customize the process used | ||
2062 | * to collect the core in userspace. Specifically | ||
2063 | * it sets up a pipe and installs it as fd 0 (stdin) | ||
2064 | * for the process. Returns 0 on success, or | ||
2065 | * PTR_ERR on failure. | ||
2066 | * Note that it also sets the core limit to 1. This | ||
2067 | * is a special value that we use to trap recursive | ||
2068 | * core dumps | ||
2069 | */ | ||
2070 | static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) | ||
2071 | { | ||
2072 | struct file *files[2]; | ||
2073 | struct fdtable *fdt; | ||
2074 | struct coredump_params *cp = (struct coredump_params *)info->data; | ||
2075 | struct files_struct *cf = current->files; | ||
2076 | int err = create_pipe_files(files, 0); | ||
2077 | if (err) | ||
2078 | return err; | ||
2079 | |||
2080 | cp->file = files[1]; | ||
2081 | |||
2082 | sys_close(0); | ||
2083 | fd_install(0, files[0]); | ||
2084 | spin_lock(&cf->file_lock); | ||
2085 | fdt = files_fdtable(cf); | ||
2086 | __set_open_fd(0, fdt); | ||
2087 | __clear_close_on_exec(0, fdt); | ||
2088 | spin_unlock(&cf->file_lock); | ||
2089 | |||
2090 | /* and disallow core files too */ | ||
2091 | current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1}; | ||
2092 | |||
2093 | return 0; | ||
2094 | } | ||
2095 | |||
2096 | void do_coredump(long signr, int exit_code, struct pt_regs *regs) | ||
2097 | { | ||
2098 | struct core_state core_state; | ||
2099 | struct core_name cn; | ||
2100 | struct mm_struct *mm = current->mm; | ||
2101 | struct linux_binfmt * binfmt; | ||
2102 | const struct cred *old_cred; | ||
2103 | struct cred *cred; | ||
2104 | int retval = 0; | ||
2105 | int flag = 0; | ||
2106 | int ispipe; | ||
2107 | bool need_nonrelative = false; | ||
2108 | static atomic_t core_dump_count = ATOMIC_INIT(0); | ||
2109 | struct coredump_params cprm = { | ||
2110 | .signr = signr, | ||
2111 | .regs = regs, | ||
2112 | .limit = rlimit(RLIMIT_CORE), | ||
2113 | /* | ||
2114 | * We must use the same mm->flags while dumping core to avoid | ||
2115 | * inconsistency of bit flags, since this flag is not protected | ||
2116 | * by any locks. | ||
2117 | */ | ||
2118 | .mm_flags = mm->flags, | ||
2119 | }; | ||
2120 | |||
2121 | audit_core_dumps(signr); | ||
2122 | |||
2123 | binfmt = mm->binfmt; | ||
2124 | if (!binfmt || !binfmt->core_dump) | ||
2125 | goto fail; | ||
2126 | if (!__get_dumpable(cprm.mm_flags)) | ||
2127 | goto fail; | ||
2128 | |||
2129 | cred = prepare_creds(); | ||
2130 | if (!cred) | ||
2131 | goto fail; | ||
2132 | /* | ||
2133 | * We cannot trust fsuid as being the "true" uid of the process | ||
2134 | * nor do we know its entire history. We only know it was tainted | ||
2135 | * so we dump it as root in mode 2, and only into a controlled | ||
2136 | * environment (pipe handler or fully qualified path). | ||
2137 | */ | ||
2138 | if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) { | ||
2139 | /* Setuid core dump mode */ | ||
2140 | flag = O_EXCL; /* Stop rewrite attacks */ | ||
2141 | cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ | ||
2142 | need_nonrelative = true; | ||
2143 | } | ||
2144 | |||
2145 | retval = coredump_wait(exit_code, &core_state); | ||
2146 | if (retval < 0) | ||
2147 | goto fail_creds; | ||
2148 | |||
2149 | old_cred = override_creds(cred); | ||
2150 | |||
2151 | /* | ||
2152 | * Clear any false indication of pending signals that might | ||
2153 | * be seen by the filesystem code called to write the core file. | ||
2154 | */ | ||
2155 | clear_thread_flag(TIF_SIGPENDING); | ||
2156 | |||
2157 | ispipe = format_corename(&cn, signr); | ||
2158 | |||
2159 | if (ispipe) { | ||
2160 | int dump_count; | ||
2161 | char **helper_argv; | ||
2162 | |||
2163 | if (ispipe < 0) { | ||
2164 | printk(KERN_WARNING "format_corename failed\n"); | ||
2165 | printk(KERN_WARNING "Aborting core\n"); | ||
2166 | goto fail_corename; | ||
2167 | } | ||
2168 | |||
2169 | if (cprm.limit == 1) { | ||
2170 | /* See umh_pipe_setup() which sets RLIMIT_CORE = 1. | ||
2171 | * | ||
2172 | * Normally core limits are irrelevant to pipes, since | ||
2173 | * we're not writing to the file system, but we use | ||
2174 | * cprm.limit of 1 here as a speacial value, this is a | ||
2175 | * consistent way to catch recursive crashes. | ||
2176 | * We can still crash if the core_pattern binary sets | ||
2177 | * RLIM_CORE = !1, but it runs as root, and can do | ||
2178 | * lots of stupid things. | ||
2179 | * | ||
2180 | * Note that we use task_tgid_vnr here to grab the pid | ||
2181 | * of the process group leader. That way we get the | ||
2182 | * right pid if a thread in a multi-threaded | ||
2183 | * core_pattern process dies. | ||
2184 | */ | ||
2185 | printk(KERN_WARNING | ||
2186 | "Process %d(%s) has RLIMIT_CORE set to 1\n", | ||
2187 | task_tgid_vnr(current), current->comm); | ||
2188 | printk(KERN_WARNING "Aborting core\n"); | ||
2189 | goto fail_unlock; | ||
2190 | } | ||
2191 | cprm.limit = RLIM_INFINITY; | ||
2192 | |||
2193 | dump_count = atomic_inc_return(&core_dump_count); | ||
2194 | if (core_pipe_limit && (core_pipe_limit < dump_count)) { | ||
2195 | printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n", | ||
2196 | task_tgid_vnr(current), current->comm); | ||
2197 | printk(KERN_WARNING "Skipping core dump\n"); | ||
2198 | goto fail_dropcount; | ||
2199 | } | ||
2200 | |||
2201 | helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL); | ||
2202 | if (!helper_argv) { | ||
2203 | printk(KERN_WARNING "%s failed to allocate memory\n", | ||
2204 | __func__); | ||
2205 | goto fail_dropcount; | ||
2206 | } | ||
2207 | |||
2208 | retval = call_usermodehelper_fns(helper_argv[0], helper_argv, | ||
2209 | NULL, UMH_WAIT_EXEC, umh_pipe_setup, | ||
2210 | NULL, &cprm); | ||
2211 | argv_free(helper_argv); | ||
2212 | if (retval) { | ||
2213 | printk(KERN_INFO "Core dump to %s pipe failed\n", | ||
2214 | cn.corename); | ||
2215 | goto close_fail; | ||
2216 | } | ||
2217 | } else { | ||
2218 | struct inode *inode; | ||
2219 | |||
2220 | if (cprm.limit < binfmt->min_coredump) | ||
2221 | goto fail_unlock; | ||
2222 | |||
2223 | if (need_nonrelative && cn.corename[0] != '/') { | ||
2224 | printk(KERN_WARNING "Pid %d(%s) can only dump core "\ | ||
2225 | "to fully qualified path!\n", | ||
2226 | task_tgid_vnr(current), current->comm); | ||
2227 | printk(KERN_WARNING "Skipping core dump\n"); | ||
2228 | goto fail_unlock; | ||
2229 | } | ||
2230 | |||
2231 | cprm.file = filp_open(cn.corename, | ||
2232 | O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, | ||
2233 | 0600); | ||
2234 | if (IS_ERR(cprm.file)) | ||
2235 | goto fail_unlock; | ||
2236 | |||
2237 | inode = cprm.file->f_path.dentry->d_inode; | ||
2238 | if (inode->i_nlink > 1) | ||
2239 | goto close_fail; | ||
2240 | if (d_unhashed(cprm.file->f_path.dentry)) | ||
2241 | goto close_fail; | ||
2242 | /* | ||
2243 | * AK: actually i see no reason to not allow this for named | ||
2244 | * pipes etc, but keep the previous behaviour for now. | ||
2245 | */ | ||
2246 | if (!S_ISREG(inode->i_mode)) | ||
2247 | goto close_fail; | ||
2248 | /* | ||
2249 | * Dont allow local users get cute and trick others to coredump | ||
2250 | * into their pre-created files. | ||
2251 | */ | ||
2252 | if (!uid_eq(inode->i_uid, current_fsuid())) | ||
2253 | goto close_fail; | ||
2254 | if (!cprm.file->f_op || !cprm.file->f_op->write) | ||
2255 | goto close_fail; | ||
2256 | if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file)) | ||
2257 | goto close_fail; | ||
2258 | } | ||
2259 | |||
2260 | retval = binfmt->core_dump(&cprm); | ||
2261 | if (retval) | ||
2262 | current->signal->group_exit_code |= 0x80; | ||
2263 | |||
2264 | if (ispipe && core_pipe_limit) | ||
2265 | wait_for_dump_helpers(cprm.file); | ||
2266 | close_fail: | ||
2267 | if (cprm.file) | ||
2268 | filp_close(cprm.file, NULL); | ||
2269 | fail_dropcount: | ||
2270 | if (ispipe) | ||
2271 | atomic_dec(&core_dump_count); | ||
2272 | fail_unlock: | ||
2273 | kfree(cn.corename); | ||
2274 | fail_corename: | ||
2275 | coredump_finish(mm); | ||
2276 | revert_creds(old_cred); | ||
2277 | fail_creds: | ||
2278 | put_cred(cred); | ||
2279 | fail: | ||
2280 | return; | ||
2281 | } | ||
2282 | |||
2283 | /* | ||
2284 | * Core dumping helper functions. These are the only things you should | ||
2285 | * do on a core-file: use only these functions to write out all the | ||
2286 | * necessary info. | ||
2287 | */ | ||
2288 | int dump_write(struct file *file, const void *addr, int nr) | ||
2289 | { | ||
2290 | return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr; | ||
2291 | } | ||
2292 | EXPORT_SYMBOL(dump_write); | ||
2293 | |||
2294 | int dump_seek(struct file *file, loff_t off) | ||
2295 | { | ||
2296 | int ret = 1; | ||
2297 | |||
2298 | if (file->f_op->llseek && file->f_op->llseek != no_llseek) { | ||
2299 | if (file->f_op->llseek(file, off, SEEK_CUR) < 0) | ||
2300 | return 0; | ||
2301 | } else { | ||
2302 | char *buf = (char *)get_zeroed_page(GFP_KERNEL); | ||
2303 | |||
2304 | if (!buf) | ||
2305 | return 0; | ||
2306 | while (off > 0) { | ||
2307 | unsigned long n = off; | ||
2308 | |||
2309 | if (n > PAGE_SIZE) | ||
2310 | n = PAGE_SIZE; | ||
2311 | if (!dump_write(file, buf, n)) { | ||
2312 | ret = 0; | ||
2313 | break; | ||
2314 | } | ||
2315 | off -= n; | ||
2316 | } | ||
2317 | free_page((unsigned long)buf); | ||
2318 | } | ||
2319 | return ret; | ||
2320 | } | ||
2321 | EXPORT_SYMBOL(dump_seek); | ||
diff --git a/fs/exofs/super.c b/fs/exofs/super.c index dde41a75c7c8..59e3bbfac0b1 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c | |||
@@ -206,6 +206,11 @@ static int init_inodecache(void) | |||
206 | */ | 206 | */ |
207 | static void destroy_inodecache(void) | 207 | static void destroy_inodecache(void) |
208 | { | 208 | { |
209 | /* | ||
210 | * Make sure all delayed rcu free inodes are flushed before we | ||
211 | * destroy cache. | ||
212 | */ | ||
213 | rcu_barrier(); | ||
209 | kmem_cache_destroy(exofs_inode_cachep); | 214 | kmem_cache_destroy(exofs_inode_cachep); |
210 | } | 215 | } |
211 | 216 | ||
diff --git a/fs/ext2/super.c b/fs/ext2/super.c index af74d9e27b71..6c205d0c565b 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c | |||
@@ -206,6 +206,11 @@ static int init_inodecache(void) | |||
206 | 206 | ||
207 | static void destroy_inodecache(void) | 207 | static void destroy_inodecache(void) |
208 | { | 208 | { |
209 | /* | ||
210 | * Make sure all delayed rcu free inodes are flushed before we | ||
211 | * destroy cache. | ||
212 | */ | ||
213 | rcu_barrier(); | ||
209 | kmem_cache_destroy(ext2_inode_cachep); | 214 | kmem_cache_destroy(ext2_inode_cachep); |
210 | } | 215 | } |
211 | 216 | ||
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 09b8455bd7eb..bd29894c8fbc 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
@@ -532,6 +532,11 @@ static int init_inodecache(void) | |||
532 | 532 | ||
533 | static void destroy_inodecache(void) | 533 | static void destroy_inodecache(void) |
534 | { | 534 | { |
535 | /* | ||
536 | * Make sure all delayed rcu free inodes are flushed before we | ||
537 | * destroy cache. | ||
538 | */ | ||
539 | rcu_barrier(); | ||
535 | kmem_cache_destroy(ext3_inode_cachep); | 540 | kmem_cache_destroy(ext3_inode_cachep); |
536 | } | 541 | } |
537 | 542 | ||
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 7f7dad787603..5439d6a56e99 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -233,7 +233,7 @@ group_extend_out: | |||
233 | 233 | ||
234 | case EXT4_IOC_MOVE_EXT: { | 234 | case EXT4_IOC_MOVE_EXT: { |
235 | struct move_extent me; | 235 | struct move_extent me; |
236 | struct file *donor_filp; | 236 | struct fd donor; |
237 | int err; | 237 | int err; |
238 | 238 | ||
239 | if (!(filp->f_mode & FMODE_READ) || | 239 | if (!(filp->f_mode & FMODE_READ) || |
@@ -245,11 +245,11 @@ group_extend_out: | |||
245 | return -EFAULT; | 245 | return -EFAULT; |
246 | me.moved_len = 0; | 246 | me.moved_len = 0; |
247 | 247 | ||
248 | donor_filp = fget(me.donor_fd); | 248 | donor = fdget(me.donor_fd); |
249 | if (!donor_filp) | 249 | if (!donor.file) |
250 | return -EBADF; | 250 | return -EBADF; |
251 | 251 | ||
252 | if (!(donor_filp->f_mode & FMODE_WRITE)) { | 252 | if (!(donor.file->f_mode & FMODE_WRITE)) { |
253 | err = -EBADF; | 253 | err = -EBADF; |
254 | goto mext_out; | 254 | goto mext_out; |
255 | } | 255 | } |
@@ -258,14 +258,15 @@ group_extend_out: | |||
258 | EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { | 258 | EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { |
259 | ext4_msg(sb, KERN_ERR, | 259 | ext4_msg(sb, KERN_ERR, |
260 | "Online defrag not supported with bigalloc"); | 260 | "Online defrag not supported with bigalloc"); |
261 | return -EOPNOTSUPP; | 261 | err = -EOPNOTSUPP; |
262 | goto mext_out; | ||
262 | } | 263 | } |
263 | 264 | ||
264 | err = mnt_want_write_file(filp); | 265 | err = mnt_want_write_file(filp); |
265 | if (err) | 266 | if (err) |
266 | goto mext_out; | 267 | goto mext_out; |
267 | 268 | ||
268 | err = ext4_move_extents(filp, donor_filp, me.orig_start, | 269 | err = ext4_move_extents(filp, donor.file, me.orig_start, |
269 | me.donor_start, me.len, &me.moved_len); | 270 | me.donor_start, me.len, &me.moved_len); |
270 | mnt_drop_write_file(filp); | 271 | mnt_drop_write_file(filp); |
271 | 272 | ||
@@ -273,7 +274,7 @@ group_extend_out: | |||
273 | &me, sizeof(me))) | 274 | &me, sizeof(me))) |
274 | err = -EFAULT; | 275 | err = -EFAULT; |
275 | mext_out: | 276 | mext_out: |
276 | fput(donor_filp); | 277 | fdput(donor); |
277 | return err; | 278 | return err; |
278 | } | 279 | } |
279 | 280 | ||
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 1f15cc836fbd..69c55d4e4626 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -1019,6 +1019,11 @@ static int init_inodecache(void) | |||
1019 | 1019 | ||
1020 | static void destroy_inodecache(void) | 1020 | static void destroy_inodecache(void) |
1021 | { | 1021 | { |
1022 | /* | ||
1023 | * Make sure all delayed rcu free inodes are flushed before we | ||
1024 | * destroy cache. | ||
1025 | */ | ||
1026 | rcu_barrier(); | ||
1022 | kmem_cache_destroy(ext4_inode_cachep); | 1027 | kmem_cache_destroy(ext4_inode_cachep); |
1023 | } | 1028 | } |
1024 | 1029 | ||
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 47d9eb0be886..4e5a6ac54ebd 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
@@ -521,6 +521,11 @@ static int __init fat_init_inodecache(void) | |||
521 | 521 | ||
522 | static void __exit fat_destroy_inodecache(void) | 522 | static void __exit fat_destroy_inodecache(void) |
523 | { | 523 | { |
524 | /* | ||
525 | * Make sure all delayed rcu free inodes are flushed before we | ||
526 | * destroy cache. | ||
527 | */ | ||
528 | rcu_barrier(); | ||
524 | kmem_cache_destroy(fat_inode_cachep); | 529 | kmem_cache_destroy(fat_inode_cachep); |
525 | } | 530 | } |
526 | 531 | ||
diff --git a/fs/fcntl.c b/fs/fcntl.c index 887b5ba8c9b5..8f704291d4ed 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c | |||
@@ -26,124 +26,6 @@ | |||
26 | #include <asm/siginfo.h> | 26 | #include <asm/siginfo.h> |
27 | #include <asm/uaccess.h> | 27 | #include <asm/uaccess.h> |
28 | 28 | ||
29 | void set_close_on_exec(unsigned int fd, int flag) | ||
30 | { | ||
31 | struct files_struct *files = current->files; | ||
32 | struct fdtable *fdt; | ||
33 | spin_lock(&files->file_lock); | ||
34 | fdt = files_fdtable(files); | ||
35 | if (flag) | ||
36 | __set_close_on_exec(fd, fdt); | ||
37 | else | ||
38 | __clear_close_on_exec(fd, fdt); | ||
39 | spin_unlock(&files->file_lock); | ||
40 | } | ||
41 | |||
42 | static bool get_close_on_exec(unsigned int fd) | ||
43 | { | ||
44 | struct files_struct *files = current->files; | ||
45 | struct fdtable *fdt; | ||
46 | bool res; | ||
47 | rcu_read_lock(); | ||
48 | fdt = files_fdtable(files); | ||
49 | res = close_on_exec(fd, fdt); | ||
50 | rcu_read_unlock(); | ||
51 | return res; | ||
52 | } | ||
53 | |||
54 | SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) | ||
55 | { | ||
56 | int err = -EBADF; | ||
57 | struct file * file, *tofree; | ||
58 | struct files_struct * files = current->files; | ||
59 | struct fdtable *fdt; | ||
60 | |||
61 | if ((flags & ~O_CLOEXEC) != 0) | ||
62 | return -EINVAL; | ||
63 | |||
64 | if (unlikely(oldfd == newfd)) | ||
65 | return -EINVAL; | ||
66 | |||
67 | spin_lock(&files->file_lock); | ||
68 | err = expand_files(files, newfd); | ||
69 | file = fcheck(oldfd); | ||
70 | if (unlikely(!file)) | ||
71 | goto Ebadf; | ||
72 | if (unlikely(err < 0)) { | ||
73 | if (err == -EMFILE) | ||
74 | goto Ebadf; | ||
75 | goto out_unlock; | ||
76 | } | ||
77 | /* | ||
78 | * We need to detect attempts to do dup2() over allocated but still | ||
79 | * not finished descriptor. NB: OpenBSD avoids that at the price of | ||
80 | * extra work in their equivalent of fget() - they insert struct | ||
81 | * file immediately after grabbing descriptor, mark it larval if | ||
82 | * more work (e.g. actual opening) is needed and make sure that | ||
83 | * fget() treats larval files as absent. Potentially interesting, | ||
84 | * but while extra work in fget() is trivial, locking implications | ||
85 | * and amount of surgery on open()-related paths in VFS are not. | ||
86 | * FreeBSD fails with -EBADF in the same situation, NetBSD "solution" | ||
87 | * deadlocks in rather amusing ways, AFAICS. All of that is out of | ||
88 | * scope of POSIX or SUS, since neither considers shared descriptor | ||
89 | * tables and this condition does not arise without those. | ||
90 | */ | ||
91 | err = -EBUSY; | ||
92 | fdt = files_fdtable(files); | ||
93 | tofree = fdt->fd[newfd]; | ||
94 | if (!tofree && fd_is_open(newfd, fdt)) | ||
95 | goto out_unlock; | ||
96 | get_file(file); | ||
97 | rcu_assign_pointer(fdt->fd[newfd], file); | ||
98 | __set_open_fd(newfd, fdt); | ||
99 | if (flags & O_CLOEXEC) | ||
100 | __set_close_on_exec(newfd, fdt); | ||
101 | else | ||
102 | __clear_close_on_exec(newfd, fdt); | ||
103 | spin_unlock(&files->file_lock); | ||
104 | |||
105 | if (tofree) | ||
106 | filp_close(tofree, files); | ||
107 | |||
108 | return newfd; | ||
109 | |||
110 | Ebadf: | ||
111 | err = -EBADF; | ||
112 | out_unlock: | ||
113 | spin_unlock(&files->file_lock); | ||
114 | return err; | ||
115 | } | ||
116 | |||
117 | SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd) | ||
118 | { | ||
119 | if (unlikely(newfd == oldfd)) { /* corner case */ | ||
120 | struct files_struct *files = current->files; | ||
121 | int retval = oldfd; | ||
122 | |||
123 | rcu_read_lock(); | ||
124 | if (!fcheck_files(files, oldfd)) | ||
125 | retval = -EBADF; | ||
126 | rcu_read_unlock(); | ||
127 | return retval; | ||
128 | } | ||
129 | return sys_dup3(oldfd, newfd, 0); | ||
130 | } | ||
131 | |||
132 | SYSCALL_DEFINE1(dup, unsigned int, fildes) | ||
133 | { | ||
134 | int ret = -EBADF; | ||
135 | struct file *file = fget_raw(fildes); | ||
136 | |||
137 | if (file) { | ||
138 | ret = get_unused_fd(); | ||
139 | if (ret >= 0) | ||
140 | fd_install(ret, file); | ||
141 | else | ||
142 | fput(file); | ||
143 | } | ||
144 | return ret; | ||
145 | } | ||
146 | |||
147 | #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME) | 29 | #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME) |
148 | 30 | ||
149 | static int setfl(int fd, struct file * filp, unsigned long arg) | 31 | static int setfl(int fd, struct file * filp, unsigned long arg) |
@@ -267,7 +149,7 @@ pid_t f_getown(struct file *filp) | |||
267 | 149 | ||
268 | static int f_setown_ex(struct file *filp, unsigned long arg) | 150 | static int f_setown_ex(struct file *filp, unsigned long arg) |
269 | { | 151 | { |
270 | struct f_owner_ex * __user owner_p = (void * __user)arg; | 152 | struct f_owner_ex __user *owner_p = (void __user *)arg; |
271 | struct f_owner_ex owner; | 153 | struct f_owner_ex owner; |
272 | struct pid *pid; | 154 | struct pid *pid; |
273 | int type; | 155 | int type; |
@@ -307,7 +189,7 @@ static int f_setown_ex(struct file *filp, unsigned long arg) | |||
307 | 189 | ||
308 | static int f_getown_ex(struct file *filp, unsigned long arg) | 190 | static int f_getown_ex(struct file *filp, unsigned long arg) |
309 | { | 191 | { |
310 | struct f_owner_ex * __user owner_p = (void * __user)arg; | 192 | struct f_owner_ex __user *owner_p = (void __user *)arg; |
311 | struct f_owner_ex owner; | 193 | struct f_owner_ex owner; |
312 | int ret = 0; | 194 | int ret = 0; |
313 | 195 | ||
@@ -345,7 +227,7 @@ static int f_getown_ex(struct file *filp, unsigned long arg) | |||
345 | static int f_getowner_uids(struct file *filp, unsigned long arg) | 227 | static int f_getowner_uids(struct file *filp, unsigned long arg) |
346 | { | 228 | { |
347 | struct user_namespace *user_ns = current_user_ns(); | 229 | struct user_namespace *user_ns = current_user_ns(); |
348 | uid_t * __user dst = (void * __user)arg; | 230 | uid_t __user *dst = (void __user *)arg; |
349 | uid_t src[2]; | 231 | uid_t src[2]; |
350 | int err; | 232 | int err; |
351 | 233 | ||
@@ -373,14 +255,10 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, | |||
373 | 255 | ||
374 | switch (cmd) { | 256 | switch (cmd) { |
375 | case F_DUPFD: | 257 | case F_DUPFD: |
258 | err = f_dupfd(arg, filp, 0); | ||
259 | break; | ||
376 | case F_DUPFD_CLOEXEC: | 260 | case F_DUPFD_CLOEXEC: |
377 | if (arg >= rlimit(RLIMIT_NOFILE)) | 261 | err = f_dupfd(arg, filp, FD_CLOEXEC); |
378 | break; | ||
379 | err = alloc_fd(arg, cmd == F_DUPFD_CLOEXEC ? O_CLOEXEC : 0); | ||
380 | if (err >= 0) { | ||
381 | get_file(filp); | ||
382 | fd_install(err, filp); | ||
383 | } | ||
384 | break; | 262 | break; |
385 | case F_GETFD: | 263 | case F_GETFD: |
386 | err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; | 264 | err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; |
@@ -470,25 +348,23 @@ static int check_fcntl_cmd(unsigned cmd) | |||
470 | 348 | ||
471 | SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) | 349 | SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) |
472 | { | 350 | { |
473 | struct file *filp; | 351 | struct fd f = fdget_raw(fd); |
474 | int fput_needed; | ||
475 | long err = -EBADF; | 352 | long err = -EBADF; |
476 | 353 | ||
477 | filp = fget_raw_light(fd, &fput_needed); | 354 | if (!f.file) |
478 | if (!filp) | ||
479 | goto out; | 355 | goto out; |
480 | 356 | ||
481 | if (unlikely(filp->f_mode & FMODE_PATH)) { | 357 | if (unlikely(f.file->f_mode & FMODE_PATH)) { |
482 | if (!check_fcntl_cmd(cmd)) | 358 | if (!check_fcntl_cmd(cmd)) |
483 | goto out1; | 359 | goto out1; |
484 | } | 360 | } |
485 | 361 | ||
486 | err = security_file_fcntl(filp, cmd, arg); | 362 | err = security_file_fcntl(f.file, cmd, arg); |
487 | if (!err) | 363 | if (!err) |
488 | err = do_fcntl(fd, cmd, arg, filp); | 364 | err = do_fcntl(fd, cmd, arg, f.file); |
489 | 365 | ||
490 | out1: | 366 | out1: |
491 | fput_light(filp, fput_needed); | 367 | fdput(f); |
492 | out: | 368 | out: |
493 | return err; | 369 | return err; |
494 | } | 370 | } |
@@ -497,38 +373,36 @@ out: | |||
497 | SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, | 373 | SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, |
498 | unsigned long, arg) | 374 | unsigned long, arg) |
499 | { | 375 | { |
500 | struct file * filp; | 376 | struct fd f = fdget_raw(fd); |
501 | long err = -EBADF; | 377 | long err = -EBADF; |
502 | int fput_needed; | ||
503 | 378 | ||
504 | filp = fget_raw_light(fd, &fput_needed); | 379 | if (!f.file) |
505 | if (!filp) | ||
506 | goto out; | 380 | goto out; |
507 | 381 | ||
508 | if (unlikely(filp->f_mode & FMODE_PATH)) { | 382 | if (unlikely(f.file->f_mode & FMODE_PATH)) { |
509 | if (!check_fcntl_cmd(cmd)) | 383 | if (!check_fcntl_cmd(cmd)) |
510 | goto out1; | 384 | goto out1; |
511 | } | 385 | } |
512 | 386 | ||
513 | err = security_file_fcntl(filp, cmd, arg); | 387 | err = security_file_fcntl(f.file, cmd, arg); |
514 | if (err) | 388 | if (err) |
515 | goto out1; | 389 | goto out1; |
516 | 390 | ||
517 | switch (cmd) { | 391 | switch (cmd) { |
518 | case F_GETLK64: | 392 | case F_GETLK64: |
519 | err = fcntl_getlk64(filp, (struct flock64 __user *) arg); | 393 | err = fcntl_getlk64(f.file, (struct flock64 __user *) arg); |
520 | break; | 394 | break; |
521 | case F_SETLK64: | 395 | case F_SETLK64: |
522 | case F_SETLKW64: | 396 | case F_SETLKW64: |
523 | err = fcntl_setlk64(fd, filp, cmd, | 397 | err = fcntl_setlk64(fd, f.file, cmd, |
524 | (struct flock64 __user *) arg); | 398 | (struct flock64 __user *) arg); |
525 | break; | 399 | break; |
526 | default: | 400 | default: |
527 | err = do_fcntl(fd, cmd, arg, filp); | 401 | err = do_fcntl(fd, cmd, arg, f.file); |
528 | break; | 402 | break; |
529 | } | 403 | } |
530 | out1: | 404 | out1: |
531 | fput_light(filp, fput_needed); | 405 | fdput(f); |
532 | out: | 406 | out: |
533 | return err; | 407 | return err; |
534 | } | 408 | } |
diff --git a/fs/fhandle.c b/fs/fhandle.c index a48e4a139be1..f775bfdd6e4a 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c | |||
@@ -113,24 +113,21 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name, | |||
113 | 113 | ||
114 | static struct vfsmount *get_vfsmount_from_fd(int fd) | 114 | static struct vfsmount *get_vfsmount_from_fd(int fd) |
115 | { | 115 | { |
116 | struct path path; | 116 | struct vfsmount *mnt; |
117 | 117 | ||
118 | if (fd == AT_FDCWD) { | 118 | if (fd == AT_FDCWD) { |
119 | struct fs_struct *fs = current->fs; | 119 | struct fs_struct *fs = current->fs; |
120 | spin_lock(&fs->lock); | 120 | spin_lock(&fs->lock); |
121 | path = fs->pwd; | 121 | mnt = mntget(fs->pwd.mnt); |
122 | mntget(path.mnt); | ||
123 | spin_unlock(&fs->lock); | 122 | spin_unlock(&fs->lock); |
124 | } else { | 123 | } else { |
125 | int fput_needed; | 124 | struct fd f = fdget(fd); |
126 | struct file *file = fget_light(fd, &fput_needed); | 125 | if (!f.file) |
127 | if (!file) | ||
128 | return ERR_PTR(-EBADF); | 126 | return ERR_PTR(-EBADF); |
129 | path = file->f_path; | 127 | mnt = mntget(f.file->f_path.mnt); |
130 | mntget(path.mnt); | 128 | fdput(f); |
131 | fput_light(file, fput_needed); | ||
132 | } | 129 | } |
133 | return path.mnt; | 130 | return mnt; |
134 | } | 131 | } |
135 | 132 | ||
136 | static int vfs_dentry_acceptable(void *context, struct dentry *dentry) | 133 | static int vfs_dentry_acceptable(void *context, struct dentry *dentry) |
@@ -6,6 +6,7 @@ | |||
6 | * Manage the dynamic fd arrays in the process files_struct. | 6 | * Manage the dynamic fd arrays in the process files_struct. |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/syscalls.h> | ||
9 | #include <linux/export.h> | 10 | #include <linux/export.h> |
10 | #include <linux/fs.h> | 11 | #include <linux/fs.h> |
11 | #include <linux/mm.h> | 12 | #include <linux/mm.h> |
@@ -84,22 +85,14 @@ static void free_fdtable_work(struct work_struct *work) | |||
84 | } | 85 | } |
85 | } | 86 | } |
86 | 87 | ||
87 | void free_fdtable_rcu(struct rcu_head *rcu) | 88 | static void free_fdtable_rcu(struct rcu_head *rcu) |
88 | { | 89 | { |
89 | struct fdtable *fdt = container_of(rcu, struct fdtable, rcu); | 90 | struct fdtable *fdt = container_of(rcu, struct fdtable, rcu); |
90 | struct fdtable_defer *fddef; | 91 | struct fdtable_defer *fddef; |
91 | 92 | ||
92 | BUG_ON(!fdt); | 93 | BUG_ON(!fdt); |
94 | BUG_ON(fdt->max_fds <= NR_OPEN_DEFAULT); | ||
93 | 95 | ||
94 | if (fdt->max_fds <= NR_OPEN_DEFAULT) { | ||
95 | /* | ||
96 | * This fdtable is embedded in the files structure and that | ||
97 | * structure itself is getting destroyed. | ||
98 | */ | ||
99 | kmem_cache_free(files_cachep, | ||
100 | container_of(fdt, struct files_struct, fdtab)); | ||
101 | return; | ||
102 | } | ||
103 | if (!is_vmalloc_addr(fdt->fd) && !is_vmalloc_addr(fdt->open_fds)) { | 96 | if (!is_vmalloc_addr(fdt->fd) && !is_vmalloc_addr(fdt->open_fds)) { |
104 | kfree(fdt->fd); | 97 | kfree(fdt->fd); |
105 | kfree(fdt->open_fds); | 98 | kfree(fdt->open_fds); |
@@ -229,7 +222,7 @@ static int expand_fdtable(struct files_struct *files, int nr) | |||
229 | copy_fdtable(new_fdt, cur_fdt); | 222 | copy_fdtable(new_fdt, cur_fdt); |
230 | rcu_assign_pointer(files->fdt, new_fdt); | 223 | rcu_assign_pointer(files->fdt, new_fdt); |
231 | if (cur_fdt->max_fds > NR_OPEN_DEFAULT) | 224 | if (cur_fdt->max_fds > NR_OPEN_DEFAULT) |
232 | free_fdtable(cur_fdt); | 225 | call_rcu(&cur_fdt->rcu, free_fdtable_rcu); |
233 | } else { | 226 | } else { |
234 | /* Somebody else expanded, so undo our attempt */ | 227 | /* Somebody else expanded, so undo our attempt */ |
235 | __free_fdtable(new_fdt); | 228 | __free_fdtable(new_fdt); |
@@ -245,19 +238,12 @@ static int expand_fdtable(struct files_struct *files, int nr) | |||
245 | * expanded and execution may have blocked. | 238 | * expanded and execution may have blocked. |
246 | * The files->file_lock should be held on entry, and will be held on exit. | 239 | * The files->file_lock should be held on entry, and will be held on exit. |
247 | */ | 240 | */ |
248 | int expand_files(struct files_struct *files, int nr) | 241 | static int expand_files(struct files_struct *files, int nr) |
249 | { | 242 | { |
250 | struct fdtable *fdt; | 243 | struct fdtable *fdt; |
251 | 244 | ||
252 | fdt = files_fdtable(files); | 245 | fdt = files_fdtable(files); |
253 | 246 | ||
254 | /* | ||
255 | * N.B. For clone tasks sharing a files structure, this test | ||
256 | * will limit the total number of files that can be opened. | ||
257 | */ | ||
258 | if (nr >= rlimit(RLIMIT_NOFILE)) | ||
259 | return -EMFILE; | ||
260 | |||
261 | /* Do we need to expand? */ | 247 | /* Do we need to expand? */ |
262 | if (nr < fdt->max_fds) | 248 | if (nr < fdt->max_fds) |
263 | return 0; | 249 | return 0; |
@@ -270,6 +256,26 @@ int expand_files(struct files_struct *files, int nr) | |||
270 | return expand_fdtable(files, nr); | 256 | return expand_fdtable(files, nr); |
271 | } | 257 | } |
272 | 258 | ||
259 | static inline void __set_close_on_exec(int fd, struct fdtable *fdt) | ||
260 | { | ||
261 | __set_bit(fd, fdt->close_on_exec); | ||
262 | } | ||
263 | |||
264 | static inline void __clear_close_on_exec(int fd, struct fdtable *fdt) | ||
265 | { | ||
266 | __clear_bit(fd, fdt->close_on_exec); | ||
267 | } | ||
268 | |||
269 | static inline void __set_open_fd(int fd, struct fdtable *fdt) | ||
270 | { | ||
271 | __set_bit(fd, fdt->open_fds); | ||
272 | } | ||
273 | |||
274 | static inline void __clear_open_fd(int fd, struct fdtable *fdt) | ||
275 | { | ||
276 | __clear_bit(fd, fdt->open_fds); | ||
277 | } | ||
278 | |||
273 | static int count_open_files(struct fdtable *fdt) | 279 | static int count_open_files(struct fdtable *fdt) |
274 | { | 280 | { |
275 | int size = fdt->max_fds; | 281 | int size = fdt->max_fds; |
@@ -395,6 +401,95 @@ out: | |||
395 | return NULL; | 401 | return NULL; |
396 | } | 402 | } |
397 | 403 | ||
404 | static void close_files(struct files_struct * files) | ||
405 | { | ||
406 | int i, j; | ||
407 | struct fdtable *fdt; | ||
408 | |||
409 | j = 0; | ||
410 | |||
411 | /* | ||
412 | * It is safe to dereference the fd table without RCU or | ||
413 | * ->file_lock because this is the last reference to the | ||
414 | * files structure. But use RCU to shut RCU-lockdep up. | ||
415 | */ | ||
416 | rcu_read_lock(); | ||
417 | fdt = files_fdtable(files); | ||
418 | rcu_read_unlock(); | ||
419 | for (;;) { | ||
420 | unsigned long set; | ||
421 | i = j * BITS_PER_LONG; | ||
422 | if (i >= fdt->max_fds) | ||
423 | break; | ||
424 | set = fdt->open_fds[j++]; | ||
425 | while (set) { | ||
426 | if (set & 1) { | ||
427 | struct file * file = xchg(&fdt->fd[i], NULL); | ||
428 | if (file) { | ||
429 | filp_close(file, files); | ||
430 | cond_resched(); | ||
431 | } | ||
432 | } | ||
433 | i++; | ||
434 | set >>= 1; | ||
435 | } | ||
436 | } | ||
437 | } | ||
438 | |||
439 | struct files_struct *get_files_struct(struct task_struct *task) | ||
440 | { | ||
441 | struct files_struct *files; | ||
442 | |||
443 | task_lock(task); | ||
444 | files = task->files; | ||
445 | if (files) | ||
446 | atomic_inc(&files->count); | ||
447 | task_unlock(task); | ||
448 | |||
449 | return files; | ||
450 | } | ||
451 | |||
452 | void put_files_struct(struct files_struct *files) | ||
453 | { | ||
454 | struct fdtable *fdt; | ||
455 | |||
456 | if (atomic_dec_and_test(&files->count)) { | ||
457 | close_files(files); | ||
458 | /* not really needed, since nobody can see us */ | ||
459 | rcu_read_lock(); | ||
460 | fdt = files_fdtable(files); | ||
461 | rcu_read_unlock(); | ||
462 | /* free the arrays if they are not embedded */ | ||
463 | if (fdt != &files->fdtab) | ||
464 | __free_fdtable(fdt); | ||
465 | kmem_cache_free(files_cachep, files); | ||
466 | } | ||
467 | } | ||
468 | |||
469 | void reset_files_struct(struct files_struct *files) | ||
470 | { | ||
471 | struct task_struct *tsk = current; | ||
472 | struct files_struct *old; | ||
473 | |||
474 | old = tsk->files; | ||
475 | task_lock(tsk); | ||
476 | tsk->files = files; | ||
477 | task_unlock(tsk); | ||
478 | put_files_struct(old); | ||
479 | } | ||
480 | |||
481 | void exit_files(struct task_struct *tsk) | ||
482 | { | ||
483 | struct files_struct * files = tsk->files; | ||
484 | |||
485 | if (files) { | ||
486 | task_lock(tsk); | ||
487 | tsk->files = NULL; | ||
488 | task_unlock(tsk); | ||
489 | put_files_struct(files); | ||
490 | } | ||
491 | } | ||
492 | |||
398 | static void __devinit fdtable_defer_list_init(int cpu) | 493 | static void __devinit fdtable_defer_list_init(int cpu) |
399 | { | 494 | { |
400 | struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu); | 495 | struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu); |
@@ -424,12 +519,18 @@ struct files_struct init_files = { | |||
424 | .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), | 519 | .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), |
425 | }; | 520 | }; |
426 | 521 | ||
522 | void daemonize_descriptors(void) | ||
523 | { | ||
524 | atomic_inc(&init_files.count); | ||
525 | reset_files_struct(&init_files); | ||
526 | } | ||
527 | |||
427 | /* | 528 | /* |
428 | * allocate a file descriptor, mark it busy. | 529 | * allocate a file descriptor, mark it busy. |
429 | */ | 530 | */ |
430 | int alloc_fd(unsigned start, unsigned flags) | 531 | int __alloc_fd(struct files_struct *files, |
532 | unsigned start, unsigned end, unsigned flags) | ||
431 | { | 533 | { |
432 | struct files_struct *files = current->files; | ||
433 | unsigned int fd; | 534 | unsigned int fd; |
434 | int error; | 535 | int error; |
435 | struct fdtable *fdt; | 536 | struct fdtable *fdt; |
@@ -444,6 +545,14 @@ repeat: | |||
444 | if (fd < fdt->max_fds) | 545 | if (fd < fdt->max_fds) |
445 | fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd); | 546 | fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd); |
446 | 547 | ||
548 | /* | ||
549 | * N.B. For clone tasks sharing a files structure, this test | ||
550 | * will limit the total number of files that can be opened. | ||
551 | */ | ||
552 | error = -EMFILE; | ||
553 | if (fd >= end) | ||
554 | goto out; | ||
555 | |||
447 | error = expand_files(files, fd); | 556 | error = expand_files(files, fd); |
448 | if (error < 0) | 557 | if (error < 0) |
449 | goto out; | 558 | goto out; |
@@ -477,8 +586,424 @@ out: | |||
477 | return error; | 586 | return error; |
478 | } | 587 | } |
479 | 588 | ||
480 | int get_unused_fd(void) | 589 | static int alloc_fd(unsigned start, unsigned flags) |
590 | { | ||
591 | return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags); | ||
592 | } | ||
593 | |||
594 | int get_unused_fd_flags(unsigned flags) | ||
595 | { | ||
596 | return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags); | ||
597 | } | ||
598 | EXPORT_SYMBOL(get_unused_fd_flags); | ||
599 | |||
600 | static void __put_unused_fd(struct files_struct *files, unsigned int fd) | ||
601 | { | ||
602 | struct fdtable *fdt = files_fdtable(files); | ||
603 | __clear_open_fd(fd, fdt); | ||
604 | if (fd < files->next_fd) | ||
605 | files->next_fd = fd; | ||
606 | } | ||
607 | |||
608 | void put_unused_fd(unsigned int fd) | ||
609 | { | ||
610 | struct files_struct *files = current->files; | ||
611 | spin_lock(&files->file_lock); | ||
612 | __put_unused_fd(files, fd); | ||
613 | spin_unlock(&files->file_lock); | ||
614 | } | ||
615 | |||
616 | EXPORT_SYMBOL(put_unused_fd); | ||
617 | |||
618 | /* | ||
619 | * Install a file pointer in the fd array. | ||
620 | * | ||
621 | * The VFS is full of places where we drop the files lock between | ||
622 | * setting the open_fds bitmap and installing the file in the file | ||
623 | * array. At any such point, we are vulnerable to a dup2() race | ||
624 | * installing a file in the array before us. We need to detect this and | ||
625 | * fput() the struct file we are about to overwrite in this case. | ||
626 | * | ||
627 | * It should never happen - if we allow dup2() do it, _really_ bad things | ||
628 | * will follow. | ||
629 | * | ||
630 | * NOTE: __fd_install() variant is really, really low-level; don't | ||
631 | * use it unless you are forced to by truly lousy API shoved down | ||
632 | * your throat. 'files' *MUST* be either current->files or obtained | ||
633 | * by get_files_struct(current) done by whoever had given it to you, | ||
634 | * or really bad things will happen. Normally you want to use | ||
635 | * fd_install() instead. | ||
636 | */ | ||
637 | |||
638 | void __fd_install(struct files_struct *files, unsigned int fd, | ||
639 | struct file *file) | ||
640 | { | ||
641 | struct fdtable *fdt; | ||
642 | spin_lock(&files->file_lock); | ||
643 | fdt = files_fdtable(files); | ||
644 | BUG_ON(fdt->fd[fd] != NULL); | ||
645 | rcu_assign_pointer(fdt->fd[fd], file); | ||
646 | spin_unlock(&files->file_lock); | ||
647 | } | ||
648 | |||
649 | void fd_install(unsigned int fd, struct file *file) | ||
481 | { | 650 | { |
482 | return alloc_fd(0, 0); | 651 | __fd_install(current->files, fd, file); |
652 | } | ||
653 | |||
654 | EXPORT_SYMBOL(fd_install); | ||
655 | |||
656 | /* | ||
657 | * The same warnings as for __alloc_fd()/__fd_install() apply here... | ||
658 | */ | ||
659 | int __close_fd(struct files_struct *files, unsigned fd) | ||
660 | { | ||
661 | struct file *file; | ||
662 | struct fdtable *fdt; | ||
663 | |||
664 | spin_lock(&files->file_lock); | ||
665 | fdt = files_fdtable(files); | ||
666 | if (fd >= fdt->max_fds) | ||
667 | goto out_unlock; | ||
668 | file = fdt->fd[fd]; | ||
669 | if (!file) | ||
670 | goto out_unlock; | ||
671 | rcu_assign_pointer(fdt->fd[fd], NULL); | ||
672 | __clear_close_on_exec(fd, fdt); | ||
673 | __put_unused_fd(files, fd); | ||
674 | spin_unlock(&files->file_lock); | ||
675 | return filp_close(file, files); | ||
676 | |||
677 | out_unlock: | ||
678 | spin_unlock(&files->file_lock); | ||
679 | return -EBADF; | ||
680 | } | ||
681 | |||
682 | void do_close_on_exec(struct files_struct *files) | ||
683 | { | ||
684 | unsigned i; | ||
685 | struct fdtable *fdt; | ||
686 | |||
687 | /* exec unshares first */ | ||
688 | BUG_ON(atomic_read(&files->count) != 1); | ||
689 | spin_lock(&files->file_lock); | ||
690 | for (i = 0; ; i++) { | ||
691 | unsigned long set; | ||
692 | unsigned fd = i * BITS_PER_LONG; | ||
693 | fdt = files_fdtable(files); | ||
694 | if (fd >= fdt->max_fds) | ||
695 | break; | ||
696 | set = fdt->close_on_exec[i]; | ||
697 | if (!set) | ||
698 | continue; | ||
699 | fdt->close_on_exec[i] = 0; | ||
700 | for ( ; set ; fd++, set >>= 1) { | ||
701 | struct file *file; | ||
702 | if (!(set & 1)) | ||
703 | continue; | ||
704 | file = fdt->fd[fd]; | ||
705 | if (!file) | ||
706 | continue; | ||
707 | rcu_assign_pointer(fdt->fd[fd], NULL); | ||
708 | __put_unused_fd(files, fd); | ||
709 | spin_unlock(&files->file_lock); | ||
710 | filp_close(file, files); | ||
711 | cond_resched(); | ||
712 | spin_lock(&files->file_lock); | ||
713 | } | ||
714 | |||
715 | } | ||
716 | spin_unlock(&files->file_lock); | ||
717 | } | ||
718 | |||
719 | struct file *fget(unsigned int fd) | ||
720 | { | ||
721 | struct file *file; | ||
722 | struct files_struct *files = current->files; | ||
723 | |||
724 | rcu_read_lock(); | ||
725 | file = fcheck_files(files, fd); | ||
726 | if (file) { | ||
727 | /* File object ref couldn't be taken */ | ||
728 | if (file->f_mode & FMODE_PATH || | ||
729 | !atomic_long_inc_not_zero(&file->f_count)) | ||
730 | file = NULL; | ||
731 | } | ||
732 | rcu_read_unlock(); | ||
733 | |||
734 | return file; | ||
735 | } | ||
736 | |||
737 | EXPORT_SYMBOL(fget); | ||
738 | |||
739 | struct file *fget_raw(unsigned int fd) | ||
740 | { | ||
741 | struct file *file; | ||
742 | struct files_struct *files = current->files; | ||
743 | |||
744 | rcu_read_lock(); | ||
745 | file = fcheck_files(files, fd); | ||
746 | if (file) { | ||
747 | /* File object ref couldn't be taken */ | ||
748 | if (!atomic_long_inc_not_zero(&file->f_count)) | ||
749 | file = NULL; | ||
750 | } | ||
751 | rcu_read_unlock(); | ||
752 | |||
753 | return file; | ||
754 | } | ||
755 | |||
756 | EXPORT_SYMBOL(fget_raw); | ||
757 | |||
758 | /* | ||
759 | * Lightweight file lookup - no refcnt increment if fd table isn't shared. | ||
760 | * | ||
761 | * You can use this instead of fget if you satisfy all of the following | ||
762 | * conditions: | ||
763 | * 1) You must call fput_light before exiting the syscall and returning control | ||
764 | * to userspace (i.e. you cannot remember the returned struct file * after | ||
765 | * returning to userspace). | ||
766 | * 2) You must not call filp_close on the returned struct file * in between | ||
767 | * calls to fget_light and fput_light. | ||
768 | * 3) You must not clone the current task in between the calls to fget_light | ||
769 | * and fput_light. | ||
770 | * | ||
771 | * The fput_needed flag returned by fget_light should be passed to the | ||
772 | * corresponding fput_light. | ||
773 | */ | ||
774 | struct file *fget_light(unsigned int fd, int *fput_needed) | ||
775 | { | ||
776 | struct file *file; | ||
777 | struct files_struct *files = current->files; | ||
778 | |||
779 | *fput_needed = 0; | ||
780 | if (atomic_read(&files->count) == 1) { | ||
781 | file = fcheck_files(files, fd); | ||
782 | if (file && (file->f_mode & FMODE_PATH)) | ||
783 | file = NULL; | ||
784 | } else { | ||
785 | rcu_read_lock(); | ||
786 | file = fcheck_files(files, fd); | ||
787 | if (file) { | ||
788 | if (!(file->f_mode & FMODE_PATH) && | ||
789 | atomic_long_inc_not_zero(&file->f_count)) | ||
790 | *fput_needed = 1; | ||
791 | else | ||
792 | /* Didn't get the reference, someone's freed */ | ||
793 | file = NULL; | ||
794 | } | ||
795 | rcu_read_unlock(); | ||
796 | } | ||
797 | |||
798 | return file; | ||
799 | } | ||
800 | EXPORT_SYMBOL(fget_light); | ||
801 | |||
802 | struct file *fget_raw_light(unsigned int fd, int *fput_needed) | ||
803 | { | ||
804 | struct file *file; | ||
805 | struct files_struct *files = current->files; | ||
806 | |||
807 | *fput_needed = 0; | ||
808 | if (atomic_read(&files->count) == 1) { | ||
809 | file = fcheck_files(files, fd); | ||
810 | } else { | ||
811 | rcu_read_lock(); | ||
812 | file = fcheck_files(files, fd); | ||
813 | if (file) { | ||
814 | if (atomic_long_inc_not_zero(&file->f_count)) | ||
815 | *fput_needed = 1; | ||
816 | else | ||
817 | /* Didn't get the reference, someone's freed */ | ||
818 | file = NULL; | ||
819 | } | ||
820 | rcu_read_unlock(); | ||
821 | } | ||
822 | |||
823 | return file; | ||
824 | } | ||
825 | |||
826 | void set_close_on_exec(unsigned int fd, int flag) | ||
827 | { | ||
828 | struct files_struct *files = current->files; | ||
829 | struct fdtable *fdt; | ||
830 | spin_lock(&files->file_lock); | ||
831 | fdt = files_fdtable(files); | ||
832 | if (flag) | ||
833 | __set_close_on_exec(fd, fdt); | ||
834 | else | ||
835 | __clear_close_on_exec(fd, fdt); | ||
836 | spin_unlock(&files->file_lock); | ||
837 | } | ||
838 | |||
839 | bool get_close_on_exec(unsigned int fd) | ||
840 | { | ||
841 | struct files_struct *files = current->files; | ||
842 | struct fdtable *fdt; | ||
843 | bool res; | ||
844 | rcu_read_lock(); | ||
845 | fdt = files_fdtable(files); | ||
846 | res = close_on_exec(fd, fdt); | ||
847 | rcu_read_unlock(); | ||
848 | return res; | ||
849 | } | ||
850 | |||
851 | static int do_dup2(struct files_struct *files, | ||
852 | struct file *file, unsigned fd, unsigned flags) | ||
853 | { | ||
854 | struct file *tofree; | ||
855 | struct fdtable *fdt; | ||
856 | |||
857 | /* | ||
858 | * We need to detect attempts to do dup2() over allocated but still | ||
859 | * not finished descriptor. NB: OpenBSD avoids that at the price of | ||
860 | * extra work in their equivalent of fget() - they insert struct | ||
861 | * file immediately after grabbing descriptor, mark it larval if | ||
862 | * more work (e.g. actual opening) is needed and make sure that | ||
863 | * fget() treats larval files as absent. Potentially interesting, | ||
864 | * but while extra work in fget() is trivial, locking implications | ||
865 | * and amount of surgery on open()-related paths in VFS are not. | ||
866 | * FreeBSD fails with -EBADF in the same situation, NetBSD "solution" | ||
867 | * deadlocks in rather amusing ways, AFAICS. All of that is out of | ||
868 | * scope of POSIX or SUS, since neither considers shared descriptor | ||
869 | * tables and this condition does not arise without those. | ||
870 | */ | ||
871 | fdt = files_fdtable(files); | ||
872 | tofree = fdt->fd[fd]; | ||
873 | if (!tofree && fd_is_open(fd, fdt)) | ||
874 | goto Ebusy; | ||
875 | get_file(file); | ||
876 | rcu_assign_pointer(fdt->fd[fd], file); | ||
877 | __set_open_fd(fd, fdt); | ||
878 | if (flags & O_CLOEXEC) | ||
879 | __set_close_on_exec(fd, fdt); | ||
880 | else | ||
881 | __clear_close_on_exec(fd, fdt); | ||
882 | spin_unlock(&files->file_lock); | ||
883 | |||
884 | if (tofree) | ||
885 | filp_close(tofree, files); | ||
886 | |||
887 | return fd; | ||
888 | |||
889 | Ebusy: | ||
890 | spin_unlock(&files->file_lock); | ||
891 | return -EBUSY; | ||
892 | } | ||
893 | |||
894 | int replace_fd(unsigned fd, struct file *file, unsigned flags) | ||
895 | { | ||
896 | int err; | ||
897 | struct files_struct *files = current->files; | ||
898 | |||
899 | if (!file) | ||
900 | return __close_fd(files, fd); | ||
901 | |||
902 | if (fd >= rlimit(RLIMIT_NOFILE)) | ||
903 | return -EMFILE; | ||
904 | |||
905 | spin_lock(&files->file_lock); | ||
906 | err = expand_files(files, fd); | ||
907 | if (unlikely(err < 0)) | ||
908 | goto out_unlock; | ||
909 | return do_dup2(files, file, fd, flags); | ||
910 | |||
911 | out_unlock: | ||
912 | spin_unlock(&files->file_lock); | ||
913 | return err; | ||
914 | } | ||
915 | |||
916 | SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) | ||
917 | { | ||
918 | int err = -EBADF; | ||
919 | struct file *file; | ||
920 | struct files_struct *files = current->files; | ||
921 | |||
922 | if ((flags & ~O_CLOEXEC) != 0) | ||
923 | return -EINVAL; | ||
924 | |||
925 | if (newfd >= rlimit(RLIMIT_NOFILE)) | ||
926 | return -EMFILE; | ||
927 | |||
928 | spin_lock(&files->file_lock); | ||
929 | err = expand_files(files, newfd); | ||
930 | file = fcheck(oldfd); | ||
931 | if (unlikely(!file)) | ||
932 | goto Ebadf; | ||
933 | if (unlikely(err < 0)) { | ||
934 | if (err == -EMFILE) | ||
935 | goto Ebadf; | ||
936 | goto out_unlock; | ||
937 | } | ||
938 | return do_dup2(files, file, newfd, flags); | ||
939 | |||
940 | Ebadf: | ||
941 | err = -EBADF; | ||
942 | out_unlock: | ||
943 | spin_unlock(&files->file_lock); | ||
944 | return err; | ||
945 | } | ||
946 | |||
947 | SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd) | ||
948 | { | ||
949 | if (unlikely(newfd == oldfd)) { /* corner case */ | ||
950 | struct files_struct *files = current->files; | ||
951 | int retval = oldfd; | ||
952 | |||
953 | rcu_read_lock(); | ||
954 | if (!fcheck_files(files, oldfd)) | ||
955 | retval = -EBADF; | ||
956 | rcu_read_unlock(); | ||
957 | return retval; | ||
958 | } | ||
959 | return sys_dup3(oldfd, newfd, 0); | ||
960 | } | ||
961 | |||
962 | SYSCALL_DEFINE1(dup, unsigned int, fildes) | ||
963 | { | ||
964 | int ret = -EBADF; | ||
965 | struct file *file = fget_raw(fildes); | ||
966 | |||
967 | if (file) { | ||
968 | ret = get_unused_fd(); | ||
969 | if (ret >= 0) | ||
970 | fd_install(ret, file); | ||
971 | else | ||
972 | fput(file); | ||
973 | } | ||
974 | return ret; | ||
975 | } | ||
976 | |||
977 | int f_dupfd(unsigned int from, struct file *file, unsigned flags) | ||
978 | { | ||
979 | int err; | ||
980 | if (from >= rlimit(RLIMIT_NOFILE)) | ||
981 | return -EINVAL; | ||
982 | err = alloc_fd(from, flags); | ||
983 | if (err >= 0) { | ||
984 | get_file(file); | ||
985 | fd_install(err, file); | ||
986 | } | ||
987 | return err; | ||
988 | } | ||
989 | |||
990 | int iterate_fd(struct files_struct *files, unsigned n, | ||
991 | int (*f)(const void *, struct file *, unsigned), | ||
992 | const void *p) | ||
993 | { | ||
994 | struct fdtable *fdt; | ||
995 | struct file *file; | ||
996 | int res = 0; | ||
997 | if (!files) | ||
998 | return 0; | ||
999 | spin_lock(&files->file_lock); | ||
1000 | fdt = files_fdtable(files); | ||
1001 | while (!res && n < fdt->max_fds) { | ||
1002 | file = rcu_dereference_check_fdtable(files, fdt->fd[n++]); | ||
1003 | if (file) | ||
1004 | res = f(p, file, n); | ||
1005 | } | ||
1006 | spin_unlock(&files->file_lock); | ||
1007 | return res; | ||
483 | } | 1008 | } |
484 | EXPORT_SYMBOL(get_unused_fd); | 1009 | EXPORT_SYMBOL(iterate_fd); |
diff --git a/fs/file_table.c b/fs/file_table.c index 701985e4ccda..c6780163bf3e 100644 --- a/fs/file_table.c +++ b/fs/file_table.c | |||
@@ -339,112 +339,6 @@ void __fput_sync(struct file *file) | |||
339 | 339 | ||
340 | EXPORT_SYMBOL(fput); | 340 | EXPORT_SYMBOL(fput); |
341 | 341 | ||
342 | struct file *fget(unsigned int fd) | ||
343 | { | ||
344 | struct file *file; | ||
345 | struct files_struct *files = current->files; | ||
346 | |||
347 | rcu_read_lock(); | ||
348 | file = fcheck_files(files, fd); | ||
349 | if (file) { | ||
350 | /* File object ref couldn't be taken */ | ||
351 | if (file->f_mode & FMODE_PATH || | ||
352 | !atomic_long_inc_not_zero(&file->f_count)) | ||
353 | file = NULL; | ||
354 | } | ||
355 | rcu_read_unlock(); | ||
356 | |||
357 | return file; | ||
358 | } | ||
359 | |||
360 | EXPORT_SYMBOL(fget); | ||
361 | |||
362 | struct file *fget_raw(unsigned int fd) | ||
363 | { | ||
364 | struct file *file; | ||
365 | struct files_struct *files = current->files; | ||
366 | |||
367 | rcu_read_lock(); | ||
368 | file = fcheck_files(files, fd); | ||
369 | if (file) { | ||
370 | /* File object ref couldn't be taken */ | ||
371 | if (!atomic_long_inc_not_zero(&file->f_count)) | ||
372 | file = NULL; | ||
373 | } | ||
374 | rcu_read_unlock(); | ||
375 | |||
376 | return file; | ||
377 | } | ||
378 | |||
379 | EXPORT_SYMBOL(fget_raw); | ||
380 | |||
381 | /* | ||
382 | * Lightweight file lookup - no refcnt increment if fd table isn't shared. | ||
383 | * | ||
384 | * You can use this instead of fget if you satisfy all of the following | ||
385 | * conditions: | ||
386 | * 1) You must call fput_light before exiting the syscall and returning control | ||
387 | * to userspace (i.e. you cannot remember the returned struct file * after | ||
388 | * returning to userspace). | ||
389 | * 2) You must not call filp_close on the returned struct file * in between | ||
390 | * calls to fget_light and fput_light. | ||
391 | * 3) You must not clone the current task in between the calls to fget_light | ||
392 | * and fput_light. | ||
393 | * | ||
394 | * The fput_needed flag returned by fget_light should be passed to the | ||
395 | * corresponding fput_light. | ||
396 | */ | ||
397 | struct file *fget_light(unsigned int fd, int *fput_needed) | ||
398 | { | ||
399 | struct file *file; | ||
400 | struct files_struct *files = current->files; | ||
401 | |||
402 | *fput_needed = 0; | ||
403 | if (atomic_read(&files->count) == 1) { | ||
404 | file = fcheck_files(files, fd); | ||
405 | if (file && (file->f_mode & FMODE_PATH)) | ||
406 | file = NULL; | ||
407 | } else { | ||
408 | rcu_read_lock(); | ||
409 | file = fcheck_files(files, fd); | ||
410 | if (file) { | ||
411 | if (!(file->f_mode & FMODE_PATH) && | ||
412 | atomic_long_inc_not_zero(&file->f_count)) | ||
413 | *fput_needed = 1; | ||
414 | else | ||
415 | /* Didn't get the reference, someone's freed */ | ||
416 | file = NULL; | ||
417 | } | ||
418 | rcu_read_unlock(); | ||
419 | } | ||
420 | |||
421 | return file; | ||
422 | } | ||
423 | |||
424 | struct file *fget_raw_light(unsigned int fd, int *fput_needed) | ||
425 | { | ||
426 | struct file *file; | ||
427 | struct files_struct *files = current->files; | ||
428 | |||
429 | *fput_needed = 0; | ||
430 | if (atomic_read(&files->count) == 1) { | ||
431 | file = fcheck_files(files, fd); | ||
432 | } else { | ||
433 | rcu_read_lock(); | ||
434 | file = fcheck_files(files, fd); | ||
435 | if (file) { | ||
436 | if (atomic_long_inc_not_zero(&file->f_count)) | ||
437 | *fput_needed = 1; | ||
438 | else | ||
439 | /* Didn't get the reference, someone's freed */ | ||
440 | file = NULL; | ||
441 | } | ||
442 | rcu_read_unlock(); | ||
443 | } | ||
444 | |||
445 | return file; | ||
446 | } | ||
447 | |||
448 | void put_filp(struct file *file) | 342 | void put_filp(struct file *file) |
449 | { | 343 | { |
450 | if (atomic_long_dec_and_test(&file->f_count)) { | 344 | if (atomic_long_dec_and_test(&file->f_count)) { |
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index d4fabd26084e..fed2c8afb3a9 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c | |||
@@ -279,6 +279,11 @@ static void __exit | |||
279 | vxfs_cleanup(void) | 279 | vxfs_cleanup(void) |
280 | { | 280 | { |
281 | unregister_filesystem(&vxfs_fs_type); | 281 | unregister_filesystem(&vxfs_fs_type); |
282 | /* | ||
283 | * Make sure all delayed rcu free inodes are flushed before we | ||
284 | * destroy cache. | ||
285 | */ | ||
286 | rcu_barrier(); | ||
282 | kmem_cache_destroy(vxfs_inode_cachep); | 287 | kmem_cache_destroy(vxfs_inode_cachep); |
283 | } | 288 | } |
284 | 289 | ||
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index f4246cfc8d87..8c23fa7a91e6 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c | |||
@@ -148,8 +148,7 @@ static struct fuse_req *get_reserved_req(struct fuse_conn *fc, | |||
148 | if (ff->reserved_req) { | 148 | if (ff->reserved_req) { |
149 | req = ff->reserved_req; | 149 | req = ff->reserved_req; |
150 | ff->reserved_req = NULL; | 150 | ff->reserved_req = NULL; |
151 | get_file(file); | 151 | req->stolen_file = get_file(file); |
152 | req->stolen_file = file; | ||
153 | } | 152 | } |
154 | spin_unlock(&fc->lock); | 153 | spin_unlock(&fc->lock); |
155 | } while (!req); | 154 | } while (!req); |
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index fca222dabe3c..f0eda124cffb 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c | |||
@@ -1197,6 +1197,12 @@ static void fuse_fs_cleanup(void) | |||
1197 | { | 1197 | { |
1198 | unregister_filesystem(&fuse_fs_type); | 1198 | unregister_filesystem(&fuse_fs_type); |
1199 | unregister_fuseblk(); | 1199 | unregister_fuseblk(); |
1200 | |||
1201 | /* | ||
1202 | * Make sure all delayed rcu free inodes are flushed before we | ||
1203 | * destroy cache. | ||
1204 | */ | ||
1205 | rcu_barrier(); | ||
1200 | kmem_cache_destroy(fuse_inode_cachep); | 1206 | kmem_cache_destroy(fuse_inode_cachep); |
1201 | } | 1207 | } |
1202 | 1208 | ||
diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 0b63d135a092..e93ddaadfd1e 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c | |||
@@ -492,6 +492,12 @@ static int __init init_hfs_fs(void) | |||
492 | static void __exit exit_hfs_fs(void) | 492 | static void __exit exit_hfs_fs(void) |
493 | { | 493 | { |
494 | unregister_filesystem(&hfs_fs_type); | 494 | unregister_filesystem(&hfs_fs_type); |
495 | |||
496 | /* | ||
497 | * Make sure all delayed rcu free inodes are flushed before we | ||
498 | * destroy cache. | ||
499 | */ | ||
500 | rcu_barrier(); | ||
495 | kmem_cache_destroy(hfs_inode_cachep); | 501 | kmem_cache_destroy(hfs_inode_cachep); |
496 | } | 502 | } |
497 | 503 | ||
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index fdafb2d71654..811a84d2d964 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c | |||
@@ -635,6 +635,12 @@ static int __init init_hfsplus_fs(void) | |||
635 | static void __exit exit_hfsplus_fs(void) | 635 | static void __exit exit_hfsplus_fs(void) |
636 | { | 636 | { |
637 | unregister_filesystem(&hfsplus_fs_type); | 637 | unregister_filesystem(&hfsplus_fs_type); |
638 | |||
639 | /* | ||
640 | * Make sure all delayed rcu free inodes are flushed before we | ||
641 | * destroy cache. | ||
642 | */ | ||
643 | rcu_barrier(); | ||
638 | kmem_cache_destroy(hfsplus_inode_cachep); | 644 | kmem_cache_destroy(hfsplus_inode_cachep); |
639 | } | 645 | } |
640 | 646 | ||
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index a152783602d9..bc28bf077a6a 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c | |||
@@ -210,6 +210,11 @@ static int init_inodecache(void) | |||
210 | 210 | ||
211 | static void destroy_inodecache(void) | 211 | static void destroy_inodecache(void) |
212 | { | 212 | { |
213 | /* | ||
214 | * Make sure all delayed rcu free inodes are flushed before we | ||
215 | * destroy cache. | ||
216 | */ | ||
217 | rcu_barrier(); | ||
213 | kmem_cache_destroy(hpfs_inode_cachep); | 218 | kmem_cache_destroy(hpfs_inode_cachep); |
214 | } | 219 | } |
215 | 220 | ||
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 6e572c4fbf68..9460120a5170 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -1048,6 +1048,11 @@ static int __init init_hugetlbfs_fs(void) | |||
1048 | 1048 | ||
1049 | static void __exit exit_hugetlbfs_fs(void) | 1049 | static void __exit exit_hugetlbfs_fs(void) |
1050 | { | 1050 | { |
1051 | /* | ||
1052 | * Make sure all delayed rcu free inodes are flushed before we | ||
1053 | * destroy cache. | ||
1054 | */ | ||
1055 | rcu_barrier(); | ||
1051 | kmem_cache_destroy(hugetlbfs_inode_cachep); | 1056 | kmem_cache_destroy(hugetlbfs_inode_cachep); |
1052 | kern_unmount(hugetlbfs_vfsmount); | 1057 | kern_unmount(hugetlbfs_vfsmount); |
1053 | unregister_filesystem(&hugetlbfs_fs_type); | 1058 | unregister_filesystem(&hugetlbfs_fs_type); |
diff --git a/fs/ioctl.c b/fs/ioctl.c index 29167bebe874..3bdad6d1f268 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c | |||
@@ -603,21 +603,14 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, | |||
603 | 603 | ||
604 | SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) | 604 | SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) |
605 | { | 605 | { |
606 | struct file *filp; | 606 | int error; |
607 | int error = -EBADF; | 607 | struct fd f = fdget(fd); |
608 | int fput_needed; | 608 | |
609 | 609 | if (!f.file) | |
610 | filp = fget_light(fd, &fput_needed); | 610 | return -EBADF; |
611 | if (!filp) | 611 | error = security_file_ioctl(f.file, cmd, arg); |
612 | goto out; | 612 | if (!error) |
613 | 613 | error = do_vfs_ioctl(f.file, fd, cmd, arg); | |
614 | error = security_file_ioctl(filp, cmd, arg); | 614 | fdput(f); |
615 | if (error) | ||
616 | goto out_fput; | ||
617 | |||
618 | error = do_vfs_ioctl(filp, fd, cmd, arg); | ||
619 | out_fput: | ||
620 | fput_light(filp, fput_needed); | ||
621 | out: | ||
622 | return error; | 615 | return error; |
623 | } | 616 | } |
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index a7d8e6cc5e0c..67ce52507d7d 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c | |||
@@ -115,6 +115,11 @@ static int init_inodecache(void) | |||
115 | 115 | ||
116 | static void destroy_inodecache(void) | 116 | static void destroy_inodecache(void) |
117 | { | 117 | { |
118 | /* | ||
119 | * Make sure all delayed rcu free inodes are flushed before we | ||
120 | * destroy cache. | ||
121 | */ | ||
122 | rcu_barrier(); | ||
118 | kmem_cache_destroy(isofs_inode_cachep); | 123 | kmem_cache_destroy(isofs_inode_cachep); |
119 | } | 124 | } |
120 | 125 | ||
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 61ea41389f90..ff487954cd96 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c | |||
@@ -418,6 +418,12 @@ static void __exit exit_jffs2_fs(void) | |||
418 | unregister_filesystem(&jffs2_fs_type); | 418 | unregister_filesystem(&jffs2_fs_type); |
419 | jffs2_destroy_slab_caches(); | 419 | jffs2_destroy_slab_caches(); |
420 | jffs2_compressors_exit(); | 420 | jffs2_compressors_exit(); |
421 | |||
422 | /* | ||
423 | * Make sure all delayed rcu free inodes are flushed before we | ||
424 | * destroy cache. | ||
425 | */ | ||
426 | rcu_barrier(); | ||
421 | kmem_cache_destroy(jffs2_inode_cachep); | 427 | kmem_cache_destroy(jffs2_inode_cachep); |
422 | } | 428 | } |
423 | 429 | ||
diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 706692f24033..efdf8835dfca 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c | |||
@@ -911,6 +911,12 @@ static void __exit exit_jfs_fs(void) | |||
911 | jfs_proc_clean(); | 911 | jfs_proc_clean(); |
912 | #endif | 912 | #endif |
913 | unregister_filesystem(&jfs_fs_type); | 913 | unregister_filesystem(&jfs_fs_type); |
914 | |||
915 | /* | ||
916 | * Make sure all delayed rcu free inodes are flushed before we | ||
917 | * destroy cache. | ||
918 | */ | ||
919 | rcu_barrier(); | ||
914 | kmem_cache_destroy(jfs_inode_cachep); | 920 | kmem_cache_destroy(jfs_inode_cachep); |
915 | } | 921 | } |
916 | 922 | ||
diff --git a/fs/locks.c b/fs/locks.c index 7e81bfc75164..abc7dc6c490b 100644 --- a/fs/locks.c +++ b/fs/locks.c | |||
@@ -1625,15 +1625,13 @@ EXPORT_SYMBOL(flock_lock_file_wait); | |||
1625 | */ | 1625 | */ |
1626 | SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) | 1626 | SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) |
1627 | { | 1627 | { |
1628 | struct file *filp; | 1628 | struct fd f = fdget(fd); |
1629 | int fput_needed; | ||
1630 | struct file_lock *lock; | 1629 | struct file_lock *lock; |
1631 | int can_sleep, unlock; | 1630 | int can_sleep, unlock; |
1632 | int error; | 1631 | int error; |
1633 | 1632 | ||
1634 | error = -EBADF; | 1633 | error = -EBADF; |
1635 | filp = fget_light(fd, &fput_needed); | 1634 | if (!f.file) |
1636 | if (!filp) | ||
1637 | goto out; | 1635 | goto out; |
1638 | 1636 | ||
1639 | can_sleep = !(cmd & LOCK_NB); | 1637 | can_sleep = !(cmd & LOCK_NB); |
@@ -1641,31 +1639,31 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) | |||
1641 | unlock = (cmd == LOCK_UN); | 1639 | unlock = (cmd == LOCK_UN); |
1642 | 1640 | ||
1643 | if (!unlock && !(cmd & LOCK_MAND) && | 1641 | if (!unlock && !(cmd & LOCK_MAND) && |
1644 | !(filp->f_mode & (FMODE_READ|FMODE_WRITE))) | 1642 | !(f.file->f_mode & (FMODE_READ|FMODE_WRITE))) |
1645 | goto out_putf; | 1643 | goto out_putf; |
1646 | 1644 | ||
1647 | error = flock_make_lock(filp, &lock, cmd); | 1645 | error = flock_make_lock(f.file, &lock, cmd); |
1648 | if (error) | 1646 | if (error) |
1649 | goto out_putf; | 1647 | goto out_putf; |
1650 | if (can_sleep) | 1648 | if (can_sleep) |
1651 | lock->fl_flags |= FL_SLEEP; | 1649 | lock->fl_flags |= FL_SLEEP; |
1652 | 1650 | ||
1653 | error = security_file_lock(filp, lock->fl_type); | 1651 | error = security_file_lock(f.file, lock->fl_type); |
1654 | if (error) | 1652 | if (error) |
1655 | goto out_free; | 1653 | goto out_free; |
1656 | 1654 | ||
1657 | if (filp->f_op && filp->f_op->flock) | 1655 | if (f.file->f_op && f.file->f_op->flock) |
1658 | error = filp->f_op->flock(filp, | 1656 | error = f.file->f_op->flock(f.file, |
1659 | (can_sleep) ? F_SETLKW : F_SETLK, | 1657 | (can_sleep) ? F_SETLKW : F_SETLK, |
1660 | lock); | 1658 | lock); |
1661 | else | 1659 | else |
1662 | error = flock_lock_file_wait(filp, lock); | 1660 | error = flock_lock_file_wait(f.file, lock); |
1663 | 1661 | ||
1664 | out_free: | 1662 | out_free: |
1665 | locks_free_lock(lock); | 1663 | locks_free_lock(lock); |
1666 | 1664 | ||
1667 | out_putf: | 1665 | out_putf: |
1668 | fput_light(filp, fput_needed); | 1666 | fdput(f); |
1669 | out: | 1667 | out: |
1670 | return error; | 1668 | return error; |
1671 | } | 1669 | } |
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c index bda39085309f..adb90116d36b 100644 --- a/fs/logfs/inode.c +++ b/fs/logfs/inode.c | |||
@@ -417,5 +417,10 @@ int logfs_init_inode_cache(void) | |||
417 | 417 | ||
418 | void logfs_destroy_inode_cache(void) | 418 | void logfs_destroy_inode_cache(void) |
419 | { | 419 | { |
420 | /* | ||
421 | * Make sure all delayed rcu free inodes are flushed before we | ||
422 | * destroy cache. | ||
423 | */ | ||
424 | rcu_barrier(); | ||
420 | kmem_cache_destroy(logfs_inode_cache); | 425 | kmem_cache_destroy(logfs_inode_cache); |
421 | } | 426 | } |
diff --git a/fs/minix/inode.c b/fs/minix/inode.c index d0e42c678923..4fc5f8ab1c44 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c | |||
@@ -100,6 +100,11 @@ static int init_inodecache(void) | |||
100 | 100 | ||
101 | static void destroy_inodecache(void) | 101 | static void destroy_inodecache(void) |
102 | { | 102 | { |
103 | /* | ||
104 | * Make sure all delayed rcu free inodes are flushed before we | ||
105 | * destroy cache. | ||
106 | */ | ||
107 | rcu_barrier(); | ||
103 | kmem_cache_destroy(minix_inode_cachep); | 108 | kmem_cache_destroy(minix_inode_cachep); |
104 | } | 109 | } |
105 | 110 | ||
diff --git a/fs/namei.c b/fs/namei.c index a856e7f7b6e3..aa30d19e9edd 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -1797,8 +1797,6 @@ static int path_init(int dfd, const char *name, unsigned int flags, | |||
1797 | struct nameidata *nd, struct file **fp) | 1797 | struct nameidata *nd, struct file **fp) |
1798 | { | 1798 | { |
1799 | int retval = 0; | 1799 | int retval = 0; |
1800 | int fput_needed; | ||
1801 | struct file *file; | ||
1802 | 1800 | ||
1803 | nd->last_type = LAST_ROOT; /* if there are only slashes... */ | 1801 | nd->last_type = LAST_ROOT; /* if there are only slashes... */ |
1804 | nd->flags = flags | LOOKUP_JUMPED; | 1802 | nd->flags = flags | LOOKUP_JUMPED; |
@@ -1850,44 +1848,41 @@ static int path_init(int dfd, const char *name, unsigned int flags, | |||
1850 | get_fs_pwd(current->fs, &nd->path); | 1848 | get_fs_pwd(current->fs, &nd->path); |
1851 | } | 1849 | } |
1852 | } else { | 1850 | } else { |
1851 | struct fd f = fdget_raw(dfd); | ||
1853 | struct dentry *dentry; | 1852 | struct dentry *dentry; |
1854 | 1853 | ||
1855 | file = fget_raw_light(dfd, &fput_needed); | 1854 | if (!f.file) |
1856 | retval = -EBADF; | 1855 | return -EBADF; |
1857 | if (!file) | ||
1858 | goto out_fail; | ||
1859 | 1856 | ||
1860 | dentry = file->f_path.dentry; | 1857 | dentry = f.file->f_path.dentry; |
1861 | 1858 | ||
1862 | if (*name) { | 1859 | if (*name) { |
1863 | retval = -ENOTDIR; | 1860 | if (!S_ISDIR(dentry->d_inode->i_mode)) { |
1864 | if (!S_ISDIR(dentry->d_inode->i_mode)) | 1861 | fdput(f); |
1865 | goto fput_fail; | 1862 | return -ENOTDIR; |
1863 | } | ||
1866 | 1864 | ||
1867 | retval = inode_permission(dentry->d_inode, MAY_EXEC); | 1865 | retval = inode_permission(dentry->d_inode, MAY_EXEC); |
1868 | if (retval) | 1866 | if (retval) { |
1869 | goto fput_fail; | 1867 | fdput(f); |
1868 | return retval; | ||
1869 | } | ||
1870 | } | 1870 | } |
1871 | 1871 | ||
1872 | nd->path = file->f_path; | 1872 | nd->path = f.file->f_path; |
1873 | if (flags & LOOKUP_RCU) { | 1873 | if (flags & LOOKUP_RCU) { |
1874 | if (fput_needed) | 1874 | if (f.need_put) |
1875 | *fp = file; | 1875 | *fp = f.file; |
1876 | nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); | 1876 | nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); |
1877 | lock_rcu_walk(); | 1877 | lock_rcu_walk(); |
1878 | } else { | 1878 | } else { |
1879 | path_get(&file->f_path); | 1879 | path_get(&nd->path); |
1880 | fput_light(file, fput_needed); | 1880 | fdput(f); |
1881 | } | 1881 | } |
1882 | } | 1882 | } |
1883 | 1883 | ||
1884 | nd->inode = nd->path.dentry->d_inode; | 1884 | nd->inode = nd->path.dentry->d_inode; |
1885 | return 0; | 1885 | return 0; |
1886 | |||
1887 | fput_fail: | ||
1888 | fput_light(file, fput_needed); | ||
1889 | out_fail: | ||
1890 | return retval; | ||
1891 | } | 1886 | } |
1892 | 1887 | ||
1893 | static inline int lookup_last(struct nameidata *nd, struct path *path) | 1888 | static inline int lookup_last(struct nameidata *nd, struct path *path) |
@@ -3971,7 +3966,7 @@ EXPORT_SYMBOL(user_path_at); | |||
3971 | EXPORT_SYMBOL(follow_down_one); | 3966 | EXPORT_SYMBOL(follow_down_one); |
3972 | EXPORT_SYMBOL(follow_down); | 3967 | EXPORT_SYMBOL(follow_down); |
3973 | EXPORT_SYMBOL(follow_up); | 3968 | EXPORT_SYMBOL(follow_up); |
3974 | EXPORT_SYMBOL(get_write_access); /* binfmt_aout */ | 3969 | EXPORT_SYMBOL(get_write_access); /* nfsd */ |
3975 | EXPORT_SYMBOL(getname); | 3970 | EXPORT_SYMBOL(getname); |
3976 | EXPORT_SYMBOL(lock_rename); | 3971 | EXPORT_SYMBOL(lock_rename); |
3977 | EXPORT_SYMBOL(lookup_one_len); | 3972 | EXPORT_SYMBOL(lookup_one_len); |
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index eaa74323663a..d7e9fe77188a 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c | |||
@@ -89,6 +89,11 @@ static int init_inodecache(void) | |||
89 | 89 | ||
90 | static void destroy_inodecache(void) | 90 | static void destroy_inodecache(void) |
91 | { | 91 | { |
92 | /* | ||
93 | * Make sure all delayed rcu free inodes are flushed before we | ||
94 | * destroy cache. | ||
95 | */ | ||
96 | rcu_barrier(); | ||
92 | kmem_cache_destroy(ncp_inode_cachep); | 97 | kmem_cache_destroy(ncp_inode_cachep); |
93 | } | 98 | } |
94 | 99 | ||
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 9b47610338f5..e4c716d374a8 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -1571,6 +1571,11 @@ static int __init nfs_init_inodecache(void) | |||
1571 | 1571 | ||
1572 | static void nfs_destroy_inodecache(void) | 1572 | static void nfs_destroy_inodecache(void) |
1573 | { | 1573 | { |
1574 | /* | ||
1575 | * Make sure all delayed rcu free inodes are flushed before we | ||
1576 | * destroy cache. | ||
1577 | */ | ||
1578 | rcu_barrier(); | ||
1574 | kmem_cache_destroy(nfs_inode_cachep); | 1579 | kmem_cache_destroy(nfs_inode_cachep); |
1575 | } | 1580 | } |
1576 | 1581 | ||
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index cc894eda385a..48a1bad37334 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
@@ -2837,8 +2837,7 @@ static int nfs4_setlease(struct nfs4_delegation *dp, int flag) | |||
2837 | return -ENOMEM; | 2837 | return -ENOMEM; |
2838 | } | 2838 | } |
2839 | fp->fi_lease = fl; | 2839 | fp->fi_lease = fl; |
2840 | fp->fi_deleg_file = fl->fl_file; | 2840 | fp->fi_deleg_file = get_file(fl->fl_file); |
2841 | get_file(fp->fi_deleg_file); | ||
2842 | atomic_set(&fp->fi_delegees, 1); | 2841 | atomic_set(&fp->fi_delegees, 1); |
2843 | list_add(&dp->dl_perfile, &fp->fi_delegations); | 2842 | list_add(&dp->dl_perfile, &fp->fi_delegations); |
2844 | return 0; | 2843 | return 0; |
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 6a10812711c1..3c991dc84f2f 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c | |||
@@ -1382,6 +1382,12 @@ static void nilfs_segbuf_init_once(void *obj) | |||
1382 | 1382 | ||
1383 | static void nilfs_destroy_cachep(void) | 1383 | static void nilfs_destroy_cachep(void) |
1384 | { | 1384 | { |
1385 | /* | ||
1386 | * Make sure all delayed rcu free inodes are flushed before we | ||
1387 | * destroy cache. | ||
1388 | */ | ||
1389 | rcu_barrier(); | ||
1390 | |||
1385 | if (nilfs_inode_cachep) | 1391 | if (nilfs_inode_cachep) |
1386 | kmem_cache_destroy(nilfs_inode_cachep); | 1392 | kmem_cache_destroy(nilfs_inode_cachep); |
1387 | if (nilfs_transaction_cachep) | 1393 | if (nilfs_transaction_cachep) |
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index d43803669739..721d692fa8d4 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c | |||
@@ -58,7 +58,9 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group, | |||
58 | return fsnotify_remove_notify_event(group); | 58 | return fsnotify_remove_notify_event(group); |
59 | } | 59 | } |
60 | 60 | ||
61 | static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event) | 61 | static int create_fd(struct fsnotify_group *group, |
62 | struct fsnotify_event *event, | ||
63 | struct file **file) | ||
62 | { | 64 | { |
63 | int client_fd; | 65 | int client_fd; |
64 | struct file *new_file; | 66 | struct file *new_file; |
@@ -98,7 +100,7 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event) | |||
98 | put_unused_fd(client_fd); | 100 | put_unused_fd(client_fd); |
99 | client_fd = PTR_ERR(new_file); | 101 | client_fd = PTR_ERR(new_file); |
100 | } else { | 102 | } else { |
101 | fd_install(client_fd, new_file); | 103 | *file = new_file; |
102 | } | 104 | } |
103 | 105 | ||
104 | return client_fd; | 106 | return client_fd; |
@@ -106,13 +108,15 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event) | |||
106 | 108 | ||
107 | static int fill_event_metadata(struct fsnotify_group *group, | 109 | static int fill_event_metadata(struct fsnotify_group *group, |
108 | struct fanotify_event_metadata *metadata, | 110 | struct fanotify_event_metadata *metadata, |
109 | struct fsnotify_event *event) | 111 | struct fsnotify_event *event, |
112 | struct file **file) | ||
110 | { | 113 | { |
111 | int ret = 0; | 114 | int ret = 0; |
112 | 115 | ||
113 | pr_debug("%s: group=%p metadata=%p event=%p\n", __func__, | 116 | pr_debug("%s: group=%p metadata=%p event=%p\n", __func__, |
114 | group, metadata, event); | 117 | group, metadata, event); |
115 | 118 | ||
119 | *file = NULL; | ||
116 | metadata->event_len = FAN_EVENT_METADATA_LEN; | 120 | metadata->event_len = FAN_EVENT_METADATA_LEN; |
117 | metadata->metadata_len = FAN_EVENT_METADATA_LEN; | 121 | metadata->metadata_len = FAN_EVENT_METADATA_LEN; |
118 | metadata->vers = FANOTIFY_METADATA_VERSION; | 122 | metadata->vers = FANOTIFY_METADATA_VERSION; |
@@ -121,7 +125,7 @@ static int fill_event_metadata(struct fsnotify_group *group, | |||
121 | if (unlikely(event->mask & FAN_Q_OVERFLOW)) | 125 | if (unlikely(event->mask & FAN_Q_OVERFLOW)) |
122 | metadata->fd = FAN_NOFD; | 126 | metadata->fd = FAN_NOFD; |
123 | else { | 127 | else { |
124 | metadata->fd = create_fd(group, event); | 128 | metadata->fd = create_fd(group, event, file); |
125 | if (metadata->fd < 0) | 129 | if (metadata->fd < 0) |
126 | ret = metadata->fd; | 130 | ret = metadata->fd; |
127 | } | 131 | } |
@@ -220,25 +224,6 @@ static int prepare_for_access_response(struct fsnotify_group *group, | |||
220 | return 0; | 224 | return 0; |
221 | } | 225 | } |
222 | 226 | ||
223 | static void remove_access_response(struct fsnotify_group *group, | ||
224 | struct fsnotify_event *event, | ||
225 | __s32 fd) | ||
226 | { | ||
227 | struct fanotify_response_event *re; | ||
228 | |||
229 | if (!(event->mask & FAN_ALL_PERM_EVENTS)) | ||
230 | return; | ||
231 | |||
232 | re = dequeue_re(group, fd); | ||
233 | if (!re) | ||
234 | return; | ||
235 | |||
236 | BUG_ON(re->event != event); | ||
237 | |||
238 | kmem_cache_free(fanotify_response_event_cache, re); | ||
239 | |||
240 | return; | ||
241 | } | ||
242 | #else | 227 | #else |
243 | static int prepare_for_access_response(struct fsnotify_group *group, | 228 | static int prepare_for_access_response(struct fsnotify_group *group, |
244 | struct fsnotify_event *event, | 229 | struct fsnotify_event *event, |
@@ -247,12 +232,6 @@ static int prepare_for_access_response(struct fsnotify_group *group, | |||
247 | return 0; | 232 | return 0; |
248 | } | 233 | } |
249 | 234 | ||
250 | static void remove_access_response(struct fsnotify_group *group, | ||
251 | struct fsnotify_event *event, | ||
252 | __s32 fd) | ||
253 | { | ||
254 | return; | ||
255 | } | ||
256 | #endif | 235 | #endif |
257 | 236 | ||
258 | static ssize_t copy_event_to_user(struct fsnotify_group *group, | 237 | static ssize_t copy_event_to_user(struct fsnotify_group *group, |
@@ -260,31 +239,33 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, | |||
260 | char __user *buf) | 239 | char __user *buf) |
261 | { | 240 | { |
262 | struct fanotify_event_metadata fanotify_event_metadata; | 241 | struct fanotify_event_metadata fanotify_event_metadata; |
242 | struct file *f; | ||
263 | int fd, ret; | 243 | int fd, ret; |
264 | 244 | ||
265 | pr_debug("%s: group=%p event=%p\n", __func__, group, event); | 245 | pr_debug("%s: group=%p event=%p\n", __func__, group, event); |
266 | 246 | ||
267 | ret = fill_event_metadata(group, &fanotify_event_metadata, event); | 247 | ret = fill_event_metadata(group, &fanotify_event_metadata, event, &f); |
268 | if (ret < 0) | 248 | if (ret < 0) |
269 | goto out; | 249 | goto out; |
270 | 250 | ||
271 | fd = fanotify_event_metadata.fd; | 251 | fd = fanotify_event_metadata.fd; |
272 | ret = prepare_for_access_response(group, event, fd); | ||
273 | if (ret) | ||
274 | goto out_close_fd; | ||
275 | |||
276 | ret = -EFAULT; | 252 | ret = -EFAULT; |
277 | if (copy_to_user(buf, &fanotify_event_metadata, | 253 | if (copy_to_user(buf, &fanotify_event_metadata, |
278 | fanotify_event_metadata.event_len)) | 254 | fanotify_event_metadata.event_len)) |
279 | goto out_kill_access_response; | 255 | goto out_close_fd; |
280 | 256 | ||
257 | ret = prepare_for_access_response(group, event, fd); | ||
258 | if (ret) | ||
259 | goto out_close_fd; | ||
260 | |||
261 | fd_install(fd, f); | ||
281 | return fanotify_event_metadata.event_len; | 262 | return fanotify_event_metadata.event_len; |
282 | 263 | ||
283 | out_kill_access_response: | ||
284 | remove_access_response(group, event, fd); | ||
285 | out_close_fd: | 264 | out_close_fd: |
286 | if (fd != FAN_NOFD) | 265 | if (fd != FAN_NOFD) { |
287 | sys_close(fd); | 266 | put_unused_fd(fd); |
267 | fput(f); | ||
268 | } | ||
288 | out: | 269 | out: |
289 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | 270 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS |
290 | if (event->mask & FAN_ALL_PERM_EVENTS) { | 271 | if (event->mask & FAN_ALL_PERM_EVENTS) { |
@@ -470,24 +451,22 @@ static int fanotify_find_path(int dfd, const char __user *filename, | |||
470 | dfd, filename, flags); | 451 | dfd, filename, flags); |
471 | 452 | ||
472 | if (filename == NULL) { | 453 | if (filename == NULL) { |
473 | struct file *file; | 454 | struct fd f = fdget(dfd); |
474 | int fput_needed; | ||
475 | 455 | ||
476 | ret = -EBADF; | 456 | ret = -EBADF; |
477 | file = fget_light(dfd, &fput_needed); | 457 | if (!f.file) |
478 | if (!file) | ||
479 | goto out; | 458 | goto out; |
480 | 459 | ||
481 | ret = -ENOTDIR; | 460 | ret = -ENOTDIR; |
482 | if ((flags & FAN_MARK_ONLYDIR) && | 461 | if ((flags & FAN_MARK_ONLYDIR) && |
483 | !(S_ISDIR(file->f_path.dentry->d_inode->i_mode))) { | 462 | !(S_ISDIR(f.file->f_path.dentry->d_inode->i_mode))) { |
484 | fput_light(file, fput_needed); | 463 | fdput(f); |
485 | goto out; | 464 | goto out; |
486 | } | 465 | } |
487 | 466 | ||
488 | *path = file->f_path; | 467 | *path = f.file->f_path; |
489 | path_get(path); | 468 | path_get(path); |
490 | fput_light(file, fput_needed); | 469 | fdput(f); |
491 | } else { | 470 | } else { |
492 | unsigned int lookup_flags = 0; | 471 | unsigned int lookup_flags = 0; |
493 | 472 | ||
@@ -767,9 +746,9 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags, | |||
767 | struct inode *inode = NULL; | 746 | struct inode *inode = NULL; |
768 | struct vfsmount *mnt = NULL; | 747 | struct vfsmount *mnt = NULL; |
769 | struct fsnotify_group *group; | 748 | struct fsnotify_group *group; |
770 | struct file *filp; | 749 | struct fd f; |
771 | struct path path; | 750 | struct path path; |
772 | int ret, fput_needed; | 751 | int ret; |
773 | 752 | ||
774 | pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n", | 753 | pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n", |
775 | __func__, fanotify_fd, flags, dfd, pathname, mask); | 754 | __func__, fanotify_fd, flags, dfd, pathname, mask); |
@@ -803,15 +782,15 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags, | |||
803 | #endif | 782 | #endif |
804 | return -EINVAL; | 783 | return -EINVAL; |
805 | 784 | ||
806 | filp = fget_light(fanotify_fd, &fput_needed); | 785 | f = fdget(fanotify_fd); |
807 | if (unlikely(!filp)) | 786 | if (unlikely(!f.file)) |
808 | return -EBADF; | 787 | return -EBADF; |
809 | 788 | ||
810 | /* verify that this is indeed an fanotify instance */ | 789 | /* verify that this is indeed an fanotify instance */ |
811 | ret = -EINVAL; | 790 | ret = -EINVAL; |
812 | if (unlikely(filp->f_op != &fanotify_fops)) | 791 | if (unlikely(f.file->f_op != &fanotify_fops)) |
813 | goto fput_and_out; | 792 | goto fput_and_out; |
814 | group = filp->private_data; | 793 | group = f.file->private_data; |
815 | 794 | ||
816 | /* | 795 | /* |
817 | * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF. These are not | 796 | * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF. These are not |
@@ -858,7 +837,7 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags, | |||
858 | 837 | ||
859 | path_put(&path); | 838 | path_put(&path); |
860 | fput_and_out: | 839 | fput_and_out: |
861 | fput_light(filp, fput_needed); | 840 | fdput(f); |
862 | return ret; | 841 | return ret; |
863 | } | 842 | } |
864 | 843 | ||
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 8445fbc8985c..c311dda054a3 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c | |||
@@ -757,16 +757,16 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, | |||
757 | struct fsnotify_group *group; | 757 | struct fsnotify_group *group; |
758 | struct inode *inode; | 758 | struct inode *inode; |
759 | struct path path; | 759 | struct path path; |
760 | struct file *filp; | 760 | struct fd f; |
761 | int ret, fput_needed; | 761 | int ret; |
762 | unsigned flags = 0; | 762 | unsigned flags = 0; |
763 | 763 | ||
764 | filp = fget_light(fd, &fput_needed); | 764 | f = fdget(fd); |
765 | if (unlikely(!filp)) | 765 | if (unlikely(!f.file)) |
766 | return -EBADF; | 766 | return -EBADF; |
767 | 767 | ||
768 | /* verify that this is indeed an inotify instance */ | 768 | /* verify that this is indeed an inotify instance */ |
769 | if (unlikely(filp->f_op != &inotify_fops)) { | 769 | if (unlikely(f.file->f_op != &inotify_fops)) { |
770 | ret = -EINVAL; | 770 | ret = -EINVAL; |
771 | goto fput_and_out; | 771 | goto fput_and_out; |
772 | } | 772 | } |
@@ -782,13 +782,13 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, | |||
782 | 782 | ||
783 | /* inode held in place by reference to path; group by fget on fd */ | 783 | /* inode held in place by reference to path; group by fget on fd */ |
784 | inode = path.dentry->d_inode; | 784 | inode = path.dentry->d_inode; |
785 | group = filp->private_data; | 785 | group = f.file->private_data; |
786 | 786 | ||
787 | /* create/update an inode mark */ | 787 | /* create/update an inode mark */ |
788 | ret = inotify_update_watch(group, inode, mask); | 788 | ret = inotify_update_watch(group, inode, mask); |
789 | path_put(&path); | 789 | path_put(&path); |
790 | fput_and_out: | 790 | fput_and_out: |
791 | fput_light(filp, fput_needed); | 791 | fdput(f); |
792 | return ret; | 792 | return ret; |
793 | } | 793 | } |
794 | 794 | ||
@@ -796,19 +796,19 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd) | |||
796 | { | 796 | { |
797 | struct fsnotify_group *group; | 797 | struct fsnotify_group *group; |
798 | struct inotify_inode_mark *i_mark; | 798 | struct inotify_inode_mark *i_mark; |
799 | struct file *filp; | 799 | struct fd f; |
800 | int ret = 0, fput_needed; | 800 | int ret = 0; |
801 | 801 | ||
802 | filp = fget_light(fd, &fput_needed); | 802 | f = fdget(fd); |
803 | if (unlikely(!filp)) | 803 | if (unlikely(!f.file)) |
804 | return -EBADF; | 804 | return -EBADF; |
805 | 805 | ||
806 | /* verify that this is indeed an inotify instance */ | 806 | /* verify that this is indeed an inotify instance */ |
807 | ret = -EINVAL; | 807 | ret = -EINVAL; |
808 | if (unlikely(filp->f_op != &inotify_fops)) | 808 | if (unlikely(f.file->f_op != &inotify_fops)) |
809 | goto out; | 809 | goto out; |
810 | 810 | ||
811 | group = filp->private_data; | 811 | group = f.file->private_data; |
812 | 812 | ||
813 | ret = -EINVAL; | 813 | ret = -EINVAL; |
814 | i_mark = inotify_idr_find(group, wd); | 814 | i_mark = inotify_idr_find(group, wd); |
@@ -823,7 +823,7 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd) | |||
823 | fsnotify_put_mark(&i_mark->fsn_mark); | 823 | fsnotify_put_mark(&i_mark->fsn_mark); |
824 | 824 | ||
825 | out: | 825 | out: |
826 | fput_light(filp, fput_needed); | 826 | fdput(f); |
827 | return ret; | 827 | return ret; |
828 | } | 828 | } |
829 | 829 | ||
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index da01c165067d..4a8289f8b16c 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c | |||
@@ -3193,6 +3193,12 @@ static void __exit exit_ntfs_fs(void) | |||
3193 | ntfs_debug("Unregistering NTFS driver."); | 3193 | ntfs_debug("Unregistering NTFS driver."); |
3194 | 3194 | ||
3195 | unregister_filesystem(&ntfs_fs_type); | 3195 | unregister_filesystem(&ntfs_fs_type); |
3196 | |||
3197 | /* | ||
3198 | * Make sure all delayed rcu free inodes are flushed before we | ||
3199 | * destroy cache. | ||
3200 | */ | ||
3201 | rcu_barrier(); | ||
3196 | kmem_cache_destroy(ntfs_big_inode_cache); | 3202 | kmem_cache_destroy(ntfs_big_inode_cache); |
3197 | kmem_cache_destroy(ntfs_inode_cache); | 3203 | kmem_cache_destroy(ntfs_inode_cache); |
3198 | kmem_cache_destroy(ntfs_name_cache); | 3204 | kmem_cache_destroy(ntfs_name_cache); |
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index a4e855e3690e..f7c648d7d6bf 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
@@ -1746,8 +1746,8 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
1746 | long fd; | 1746 | long fd; |
1747 | int sectsize; | 1747 | int sectsize; |
1748 | char *p = (char *)page; | 1748 | char *p = (char *)page; |
1749 | struct file *filp = NULL; | 1749 | struct fd f; |
1750 | struct inode *inode = NULL; | 1750 | struct inode *inode; |
1751 | ssize_t ret = -EINVAL; | 1751 | ssize_t ret = -EINVAL; |
1752 | int live_threshold; | 1752 | int live_threshold; |
1753 | 1753 | ||
@@ -1766,26 +1766,26 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
1766 | if (fd < 0 || fd >= INT_MAX) | 1766 | if (fd < 0 || fd >= INT_MAX) |
1767 | goto out; | 1767 | goto out; |
1768 | 1768 | ||
1769 | filp = fget(fd); | 1769 | f = fdget(fd); |
1770 | if (filp == NULL) | 1770 | if (f.file == NULL) |
1771 | goto out; | 1771 | goto out; |
1772 | 1772 | ||
1773 | if (reg->hr_blocks == 0 || reg->hr_start_block == 0 || | 1773 | if (reg->hr_blocks == 0 || reg->hr_start_block == 0 || |
1774 | reg->hr_block_bytes == 0) | 1774 | reg->hr_block_bytes == 0) |
1775 | goto out; | 1775 | goto out2; |
1776 | 1776 | ||
1777 | inode = igrab(filp->f_mapping->host); | 1777 | inode = igrab(f.file->f_mapping->host); |
1778 | if (inode == NULL) | 1778 | if (inode == NULL) |
1779 | goto out; | 1779 | goto out2; |
1780 | 1780 | ||
1781 | if (!S_ISBLK(inode->i_mode)) | 1781 | if (!S_ISBLK(inode->i_mode)) |
1782 | goto out; | 1782 | goto out3; |
1783 | 1783 | ||
1784 | reg->hr_bdev = I_BDEV(filp->f_mapping->host); | 1784 | reg->hr_bdev = I_BDEV(f.file->f_mapping->host); |
1785 | ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ, NULL); | 1785 | ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ, NULL); |
1786 | if (ret) { | 1786 | if (ret) { |
1787 | reg->hr_bdev = NULL; | 1787 | reg->hr_bdev = NULL; |
1788 | goto out; | 1788 | goto out3; |
1789 | } | 1789 | } |
1790 | inode = NULL; | 1790 | inode = NULL; |
1791 | 1791 | ||
@@ -1797,7 +1797,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
1797 | "blocksize %u incorrect for device, expected %d", | 1797 | "blocksize %u incorrect for device, expected %d", |
1798 | reg->hr_block_bytes, sectsize); | 1798 | reg->hr_block_bytes, sectsize); |
1799 | ret = -EINVAL; | 1799 | ret = -EINVAL; |
1800 | goto out; | 1800 | goto out3; |
1801 | } | 1801 | } |
1802 | 1802 | ||
1803 | o2hb_init_region_params(reg); | 1803 | o2hb_init_region_params(reg); |
@@ -1811,13 +1811,13 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
1811 | ret = o2hb_map_slot_data(reg); | 1811 | ret = o2hb_map_slot_data(reg); |
1812 | if (ret) { | 1812 | if (ret) { |
1813 | mlog_errno(ret); | 1813 | mlog_errno(ret); |
1814 | goto out; | 1814 | goto out3; |
1815 | } | 1815 | } |
1816 | 1816 | ||
1817 | ret = o2hb_populate_slot_data(reg); | 1817 | ret = o2hb_populate_slot_data(reg); |
1818 | if (ret) { | 1818 | if (ret) { |
1819 | mlog_errno(ret); | 1819 | mlog_errno(ret); |
1820 | goto out; | 1820 | goto out3; |
1821 | } | 1821 | } |
1822 | 1822 | ||
1823 | INIT_DELAYED_WORK(®->hr_write_timeout_work, o2hb_write_timeout); | 1823 | INIT_DELAYED_WORK(®->hr_write_timeout_work, o2hb_write_timeout); |
@@ -1847,7 +1847,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
1847 | if (IS_ERR(hb_task)) { | 1847 | if (IS_ERR(hb_task)) { |
1848 | ret = PTR_ERR(hb_task); | 1848 | ret = PTR_ERR(hb_task); |
1849 | mlog_errno(ret); | 1849 | mlog_errno(ret); |
1850 | goto out; | 1850 | goto out3; |
1851 | } | 1851 | } |
1852 | 1852 | ||
1853 | spin_lock(&o2hb_live_lock); | 1853 | spin_lock(&o2hb_live_lock); |
@@ -1863,7 +1863,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
1863 | 1863 | ||
1864 | if (reg->hr_aborted_start) { | 1864 | if (reg->hr_aborted_start) { |
1865 | ret = -EIO; | 1865 | ret = -EIO; |
1866 | goto out; | 1866 | goto out3; |
1867 | } | 1867 | } |
1868 | 1868 | ||
1869 | /* Ok, we were woken. Make sure it wasn't by drop_item() */ | 1869 | /* Ok, we were woken. Make sure it wasn't by drop_item() */ |
@@ -1882,11 +1882,11 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
1882 | printk(KERN_NOTICE "o2hb: Heartbeat started on region %s (%s)\n", | 1882 | printk(KERN_NOTICE "o2hb: Heartbeat started on region %s (%s)\n", |
1883 | config_item_name(®->hr_item), reg->hr_dev_name); | 1883 | config_item_name(®->hr_item), reg->hr_dev_name); |
1884 | 1884 | ||
1885 | out3: | ||
1886 | iput(inode); | ||
1887 | out2: | ||
1888 | fdput(f); | ||
1885 | out: | 1889 | out: |
1886 | if (filp) | ||
1887 | fput(filp); | ||
1888 | if (inode) | ||
1889 | iput(inode); | ||
1890 | if (ret < 0) { | 1890 | if (ret < 0) { |
1891 | if (reg->hr_bdev) { | 1891 | if (reg->hr_bdev) { |
1892 | blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE); | 1892 | blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE); |
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index 83b6f98e0665..16b712d260d4 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c | |||
@@ -691,6 +691,11 @@ static void __exit exit_dlmfs_fs(void) | |||
691 | flush_workqueue(user_dlm_worker); | 691 | flush_workqueue(user_dlm_worker); |
692 | destroy_workqueue(user_dlm_worker); | 692 | destroy_workqueue(user_dlm_worker); |
693 | 693 | ||
694 | /* | ||
695 | * Make sure all delayed rcu free inodes are flushed before we | ||
696 | * destroy cache. | ||
697 | */ | ||
698 | rcu_barrier(); | ||
694 | kmem_cache_destroy(dlmfs_inode_cache); | 699 | kmem_cache_destroy(dlmfs_inode_cache); |
695 | 700 | ||
696 | bdi_destroy(&dlmfs_backing_dev_info); | 701 | bdi_destroy(&dlmfs_backing_dev_info); |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 68f4541c2db9..0e91ec22a940 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -1818,6 +1818,11 @@ static int ocfs2_initialize_mem_caches(void) | |||
1818 | 1818 | ||
1819 | static void ocfs2_free_mem_caches(void) | 1819 | static void ocfs2_free_mem_caches(void) |
1820 | { | 1820 | { |
1821 | /* | ||
1822 | * Make sure all delayed rcu free inodes are flushed before we | ||
1823 | * destroy cache. | ||
1824 | */ | ||
1825 | rcu_barrier(); | ||
1821 | if (ocfs2_inode_cachep) | 1826 | if (ocfs2_inode_cachep) |
1822 | kmem_cache_destroy(ocfs2_inode_cachep); | 1827 | kmem_cache_destroy(ocfs2_inode_cachep); |
1823 | ocfs2_inode_cachep = NULL; | 1828 | ocfs2_inode_cachep = NULL; |
@@ -132,27 +132,27 @@ SYSCALL_DEFINE2(truncate, const char __user *, path, long, length) | |||
132 | 132 | ||
133 | static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) | 133 | static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) |
134 | { | 134 | { |
135 | struct inode * inode; | 135 | struct inode *inode; |
136 | struct dentry *dentry; | 136 | struct dentry *dentry; |
137 | struct file * file; | 137 | struct fd f; |
138 | int error; | 138 | int error; |
139 | 139 | ||
140 | error = -EINVAL; | 140 | error = -EINVAL; |
141 | if (length < 0) | 141 | if (length < 0) |
142 | goto out; | 142 | goto out; |
143 | error = -EBADF; | 143 | error = -EBADF; |
144 | file = fget(fd); | 144 | f = fdget(fd); |
145 | if (!file) | 145 | if (!f.file) |
146 | goto out; | 146 | goto out; |
147 | 147 | ||
148 | /* explicitly opened as large or we are on 64-bit box */ | 148 | /* explicitly opened as large or we are on 64-bit box */ |
149 | if (file->f_flags & O_LARGEFILE) | 149 | if (f.file->f_flags & O_LARGEFILE) |
150 | small = 0; | 150 | small = 0; |
151 | 151 | ||
152 | dentry = file->f_path.dentry; | 152 | dentry = f.file->f_path.dentry; |
153 | inode = dentry->d_inode; | 153 | inode = dentry->d_inode; |
154 | error = -EINVAL; | 154 | error = -EINVAL; |
155 | if (!S_ISREG(inode->i_mode) || !(file->f_mode & FMODE_WRITE)) | 155 | if (!S_ISREG(inode->i_mode) || !(f.file->f_mode & FMODE_WRITE)) |
156 | goto out_putf; | 156 | goto out_putf; |
157 | 157 | ||
158 | error = -EINVAL; | 158 | error = -EINVAL; |
@@ -165,14 +165,14 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) | |||
165 | goto out_putf; | 165 | goto out_putf; |
166 | 166 | ||
167 | sb_start_write(inode->i_sb); | 167 | sb_start_write(inode->i_sb); |
168 | error = locks_verify_truncate(inode, file, length); | 168 | error = locks_verify_truncate(inode, f.file, length); |
169 | if (!error) | 169 | if (!error) |
170 | error = security_path_truncate(&file->f_path); | 170 | error = security_path_truncate(&f.file->f_path); |
171 | if (!error) | 171 | if (!error) |
172 | error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file); | 172 | error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, f.file); |
173 | sb_end_write(inode->i_sb); | 173 | sb_end_write(inode->i_sb); |
174 | out_putf: | 174 | out_putf: |
175 | fput(file); | 175 | fdput(f); |
176 | out: | 176 | out: |
177 | return error; | 177 | return error; |
178 | } | 178 | } |
@@ -276,15 +276,13 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
276 | 276 | ||
277 | SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len) | 277 | SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len) |
278 | { | 278 | { |
279 | struct file *file; | 279 | struct fd f = fdget(fd); |
280 | int error = -EBADF; | 280 | int error = -EBADF; |
281 | 281 | ||
282 | file = fget(fd); | 282 | if (f.file) { |
283 | if (file) { | 283 | error = do_fallocate(f.file, mode, offset, len); |
284 | error = do_fallocate(file, mode, offset, len); | 284 | fdput(f); |
285 | fput(file); | ||
286 | } | 285 | } |
287 | |||
288 | return error; | 286 | return error; |
289 | } | 287 | } |
290 | 288 | ||
@@ -400,16 +398,15 @@ out: | |||
400 | 398 | ||
401 | SYSCALL_DEFINE1(fchdir, unsigned int, fd) | 399 | SYSCALL_DEFINE1(fchdir, unsigned int, fd) |
402 | { | 400 | { |
403 | struct file *file; | 401 | struct fd f = fdget_raw(fd); |
404 | struct inode *inode; | 402 | struct inode *inode; |
405 | int error, fput_needed; | 403 | int error = -EBADF; |
406 | 404 | ||
407 | error = -EBADF; | 405 | error = -EBADF; |
408 | file = fget_raw_light(fd, &fput_needed); | 406 | if (!f.file) |
409 | if (!file) | ||
410 | goto out; | 407 | goto out; |
411 | 408 | ||
412 | inode = file->f_path.dentry->d_inode; | 409 | inode = f.file->f_path.dentry->d_inode; |
413 | 410 | ||
414 | error = -ENOTDIR; | 411 | error = -ENOTDIR; |
415 | if (!S_ISDIR(inode->i_mode)) | 412 | if (!S_ISDIR(inode->i_mode)) |
@@ -417,9 +414,9 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd) | |||
417 | 414 | ||
418 | error = inode_permission(inode, MAY_EXEC | MAY_CHDIR); | 415 | error = inode_permission(inode, MAY_EXEC | MAY_CHDIR); |
419 | if (!error) | 416 | if (!error) |
420 | set_fs_pwd(current->fs, &file->f_path); | 417 | set_fs_pwd(current->fs, &f.file->f_path); |
421 | out_putf: | 418 | out_putf: |
422 | fput_light(file, fput_needed); | 419 | fdput(f); |
423 | out: | 420 | out: |
424 | return error; | 421 | return error; |
425 | } | 422 | } |
@@ -582,23 +579,20 @@ SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group | |||
582 | 579 | ||
583 | SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) | 580 | SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) |
584 | { | 581 | { |
585 | struct file * file; | 582 | struct fd f = fdget(fd); |
586 | int error = -EBADF; | 583 | int error = -EBADF; |
587 | struct dentry * dentry; | ||
588 | 584 | ||
589 | file = fget(fd); | 585 | if (!f.file) |
590 | if (!file) | ||
591 | goto out; | 586 | goto out; |
592 | 587 | ||
593 | error = mnt_want_write_file(file); | 588 | error = mnt_want_write_file(f.file); |
594 | if (error) | 589 | if (error) |
595 | goto out_fput; | 590 | goto out_fput; |
596 | dentry = file->f_path.dentry; | 591 | audit_inode(NULL, f.file->f_path.dentry); |
597 | audit_inode(NULL, dentry); | 592 | error = chown_common(&f.file->f_path, user, group); |
598 | error = chown_common(&file->f_path, user, group); | 593 | mnt_drop_write_file(f.file); |
599 | mnt_drop_write_file(file); | ||
600 | out_fput: | 594 | out_fput: |
601 | fput(file); | 595 | fdput(f); |
602 | out: | 596 | out: |
603 | return error; | 597 | return error; |
604 | } | 598 | } |
@@ -803,50 +797,6 @@ struct file *dentry_open(const struct path *path, int flags, | |||
803 | } | 797 | } |
804 | EXPORT_SYMBOL(dentry_open); | 798 | EXPORT_SYMBOL(dentry_open); |
805 | 799 | ||
806 | static void __put_unused_fd(struct files_struct *files, unsigned int fd) | ||
807 | { | ||
808 | struct fdtable *fdt = files_fdtable(files); | ||
809 | __clear_open_fd(fd, fdt); | ||
810 | if (fd < files->next_fd) | ||
811 | files->next_fd = fd; | ||
812 | } | ||
813 | |||
814 | void put_unused_fd(unsigned int fd) | ||
815 | { | ||
816 | struct files_struct *files = current->files; | ||
817 | spin_lock(&files->file_lock); | ||
818 | __put_unused_fd(files, fd); | ||
819 | spin_unlock(&files->file_lock); | ||
820 | } | ||
821 | |||
822 | EXPORT_SYMBOL(put_unused_fd); | ||
823 | |||
824 | /* | ||
825 | * Install a file pointer in the fd array. | ||
826 | * | ||
827 | * The VFS is full of places where we drop the files lock between | ||
828 | * setting the open_fds bitmap and installing the file in the file | ||
829 | * array. At any such point, we are vulnerable to a dup2() race | ||
830 | * installing a file in the array before us. We need to detect this and | ||
831 | * fput() the struct file we are about to overwrite in this case. | ||
832 | * | ||
833 | * It should never happen - if we allow dup2() do it, _really_ bad things | ||
834 | * will follow. | ||
835 | */ | ||
836 | |||
837 | void fd_install(unsigned int fd, struct file *file) | ||
838 | { | ||
839 | struct files_struct *files = current->files; | ||
840 | struct fdtable *fdt; | ||
841 | spin_lock(&files->file_lock); | ||
842 | fdt = files_fdtable(files); | ||
843 | BUG_ON(fdt->fd[fd] != NULL); | ||
844 | rcu_assign_pointer(fdt->fd[fd], file); | ||
845 | spin_unlock(&files->file_lock); | ||
846 | } | ||
847 | |||
848 | EXPORT_SYMBOL(fd_install); | ||
849 | |||
850 | static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op) | 800 | static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op) |
851 | { | 801 | { |
852 | int lookup_flags = 0; | 802 | int lookup_flags = 0; |
@@ -858,7 +808,7 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o | |||
858 | op->mode = 0; | 808 | op->mode = 0; |
859 | 809 | ||
860 | /* Must never be set by userspace */ | 810 | /* Must never be set by userspace */ |
861 | flags &= ~FMODE_NONOTIFY; | 811 | flags &= ~FMODE_NONOTIFY & ~O_CLOEXEC; |
862 | 812 | ||
863 | /* | 813 | /* |
864 | * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only | 814 | * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only |
@@ -1038,23 +988,7 @@ EXPORT_SYMBOL(filp_close); | |||
1038 | */ | 988 | */ |
1039 | SYSCALL_DEFINE1(close, unsigned int, fd) | 989 | SYSCALL_DEFINE1(close, unsigned int, fd) |
1040 | { | 990 | { |
1041 | struct file * filp; | 991 | int retval = __close_fd(current->files, fd); |
1042 | struct files_struct *files = current->files; | ||
1043 | struct fdtable *fdt; | ||
1044 | int retval; | ||
1045 | |||
1046 | spin_lock(&files->file_lock); | ||
1047 | fdt = files_fdtable(files); | ||
1048 | if (fd >= fdt->max_fds) | ||
1049 | goto out_unlock; | ||
1050 | filp = fdt->fd[fd]; | ||
1051 | if (!filp) | ||
1052 | goto out_unlock; | ||
1053 | rcu_assign_pointer(fdt->fd[fd], NULL); | ||
1054 | __clear_close_on_exec(fd, fdt); | ||
1055 | __put_unused_fd(files, fd); | ||
1056 | spin_unlock(&files->file_lock); | ||
1057 | retval = filp_close(filp, files); | ||
1058 | 992 | ||
1059 | /* can't restart close syscall because file table entry was cleared */ | 993 | /* can't restart close syscall because file table entry was cleared */ |
1060 | if (unlikely(retval == -ERESTARTSYS || | 994 | if (unlikely(retval == -ERESTARTSYS || |
@@ -1064,10 +998,6 @@ SYSCALL_DEFINE1(close, unsigned int, fd) | |||
1064 | retval = -EINTR; | 998 | retval = -EINTR; |
1065 | 999 | ||
1066 | return retval; | 1000 | return retval; |
1067 | |||
1068 | out_unlock: | ||
1069 | spin_unlock(&files->file_lock); | ||
1070 | return -EBADF; | ||
1071 | } | 1001 | } |
1072 | EXPORT_SYMBOL(sys_close); | 1002 | EXPORT_SYMBOL(sys_close); |
1073 | 1003 | ||
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 4a3477949bca..2ad080faca34 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c | |||
@@ -463,6 +463,11 @@ static int __init init_openprom_fs(void) | |||
463 | static void __exit exit_openprom_fs(void) | 463 | static void __exit exit_openprom_fs(void) |
464 | { | 464 | { |
465 | unregister_filesystem(&openprom_fs_type); | 465 | unregister_filesystem(&openprom_fs_type); |
466 | /* | ||
467 | * Make sure all delayed rcu free inodes are flushed before we | ||
468 | * destroy cache. | ||
469 | */ | ||
470 | rcu_barrier(); | ||
466 | kmem_cache_destroy(op_inode_cachep); | 471 | kmem_cache_destroy(op_inode_cachep); |
467 | } | 472 | } |
468 | 473 | ||
@@ -1064,9 +1064,8 @@ err_inode: | |||
1064 | return err; | 1064 | return err; |
1065 | } | 1065 | } |
1066 | 1066 | ||
1067 | int do_pipe_flags(int *fd, int flags) | 1067 | static int __do_pipe_flags(int *fd, struct file **files, int flags) |
1068 | { | 1068 | { |
1069 | struct file *files[2]; | ||
1070 | int error; | 1069 | int error; |
1071 | int fdw, fdr; | 1070 | int fdw, fdr; |
1072 | 1071 | ||
@@ -1088,11 +1087,8 @@ int do_pipe_flags(int *fd, int flags) | |||
1088 | fdw = error; | 1087 | fdw = error; |
1089 | 1088 | ||
1090 | audit_fd_pair(fdr, fdw); | 1089 | audit_fd_pair(fdr, fdw); |
1091 | fd_install(fdr, files[0]); | ||
1092 | fd_install(fdw, files[1]); | ||
1093 | fd[0] = fdr; | 1090 | fd[0] = fdr; |
1094 | fd[1] = fdw; | 1091 | fd[1] = fdw; |
1095 | |||
1096 | return 0; | 1092 | return 0; |
1097 | 1093 | ||
1098 | err_fdr: | 1094 | err_fdr: |
@@ -1103,21 +1099,38 @@ int do_pipe_flags(int *fd, int flags) | |||
1103 | return error; | 1099 | return error; |
1104 | } | 1100 | } |
1105 | 1101 | ||
1102 | int do_pipe_flags(int *fd, int flags) | ||
1103 | { | ||
1104 | struct file *files[2]; | ||
1105 | int error = __do_pipe_flags(fd, files, flags); | ||
1106 | if (!error) { | ||
1107 | fd_install(fd[0], files[0]); | ||
1108 | fd_install(fd[1], files[1]); | ||
1109 | } | ||
1110 | return error; | ||
1111 | } | ||
1112 | |||
1106 | /* | 1113 | /* |
1107 | * sys_pipe() is the normal C calling standard for creating | 1114 | * sys_pipe() is the normal C calling standard for creating |
1108 | * a pipe. It's not the way Unix traditionally does this, though. | 1115 | * a pipe. It's not the way Unix traditionally does this, though. |
1109 | */ | 1116 | */ |
1110 | SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags) | 1117 | SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags) |
1111 | { | 1118 | { |
1119 | struct file *files[2]; | ||
1112 | int fd[2]; | 1120 | int fd[2]; |
1113 | int error; | 1121 | int error; |
1114 | 1122 | ||
1115 | error = do_pipe_flags(fd, flags); | 1123 | error = __do_pipe_flags(fd, files, flags); |
1116 | if (!error) { | 1124 | if (!error) { |
1117 | if (copy_to_user(fildes, fd, sizeof(fd))) { | 1125 | if (unlikely(copy_to_user(fildes, fd, sizeof(fd)))) { |
1118 | sys_close(fd[0]); | 1126 | fput(files[0]); |
1119 | sys_close(fd[1]); | 1127 | fput(files[1]); |
1128 | put_unused_fd(fd[0]); | ||
1129 | put_unused_fd(fd[1]); | ||
1120 | error = -EFAULT; | 1130 | error = -EFAULT; |
1131 | } else { | ||
1132 | fd_install(fd[0], files[0]); | ||
1133 | fd_install(fd[1], files[1]); | ||
1121 | } | 1134 | } |
1122 | } | 1135 | } |
1123 | return error; | 1136 | return error; |
diff --git a/fs/proc/Makefile b/fs/proc/Makefile index c1c729335924..99349efbbc2b 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile | |||
@@ -8,7 +8,7 @@ proc-y := nommu.o task_nommu.o | |||
8 | proc-$(CONFIG_MMU) := mmu.o task_mmu.o | 8 | proc-$(CONFIG_MMU) := mmu.o task_mmu.o |
9 | 9 | ||
10 | proc-y += inode.o root.o base.o generic.o array.o \ | 10 | proc-y += inode.o root.o base.o generic.o array.o \ |
11 | proc_tty.o | 11 | proc_tty.o fd.o |
12 | proc-y += cmdline.o | 12 | proc-y += cmdline.o |
13 | proc-y += consoles.o | 13 | proc-y += consoles.o |
14 | proc-y += cpuinfo.o | 14 | proc-y += cpuinfo.o |
diff --git a/fs/proc/base.c b/fs/proc/base.c index acd1960c28a2..d295af993677 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -90,6 +90,7 @@ | |||
90 | #endif | 90 | #endif |
91 | #include <trace/events/oom.h> | 91 | #include <trace/events/oom.h> |
92 | #include "internal.h" | 92 | #include "internal.h" |
93 | #include "fd.h" | ||
93 | 94 | ||
94 | /* NOTE: | 95 | /* NOTE: |
95 | * Implementing inode permission operations in /proc is almost | 96 | * Implementing inode permission operations in /proc is almost |
@@ -136,8 +137,6 @@ struct pid_entry { | |||
136 | NULL, &proc_single_file_operations, \ | 137 | NULL, &proc_single_file_operations, \ |
137 | { .proc_show = show } ) | 138 | { .proc_show = show } ) |
138 | 139 | ||
139 | static int proc_fd_permission(struct inode *inode, int mask); | ||
140 | |||
141 | /* | 140 | /* |
142 | * Count the number of hardlinks for the pid_entry table, excluding the . | 141 | * Count the number of hardlinks for the pid_entry table, excluding the . |
143 | * and .. links. | 142 | * and .. links. |
@@ -1500,7 +1499,7 @@ out: | |||
1500 | return error; | 1499 | return error; |
1501 | } | 1500 | } |
1502 | 1501 | ||
1503 | static const struct inode_operations proc_pid_link_inode_operations = { | 1502 | const struct inode_operations proc_pid_link_inode_operations = { |
1504 | .readlink = proc_pid_readlink, | 1503 | .readlink = proc_pid_readlink, |
1505 | .follow_link = proc_pid_follow_link, | 1504 | .follow_link = proc_pid_follow_link, |
1506 | .setattr = proc_setattr, | 1505 | .setattr = proc_setattr, |
@@ -1509,21 +1508,6 @@ static const struct inode_operations proc_pid_link_inode_operations = { | |||
1509 | 1508 | ||
1510 | /* building an inode */ | 1509 | /* building an inode */ |
1511 | 1510 | ||
1512 | static int task_dumpable(struct task_struct *task) | ||
1513 | { | ||
1514 | int dumpable = 0; | ||
1515 | struct mm_struct *mm; | ||
1516 | |||
1517 | task_lock(task); | ||
1518 | mm = task->mm; | ||
1519 | if (mm) | ||
1520 | dumpable = get_dumpable(mm); | ||
1521 | task_unlock(task); | ||
1522 | if(dumpable == 1) | ||
1523 | return 1; | ||
1524 | return 0; | ||
1525 | } | ||
1526 | |||
1527 | struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task) | 1511 | struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task) |
1528 | { | 1512 | { |
1529 | struct inode * inode; | 1513 | struct inode * inode; |
@@ -1649,15 +1633,6 @@ int pid_revalidate(struct dentry *dentry, unsigned int flags) | |||
1649 | return 0; | 1633 | return 0; |
1650 | } | 1634 | } |
1651 | 1635 | ||
1652 | static int pid_delete_dentry(const struct dentry * dentry) | ||
1653 | { | ||
1654 | /* Is the task we represent dead? | ||
1655 | * If so, then don't put the dentry on the lru list, | ||
1656 | * kill it immediately. | ||
1657 | */ | ||
1658 | return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; | ||
1659 | } | ||
1660 | |||
1661 | const struct dentry_operations pid_dentry_operations = | 1636 | const struct dentry_operations pid_dentry_operations = |
1662 | { | 1637 | { |
1663 | .d_revalidate = pid_revalidate, | 1638 | .d_revalidate = pid_revalidate, |
@@ -1720,289 +1695,6 @@ end_instantiate: | |||
1720 | return filldir(dirent, name, len, filp->f_pos, ino, type); | 1695 | return filldir(dirent, name, len, filp->f_pos, ino, type); |
1721 | } | 1696 | } |
1722 | 1697 | ||
1723 | static unsigned name_to_int(struct dentry *dentry) | ||
1724 | { | ||
1725 | const char *name = dentry->d_name.name; | ||
1726 | int len = dentry->d_name.len; | ||
1727 | unsigned n = 0; | ||
1728 | |||
1729 | if (len > 1 && *name == '0') | ||
1730 | goto out; | ||
1731 | while (len-- > 0) { | ||
1732 | unsigned c = *name++ - '0'; | ||
1733 | if (c > 9) | ||
1734 | goto out; | ||
1735 | if (n >= (~0U-9)/10) | ||
1736 | goto out; | ||
1737 | n *= 10; | ||
1738 | n += c; | ||
1739 | } | ||
1740 | return n; | ||
1741 | out: | ||
1742 | return ~0U; | ||
1743 | } | ||
1744 | |||
1745 | #define PROC_FDINFO_MAX 64 | ||
1746 | |||
1747 | static int proc_fd_info(struct inode *inode, struct path *path, char *info) | ||
1748 | { | ||
1749 | struct task_struct *task = get_proc_task(inode); | ||
1750 | struct files_struct *files = NULL; | ||
1751 | struct file *file; | ||
1752 | int fd = proc_fd(inode); | ||
1753 | |||
1754 | if (task) { | ||
1755 | files = get_files_struct(task); | ||
1756 | put_task_struct(task); | ||
1757 | } | ||
1758 | if (files) { | ||
1759 | /* | ||
1760 | * We are not taking a ref to the file structure, so we must | ||
1761 | * hold ->file_lock. | ||
1762 | */ | ||
1763 | spin_lock(&files->file_lock); | ||
1764 | file = fcheck_files(files, fd); | ||
1765 | if (file) { | ||
1766 | unsigned int f_flags; | ||
1767 | struct fdtable *fdt; | ||
1768 | |||
1769 | fdt = files_fdtable(files); | ||
1770 | f_flags = file->f_flags & ~O_CLOEXEC; | ||
1771 | if (close_on_exec(fd, fdt)) | ||
1772 | f_flags |= O_CLOEXEC; | ||
1773 | |||
1774 | if (path) { | ||
1775 | *path = file->f_path; | ||
1776 | path_get(&file->f_path); | ||
1777 | } | ||
1778 | if (info) | ||
1779 | snprintf(info, PROC_FDINFO_MAX, | ||
1780 | "pos:\t%lli\n" | ||
1781 | "flags:\t0%o\n", | ||
1782 | (long long) file->f_pos, | ||
1783 | f_flags); | ||
1784 | spin_unlock(&files->file_lock); | ||
1785 | put_files_struct(files); | ||
1786 | return 0; | ||
1787 | } | ||
1788 | spin_unlock(&files->file_lock); | ||
1789 | put_files_struct(files); | ||
1790 | } | ||
1791 | return -ENOENT; | ||
1792 | } | ||
1793 | |||
1794 | static int proc_fd_link(struct dentry *dentry, struct path *path) | ||
1795 | { | ||
1796 | return proc_fd_info(dentry->d_inode, path, NULL); | ||
1797 | } | ||
1798 | |||
1799 | static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags) | ||
1800 | { | ||
1801 | struct inode *inode; | ||
1802 | struct task_struct *task; | ||
1803 | int fd; | ||
1804 | struct files_struct *files; | ||
1805 | const struct cred *cred; | ||
1806 | |||
1807 | if (flags & LOOKUP_RCU) | ||
1808 | return -ECHILD; | ||
1809 | |||
1810 | inode = dentry->d_inode; | ||
1811 | task = get_proc_task(inode); | ||
1812 | fd = proc_fd(inode); | ||
1813 | |||
1814 | if (task) { | ||
1815 | files = get_files_struct(task); | ||
1816 | if (files) { | ||
1817 | struct file *file; | ||
1818 | rcu_read_lock(); | ||
1819 | file = fcheck_files(files, fd); | ||
1820 | if (file) { | ||
1821 | unsigned f_mode = file->f_mode; | ||
1822 | |||
1823 | rcu_read_unlock(); | ||
1824 | put_files_struct(files); | ||
1825 | |||
1826 | if (task_dumpable(task)) { | ||
1827 | rcu_read_lock(); | ||
1828 | cred = __task_cred(task); | ||
1829 | inode->i_uid = cred->euid; | ||
1830 | inode->i_gid = cred->egid; | ||
1831 | rcu_read_unlock(); | ||
1832 | } else { | ||
1833 | inode->i_uid = GLOBAL_ROOT_UID; | ||
1834 | inode->i_gid = GLOBAL_ROOT_GID; | ||
1835 | } | ||
1836 | |||
1837 | if (S_ISLNK(inode->i_mode)) { | ||
1838 | unsigned i_mode = S_IFLNK; | ||
1839 | if (f_mode & FMODE_READ) | ||
1840 | i_mode |= S_IRUSR | S_IXUSR; | ||
1841 | if (f_mode & FMODE_WRITE) | ||
1842 | i_mode |= S_IWUSR | S_IXUSR; | ||
1843 | inode->i_mode = i_mode; | ||
1844 | } | ||
1845 | |||
1846 | security_task_to_inode(task, inode); | ||
1847 | put_task_struct(task); | ||
1848 | return 1; | ||
1849 | } | ||
1850 | rcu_read_unlock(); | ||
1851 | put_files_struct(files); | ||
1852 | } | ||
1853 | put_task_struct(task); | ||
1854 | } | ||
1855 | d_drop(dentry); | ||
1856 | return 0; | ||
1857 | } | ||
1858 | |||
1859 | static const struct dentry_operations tid_fd_dentry_operations = | ||
1860 | { | ||
1861 | .d_revalidate = tid_fd_revalidate, | ||
1862 | .d_delete = pid_delete_dentry, | ||
1863 | }; | ||
1864 | |||
1865 | static struct dentry *proc_fd_instantiate(struct inode *dir, | ||
1866 | struct dentry *dentry, struct task_struct *task, const void *ptr) | ||
1867 | { | ||
1868 | unsigned fd = (unsigned long)ptr; | ||
1869 | struct inode *inode; | ||
1870 | struct proc_inode *ei; | ||
1871 | struct dentry *error = ERR_PTR(-ENOENT); | ||
1872 | |||
1873 | inode = proc_pid_make_inode(dir->i_sb, task); | ||
1874 | if (!inode) | ||
1875 | goto out; | ||
1876 | ei = PROC_I(inode); | ||
1877 | ei->fd = fd; | ||
1878 | |||
1879 | inode->i_mode = S_IFLNK; | ||
1880 | inode->i_op = &proc_pid_link_inode_operations; | ||
1881 | inode->i_size = 64; | ||
1882 | ei->op.proc_get_link = proc_fd_link; | ||
1883 | d_set_d_op(dentry, &tid_fd_dentry_operations); | ||
1884 | d_add(dentry, inode); | ||
1885 | /* Close the race of the process dying before we return the dentry */ | ||
1886 | if (tid_fd_revalidate(dentry, 0)) | ||
1887 | error = NULL; | ||
1888 | |||
1889 | out: | ||
1890 | return error; | ||
1891 | } | ||
1892 | |||
1893 | static struct dentry *proc_lookupfd_common(struct inode *dir, | ||
1894 | struct dentry *dentry, | ||
1895 | instantiate_t instantiate) | ||
1896 | { | ||
1897 | struct task_struct *task = get_proc_task(dir); | ||
1898 | unsigned fd = name_to_int(dentry); | ||
1899 | struct dentry *result = ERR_PTR(-ENOENT); | ||
1900 | |||
1901 | if (!task) | ||
1902 | goto out_no_task; | ||
1903 | if (fd == ~0U) | ||
1904 | goto out; | ||
1905 | |||
1906 | result = instantiate(dir, dentry, task, (void *)(unsigned long)fd); | ||
1907 | out: | ||
1908 | put_task_struct(task); | ||
1909 | out_no_task: | ||
1910 | return result; | ||
1911 | } | ||
1912 | |||
1913 | static int proc_readfd_common(struct file * filp, void * dirent, | ||
1914 | filldir_t filldir, instantiate_t instantiate) | ||
1915 | { | ||
1916 | struct dentry *dentry = filp->f_path.dentry; | ||
1917 | struct inode *inode = dentry->d_inode; | ||
1918 | struct task_struct *p = get_proc_task(inode); | ||
1919 | unsigned int fd, ino; | ||
1920 | int retval; | ||
1921 | struct files_struct * files; | ||
1922 | |||
1923 | retval = -ENOENT; | ||
1924 | if (!p) | ||
1925 | goto out_no_task; | ||
1926 | retval = 0; | ||
1927 | |||
1928 | fd = filp->f_pos; | ||
1929 | switch (fd) { | ||
1930 | case 0: | ||
1931 | if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) | ||
1932 | goto out; | ||
1933 | filp->f_pos++; | ||
1934 | case 1: | ||
1935 | ino = parent_ino(dentry); | ||
1936 | if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) | ||
1937 | goto out; | ||
1938 | filp->f_pos++; | ||
1939 | default: | ||
1940 | files = get_files_struct(p); | ||
1941 | if (!files) | ||
1942 | goto out; | ||
1943 | rcu_read_lock(); | ||
1944 | for (fd = filp->f_pos-2; | ||
1945 | fd < files_fdtable(files)->max_fds; | ||
1946 | fd++, filp->f_pos++) { | ||
1947 | char name[PROC_NUMBUF]; | ||
1948 | int len; | ||
1949 | int rv; | ||
1950 | |||
1951 | if (!fcheck_files(files, fd)) | ||
1952 | continue; | ||
1953 | rcu_read_unlock(); | ||
1954 | |||
1955 | len = snprintf(name, sizeof(name), "%d", fd); | ||
1956 | rv = proc_fill_cache(filp, dirent, filldir, | ||
1957 | name, len, instantiate, p, | ||
1958 | (void *)(unsigned long)fd); | ||
1959 | if (rv < 0) | ||
1960 | goto out_fd_loop; | ||
1961 | rcu_read_lock(); | ||
1962 | } | ||
1963 | rcu_read_unlock(); | ||
1964 | out_fd_loop: | ||
1965 | put_files_struct(files); | ||
1966 | } | ||
1967 | out: | ||
1968 | put_task_struct(p); | ||
1969 | out_no_task: | ||
1970 | return retval; | ||
1971 | } | ||
1972 | |||
1973 | static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry, | ||
1974 | unsigned int flags) | ||
1975 | { | ||
1976 | return proc_lookupfd_common(dir, dentry, proc_fd_instantiate); | ||
1977 | } | ||
1978 | |||
1979 | static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir) | ||
1980 | { | ||
1981 | return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate); | ||
1982 | } | ||
1983 | |||
1984 | static ssize_t proc_fdinfo_read(struct file *file, char __user *buf, | ||
1985 | size_t len, loff_t *ppos) | ||
1986 | { | ||
1987 | char tmp[PROC_FDINFO_MAX]; | ||
1988 | int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp); | ||
1989 | if (!err) | ||
1990 | err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp)); | ||
1991 | return err; | ||
1992 | } | ||
1993 | |||
1994 | static const struct file_operations proc_fdinfo_file_operations = { | ||
1995 | .open = nonseekable_open, | ||
1996 | .read = proc_fdinfo_read, | ||
1997 | .llseek = no_llseek, | ||
1998 | }; | ||
1999 | |||
2000 | static const struct file_operations proc_fd_operations = { | ||
2001 | .read = generic_read_dir, | ||
2002 | .readdir = proc_readfd, | ||
2003 | .llseek = default_llseek, | ||
2004 | }; | ||
2005 | |||
2006 | #ifdef CONFIG_CHECKPOINT_RESTORE | 1698 | #ifdef CONFIG_CHECKPOINT_RESTORE |
2007 | 1699 | ||
2008 | /* | 1700 | /* |
@@ -2121,7 +1813,7 @@ out: | |||
2121 | } | 1813 | } |
2122 | 1814 | ||
2123 | struct map_files_info { | 1815 | struct map_files_info { |
2124 | struct file *file; | 1816 | fmode_t mode; |
2125 | unsigned long len; | 1817 | unsigned long len; |
2126 | unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ | 1818 | unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ |
2127 | }; | 1819 | }; |
@@ -2130,13 +1822,10 @@ static struct dentry * | |||
2130 | proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, | 1822 | proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, |
2131 | struct task_struct *task, const void *ptr) | 1823 | struct task_struct *task, const void *ptr) |
2132 | { | 1824 | { |
2133 | const struct file *file = ptr; | 1825 | fmode_t mode = (fmode_t)(unsigned long)ptr; |
2134 | struct proc_inode *ei; | 1826 | struct proc_inode *ei; |
2135 | struct inode *inode; | 1827 | struct inode *inode; |
2136 | 1828 | ||
2137 | if (!file) | ||
2138 | return ERR_PTR(-ENOENT); | ||
2139 | |||
2140 | inode = proc_pid_make_inode(dir->i_sb, task); | 1829 | inode = proc_pid_make_inode(dir->i_sb, task); |
2141 | if (!inode) | 1830 | if (!inode) |
2142 | return ERR_PTR(-ENOENT); | 1831 | return ERR_PTR(-ENOENT); |
@@ -2148,9 +1837,9 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, | |||
2148 | inode->i_size = 64; | 1837 | inode->i_size = 64; |
2149 | inode->i_mode = S_IFLNK; | 1838 | inode->i_mode = S_IFLNK; |
2150 | 1839 | ||
2151 | if (file->f_mode & FMODE_READ) | 1840 | if (mode & FMODE_READ) |
2152 | inode->i_mode |= S_IRUSR; | 1841 | inode->i_mode |= S_IRUSR; |
2153 | if (file->f_mode & FMODE_WRITE) | 1842 | if (mode & FMODE_WRITE) |
2154 | inode->i_mode |= S_IWUSR; | 1843 | inode->i_mode |= S_IWUSR; |
2155 | 1844 | ||
2156 | d_set_d_op(dentry, &tid_map_files_dentry_operations); | 1845 | d_set_d_op(dentry, &tid_map_files_dentry_operations); |
@@ -2194,7 +1883,8 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, | |||
2194 | if (!vma) | 1883 | if (!vma) |
2195 | goto out_no_vma; | 1884 | goto out_no_vma; |
2196 | 1885 | ||
2197 | result = proc_map_files_instantiate(dir, dentry, task, vma->vm_file); | 1886 | result = proc_map_files_instantiate(dir, dentry, task, |
1887 | (void *)(unsigned long)vma->vm_file->f_mode); | ||
2198 | 1888 | ||
2199 | out_no_vma: | 1889 | out_no_vma: |
2200 | up_read(&mm->mmap_sem); | 1890 | up_read(&mm->mmap_sem); |
@@ -2295,8 +1985,7 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
2295 | if (++pos <= filp->f_pos) | 1985 | if (++pos <= filp->f_pos) |
2296 | continue; | 1986 | continue; |
2297 | 1987 | ||
2298 | get_file(vma->vm_file); | 1988 | info.mode = vma->vm_file->f_mode; |
2299 | info.file = vma->vm_file; | ||
2300 | info.len = snprintf(info.name, | 1989 | info.len = snprintf(info.name, |
2301 | sizeof(info.name), "%lx-%lx", | 1990 | sizeof(info.name), "%lx-%lx", |
2302 | vma->vm_start, vma->vm_end); | 1991 | vma->vm_start, vma->vm_end); |
@@ -2311,19 +2000,11 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
2311 | ret = proc_fill_cache(filp, dirent, filldir, | 2000 | ret = proc_fill_cache(filp, dirent, filldir, |
2312 | p->name, p->len, | 2001 | p->name, p->len, |
2313 | proc_map_files_instantiate, | 2002 | proc_map_files_instantiate, |
2314 | task, p->file); | 2003 | task, |
2004 | (void *)(unsigned long)p->mode); | ||
2315 | if (ret) | 2005 | if (ret) |
2316 | break; | 2006 | break; |
2317 | filp->f_pos++; | 2007 | filp->f_pos++; |
2318 | fput(p->file); | ||
2319 | } | ||
2320 | for (; i < nr_files; i++) { | ||
2321 | /* | ||
2322 | * In case of error don't forget | ||
2323 | * to put rest of file refs. | ||
2324 | */ | ||
2325 | p = flex_array_get(fa, i); | ||
2326 | fput(p->file); | ||
2327 | } | 2008 | } |
2328 | if (fa) | 2009 | if (fa) |
2329 | flex_array_free(fa); | 2010 | flex_array_free(fa); |
@@ -2345,82 +2026,6 @@ static const struct file_operations proc_map_files_operations = { | |||
2345 | 2026 | ||
2346 | #endif /* CONFIG_CHECKPOINT_RESTORE */ | 2027 | #endif /* CONFIG_CHECKPOINT_RESTORE */ |
2347 | 2028 | ||
2348 | /* | ||
2349 | * /proc/pid/fd needs a special permission handler so that a process can still | ||
2350 | * access /proc/self/fd after it has executed a setuid(). | ||
2351 | */ | ||
2352 | static int proc_fd_permission(struct inode *inode, int mask) | ||
2353 | { | ||
2354 | int rv = generic_permission(inode, mask); | ||
2355 | if (rv == 0) | ||
2356 | return 0; | ||
2357 | if (task_pid(current) == proc_pid(inode)) | ||
2358 | rv = 0; | ||
2359 | return rv; | ||
2360 | } | ||
2361 | |||
2362 | /* | ||
2363 | * proc directories can do almost nothing.. | ||
2364 | */ | ||
2365 | static const struct inode_operations proc_fd_inode_operations = { | ||
2366 | .lookup = proc_lookupfd, | ||
2367 | .permission = proc_fd_permission, | ||
2368 | .setattr = proc_setattr, | ||
2369 | }; | ||
2370 | |||
2371 | static struct dentry *proc_fdinfo_instantiate(struct inode *dir, | ||
2372 | struct dentry *dentry, struct task_struct *task, const void *ptr) | ||
2373 | { | ||
2374 | unsigned fd = (unsigned long)ptr; | ||
2375 | struct inode *inode; | ||
2376 | struct proc_inode *ei; | ||
2377 | struct dentry *error = ERR_PTR(-ENOENT); | ||
2378 | |||
2379 | inode = proc_pid_make_inode(dir->i_sb, task); | ||
2380 | if (!inode) | ||
2381 | goto out; | ||
2382 | ei = PROC_I(inode); | ||
2383 | ei->fd = fd; | ||
2384 | inode->i_mode = S_IFREG | S_IRUSR; | ||
2385 | inode->i_fop = &proc_fdinfo_file_operations; | ||
2386 | d_set_d_op(dentry, &tid_fd_dentry_operations); | ||
2387 | d_add(dentry, inode); | ||
2388 | /* Close the race of the process dying before we return the dentry */ | ||
2389 | if (tid_fd_revalidate(dentry, 0)) | ||
2390 | error = NULL; | ||
2391 | |||
2392 | out: | ||
2393 | return error; | ||
2394 | } | ||
2395 | |||
2396 | static struct dentry *proc_lookupfdinfo(struct inode *dir, | ||
2397 | struct dentry *dentry, | ||
2398 | unsigned int flags) | ||
2399 | { | ||
2400 | return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate); | ||
2401 | } | ||
2402 | |||
2403 | static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir) | ||
2404 | { | ||
2405 | return proc_readfd_common(filp, dirent, filldir, | ||
2406 | proc_fdinfo_instantiate); | ||
2407 | } | ||
2408 | |||
2409 | static const struct file_operations proc_fdinfo_operations = { | ||
2410 | .read = generic_read_dir, | ||
2411 | .readdir = proc_readfdinfo, | ||
2412 | .llseek = default_llseek, | ||
2413 | }; | ||
2414 | |||
2415 | /* | ||
2416 | * proc directories can do almost nothing.. | ||
2417 | */ | ||
2418 | static const struct inode_operations proc_fdinfo_inode_operations = { | ||
2419 | .lookup = proc_lookupfdinfo, | ||
2420 | .setattr = proc_setattr, | ||
2421 | }; | ||
2422 | |||
2423 | |||
2424 | static struct dentry *proc_pident_instantiate(struct inode *dir, | 2029 | static struct dentry *proc_pident_instantiate(struct inode *dir, |
2425 | struct dentry *dentry, struct task_struct *task, const void *ptr) | 2030 | struct dentry *dentry, struct task_struct *task, const void *ptr) |
2426 | { | 2031 | { |
diff --git a/fs/proc/fd.c b/fs/proc/fd.c new file mode 100644 index 000000000000..f28a875f8779 --- /dev/null +++ b/fs/proc/fd.c | |||
@@ -0,0 +1,367 @@ | |||
1 | #include <linux/sched.h> | ||
2 | #include <linux/errno.h> | ||
3 | #include <linux/dcache.h> | ||
4 | #include <linux/path.h> | ||
5 | #include <linux/fdtable.h> | ||
6 | #include <linux/namei.h> | ||
7 | #include <linux/pid.h> | ||
8 | #include <linux/security.h> | ||
9 | #include <linux/file.h> | ||
10 | #include <linux/seq_file.h> | ||
11 | |||
12 | #include <linux/proc_fs.h> | ||
13 | |||
14 | #include "internal.h" | ||
15 | #include "fd.h" | ||
16 | |||
17 | static int seq_show(struct seq_file *m, void *v) | ||
18 | { | ||
19 | struct files_struct *files = NULL; | ||
20 | int f_flags = 0, ret = -ENOENT; | ||
21 | struct file *file = NULL; | ||
22 | struct task_struct *task; | ||
23 | |||
24 | task = get_proc_task(m->private); | ||
25 | if (!task) | ||
26 | return -ENOENT; | ||
27 | |||
28 | files = get_files_struct(task); | ||
29 | put_task_struct(task); | ||
30 | |||
31 | if (files) { | ||
32 | int fd = proc_fd(m->private); | ||
33 | |||
34 | spin_lock(&files->file_lock); | ||
35 | file = fcheck_files(files, fd); | ||
36 | if (file) { | ||
37 | struct fdtable *fdt = files_fdtable(files); | ||
38 | |||
39 | f_flags = file->f_flags; | ||
40 | if (close_on_exec(fd, fdt)) | ||
41 | f_flags |= O_CLOEXEC; | ||
42 | |||
43 | get_file(file); | ||
44 | ret = 0; | ||
45 | } | ||
46 | spin_unlock(&files->file_lock); | ||
47 | put_files_struct(files); | ||
48 | } | ||
49 | |||
50 | if (!ret) { | ||
51 | seq_printf(m, "pos:\t%lli\nflags:\t0%o\n", | ||
52 | (long long)file->f_pos, f_flags); | ||
53 | fput(file); | ||
54 | } | ||
55 | |||
56 | return ret; | ||
57 | } | ||
58 | |||
59 | static int seq_fdinfo_open(struct inode *inode, struct file *file) | ||
60 | { | ||
61 | return single_open(file, seq_show, inode); | ||
62 | } | ||
63 | |||
64 | static const struct file_operations proc_fdinfo_file_operations = { | ||
65 | .open = seq_fdinfo_open, | ||
66 | .read = seq_read, | ||
67 | .llseek = seq_lseek, | ||
68 | .release = single_release, | ||
69 | }; | ||
70 | |||
71 | static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags) | ||
72 | { | ||
73 | struct files_struct *files; | ||
74 | struct task_struct *task; | ||
75 | const struct cred *cred; | ||
76 | struct inode *inode; | ||
77 | int fd; | ||
78 | |||
79 | if (flags & LOOKUP_RCU) | ||
80 | return -ECHILD; | ||
81 | |||
82 | inode = dentry->d_inode; | ||
83 | task = get_proc_task(inode); | ||
84 | fd = proc_fd(inode); | ||
85 | |||
86 | if (task) { | ||
87 | files = get_files_struct(task); | ||
88 | if (files) { | ||
89 | struct file *file; | ||
90 | |||
91 | rcu_read_lock(); | ||
92 | file = fcheck_files(files, fd); | ||
93 | if (file) { | ||
94 | unsigned f_mode = file->f_mode; | ||
95 | |||
96 | rcu_read_unlock(); | ||
97 | put_files_struct(files); | ||
98 | |||
99 | if (task_dumpable(task)) { | ||
100 | rcu_read_lock(); | ||
101 | cred = __task_cred(task); | ||
102 | inode->i_uid = cred->euid; | ||
103 | inode->i_gid = cred->egid; | ||
104 | rcu_read_unlock(); | ||
105 | } else { | ||
106 | inode->i_uid = GLOBAL_ROOT_UID; | ||
107 | inode->i_gid = GLOBAL_ROOT_GID; | ||
108 | } | ||
109 | |||
110 | if (S_ISLNK(inode->i_mode)) { | ||
111 | unsigned i_mode = S_IFLNK; | ||
112 | if (f_mode & FMODE_READ) | ||
113 | i_mode |= S_IRUSR | S_IXUSR; | ||
114 | if (f_mode & FMODE_WRITE) | ||
115 | i_mode |= S_IWUSR | S_IXUSR; | ||
116 | inode->i_mode = i_mode; | ||
117 | } | ||
118 | |||
119 | security_task_to_inode(task, inode); | ||
120 | put_task_struct(task); | ||
121 | return 1; | ||
122 | } | ||
123 | rcu_read_unlock(); | ||
124 | put_files_struct(files); | ||
125 | } | ||
126 | put_task_struct(task); | ||
127 | } | ||
128 | |||
129 | d_drop(dentry); | ||
130 | return 0; | ||
131 | } | ||
132 | |||
133 | static const struct dentry_operations tid_fd_dentry_operations = { | ||
134 | .d_revalidate = tid_fd_revalidate, | ||
135 | .d_delete = pid_delete_dentry, | ||
136 | }; | ||
137 | |||
138 | static int proc_fd_link(struct dentry *dentry, struct path *path) | ||
139 | { | ||
140 | struct files_struct *files = NULL; | ||
141 | struct task_struct *task; | ||
142 | int ret = -ENOENT; | ||
143 | |||
144 | task = get_proc_task(dentry->d_inode); | ||
145 | if (task) { | ||
146 | files = get_files_struct(task); | ||
147 | put_task_struct(task); | ||
148 | } | ||
149 | |||
150 | if (files) { | ||
151 | int fd = proc_fd(dentry->d_inode); | ||
152 | struct file *fd_file; | ||
153 | |||
154 | spin_lock(&files->file_lock); | ||
155 | fd_file = fcheck_files(files, fd); | ||
156 | if (fd_file) { | ||
157 | *path = fd_file->f_path; | ||
158 | path_get(&fd_file->f_path); | ||
159 | ret = 0; | ||
160 | } | ||
161 | spin_unlock(&files->file_lock); | ||
162 | put_files_struct(files); | ||
163 | } | ||
164 | |||
165 | return ret; | ||
166 | } | ||
167 | |||
168 | static struct dentry * | ||
169 | proc_fd_instantiate(struct inode *dir, struct dentry *dentry, | ||
170 | struct task_struct *task, const void *ptr) | ||
171 | { | ||
172 | struct dentry *error = ERR_PTR(-ENOENT); | ||
173 | unsigned fd = (unsigned long)ptr; | ||
174 | struct proc_inode *ei; | ||
175 | struct inode *inode; | ||
176 | |||
177 | inode = proc_pid_make_inode(dir->i_sb, task); | ||
178 | if (!inode) | ||
179 | goto out; | ||
180 | |||
181 | ei = PROC_I(inode); | ||
182 | ei->fd = fd; | ||
183 | |||
184 | inode->i_mode = S_IFLNK; | ||
185 | inode->i_op = &proc_pid_link_inode_operations; | ||
186 | inode->i_size = 64; | ||
187 | |||
188 | ei->op.proc_get_link = proc_fd_link; | ||
189 | |||
190 | d_set_d_op(dentry, &tid_fd_dentry_operations); | ||
191 | d_add(dentry, inode); | ||
192 | |||
193 | /* Close the race of the process dying before we return the dentry */ | ||
194 | if (tid_fd_revalidate(dentry, 0)) | ||
195 | error = NULL; | ||
196 | out: | ||
197 | return error; | ||
198 | } | ||
199 | |||
200 | static struct dentry *proc_lookupfd_common(struct inode *dir, | ||
201 | struct dentry *dentry, | ||
202 | instantiate_t instantiate) | ||
203 | { | ||
204 | struct task_struct *task = get_proc_task(dir); | ||
205 | struct dentry *result = ERR_PTR(-ENOENT); | ||
206 | unsigned fd = name_to_int(dentry); | ||
207 | |||
208 | if (!task) | ||
209 | goto out_no_task; | ||
210 | if (fd == ~0U) | ||
211 | goto out; | ||
212 | |||
213 | result = instantiate(dir, dentry, task, (void *)(unsigned long)fd); | ||
214 | out: | ||
215 | put_task_struct(task); | ||
216 | out_no_task: | ||
217 | return result; | ||
218 | } | ||
219 | |||
220 | static int proc_readfd_common(struct file * filp, void * dirent, | ||
221 | filldir_t filldir, instantiate_t instantiate) | ||
222 | { | ||
223 | struct dentry *dentry = filp->f_path.dentry; | ||
224 | struct inode *inode = dentry->d_inode; | ||
225 | struct task_struct *p = get_proc_task(inode); | ||
226 | struct files_struct *files; | ||
227 | unsigned int fd, ino; | ||
228 | int retval; | ||
229 | |||
230 | retval = -ENOENT; | ||
231 | if (!p) | ||
232 | goto out_no_task; | ||
233 | retval = 0; | ||
234 | |||
235 | fd = filp->f_pos; | ||
236 | switch (fd) { | ||
237 | case 0: | ||
238 | if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) | ||
239 | goto out; | ||
240 | filp->f_pos++; | ||
241 | case 1: | ||
242 | ino = parent_ino(dentry); | ||
243 | if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) | ||
244 | goto out; | ||
245 | filp->f_pos++; | ||
246 | default: | ||
247 | files = get_files_struct(p); | ||
248 | if (!files) | ||
249 | goto out; | ||
250 | rcu_read_lock(); | ||
251 | for (fd = filp->f_pos - 2; | ||
252 | fd < files_fdtable(files)->max_fds; | ||
253 | fd++, filp->f_pos++) { | ||
254 | char name[PROC_NUMBUF]; | ||
255 | int len; | ||
256 | int rv; | ||
257 | |||
258 | if (!fcheck_files(files, fd)) | ||
259 | continue; | ||
260 | rcu_read_unlock(); | ||
261 | |||
262 | len = snprintf(name, sizeof(name), "%d", fd); | ||
263 | rv = proc_fill_cache(filp, dirent, filldir, | ||
264 | name, len, instantiate, p, | ||
265 | (void *)(unsigned long)fd); | ||
266 | if (rv < 0) | ||
267 | goto out_fd_loop; | ||
268 | rcu_read_lock(); | ||
269 | } | ||
270 | rcu_read_unlock(); | ||
271 | out_fd_loop: | ||
272 | put_files_struct(files); | ||
273 | } | ||
274 | out: | ||
275 | put_task_struct(p); | ||
276 | out_no_task: | ||
277 | return retval; | ||
278 | } | ||
279 | |||
280 | static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir) | ||
281 | { | ||
282 | return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate); | ||
283 | } | ||
284 | |||
285 | const struct file_operations proc_fd_operations = { | ||
286 | .read = generic_read_dir, | ||
287 | .readdir = proc_readfd, | ||
288 | .llseek = default_llseek, | ||
289 | }; | ||
290 | |||
291 | static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry, | ||
292 | unsigned int flags) | ||
293 | { | ||
294 | return proc_lookupfd_common(dir, dentry, proc_fd_instantiate); | ||
295 | } | ||
296 | |||
297 | /* | ||
298 | * /proc/pid/fd needs a special permission handler so that a process can still | ||
299 | * access /proc/self/fd after it has executed a setuid(). | ||
300 | */ | ||
301 | int proc_fd_permission(struct inode *inode, int mask) | ||
302 | { | ||
303 | int rv = generic_permission(inode, mask); | ||
304 | if (rv == 0) | ||
305 | return 0; | ||
306 | if (task_pid(current) == proc_pid(inode)) | ||
307 | rv = 0; | ||
308 | return rv; | ||
309 | } | ||
310 | |||
311 | const struct inode_operations proc_fd_inode_operations = { | ||
312 | .lookup = proc_lookupfd, | ||
313 | .permission = proc_fd_permission, | ||
314 | .setattr = proc_setattr, | ||
315 | }; | ||
316 | |||
317 | static struct dentry * | ||
318 | proc_fdinfo_instantiate(struct inode *dir, struct dentry *dentry, | ||
319 | struct task_struct *task, const void *ptr) | ||
320 | { | ||
321 | struct dentry *error = ERR_PTR(-ENOENT); | ||
322 | unsigned fd = (unsigned long)ptr; | ||
323 | struct proc_inode *ei; | ||
324 | struct inode *inode; | ||
325 | |||
326 | inode = proc_pid_make_inode(dir->i_sb, task); | ||
327 | if (!inode) | ||
328 | goto out; | ||
329 | |||
330 | ei = PROC_I(inode); | ||
331 | ei->fd = fd; | ||
332 | |||
333 | inode->i_mode = S_IFREG | S_IRUSR; | ||
334 | inode->i_fop = &proc_fdinfo_file_operations; | ||
335 | |||
336 | d_set_d_op(dentry, &tid_fd_dentry_operations); | ||
337 | d_add(dentry, inode); | ||
338 | |||
339 | /* Close the race of the process dying before we return the dentry */ | ||
340 | if (tid_fd_revalidate(dentry, 0)) | ||
341 | error = NULL; | ||
342 | out: | ||
343 | return error; | ||
344 | } | ||
345 | |||
346 | static struct dentry * | ||
347 | proc_lookupfdinfo(struct inode *dir, struct dentry *dentry, unsigned int flags) | ||
348 | { | ||
349 | return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate); | ||
350 | } | ||
351 | |||
352 | static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir) | ||
353 | { | ||
354 | return proc_readfd_common(filp, dirent, filldir, | ||
355 | proc_fdinfo_instantiate); | ||
356 | } | ||
357 | |||
358 | const struct inode_operations proc_fdinfo_inode_operations = { | ||
359 | .lookup = proc_lookupfdinfo, | ||
360 | .setattr = proc_setattr, | ||
361 | }; | ||
362 | |||
363 | const struct file_operations proc_fdinfo_operations = { | ||
364 | .read = generic_read_dir, | ||
365 | .readdir = proc_readfdinfo, | ||
366 | .llseek = default_llseek, | ||
367 | }; | ||
diff --git a/fs/proc/fd.h b/fs/proc/fd.h new file mode 100644 index 000000000000..cbb1d47deda8 --- /dev/null +++ b/fs/proc/fd.h | |||
@@ -0,0 +1,14 @@ | |||
1 | #ifndef __PROCFS_FD_H__ | ||
2 | #define __PROCFS_FD_H__ | ||
3 | |||
4 | #include <linux/fs.h> | ||
5 | |||
6 | extern const struct file_operations proc_fd_operations; | ||
7 | extern const struct inode_operations proc_fd_inode_operations; | ||
8 | |||
9 | extern const struct file_operations proc_fdinfo_operations; | ||
10 | extern const struct inode_operations proc_fdinfo_inode_operations; | ||
11 | |||
12 | extern int proc_fd_permission(struct inode *inode, int mask); | ||
13 | |||
14 | #endif /* __PROCFS_FD_H__ */ | ||
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index e1167a1c9126..67925a7bd8cb 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
@@ -9,6 +9,7 @@ | |||
9 | * 2 of the License, or (at your option) any later version. | 9 | * 2 of the License, or (at your option) any later version. |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #include <linux/sched.h> | ||
12 | #include <linux/proc_fs.h> | 13 | #include <linux/proc_fs.h> |
13 | struct ctl_table_header; | 14 | struct ctl_table_header; |
14 | 15 | ||
@@ -65,6 +66,7 @@ extern const struct file_operations proc_clear_refs_operations; | |||
65 | extern const struct file_operations proc_pagemap_operations; | 66 | extern const struct file_operations proc_pagemap_operations; |
66 | extern const struct file_operations proc_net_operations; | 67 | extern const struct file_operations proc_net_operations; |
67 | extern const struct inode_operations proc_net_inode_operations; | 68 | extern const struct inode_operations proc_net_inode_operations; |
69 | extern const struct inode_operations proc_pid_link_inode_operations; | ||
68 | 70 | ||
69 | struct proc_maps_private { | 71 | struct proc_maps_private { |
70 | struct pid *pid; | 72 | struct pid *pid; |
@@ -91,6 +93,52 @@ static inline int proc_fd(struct inode *inode) | |||
91 | return PROC_I(inode)->fd; | 93 | return PROC_I(inode)->fd; |
92 | } | 94 | } |
93 | 95 | ||
96 | static inline int task_dumpable(struct task_struct *task) | ||
97 | { | ||
98 | int dumpable = 0; | ||
99 | struct mm_struct *mm; | ||
100 | |||
101 | task_lock(task); | ||
102 | mm = task->mm; | ||
103 | if (mm) | ||
104 | dumpable = get_dumpable(mm); | ||
105 | task_unlock(task); | ||
106 | if(dumpable == 1) | ||
107 | return 1; | ||
108 | return 0; | ||
109 | } | ||
110 | |||
111 | static inline int pid_delete_dentry(const struct dentry * dentry) | ||
112 | { | ||
113 | /* Is the task we represent dead? | ||
114 | * If so, then don't put the dentry on the lru list, | ||
115 | * kill it immediately. | ||
116 | */ | ||
117 | return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; | ||
118 | } | ||
119 | |||
120 | static inline unsigned name_to_int(struct dentry *dentry) | ||
121 | { | ||
122 | const char *name = dentry->d_name.name; | ||
123 | int len = dentry->d_name.len; | ||
124 | unsigned n = 0; | ||
125 | |||
126 | if (len > 1 && *name == '0') | ||
127 | goto out; | ||
128 | while (len-- > 0) { | ||
129 | unsigned c = *name++ - '0'; | ||
130 | if (c > 9) | ||
131 | goto out; | ||
132 | if (n >= (~0U-9)/10) | ||
133 | goto out; | ||
134 | n *= 10; | ||
135 | n += c; | ||
136 | } | ||
137 | return n; | ||
138 | out: | ||
139 | return ~0U; | ||
140 | } | ||
141 | |||
94 | struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino, | 142 | struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino, |
95 | struct dentry *dentry); | 143 | struct dentry *dentry); |
96 | int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, | 144 | int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, |
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 5c3c7b02e17b..43098bb5723a 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c | |||
@@ -391,6 +391,11 @@ static int init_inodecache(void) | |||
391 | 391 | ||
392 | static void destroy_inodecache(void) | 392 | static void destroy_inodecache(void) |
393 | { | 393 | { |
394 | /* | ||
395 | * Make sure all delayed rcu free inodes are flushed before we | ||
396 | * destroy cache. | ||
397 | */ | ||
398 | rcu_barrier(); | ||
394 | kmem_cache_destroy(qnx4_inode_cachep); | 399 | kmem_cache_destroy(qnx4_inode_cachep); |
395 | } | 400 | } |
396 | 401 | ||
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c index f4eef0b5e7b5..b6addf560483 100644 --- a/fs/qnx6/inode.c +++ b/fs/qnx6/inode.c | |||
@@ -651,6 +651,11 @@ static int init_inodecache(void) | |||
651 | 651 | ||
652 | static void destroy_inodecache(void) | 652 | static void destroy_inodecache(void) |
653 | { | 653 | { |
654 | /* | ||
655 | * Make sure all delayed rcu free inodes are flushed before we | ||
656 | * destroy cache. | ||
657 | */ | ||
658 | rcu_barrier(); | ||
654 | kmem_cache_destroy(qnx6_inode_cachep); | 659 | kmem_cache_destroy(qnx6_inode_cachep); |
655 | } | 660 | } |
656 | 661 | ||
diff --git a/fs/read_write.c b/fs/read_write.c index 1adfb691e4f1..d06534857e9e 100644 --- a/fs/read_write.c +++ b/fs/read_write.c | |||
@@ -232,23 +232,18 @@ EXPORT_SYMBOL(vfs_llseek); | |||
232 | SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin) | 232 | SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin) |
233 | { | 233 | { |
234 | off_t retval; | 234 | off_t retval; |
235 | struct file * file; | 235 | struct fd f = fdget(fd); |
236 | int fput_needed; | 236 | if (!f.file) |
237 | 237 | return -EBADF; | |
238 | retval = -EBADF; | ||
239 | file = fget_light(fd, &fput_needed); | ||
240 | if (!file) | ||
241 | goto bad; | ||
242 | 238 | ||
243 | retval = -EINVAL; | 239 | retval = -EINVAL; |
244 | if (origin <= SEEK_MAX) { | 240 | if (origin <= SEEK_MAX) { |
245 | loff_t res = vfs_llseek(file, offset, origin); | 241 | loff_t res = vfs_llseek(f.file, offset, origin); |
246 | retval = res; | 242 | retval = res; |
247 | if (res != (loff_t)retval) | 243 | if (res != (loff_t)retval) |
248 | retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ | 244 | retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ |
249 | } | 245 | } |
250 | fput_light(file, fput_needed); | 246 | fdput(f); |
251 | bad: | ||
252 | return retval; | 247 | return retval; |
253 | } | 248 | } |
254 | 249 | ||
@@ -258,20 +253,17 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, | |||
258 | unsigned int, origin) | 253 | unsigned int, origin) |
259 | { | 254 | { |
260 | int retval; | 255 | int retval; |
261 | struct file * file; | 256 | struct fd f = fdget(fd); |
262 | loff_t offset; | 257 | loff_t offset; |
263 | int fput_needed; | ||
264 | 258 | ||
265 | retval = -EBADF; | 259 | if (!f.file) |
266 | file = fget_light(fd, &fput_needed); | 260 | return -EBADF; |
267 | if (!file) | ||
268 | goto bad; | ||
269 | 261 | ||
270 | retval = -EINVAL; | 262 | retval = -EINVAL; |
271 | if (origin > SEEK_MAX) | 263 | if (origin > SEEK_MAX) |
272 | goto out_putf; | 264 | goto out_putf; |
273 | 265 | ||
274 | offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low, | 266 | offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low, |
275 | origin); | 267 | origin); |
276 | 268 | ||
277 | retval = (int)offset; | 269 | retval = (int)offset; |
@@ -281,8 +273,7 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, | |||
281 | retval = 0; | 273 | retval = 0; |
282 | } | 274 | } |
283 | out_putf: | 275 | out_putf: |
284 | fput_light(file, fput_needed); | 276 | fdput(f); |
285 | bad: | ||
286 | return retval; | 277 | return retval; |
287 | } | 278 | } |
288 | #endif | 279 | #endif |
@@ -461,34 +452,29 @@ static inline void file_pos_write(struct file *file, loff_t pos) | |||
461 | 452 | ||
462 | SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) | 453 | SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) |
463 | { | 454 | { |
464 | struct file *file; | 455 | struct fd f = fdget(fd); |
465 | ssize_t ret = -EBADF; | 456 | ssize_t ret = -EBADF; |
466 | int fput_needed; | ||
467 | 457 | ||
468 | file = fget_light(fd, &fput_needed); | 458 | if (f.file) { |
469 | if (file) { | 459 | loff_t pos = file_pos_read(f.file); |
470 | loff_t pos = file_pos_read(file); | 460 | ret = vfs_read(f.file, buf, count, &pos); |
471 | ret = vfs_read(file, buf, count, &pos); | 461 | file_pos_write(f.file, pos); |
472 | file_pos_write(file, pos); | 462 | fdput(f); |
473 | fput_light(file, fput_needed); | ||
474 | } | 463 | } |
475 | |||
476 | return ret; | 464 | return ret; |
477 | } | 465 | } |
478 | 466 | ||
479 | SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, | 467 | SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, |
480 | size_t, count) | 468 | size_t, count) |
481 | { | 469 | { |
482 | struct file *file; | 470 | struct fd f = fdget(fd); |
483 | ssize_t ret = -EBADF; | 471 | ssize_t ret = -EBADF; |
484 | int fput_needed; | ||
485 | 472 | ||
486 | file = fget_light(fd, &fput_needed); | 473 | if (f.file) { |
487 | if (file) { | 474 | loff_t pos = file_pos_read(f.file); |
488 | loff_t pos = file_pos_read(file); | 475 | ret = vfs_write(f.file, buf, count, &pos); |
489 | ret = vfs_write(file, buf, count, &pos); | 476 | file_pos_write(f.file, pos); |
490 | file_pos_write(file, pos); | 477 | fdput(f); |
491 | fput_light(file, fput_needed); | ||
492 | } | 478 | } |
493 | 479 | ||
494 | return ret; | 480 | return ret; |
@@ -497,19 +483,18 @@ SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, | |||
497 | SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf, | 483 | SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf, |
498 | size_t count, loff_t pos) | 484 | size_t count, loff_t pos) |
499 | { | 485 | { |
500 | struct file *file; | 486 | struct fd f; |
501 | ssize_t ret = -EBADF; | 487 | ssize_t ret = -EBADF; |
502 | int fput_needed; | ||
503 | 488 | ||
504 | if (pos < 0) | 489 | if (pos < 0) |
505 | return -EINVAL; | 490 | return -EINVAL; |
506 | 491 | ||
507 | file = fget_light(fd, &fput_needed); | 492 | f = fdget(fd); |
508 | if (file) { | 493 | if (f.file) { |
509 | ret = -ESPIPE; | 494 | ret = -ESPIPE; |
510 | if (file->f_mode & FMODE_PREAD) | 495 | if (f.file->f_mode & FMODE_PREAD) |
511 | ret = vfs_read(file, buf, count, &pos); | 496 | ret = vfs_read(f.file, buf, count, &pos); |
512 | fput_light(file, fput_needed); | 497 | fdput(f); |
513 | } | 498 | } |
514 | 499 | ||
515 | return ret; | 500 | return ret; |
@@ -526,19 +511,18 @@ SYSCALL_ALIAS(sys_pread64, SyS_pread64); | |||
526 | SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf, | 511 | SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf, |
527 | size_t count, loff_t pos) | 512 | size_t count, loff_t pos) |
528 | { | 513 | { |
529 | struct file *file; | 514 | struct fd f; |
530 | ssize_t ret = -EBADF; | 515 | ssize_t ret = -EBADF; |
531 | int fput_needed; | ||
532 | 516 | ||
533 | if (pos < 0) | 517 | if (pos < 0) |
534 | return -EINVAL; | 518 | return -EINVAL; |
535 | 519 | ||
536 | file = fget_light(fd, &fput_needed); | 520 | f = fdget(fd); |
537 | if (file) { | 521 | if (f.file) { |
538 | ret = -ESPIPE; | 522 | ret = -ESPIPE; |
539 | if (file->f_mode & FMODE_PWRITE) | 523 | if (f.file->f_mode & FMODE_PWRITE) |
540 | ret = vfs_write(file, buf, count, &pos); | 524 | ret = vfs_write(f.file, buf, count, &pos); |
541 | fput_light(file, fput_needed); | 525 | fdput(f); |
542 | } | 526 | } |
543 | 527 | ||
544 | return ret; | 528 | return ret; |
@@ -789,16 +773,14 @@ EXPORT_SYMBOL(vfs_writev); | |||
789 | SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, | 773 | SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, |
790 | unsigned long, vlen) | 774 | unsigned long, vlen) |
791 | { | 775 | { |
792 | struct file *file; | 776 | struct fd f = fdget(fd); |
793 | ssize_t ret = -EBADF; | 777 | ssize_t ret = -EBADF; |
794 | int fput_needed; | ||
795 | 778 | ||
796 | file = fget_light(fd, &fput_needed); | 779 | if (f.file) { |
797 | if (file) { | 780 | loff_t pos = file_pos_read(f.file); |
798 | loff_t pos = file_pos_read(file); | 781 | ret = vfs_readv(f.file, vec, vlen, &pos); |
799 | ret = vfs_readv(file, vec, vlen, &pos); | 782 | file_pos_write(f.file, pos); |
800 | file_pos_write(file, pos); | 783 | fdput(f); |
801 | fput_light(file, fput_needed); | ||
802 | } | 784 | } |
803 | 785 | ||
804 | if (ret > 0) | 786 | if (ret > 0) |
@@ -810,16 +792,14 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, | |||
810 | SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, | 792 | SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, |
811 | unsigned long, vlen) | 793 | unsigned long, vlen) |
812 | { | 794 | { |
813 | struct file *file; | 795 | struct fd f = fdget(fd); |
814 | ssize_t ret = -EBADF; | 796 | ssize_t ret = -EBADF; |
815 | int fput_needed; | ||
816 | 797 | ||
817 | file = fget_light(fd, &fput_needed); | 798 | if (f.file) { |
818 | if (file) { | 799 | loff_t pos = file_pos_read(f.file); |
819 | loff_t pos = file_pos_read(file); | 800 | ret = vfs_writev(f.file, vec, vlen, &pos); |
820 | ret = vfs_writev(file, vec, vlen, &pos); | 801 | file_pos_write(f.file, pos); |
821 | file_pos_write(file, pos); | 802 | fdput(f); |
822 | fput_light(file, fput_needed); | ||
823 | } | 803 | } |
824 | 804 | ||
825 | if (ret > 0) | 805 | if (ret > 0) |
@@ -838,19 +818,18 @@ SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec, | |||
838 | unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) | 818 | unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) |
839 | { | 819 | { |
840 | loff_t pos = pos_from_hilo(pos_h, pos_l); | 820 | loff_t pos = pos_from_hilo(pos_h, pos_l); |
841 | struct file *file; | 821 | struct fd f; |
842 | ssize_t ret = -EBADF; | 822 | ssize_t ret = -EBADF; |
843 | int fput_needed; | ||
844 | 823 | ||
845 | if (pos < 0) | 824 | if (pos < 0) |
846 | return -EINVAL; | 825 | return -EINVAL; |
847 | 826 | ||
848 | file = fget_light(fd, &fput_needed); | 827 | f = fdget(fd); |
849 | if (file) { | 828 | if (f.file) { |
850 | ret = -ESPIPE; | 829 | ret = -ESPIPE; |
851 | if (file->f_mode & FMODE_PREAD) | 830 | if (f.file->f_mode & FMODE_PREAD) |
852 | ret = vfs_readv(file, vec, vlen, &pos); | 831 | ret = vfs_readv(f.file, vec, vlen, &pos); |
853 | fput_light(file, fput_needed); | 832 | fdput(f); |
854 | } | 833 | } |
855 | 834 | ||
856 | if (ret > 0) | 835 | if (ret > 0) |
@@ -863,19 +842,18 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, | |||
863 | unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) | 842 | unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) |
864 | { | 843 | { |
865 | loff_t pos = pos_from_hilo(pos_h, pos_l); | 844 | loff_t pos = pos_from_hilo(pos_h, pos_l); |
866 | struct file *file; | 845 | struct fd f; |
867 | ssize_t ret = -EBADF; | 846 | ssize_t ret = -EBADF; |
868 | int fput_needed; | ||
869 | 847 | ||
870 | if (pos < 0) | 848 | if (pos < 0) |
871 | return -EINVAL; | 849 | return -EINVAL; |
872 | 850 | ||
873 | file = fget_light(fd, &fput_needed); | 851 | f = fdget(fd); |
874 | if (file) { | 852 | if (f.file) { |
875 | ret = -ESPIPE; | 853 | ret = -ESPIPE; |
876 | if (file->f_mode & FMODE_PWRITE) | 854 | if (f.file->f_mode & FMODE_PWRITE) |
877 | ret = vfs_writev(file, vec, vlen, &pos); | 855 | ret = vfs_writev(f.file, vec, vlen, &pos); |
878 | fput_light(file, fput_needed); | 856 | fdput(f); |
879 | } | 857 | } |
880 | 858 | ||
881 | if (ret > 0) | 859 | if (ret > 0) |
@@ -884,31 +862,31 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, | |||
884 | return ret; | 862 | return ret; |
885 | } | 863 | } |
886 | 864 | ||
887 | static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, | 865 | ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count, |
888 | size_t count, loff_t max) | 866 | loff_t max) |
889 | { | 867 | { |
890 | struct file * in_file, * out_file; | 868 | struct fd in, out; |
891 | struct inode * in_inode, * out_inode; | 869 | struct inode *in_inode, *out_inode; |
892 | loff_t pos; | 870 | loff_t pos; |
893 | ssize_t retval; | 871 | ssize_t retval; |
894 | int fput_needed_in, fput_needed_out, fl; | 872 | int fl; |
895 | 873 | ||
896 | /* | 874 | /* |
897 | * Get input file, and verify that it is ok.. | 875 | * Get input file, and verify that it is ok.. |
898 | */ | 876 | */ |
899 | retval = -EBADF; | 877 | retval = -EBADF; |
900 | in_file = fget_light(in_fd, &fput_needed_in); | 878 | in = fdget(in_fd); |
901 | if (!in_file) | 879 | if (!in.file) |
902 | goto out; | 880 | goto out; |
903 | if (!(in_file->f_mode & FMODE_READ)) | 881 | if (!(in.file->f_mode & FMODE_READ)) |
904 | goto fput_in; | 882 | goto fput_in; |
905 | retval = -ESPIPE; | 883 | retval = -ESPIPE; |
906 | if (!ppos) | 884 | if (!ppos) |
907 | ppos = &in_file->f_pos; | 885 | ppos = &in.file->f_pos; |
908 | else | 886 | else |
909 | if (!(in_file->f_mode & FMODE_PREAD)) | 887 | if (!(in.file->f_mode & FMODE_PREAD)) |
910 | goto fput_in; | 888 | goto fput_in; |
911 | retval = rw_verify_area(READ, in_file, ppos, count); | 889 | retval = rw_verify_area(READ, in.file, ppos, count); |
912 | if (retval < 0) | 890 | if (retval < 0) |
913 | goto fput_in; | 891 | goto fput_in; |
914 | count = retval; | 892 | count = retval; |
@@ -917,15 +895,15 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, | |||
917 | * Get output file, and verify that it is ok.. | 895 | * Get output file, and verify that it is ok.. |
918 | */ | 896 | */ |
919 | retval = -EBADF; | 897 | retval = -EBADF; |
920 | out_file = fget_light(out_fd, &fput_needed_out); | 898 | out = fdget(out_fd); |
921 | if (!out_file) | 899 | if (!out.file) |
922 | goto fput_in; | 900 | goto fput_in; |
923 | if (!(out_file->f_mode & FMODE_WRITE)) | 901 | if (!(out.file->f_mode & FMODE_WRITE)) |
924 | goto fput_out; | 902 | goto fput_out; |
925 | retval = -EINVAL; | 903 | retval = -EINVAL; |
926 | in_inode = in_file->f_path.dentry->d_inode; | 904 | in_inode = in.file->f_path.dentry->d_inode; |
927 | out_inode = out_file->f_path.dentry->d_inode; | 905 | out_inode = out.file->f_path.dentry->d_inode; |
928 | retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); | 906 | retval = rw_verify_area(WRITE, out.file, &out.file->f_pos, count); |
929 | if (retval < 0) | 907 | if (retval < 0) |
930 | goto fput_out; | 908 | goto fput_out; |
931 | count = retval; | 909 | count = retval; |
@@ -949,10 +927,10 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, | |||
949 | * and the application is arguably buggy if it doesn't expect | 927 | * and the application is arguably buggy if it doesn't expect |
950 | * EAGAIN on a non-blocking file descriptor. | 928 | * EAGAIN on a non-blocking file descriptor. |
951 | */ | 929 | */ |
952 | if (in_file->f_flags & O_NONBLOCK) | 930 | if (in.file->f_flags & O_NONBLOCK) |
953 | fl = SPLICE_F_NONBLOCK; | 931 | fl = SPLICE_F_NONBLOCK; |
954 | #endif | 932 | #endif |
955 | retval = do_splice_direct(in_file, ppos, out_file, count, fl); | 933 | retval = do_splice_direct(in.file, ppos, out.file, count, fl); |
956 | 934 | ||
957 | if (retval > 0) { | 935 | if (retval > 0) { |
958 | add_rchar(current, retval); | 936 | add_rchar(current, retval); |
@@ -965,9 +943,9 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, | |||
965 | retval = -EOVERFLOW; | 943 | retval = -EOVERFLOW; |
966 | 944 | ||
967 | fput_out: | 945 | fput_out: |
968 | fput_light(out_file, fput_needed_out); | 946 | fdput(out); |
969 | fput_in: | 947 | fput_in: |
970 | fput_light(in_file, fput_needed_in); | 948 | fdput(in); |
971 | out: | 949 | out: |
972 | return retval; | 950 | return retval; |
973 | } | 951 | } |
diff --git a/fs/read_write.h b/fs/read_write.h index d07b954c6e0c..d3e00ef67420 100644 --- a/fs/read_write.h +++ b/fs/read_write.h | |||
@@ -12,3 +12,5 @@ ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, | |||
12 | unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn); | 12 | unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn); |
13 | ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, | 13 | ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, |
14 | unsigned long nr_segs, loff_t *ppos, io_fn_t fn); | 14 | unsigned long nr_segs, loff_t *ppos, io_fn_t fn); |
15 | ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count, | ||
16 | loff_t max); | ||
diff --git a/fs/readdir.c b/fs/readdir.c index 39e3370d79cf..5e69ef533b77 100644 --- a/fs/readdir.c +++ b/fs/readdir.c | |||
@@ -106,22 +106,20 @@ SYSCALL_DEFINE3(old_readdir, unsigned int, fd, | |||
106 | struct old_linux_dirent __user *, dirent, unsigned int, count) | 106 | struct old_linux_dirent __user *, dirent, unsigned int, count) |
107 | { | 107 | { |
108 | int error; | 108 | int error; |
109 | struct file * file; | 109 | struct fd f = fdget(fd); |
110 | struct readdir_callback buf; | 110 | struct readdir_callback buf; |
111 | int fput_needed; | ||
112 | 111 | ||
113 | file = fget_light(fd, &fput_needed); | 112 | if (!f.file) |
114 | if (!file) | ||
115 | return -EBADF; | 113 | return -EBADF; |
116 | 114 | ||
117 | buf.result = 0; | 115 | buf.result = 0; |
118 | buf.dirent = dirent; | 116 | buf.dirent = dirent; |
119 | 117 | ||
120 | error = vfs_readdir(file, fillonedir, &buf); | 118 | error = vfs_readdir(f.file, fillonedir, &buf); |
121 | if (buf.result) | 119 | if (buf.result) |
122 | error = buf.result; | 120 | error = buf.result; |
123 | 121 | ||
124 | fput_light(file, fput_needed); | 122 | fdput(f); |
125 | return error; | 123 | return error; |
126 | } | 124 | } |
127 | 125 | ||
@@ -191,17 +189,16 @@ efault: | |||
191 | SYSCALL_DEFINE3(getdents, unsigned int, fd, | 189 | SYSCALL_DEFINE3(getdents, unsigned int, fd, |
192 | struct linux_dirent __user *, dirent, unsigned int, count) | 190 | struct linux_dirent __user *, dirent, unsigned int, count) |
193 | { | 191 | { |
194 | struct file * file; | 192 | struct fd f; |
195 | struct linux_dirent __user * lastdirent; | 193 | struct linux_dirent __user * lastdirent; |
196 | struct getdents_callback buf; | 194 | struct getdents_callback buf; |
197 | int fput_needed; | ||
198 | int error; | 195 | int error; |
199 | 196 | ||
200 | if (!access_ok(VERIFY_WRITE, dirent, count)) | 197 | if (!access_ok(VERIFY_WRITE, dirent, count)) |
201 | return -EFAULT; | 198 | return -EFAULT; |
202 | 199 | ||
203 | file = fget_light(fd, &fput_needed); | 200 | f = fdget(fd); |
204 | if (!file) | 201 | if (!f.file) |
205 | return -EBADF; | 202 | return -EBADF; |
206 | 203 | ||
207 | buf.current_dir = dirent; | 204 | buf.current_dir = dirent; |
@@ -209,17 +206,17 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, | |||
209 | buf.count = count; | 206 | buf.count = count; |
210 | buf.error = 0; | 207 | buf.error = 0; |
211 | 208 | ||
212 | error = vfs_readdir(file, filldir, &buf); | 209 | error = vfs_readdir(f.file, filldir, &buf); |
213 | if (error >= 0) | 210 | if (error >= 0) |
214 | error = buf.error; | 211 | error = buf.error; |
215 | lastdirent = buf.previous; | 212 | lastdirent = buf.previous; |
216 | if (lastdirent) { | 213 | if (lastdirent) { |
217 | if (put_user(file->f_pos, &lastdirent->d_off)) | 214 | if (put_user(f.file->f_pos, &lastdirent->d_off)) |
218 | error = -EFAULT; | 215 | error = -EFAULT; |
219 | else | 216 | else |
220 | error = count - buf.count; | 217 | error = count - buf.count; |
221 | } | 218 | } |
222 | fput_light(file, fput_needed); | 219 | fdput(f); |
223 | return error; | 220 | return error; |
224 | } | 221 | } |
225 | 222 | ||
@@ -272,17 +269,16 @@ efault: | |||
272 | SYSCALL_DEFINE3(getdents64, unsigned int, fd, | 269 | SYSCALL_DEFINE3(getdents64, unsigned int, fd, |
273 | struct linux_dirent64 __user *, dirent, unsigned int, count) | 270 | struct linux_dirent64 __user *, dirent, unsigned int, count) |
274 | { | 271 | { |
275 | struct file * file; | 272 | struct fd f; |
276 | struct linux_dirent64 __user * lastdirent; | 273 | struct linux_dirent64 __user * lastdirent; |
277 | struct getdents_callback64 buf; | 274 | struct getdents_callback64 buf; |
278 | int fput_needed; | ||
279 | int error; | 275 | int error; |
280 | 276 | ||
281 | if (!access_ok(VERIFY_WRITE, dirent, count)) | 277 | if (!access_ok(VERIFY_WRITE, dirent, count)) |
282 | return -EFAULT; | 278 | return -EFAULT; |
283 | 279 | ||
284 | file = fget_light(fd, &fput_needed); | 280 | f = fdget(fd); |
285 | if (!file) | 281 | if (!f.file) |
286 | return -EBADF; | 282 | return -EBADF; |
287 | 283 | ||
288 | buf.current_dir = dirent; | 284 | buf.current_dir = dirent; |
@@ -290,17 +286,17 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, | |||
290 | buf.count = count; | 286 | buf.count = count; |
291 | buf.error = 0; | 287 | buf.error = 0; |
292 | 288 | ||
293 | error = vfs_readdir(file, filldir64, &buf); | 289 | error = vfs_readdir(f.file, filldir64, &buf); |
294 | if (error >= 0) | 290 | if (error >= 0) |
295 | error = buf.error; | 291 | error = buf.error; |
296 | lastdirent = buf.previous; | 292 | lastdirent = buf.previous; |
297 | if (lastdirent) { | 293 | if (lastdirent) { |
298 | typeof(lastdirent->d_off) d_off = file->f_pos; | 294 | typeof(lastdirent->d_off) d_off = f.file->f_pos; |
299 | if (__put_user(d_off, &lastdirent->d_off)) | 295 | if (__put_user(d_off, &lastdirent->d_off)) |
300 | error = -EFAULT; | 296 | error = -EFAULT; |
301 | else | 297 | else |
302 | error = count - buf.count; | 298 | error = count - buf.count; |
303 | } | 299 | } |
304 | fput_light(file, fput_needed); | 300 | fdput(f); |
305 | return error; | 301 | return error; |
306 | } | 302 | } |
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 7a37dabf5a96..1078ae179993 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c | |||
@@ -608,6 +608,11 @@ static int init_inodecache(void) | |||
608 | 608 | ||
609 | static void destroy_inodecache(void) | 609 | static void destroy_inodecache(void) |
610 | { | 610 | { |
611 | /* | ||
612 | * Make sure all delayed rcu free inodes are flushed before we | ||
613 | * destroy cache. | ||
614 | */ | ||
615 | rcu_barrier(); | ||
611 | kmem_cache_destroy(reiserfs_inode_cachep); | 616 | kmem_cache_destroy(reiserfs_inode_cachep); |
612 | } | 617 | } |
613 | 618 | ||
diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 77c5f2173983..fd7c5f60b46b 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c | |||
@@ -648,6 +648,11 @@ error_register: | |||
648 | static void __exit exit_romfs_fs(void) | 648 | static void __exit exit_romfs_fs(void) |
649 | { | 649 | { |
650 | unregister_filesystem(&romfs_fs_type); | 650 | unregister_filesystem(&romfs_fs_type); |
651 | /* | ||
652 | * Make sure all delayed rcu free inodes are flushed before we | ||
653 | * destroy cache. | ||
654 | */ | ||
655 | rcu_barrier(); | ||
651 | kmem_cache_destroy(romfs_inode_cachep); | 656 | kmem_cache_destroy(romfs_inode_cachep); |
652 | } | 657 | } |
653 | 658 | ||
diff --git a/fs/select.c b/fs/select.c index db14c781335e..2ef72d965036 100644 --- a/fs/select.c +++ b/fs/select.c | |||
@@ -220,8 +220,7 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, | |||
220 | struct poll_table_entry *entry = poll_get_entry(pwq); | 220 | struct poll_table_entry *entry = poll_get_entry(pwq); |
221 | if (!entry) | 221 | if (!entry) |
222 | return; | 222 | return; |
223 | get_file(filp); | 223 | entry->filp = get_file(filp); |
224 | entry->filp = filp; | ||
225 | entry->wait_address = wait_address; | 224 | entry->wait_address = wait_address; |
226 | entry->key = p->_key; | 225 | entry->key = p->_key; |
227 | init_waitqueue_func_entry(&entry->wait, pollwake); | 226 | init_waitqueue_func_entry(&entry->wait, pollwake); |
@@ -429,8 +428,6 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) | |||
429 | for (i = 0; i < n; ++rinp, ++routp, ++rexp) { | 428 | for (i = 0; i < n; ++rinp, ++routp, ++rexp) { |
430 | unsigned long in, out, ex, all_bits, bit = 1, mask, j; | 429 | unsigned long in, out, ex, all_bits, bit = 1, mask, j; |
431 | unsigned long res_in = 0, res_out = 0, res_ex = 0; | 430 | unsigned long res_in = 0, res_out = 0, res_ex = 0; |
432 | const struct file_operations *f_op = NULL; | ||
433 | struct file *file = NULL; | ||
434 | 431 | ||
435 | in = *inp++; out = *outp++; ex = *exp++; | 432 | in = *inp++; out = *outp++; ex = *exp++; |
436 | all_bits = in | out | ex; | 433 | all_bits = in | out | ex; |
@@ -440,20 +437,21 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) | |||
440 | } | 437 | } |
441 | 438 | ||
442 | for (j = 0; j < BITS_PER_LONG; ++j, ++i, bit <<= 1) { | 439 | for (j = 0; j < BITS_PER_LONG; ++j, ++i, bit <<= 1) { |
443 | int fput_needed; | 440 | struct fd f; |
444 | if (i >= n) | 441 | if (i >= n) |
445 | break; | 442 | break; |
446 | if (!(bit & all_bits)) | 443 | if (!(bit & all_bits)) |
447 | continue; | 444 | continue; |
448 | file = fget_light(i, &fput_needed); | 445 | f = fdget(i); |
449 | if (file) { | 446 | if (f.file) { |
450 | f_op = file->f_op; | 447 | const struct file_operations *f_op; |
448 | f_op = f.file->f_op; | ||
451 | mask = DEFAULT_POLLMASK; | 449 | mask = DEFAULT_POLLMASK; |
452 | if (f_op && f_op->poll) { | 450 | if (f_op && f_op->poll) { |
453 | wait_key_set(wait, in, out, bit); | 451 | wait_key_set(wait, in, out, bit); |
454 | mask = (*f_op->poll)(file, wait); | 452 | mask = (*f_op->poll)(f.file, wait); |
455 | } | 453 | } |
456 | fput_light(file, fput_needed); | 454 | fdput(f); |
457 | if ((mask & POLLIN_SET) && (in & bit)) { | 455 | if ((mask & POLLIN_SET) && (in & bit)) { |
458 | res_in |= bit; | 456 | res_in |= bit; |
459 | retval++; | 457 | retval++; |
@@ -726,20 +724,17 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait) | |||
726 | mask = 0; | 724 | mask = 0; |
727 | fd = pollfd->fd; | 725 | fd = pollfd->fd; |
728 | if (fd >= 0) { | 726 | if (fd >= 0) { |
729 | int fput_needed; | 727 | struct fd f = fdget(fd); |
730 | struct file * file; | ||
731 | |||
732 | file = fget_light(fd, &fput_needed); | ||
733 | mask = POLLNVAL; | 728 | mask = POLLNVAL; |
734 | if (file != NULL) { | 729 | if (f.file) { |
735 | mask = DEFAULT_POLLMASK; | 730 | mask = DEFAULT_POLLMASK; |
736 | if (file->f_op && file->f_op->poll) { | 731 | if (f.file->f_op && f.file->f_op->poll) { |
737 | pwait->_key = pollfd->events|POLLERR|POLLHUP; | 732 | pwait->_key = pollfd->events|POLLERR|POLLHUP; |
738 | mask = file->f_op->poll(file, pwait); | 733 | mask = f.file->f_op->poll(f.file, pwait); |
739 | } | 734 | } |
740 | /* Mask out unneeded events. */ | 735 | /* Mask out unneeded events. */ |
741 | mask &= pollfd->events | POLLERR | POLLHUP; | 736 | mask &= pollfd->events | POLLERR | POLLHUP; |
742 | fput_light(file, fput_needed); | 737 | fdput(f); |
743 | } | 738 | } |
744 | } | 739 | } |
745 | pollfd->revents = mask; | 740 | pollfd->revents = mask; |
diff --git a/fs/signalfd.c b/fs/signalfd.c index 9f35a37173de..8bee4e570911 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c | |||
@@ -269,13 +269,12 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask, | |||
269 | if (ufd < 0) | 269 | if (ufd < 0) |
270 | kfree(ctx); | 270 | kfree(ctx); |
271 | } else { | 271 | } else { |
272 | int fput_needed; | 272 | struct fd f = fdget(ufd); |
273 | struct file *file = fget_light(ufd, &fput_needed); | 273 | if (!f.file) |
274 | if (!file) | ||
275 | return -EBADF; | 274 | return -EBADF; |
276 | ctx = file->private_data; | 275 | ctx = f.file->private_data; |
277 | if (file->f_op != &signalfd_fops) { | 276 | if (f.file->f_op != &signalfd_fops) { |
278 | fput_light(file, fput_needed); | 277 | fdput(f); |
279 | return -EINVAL; | 278 | return -EINVAL; |
280 | } | 279 | } |
281 | spin_lock_irq(¤t->sighand->siglock); | 280 | spin_lock_irq(¤t->sighand->siglock); |
@@ -283,7 +282,7 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask, | |||
283 | spin_unlock_irq(¤t->sighand->siglock); | 282 | spin_unlock_irq(¤t->sighand->siglock); |
284 | 283 | ||
285 | wake_up(¤t->sighand->signalfd_wqh); | 284 | wake_up(¤t->sighand->signalfd_wqh); |
286 | fput_light(file, fput_needed); | 285 | fdput(f); |
287 | } | 286 | } |
288 | 287 | ||
289 | return ufd; | 288 | return ufd; |
diff --git a/fs/splice.c b/fs/splice.c index 41514dd89462..13e5b4776e7a 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -1666,9 +1666,8 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov, | |||
1666 | SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov, | 1666 | SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov, |
1667 | unsigned long, nr_segs, unsigned int, flags) | 1667 | unsigned long, nr_segs, unsigned int, flags) |
1668 | { | 1668 | { |
1669 | struct file *file; | 1669 | struct fd f; |
1670 | long error; | 1670 | long error; |
1671 | int fput; | ||
1672 | 1671 | ||
1673 | if (unlikely(nr_segs > UIO_MAXIOV)) | 1672 | if (unlikely(nr_segs > UIO_MAXIOV)) |
1674 | return -EINVAL; | 1673 | return -EINVAL; |
@@ -1676,14 +1675,14 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov, | |||
1676 | return 0; | 1675 | return 0; |
1677 | 1676 | ||
1678 | error = -EBADF; | 1677 | error = -EBADF; |
1679 | file = fget_light(fd, &fput); | 1678 | f = fdget(fd); |
1680 | if (file) { | 1679 | if (f.file) { |
1681 | if (file->f_mode & FMODE_WRITE) | 1680 | if (f.file->f_mode & FMODE_WRITE) |
1682 | error = vmsplice_to_pipe(file, iov, nr_segs, flags); | 1681 | error = vmsplice_to_pipe(f.file, iov, nr_segs, flags); |
1683 | else if (file->f_mode & FMODE_READ) | 1682 | else if (f.file->f_mode & FMODE_READ) |
1684 | error = vmsplice_to_user(file, iov, nr_segs, flags); | 1683 | error = vmsplice_to_user(f.file, iov, nr_segs, flags); |
1685 | 1684 | ||
1686 | fput_light(file, fput); | 1685 | fdput(f); |
1687 | } | 1686 | } |
1688 | 1687 | ||
1689 | return error; | 1688 | return error; |
@@ -1693,30 +1692,27 @@ SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in, | |||
1693 | int, fd_out, loff_t __user *, off_out, | 1692 | int, fd_out, loff_t __user *, off_out, |
1694 | size_t, len, unsigned int, flags) | 1693 | size_t, len, unsigned int, flags) |
1695 | { | 1694 | { |
1695 | struct fd in, out; | ||
1696 | long error; | 1696 | long error; |
1697 | struct file *in, *out; | ||
1698 | int fput_in, fput_out; | ||
1699 | 1697 | ||
1700 | if (unlikely(!len)) | 1698 | if (unlikely(!len)) |
1701 | return 0; | 1699 | return 0; |
1702 | 1700 | ||
1703 | error = -EBADF; | 1701 | error = -EBADF; |
1704 | in = fget_light(fd_in, &fput_in); | 1702 | in = fdget(fd_in); |
1705 | if (in) { | 1703 | if (in.file) { |
1706 | if (in->f_mode & FMODE_READ) { | 1704 | if (in.file->f_mode & FMODE_READ) { |
1707 | out = fget_light(fd_out, &fput_out); | 1705 | out = fdget(fd_out); |
1708 | if (out) { | 1706 | if (out.file) { |
1709 | if (out->f_mode & FMODE_WRITE) | 1707 | if (out.file->f_mode & FMODE_WRITE) |
1710 | error = do_splice(in, off_in, | 1708 | error = do_splice(in.file, off_in, |
1711 | out, off_out, | 1709 | out.file, off_out, |
1712 | len, flags); | 1710 | len, flags); |
1713 | fput_light(out, fput_out); | 1711 | fdput(out); |
1714 | } | 1712 | } |
1715 | } | 1713 | } |
1716 | 1714 | fdput(in); | |
1717 | fput_light(in, fput_in); | ||
1718 | } | 1715 | } |
1719 | |||
1720 | return error; | 1716 | return error; |
1721 | } | 1717 | } |
1722 | 1718 | ||
@@ -2027,26 +2023,25 @@ static long do_tee(struct file *in, struct file *out, size_t len, | |||
2027 | 2023 | ||
2028 | SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags) | 2024 | SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags) |
2029 | { | 2025 | { |
2030 | struct file *in; | 2026 | struct fd in; |
2031 | int error, fput_in; | 2027 | int error; |
2032 | 2028 | ||
2033 | if (unlikely(!len)) | 2029 | if (unlikely(!len)) |
2034 | return 0; | 2030 | return 0; |
2035 | 2031 | ||
2036 | error = -EBADF; | 2032 | error = -EBADF; |
2037 | in = fget_light(fdin, &fput_in); | 2033 | in = fdget(fdin); |
2038 | if (in) { | 2034 | if (in.file) { |
2039 | if (in->f_mode & FMODE_READ) { | 2035 | if (in.file->f_mode & FMODE_READ) { |
2040 | int fput_out; | 2036 | struct fd out = fdget(fdout); |
2041 | struct file *out = fget_light(fdout, &fput_out); | 2037 | if (out.file) { |
2042 | 2038 | if (out.file->f_mode & FMODE_WRITE) | |
2043 | if (out) { | 2039 | error = do_tee(in.file, out.file, |
2044 | if (out->f_mode & FMODE_WRITE) | 2040 | len, flags); |
2045 | error = do_tee(in, out, len, flags); | 2041 | fdput(out); |
2046 | fput_light(out, fput_out); | ||
2047 | } | 2042 | } |
2048 | } | 2043 | } |
2049 | fput_light(in, fput_in); | 2044 | fdput(in); |
2050 | } | 2045 | } |
2051 | 2046 | ||
2052 | return error; | 2047 | return error; |
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 29cd014ed3a1..260e3928d4f5 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c | |||
@@ -425,6 +425,11 @@ static int __init init_inodecache(void) | |||
425 | 425 | ||
426 | static void destroy_inodecache(void) | 426 | static void destroy_inodecache(void) |
427 | { | 427 | { |
428 | /* | ||
429 | * Make sure all delayed rcu free inodes are flushed before we | ||
430 | * destroy cache. | ||
431 | */ | ||
432 | rcu_barrier(); | ||
428 | kmem_cache_destroy(squashfs_inode_cachep); | 433 | kmem_cache_destroy(squashfs_inode_cachep); |
429 | } | 434 | } |
430 | 435 | ||
@@ -57,13 +57,13 @@ EXPORT_SYMBOL(vfs_getattr); | |||
57 | 57 | ||
58 | int vfs_fstat(unsigned int fd, struct kstat *stat) | 58 | int vfs_fstat(unsigned int fd, struct kstat *stat) |
59 | { | 59 | { |
60 | int fput_needed; | 60 | struct fd f = fdget_raw(fd); |
61 | struct file *f = fget_raw_light(fd, &fput_needed); | ||
62 | int error = -EBADF; | 61 | int error = -EBADF; |
63 | 62 | ||
64 | if (f) { | 63 | if (f.file) { |
65 | error = vfs_getattr(f->f_path.mnt, f->f_path.dentry, stat); | 64 | error = vfs_getattr(f.file->f_path.mnt, f.file->f_path.dentry, |
66 | fput_light(f, fput_needed); | 65 | stat); |
66 | fdput(f); | ||
67 | } | 67 | } |
68 | return error; | 68 | return error; |
69 | } | 69 | } |
diff --git a/fs/statfs.c b/fs/statfs.c index 95ad5c0e586c..f8e832e6f0a2 100644 --- a/fs/statfs.c +++ b/fs/statfs.c | |||
@@ -87,12 +87,11 @@ int user_statfs(const char __user *pathname, struct kstatfs *st) | |||
87 | 87 | ||
88 | int fd_statfs(int fd, struct kstatfs *st) | 88 | int fd_statfs(int fd, struct kstatfs *st) |
89 | { | 89 | { |
90 | int fput_needed; | 90 | struct fd f = fdget(fd); |
91 | struct file *file = fget_light(fd, &fput_needed); | ||
92 | int error = -EBADF; | 91 | int error = -EBADF; |
93 | if (file) { | 92 | if (f.file) { |
94 | error = vfs_statfs(&file->f_path, st); | 93 | error = vfs_statfs(&f.file->f_path, st); |
95 | fput_light(file, fput_needed); | 94 | fdput(f); |
96 | } | 95 | } |
97 | return error; | 96 | return error; |
98 | } | 97 | } |
diff --git a/fs/super.c b/fs/super.c index 0902cfa6a12e..5fdf7ff32c4e 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -307,12 +307,6 @@ void deactivate_locked_super(struct super_block *s) | |||
307 | 307 | ||
308 | /* caches are now gone, we can safely kill the shrinker now */ | 308 | /* caches are now gone, we can safely kill the shrinker now */ |
309 | unregister_shrinker(&s->s_shrink); | 309 | unregister_shrinker(&s->s_shrink); |
310 | |||
311 | /* | ||
312 | * We need to call rcu_barrier so all the delayed rcu free | ||
313 | * inodes are flushed before we release the fs module. | ||
314 | */ | ||
315 | rcu_barrier(); | ||
316 | put_filesystem(fs); | 310 | put_filesystem(fs); |
317 | put_super(s); | 311 | put_super(s); |
318 | } else { | 312 | } else { |
@@ -148,21 +148,19 @@ void emergency_sync(void) | |||
148 | */ | 148 | */ |
149 | SYSCALL_DEFINE1(syncfs, int, fd) | 149 | SYSCALL_DEFINE1(syncfs, int, fd) |
150 | { | 150 | { |
151 | struct file *file; | 151 | struct fd f = fdget(fd); |
152 | struct super_block *sb; | 152 | struct super_block *sb; |
153 | int ret; | 153 | int ret; |
154 | int fput_needed; | ||
155 | 154 | ||
156 | file = fget_light(fd, &fput_needed); | 155 | if (!f.file) |
157 | if (!file) | ||
158 | return -EBADF; | 156 | return -EBADF; |
159 | sb = file->f_dentry->d_sb; | 157 | sb = f.file->f_dentry->d_sb; |
160 | 158 | ||
161 | down_read(&sb->s_umount); | 159 | down_read(&sb->s_umount); |
162 | ret = sync_filesystem(sb); | 160 | ret = sync_filesystem(sb); |
163 | up_read(&sb->s_umount); | 161 | up_read(&sb->s_umount); |
164 | 162 | ||
165 | fput_light(file, fput_needed); | 163 | fdput(f); |
166 | return ret; | 164 | return ret; |
167 | } | 165 | } |
168 | 166 | ||
@@ -201,14 +199,12 @@ EXPORT_SYMBOL(vfs_fsync); | |||
201 | 199 | ||
202 | static int do_fsync(unsigned int fd, int datasync) | 200 | static int do_fsync(unsigned int fd, int datasync) |
203 | { | 201 | { |
204 | struct file *file; | 202 | struct fd f = fdget(fd); |
205 | int ret = -EBADF; | 203 | int ret = -EBADF; |
206 | int fput_needed; | ||
207 | 204 | ||
208 | file = fget_light(fd, &fput_needed); | 205 | if (f.file) { |
209 | if (file) { | 206 | ret = vfs_fsync(f.file, datasync); |
210 | ret = vfs_fsync(file, datasync); | 207 | fdput(f); |
211 | fput_light(file, fput_needed); | ||
212 | } | 208 | } |
213 | return ret; | 209 | return ret; |
214 | } | 210 | } |
@@ -291,10 +287,9 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes, | |||
291 | unsigned int flags) | 287 | unsigned int flags) |
292 | { | 288 | { |
293 | int ret; | 289 | int ret; |
294 | struct file *file; | 290 | struct fd f; |
295 | struct address_space *mapping; | 291 | struct address_space *mapping; |
296 | loff_t endbyte; /* inclusive */ | 292 | loff_t endbyte; /* inclusive */ |
297 | int fput_needed; | ||
298 | umode_t i_mode; | 293 | umode_t i_mode; |
299 | 294 | ||
300 | ret = -EINVAL; | 295 | ret = -EINVAL; |
@@ -333,17 +328,17 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes, | |||
333 | endbyte--; /* inclusive */ | 328 | endbyte--; /* inclusive */ |
334 | 329 | ||
335 | ret = -EBADF; | 330 | ret = -EBADF; |
336 | file = fget_light(fd, &fput_needed); | 331 | f = fdget(fd); |
337 | if (!file) | 332 | if (!f.file) |
338 | goto out; | 333 | goto out; |
339 | 334 | ||
340 | i_mode = file->f_path.dentry->d_inode->i_mode; | 335 | i_mode = f.file->f_path.dentry->d_inode->i_mode; |
341 | ret = -ESPIPE; | 336 | ret = -ESPIPE; |
342 | if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) && | 337 | if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) && |
343 | !S_ISLNK(i_mode)) | 338 | !S_ISLNK(i_mode)) |
344 | goto out_put; | 339 | goto out_put; |
345 | 340 | ||
346 | mapping = file->f_mapping; | 341 | mapping = f.file->f_mapping; |
347 | if (!mapping) { | 342 | if (!mapping) { |
348 | ret = -EINVAL; | 343 | ret = -EINVAL; |
349 | goto out_put; | 344 | goto out_put; |
@@ -366,7 +361,7 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes, | |||
366 | ret = filemap_fdatawait_range(mapping, offset, endbyte); | 361 | ret = filemap_fdatawait_range(mapping, offset, endbyte); |
367 | 362 | ||
368 | out_put: | 363 | out_put: |
369 | fput_light(file, fput_needed); | 364 | fdput(f); |
370 | out: | 365 | out: |
371 | return ret; | 366 | return ret; |
372 | } | 367 | } |
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index b23ab736685d..d33e506c1eac 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c | |||
@@ -360,5 +360,10 @@ int __init sysv_init_icache(void) | |||
360 | 360 | ||
361 | void sysv_destroy_icache(void) | 361 | void sysv_destroy_icache(void) |
362 | { | 362 | { |
363 | /* | ||
364 | * Make sure all delayed rcu free inodes are flushed before we | ||
365 | * destroy cache. | ||
366 | */ | ||
367 | rcu_barrier(); | ||
363 | kmem_cache_destroy(sysv_inode_cachep); | 368 | kmem_cache_destroy(sysv_inode_cachep); |
364 | } | 369 | } |
diff --git a/fs/timerfd.c b/fs/timerfd.c index dffeb3795af1..d03822bbf190 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c | |||
@@ -234,19 +234,17 @@ static const struct file_operations timerfd_fops = { | |||
234 | .llseek = noop_llseek, | 234 | .llseek = noop_llseek, |
235 | }; | 235 | }; |
236 | 236 | ||
237 | static struct file *timerfd_fget(int fd) | 237 | static int timerfd_fget(int fd, struct fd *p) |
238 | { | 238 | { |
239 | struct file *file; | 239 | struct fd f = fdget(fd); |
240 | 240 | if (!f.file) | |
241 | file = fget(fd); | 241 | return -EBADF; |
242 | if (!file) | 242 | if (f.file->f_op != &timerfd_fops) { |
243 | return ERR_PTR(-EBADF); | 243 | fdput(f); |
244 | if (file->f_op != &timerfd_fops) { | 244 | return -EINVAL; |
245 | fput(file); | ||
246 | return ERR_PTR(-EINVAL); | ||
247 | } | 245 | } |
248 | 246 | *p = f; | |
249 | return file; | 247 | return 0; |
250 | } | 248 | } |
251 | 249 | ||
252 | SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) | 250 | SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) |
@@ -284,7 +282,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, | |||
284 | const struct itimerspec __user *, utmr, | 282 | const struct itimerspec __user *, utmr, |
285 | struct itimerspec __user *, otmr) | 283 | struct itimerspec __user *, otmr) |
286 | { | 284 | { |
287 | struct file *file; | 285 | struct fd f; |
288 | struct timerfd_ctx *ctx; | 286 | struct timerfd_ctx *ctx; |
289 | struct itimerspec ktmr, kotmr; | 287 | struct itimerspec ktmr, kotmr; |
290 | int ret; | 288 | int ret; |
@@ -297,10 +295,10 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, | |||
297 | !timespec_valid(&ktmr.it_interval)) | 295 | !timespec_valid(&ktmr.it_interval)) |
298 | return -EINVAL; | 296 | return -EINVAL; |
299 | 297 | ||
300 | file = timerfd_fget(ufd); | 298 | ret = timerfd_fget(ufd, &f); |
301 | if (IS_ERR(file)) | 299 | if (ret) |
302 | return PTR_ERR(file); | 300 | return ret; |
303 | ctx = file->private_data; | 301 | ctx = f.file->private_data; |
304 | 302 | ||
305 | timerfd_setup_cancel(ctx, flags); | 303 | timerfd_setup_cancel(ctx, flags); |
306 | 304 | ||
@@ -334,7 +332,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, | |||
334 | ret = timerfd_setup(ctx, flags, &ktmr); | 332 | ret = timerfd_setup(ctx, flags, &ktmr); |
335 | 333 | ||
336 | spin_unlock_irq(&ctx->wqh.lock); | 334 | spin_unlock_irq(&ctx->wqh.lock); |
337 | fput(file); | 335 | fdput(f); |
338 | if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr))) | 336 | if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr))) |
339 | return -EFAULT; | 337 | return -EFAULT; |
340 | 338 | ||
@@ -343,14 +341,13 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, | |||
343 | 341 | ||
344 | SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) | 342 | SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) |
345 | { | 343 | { |
346 | struct file *file; | 344 | struct fd f; |
347 | struct timerfd_ctx *ctx; | 345 | struct timerfd_ctx *ctx; |
348 | struct itimerspec kotmr; | 346 | struct itimerspec kotmr; |
349 | 347 | int ret = timerfd_fget(ufd, &f); | |
350 | file = timerfd_fget(ufd); | 348 | if (ret) |
351 | if (IS_ERR(file)) | 349 | return ret; |
352 | return PTR_ERR(file); | 350 | ctx = f.file->private_data; |
353 | ctx = file->private_data; | ||
354 | 351 | ||
355 | spin_lock_irq(&ctx->wqh.lock); | 352 | spin_lock_irq(&ctx->wqh.lock); |
356 | if (ctx->expired && ctx->tintv.tv64) { | 353 | if (ctx->expired && ctx->tintv.tv64) { |
@@ -362,7 +359,7 @@ SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) | |||
362 | kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); | 359 | kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); |
363 | kotmr.it_interval = ktime_to_timespec(ctx->tintv); | 360 | kotmr.it_interval = ktime_to_timespec(ctx->tintv); |
364 | spin_unlock_irq(&ctx->wqh.lock); | 361 | spin_unlock_irq(&ctx->wqh.lock); |
365 | fput(file); | 362 | fdput(f); |
366 | 363 | ||
367 | return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0; | 364 | return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0; |
368 | } | 365 | } |
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 681f3a942444..49825427a0e8 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c | |||
@@ -2298,6 +2298,12 @@ static void __exit ubifs_exit(void) | |||
2298 | dbg_debugfs_exit(); | 2298 | dbg_debugfs_exit(); |
2299 | ubifs_compressors_exit(); | 2299 | ubifs_compressors_exit(); |
2300 | unregister_shrinker(&ubifs_shrinker_info); | 2300 | unregister_shrinker(&ubifs_shrinker_info); |
2301 | |||
2302 | /* | ||
2303 | * Make sure all delayed rcu free inodes are flushed before we | ||
2304 | * destroy cache. | ||
2305 | */ | ||
2306 | rcu_barrier(); | ||
2301 | kmem_cache_destroy(ubifs_inode_slab); | 2307 | kmem_cache_destroy(ubifs_inode_slab); |
2302 | unregister_filesystem(&ubifs_fs_type); | 2308 | unregister_filesystem(&ubifs_fs_type); |
2303 | } | 2309 | } |
diff --git a/fs/udf/super.c b/fs/udf/super.c index 862741dddf27..d44fb568abe1 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c | |||
@@ -171,6 +171,11 @@ static int init_inodecache(void) | |||
171 | 171 | ||
172 | static void destroy_inodecache(void) | 172 | static void destroy_inodecache(void) |
173 | { | 173 | { |
174 | /* | ||
175 | * Make sure all delayed rcu free inodes are flushed before we | ||
176 | * destroy cache. | ||
177 | */ | ||
178 | rcu_barrier(); | ||
174 | kmem_cache_destroy(udf_inode_cachep); | 179 | kmem_cache_destroy(udf_inode_cachep); |
175 | } | 180 | } |
176 | 181 | ||
diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 444927e5706b..f7cfecfe1cab 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c | |||
@@ -1466,6 +1466,11 @@ static int init_inodecache(void) | |||
1466 | 1466 | ||
1467 | static void destroy_inodecache(void) | 1467 | static void destroy_inodecache(void) |
1468 | { | 1468 | { |
1469 | /* | ||
1470 | * Make sure all delayed rcu free inodes are flushed before we | ||
1471 | * destroy cache. | ||
1472 | */ | ||
1473 | rcu_barrier(); | ||
1469 | kmem_cache_destroy(ufs_inode_cachep); | 1474 | kmem_cache_destroy(ufs_inode_cachep); |
1470 | } | 1475 | } |
1471 | 1476 | ||
diff --git a/fs/utimes.c b/fs/utimes.c index fa4dbe451e27..bb0696a41735 100644 --- a/fs/utimes.c +++ b/fs/utimes.c | |||
@@ -140,19 +140,18 @@ long do_utimes(int dfd, const char __user *filename, struct timespec *times, | |||
140 | goto out; | 140 | goto out; |
141 | 141 | ||
142 | if (filename == NULL && dfd != AT_FDCWD) { | 142 | if (filename == NULL && dfd != AT_FDCWD) { |
143 | int fput_needed; | 143 | struct fd f; |
144 | struct file *file; | ||
145 | 144 | ||
146 | if (flags & AT_SYMLINK_NOFOLLOW) | 145 | if (flags & AT_SYMLINK_NOFOLLOW) |
147 | goto out; | 146 | goto out; |
148 | 147 | ||
149 | file = fget_light(dfd, &fput_needed); | 148 | f = fdget(dfd); |
150 | error = -EBADF; | 149 | error = -EBADF; |
151 | if (!file) | 150 | if (!f.file) |
152 | goto out; | 151 | goto out; |
153 | 152 | ||
154 | error = utimes_common(&file->f_path, times); | 153 | error = utimes_common(&f.file->f_path, times); |
155 | fput_light(file, fput_needed); | 154 | fdput(f); |
156 | } else { | 155 | } else { |
157 | struct path path; | 156 | struct path path; |
158 | int lookup_flags = 0; | 157 | int lookup_flags = 0; |
diff --git a/fs/xattr.c b/fs/xattr.c index f7f7f09b0b41..ca15fbd391c8 100644 --- a/fs/xattr.c +++ b/fs/xattr.c | |||
@@ -403,22 +403,20 @@ SYSCALL_DEFINE5(lsetxattr, const char __user *, pathname, | |||
403 | SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, | 403 | SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, |
404 | const void __user *,value, size_t, size, int, flags) | 404 | const void __user *,value, size_t, size, int, flags) |
405 | { | 405 | { |
406 | int fput_needed; | 406 | struct fd f = fdget(fd); |
407 | struct file *f; | ||
408 | struct dentry *dentry; | 407 | struct dentry *dentry; |
409 | int error = -EBADF; | 408 | int error = -EBADF; |
410 | 409 | ||
411 | f = fget_light(fd, &fput_needed); | 410 | if (!f.file) |
412 | if (!f) | ||
413 | return error; | 411 | return error; |
414 | dentry = f->f_path.dentry; | 412 | dentry = f.file->f_path.dentry; |
415 | audit_inode(NULL, dentry); | 413 | audit_inode(NULL, dentry); |
416 | error = mnt_want_write_file(f); | 414 | error = mnt_want_write_file(f.file); |
417 | if (!error) { | 415 | if (!error) { |
418 | error = setxattr(dentry, name, value, size, flags); | 416 | error = setxattr(dentry, name, value, size, flags); |
419 | mnt_drop_write_file(f); | 417 | mnt_drop_write_file(f.file); |
420 | } | 418 | } |
421 | fput_light(f, fput_needed); | 419 | fdput(f); |
422 | return error; | 420 | return error; |
423 | } | 421 | } |
424 | 422 | ||
@@ -502,16 +500,14 @@ SYSCALL_DEFINE4(lgetxattr, const char __user *, pathname, | |||
502 | SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name, | 500 | SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name, |
503 | void __user *, value, size_t, size) | 501 | void __user *, value, size_t, size) |
504 | { | 502 | { |
505 | int fput_needed; | 503 | struct fd f = fdget(fd); |
506 | struct file *f; | ||
507 | ssize_t error = -EBADF; | 504 | ssize_t error = -EBADF; |
508 | 505 | ||
509 | f = fget_light(fd, &fput_needed); | 506 | if (!f.file) |
510 | if (!f) | ||
511 | return error; | 507 | return error; |
512 | audit_inode(NULL, f->f_path.dentry); | 508 | audit_inode(NULL, f.file->f_path.dentry); |
513 | error = getxattr(f->f_path.dentry, name, value, size); | 509 | error = getxattr(f.file->f_path.dentry, name, value, size); |
514 | fput_light(f, fput_needed); | 510 | fdput(f); |
515 | return error; | 511 | return error; |
516 | } | 512 | } |
517 | 513 | ||
@@ -583,16 +579,14 @@ SYSCALL_DEFINE3(llistxattr, const char __user *, pathname, char __user *, list, | |||
583 | 579 | ||
584 | SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size) | 580 | SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size) |
585 | { | 581 | { |
586 | int fput_needed; | 582 | struct fd f = fdget(fd); |
587 | struct file *f; | ||
588 | ssize_t error = -EBADF; | 583 | ssize_t error = -EBADF; |
589 | 584 | ||
590 | f = fget_light(fd, &fput_needed); | 585 | if (!f.file) |
591 | if (!f) | ||
592 | return error; | 586 | return error; |
593 | audit_inode(NULL, f->f_path.dentry); | 587 | audit_inode(NULL, f.file->f_path.dentry); |
594 | error = listxattr(f->f_path.dentry, list, size); | 588 | error = listxattr(f.file->f_path.dentry, list, size); |
595 | fput_light(f, fput_needed); | 589 | fdput(f); |
596 | return error; | 590 | return error; |
597 | } | 591 | } |
598 | 592 | ||
@@ -652,22 +646,20 @@ SYSCALL_DEFINE2(lremovexattr, const char __user *, pathname, | |||
652 | 646 | ||
653 | SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name) | 647 | SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name) |
654 | { | 648 | { |
655 | int fput_needed; | 649 | struct fd f = fdget(fd); |
656 | struct file *f; | ||
657 | struct dentry *dentry; | 650 | struct dentry *dentry; |
658 | int error = -EBADF; | 651 | int error = -EBADF; |
659 | 652 | ||
660 | f = fget_light(fd, &fput_needed); | 653 | if (!f.file) |
661 | if (!f) | ||
662 | return error; | 654 | return error; |
663 | dentry = f->f_path.dentry; | 655 | dentry = f.file->f_path.dentry; |
664 | audit_inode(NULL, dentry); | 656 | audit_inode(NULL, dentry); |
665 | error = mnt_want_write_file(f); | 657 | error = mnt_want_write_file(f.file); |
666 | if (!error) { | 658 | if (!error) { |
667 | error = removexattr(dentry, name); | 659 | error = removexattr(dentry, name); |
668 | mnt_drop_write_file(f); | 660 | mnt_drop_write_file(f.file); |
669 | } | 661 | } |
670 | fput_light(f, fput_needed); | 662 | fdput(f); |
671 | return error; | 663 | return error; |
672 | } | 664 | } |
673 | 665 | ||
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index e00de08dc8ac..b9b8646e62db 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c | |||
@@ -48,44 +48,44 @@ xfs_swapext( | |||
48 | xfs_swapext_t *sxp) | 48 | xfs_swapext_t *sxp) |
49 | { | 49 | { |
50 | xfs_inode_t *ip, *tip; | 50 | xfs_inode_t *ip, *tip; |
51 | struct file *file, *tmp_file; | 51 | struct fd f, tmp; |
52 | int error = 0; | 52 | int error = 0; |
53 | 53 | ||
54 | /* Pull information for the target fd */ | 54 | /* Pull information for the target fd */ |
55 | file = fget((int)sxp->sx_fdtarget); | 55 | f = fdget((int)sxp->sx_fdtarget); |
56 | if (!file) { | 56 | if (!f.file) { |
57 | error = XFS_ERROR(EINVAL); | 57 | error = XFS_ERROR(EINVAL); |
58 | goto out; | 58 | goto out; |
59 | } | 59 | } |
60 | 60 | ||
61 | if (!(file->f_mode & FMODE_WRITE) || | 61 | if (!(f.file->f_mode & FMODE_WRITE) || |
62 | !(file->f_mode & FMODE_READ) || | 62 | !(f.file->f_mode & FMODE_READ) || |
63 | (file->f_flags & O_APPEND)) { | 63 | (f.file->f_flags & O_APPEND)) { |
64 | error = XFS_ERROR(EBADF); | 64 | error = XFS_ERROR(EBADF); |
65 | goto out_put_file; | 65 | goto out_put_file; |
66 | } | 66 | } |
67 | 67 | ||
68 | tmp_file = fget((int)sxp->sx_fdtmp); | 68 | tmp = fdget((int)sxp->sx_fdtmp); |
69 | if (!tmp_file) { | 69 | if (!tmp.file) { |
70 | error = XFS_ERROR(EINVAL); | 70 | error = XFS_ERROR(EINVAL); |
71 | goto out_put_file; | 71 | goto out_put_file; |
72 | } | 72 | } |
73 | 73 | ||
74 | if (!(tmp_file->f_mode & FMODE_WRITE) || | 74 | if (!(tmp.file->f_mode & FMODE_WRITE) || |
75 | !(tmp_file->f_mode & FMODE_READ) || | 75 | !(tmp.file->f_mode & FMODE_READ) || |
76 | (tmp_file->f_flags & O_APPEND)) { | 76 | (tmp.file->f_flags & O_APPEND)) { |
77 | error = XFS_ERROR(EBADF); | 77 | error = XFS_ERROR(EBADF); |
78 | goto out_put_tmp_file; | 78 | goto out_put_tmp_file; |
79 | } | 79 | } |
80 | 80 | ||
81 | if (IS_SWAPFILE(file->f_path.dentry->d_inode) || | 81 | if (IS_SWAPFILE(f.file->f_path.dentry->d_inode) || |
82 | IS_SWAPFILE(tmp_file->f_path.dentry->d_inode)) { | 82 | IS_SWAPFILE(tmp.file->f_path.dentry->d_inode)) { |
83 | error = XFS_ERROR(EINVAL); | 83 | error = XFS_ERROR(EINVAL); |
84 | goto out_put_tmp_file; | 84 | goto out_put_tmp_file; |
85 | } | 85 | } |
86 | 86 | ||
87 | ip = XFS_I(file->f_path.dentry->d_inode); | 87 | ip = XFS_I(f.file->f_path.dentry->d_inode); |
88 | tip = XFS_I(tmp_file->f_path.dentry->d_inode); | 88 | tip = XFS_I(tmp.file->f_path.dentry->d_inode); |
89 | 89 | ||
90 | if (ip->i_mount != tip->i_mount) { | 90 | if (ip->i_mount != tip->i_mount) { |
91 | error = XFS_ERROR(EINVAL); | 91 | error = XFS_ERROR(EINVAL); |
@@ -105,9 +105,9 @@ xfs_swapext( | |||
105 | error = xfs_swap_extents(ip, tip, sxp); | 105 | error = xfs_swap_extents(ip, tip, sxp); |
106 | 106 | ||
107 | out_put_tmp_file: | 107 | out_put_tmp_file: |
108 | fput(tmp_file); | 108 | fdput(tmp); |
109 | out_put_file: | 109 | out_put_file: |
110 | fput(file); | 110 | fdput(f); |
111 | out: | 111 | out: |
112 | return error; | 112 | return error; |
113 | } | 113 | } |
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 0e0232c3b6d9..8305f2ac6773 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c | |||
@@ -70,16 +70,16 @@ xfs_find_handle( | |||
70 | int hsize; | 70 | int hsize; |
71 | xfs_handle_t handle; | 71 | xfs_handle_t handle; |
72 | struct inode *inode; | 72 | struct inode *inode; |
73 | struct file *file = NULL; | 73 | struct fd f; |
74 | struct path path; | 74 | struct path path; |
75 | int error; | 75 | int error; |
76 | struct xfs_inode *ip; | 76 | struct xfs_inode *ip; |
77 | 77 | ||
78 | if (cmd == XFS_IOC_FD_TO_HANDLE) { | 78 | if (cmd == XFS_IOC_FD_TO_HANDLE) { |
79 | file = fget(hreq->fd); | 79 | f = fdget(hreq->fd); |
80 | if (!file) | 80 | if (!f.file) |
81 | return -EBADF; | 81 | return -EBADF; |
82 | inode = file->f_path.dentry->d_inode; | 82 | inode = f.file->f_path.dentry->d_inode; |
83 | } else { | 83 | } else { |
84 | error = user_lpath((const char __user *)hreq->path, &path); | 84 | error = user_lpath((const char __user *)hreq->path, &path); |
85 | if (error) | 85 | if (error) |
@@ -134,7 +134,7 @@ xfs_find_handle( | |||
134 | 134 | ||
135 | out_put: | 135 | out_put: |
136 | if (cmd == XFS_IOC_FD_TO_HANDLE) | 136 | if (cmd == XFS_IOC_FD_TO_HANDLE) |
137 | fput(file); | 137 | fdput(f); |
138 | else | 138 | else |
139 | path_put(&path); | 139 | path_put(&path); |
140 | return error; | 140 | return error; |
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 001537f92caf..e0fd2734189e 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c | |||
@@ -1506,6 +1506,11 @@ xfs_init_zones(void) | |||
1506 | STATIC void | 1506 | STATIC void |
1507 | xfs_destroy_zones(void) | 1507 | xfs_destroy_zones(void) |
1508 | { | 1508 | { |
1509 | /* | ||
1510 | * Make sure all delayed rcu free are flushed before we | ||
1511 | * destroy caches. | ||
1512 | */ | ||
1513 | rcu_barrier(); | ||
1509 | kmem_zone_destroy(xfs_ili_zone); | 1514 | kmem_zone_destroy(xfs_ili_zone); |
1510 | kmem_zone_destroy(xfs_inode_zone); | 1515 | kmem_zone_destroy(xfs_inode_zone); |
1511 | kmem_zone_destroy(xfs_efi_zone); | 1516 | kmem_zone_destroy(xfs_efi_zone); |