aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs.c5
-rw-r--r--fs/Makefile2
-rw-r--r--fs/adfs/super.c5
-rw-r--r--fs/affs/super.c5
-rw-r--r--fs/afs/super.c5
-rw-r--r--fs/autofs4/dev-ioctl.c18
-rw-r--r--fs/autofs4/waitq.c3
-rw-r--r--fs/befs/linuxvfs.c5
-rw-r--r--fs/bfs/inode.c5
-rw-r--r--fs/binfmt_elf.c19
-rw-r--r--fs/btrfs/extent_io.c6
-rw-r--r--fs/btrfs/inode.c5
-rw-r--r--fs/btrfs/ioctl.c32
-rw-r--r--fs/btrfs/reada.c18
-rw-r--r--fs/ceph/inode.c4
-rw-r--r--fs/ceph/super.c5
-rw-r--r--fs/cifs/cifsfs.c5
-rw-r--r--fs/coda/inode.c37
-rw-r--r--fs/compat.c112
-rw-r--r--fs/compat_ioctl.c27
-rw-r--r--fs/coredump.c686
-rw-r--r--fs/dcache.c2
-rw-r--r--fs/ecryptfs/main.c6
-rw-r--r--fs/efs/super.c5
-rw-r--r--fs/eventpoll.c23
-rw-r--r--fs/exec.c688
-rw-r--r--fs/exofs/super.c5
-rw-r--r--fs/ext2/super.c5
-rw-r--r--fs/ext3/super.c5
-rw-r--r--fs/ext4/ioctl.c15
-rw-r--r--fs/ext4/super.c5
-rw-r--r--fs/fat/inode.c5
-rw-r--r--fs/fcntl.c166
-rw-r--r--fs/fhandle.c17
-rw-r--r--fs/file.c573
-rw-r--r--fs/file_table.c106
-rw-r--r--fs/freevxfs/vxfs_super.c5
-rw-r--r--fs/fuse/dev.c3
-rw-r--r--fs/fuse/inode.c6
-rw-r--r--fs/hfs/super.c6
-rw-r--r--fs/hfsplus/super.c6
-rw-r--r--fs/hpfs/super.c5
-rw-r--r--fs/hugetlbfs/inode.c5
-rw-r--r--fs/ioctl.c25
-rw-r--r--fs/isofs/inode.c5
-rw-r--r--fs/jffs2/super.c6
-rw-r--r--fs/jfs/super.c6
-rw-r--r--fs/locks.c20
-rw-r--r--fs/logfs/inode.c5
-rw-r--r--fs/minix/inode.c5
-rw-r--r--fs/namei.c41
-rw-r--r--fs/ncpfs/inode.c5
-rw-r--r--fs/nfs/inode.c5
-rw-r--r--fs/nfsd/nfs4state.c3
-rw-r--r--fs/nilfs2/super.c6
-rw-r--r--fs/notify/fanotify/fanotify_user.c87
-rw-r--r--fs/notify/inotify/inotify_user.c28
-rw-r--r--fs/ntfs/super.c6
-rw-r--r--fs/ocfs2/cluster/heartbeat.c38
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c5
-rw-r--r--fs/ocfs2/super.c5
-rw-r--r--fs/open.c130
-rw-r--r--fs/openpromfs/inode.c5
-rw-r--r--fs/pipe.c31
-rw-r--r--fs/proc/Makefile2
-rw-r--r--fs/proc/base.c417
-rw-r--r--fs/proc/fd.c367
-rw-r--r--fs/proc/fd.h14
-rw-r--r--fs/proc/internal.h48
-rw-r--r--fs/qnx4/inode.c5
-rw-r--r--fs/qnx6/inode.c5
-rw-r--r--fs/read_write.c180
-rw-r--r--fs/read_write.h2
-rw-r--r--fs/readdir.c36
-rw-r--r--fs/reiserfs/super.c5
-rw-r--r--fs/romfs/super.c5
-rw-r--r--fs/select.c31
-rw-r--r--fs/signalfd.c13
-rw-r--r--fs/splice.c69
-rw-r--r--fs/squashfs/super.c5
-rw-r--r--fs/stat.c10
-rw-r--r--fs/statfs.c9
-rw-r--r--fs/super.c6
-rw-r--r--fs/sync.c33
-rw-r--r--fs/sysv/inode.c5
-rw-r--r--fs/timerfd.c45
-rw-r--r--fs/ubifs/super.c6
-rw-r--r--fs/udf/super.c5
-rw-r--r--fs/ufs/super.c5
-rw-r--r--fs/utimes.c11
-rw-r--r--fs/xattr.c52
-rw-r--r--fs/xfs/xfs_dfrag.c34
-rw-r--r--fs/xfs/xfs_ioctl.c10
-rw-r--r--fs/xfs/xfs_super.c5
94 files changed, 2486 insertions, 2092 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index b85efa77394..392c5dac198 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -560,6 +560,11 @@ static int v9fs_init_inode_cache(void)
560 */ 560 */
561static void v9fs_destroy_inode_cache(void) 561static void v9fs_destroy_inode_cache(void)
562{ 562{
563 /*
564 * Make sure all delayed rcu free inodes are flushed before we
565 * destroy cache.
566 */
567 rcu_barrier();
563 kmem_cache_destroy(v9fs_inode_cache); 568 kmem_cache_destroy(v9fs_inode_cache);
564} 569}
565 570
diff --git a/fs/Makefile b/fs/Makefile
index 2fb97793467..8938f825032 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \
11 attr.o bad_inode.o file.o filesystems.o namespace.o \ 11 attr.o bad_inode.o file.o filesystems.o namespace.o \
12 seq_file.o xattr.o libfs.o fs-writeback.o \ 12 seq_file.o xattr.o libfs.o fs-writeback.o \
13 pnode.o drop_caches.o splice.o sync.o utimes.o \ 13 pnode.o drop_caches.o splice.o sync.o utimes.o \
14 stack.o fs_struct.o statfs.o 14 stack.o fs_struct.o statfs.o coredump.o
15 15
16ifeq ($(CONFIG_BLOCK),y) 16ifeq ($(CONFIG_BLOCK),y)
17obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o 17obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 22a0d7ed5fa..d5712293579 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -280,6 +280,11 @@ static int init_inodecache(void)
280 280
281static void destroy_inodecache(void) 281static void destroy_inodecache(void)
282{ 282{
283 /*
284 * Make sure all delayed rcu free inodes are flushed before we
285 * destroy cache.
286 */
287 rcu_barrier();
283 kmem_cache_destroy(adfs_inode_cachep); 288 kmem_cache_destroy(adfs_inode_cachep);
284} 289}
285 290
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 1f030825cd3..b84dc735250 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -147,6 +147,11 @@ static int init_inodecache(void)
147 147
148static void destroy_inodecache(void) 148static void destroy_inodecache(void)
149{ 149{
150 /*
151 * Make sure all delayed rcu free inodes are flushed before we
152 * destroy cache.
153 */
154 rcu_barrier();
150 kmem_cache_destroy(affs_inode_cachep); 155 kmem_cache_destroy(affs_inode_cachep);
151} 156}
152 157
diff --git a/fs/afs/super.c b/fs/afs/super.c
index df8c6047c2a..43165009428 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -123,6 +123,11 @@ void __exit afs_fs_exit(void)
123 BUG(); 123 BUG();
124 } 124 }
125 125
126 /*
127 * Make sure all delayed rcu free inodes are flushed before we
128 * destroy cache.
129 */
130 rcu_barrier();
126 kmem_cache_destroy(afs_inode_cachep); 131 kmem_cache_destroy(afs_inode_cachep);
127 _leave(""); 132 _leave("");
128} 133}
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index abf645c1703..a16214109d3 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -221,20 +221,6 @@ static int test_by_type(struct path *path, void *p)
221 return ino && ino->sbi->type & *(unsigned *)p; 221 return ino && ino->sbi->type & *(unsigned *)p;
222} 222}
223 223
224static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file)
225{
226 struct files_struct *files = current->files;
227 struct fdtable *fdt;
228
229 spin_lock(&files->file_lock);
230 fdt = files_fdtable(files);
231 BUG_ON(fdt->fd[fd] != NULL);
232 rcu_assign_pointer(fdt->fd[fd], file);
233 __set_close_on_exec(fd, fdt);
234 spin_unlock(&files->file_lock);
235}
236
237
238/* 224/*
239 * Open a file descriptor on the autofs mount point corresponding 225 * Open a file descriptor on the autofs mount point corresponding
240 * to the given path and device number (aka. new_encode_dev(sb->s_dev)). 226 * to the given path and device number (aka. new_encode_dev(sb->s_dev)).
@@ -243,7 +229,7 @@ static int autofs_dev_ioctl_open_mountpoint(const char *name, dev_t devid)
243{ 229{
244 int err, fd; 230 int err, fd;
245 231
246 fd = get_unused_fd(); 232 fd = get_unused_fd_flags(O_CLOEXEC);
247 if (likely(fd >= 0)) { 233 if (likely(fd >= 0)) {
248 struct file *filp; 234 struct file *filp;
249 struct path path; 235 struct path path;
@@ -264,7 +250,7 @@ static int autofs_dev_ioctl_open_mountpoint(const char *name, dev_t devid)
264 goto out; 250 goto out;
265 } 251 }
266 252
267 autofs_dev_ioctl_fd_install(fd, filp); 253 fd_install(fd, filp);
268 } 254 }
269 255
270 return fd; 256 return fd;
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index da8876d38a7..dce436e595c 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -175,8 +175,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
175 return; 175 return;
176 } 176 }
177 177
178 pipe = sbi->pipe; 178 pipe = get_file(sbi->pipe);
179 get_file(pipe);
180 179
181 mutex_unlock(&sbi->wq_mutex); 180 mutex_unlock(&sbi->wq_mutex);
182 181
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 7f73a692bfd..2b3bda8d5e6 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -457,6 +457,11 @@ befs_init_inodecache(void)
457static void 457static void
458befs_destroy_inodecache(void) 458befs_destroy_inodecache(void)
459{ 459{
460 /*
461 * Make sure all delayed rcu free inodes are flushed before we
462 * destroy cache.
463 */
464 rcu_barrier();
460 kmem_cache_destroy(befs_inode_cachep); 465 kmem_cache_destroy(befs_inode_cachep);
461} 466}
462 467
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index b242beba58e..737aaa3f709 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -280,6 +280,11 @@ static int init_inodecache(void)
280 280
281static void destroy_inodecache(void) 281static void destroy_inodecache(void)
282{ 282{
283 /*
284 * Make sure all delayed rcu free inodes are flushed before we
285 * destroy cache.
286 */
287 rcu_barrier();
283 kmem_cache_destroy(bfs_inode_cachep); 288 kmem_cache_destroy(bfs_inode_cachep);
284} 289}
285 290
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 1b52956afe3..0225fddf49b 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1696,30 +1696,19 @@ static int elf_note_info_init(struct elf_note_info *info)
1696 return 0; 1696 return 0;
1697 info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL); 1697 info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1698 if (!info->psinfo) 1698 if (!info->psinfo)
1699 goto notes_free; 1699 return 0;
1700 info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL); 1700 info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1701 if (!info->prstatus) 1701 if (!info->prstatus)
1702 goto psinfo_free; 1702 return 0;
1703 info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL); 1703 info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1704 if (!info->fpu) 1704 if (!info->fpu)
1705 goto prstatus_free; 1705 return 0;
1706#ifdef ELF_CORE_COPY_XFPREGS 1706#ifdef ELF_CORE_COPY_XFPREGS
1707 info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL); 1707 info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1708 if (!info->xfpu) 1708 if (!info->xfpu)
1709 goto fpu_free; 1709 return 0;
1710#endif 1710#endif
1711 return 1; 1711 return 1;
1712#ifdef ELF_CORE_COPY_XFPREGS
1713 fpu_free:
1714 kfree(info->fpu);
1715#endif
1716 prstatus_free:
1717 kfree(info->prstatus);
1718 psinfo_free:
1719 kfree(info->psinfo);
1720 notes_free:
1721 kfree(info->notes);
1722 return 0;
1723} 1712}
1724 1713
1725static int fill_note_info(struct elfhdr *elf, int phdrs, 1714static int fill_note_info(struct elfhdr *elf, int phdrs,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 4c878476bb9..b08ea4717e9 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -107,6 +107,12 @@ void extent_io_exit(void)
107 list_del(&eb->leak_list); 107 list_del(&eb->leak_list);
108 kmem_cache_free(extent_buffer_cache, eb); 108 kmem_cache_free(extent_buffer_cache, eb);
109 } 109 }
110
111 /*
112 * Make sure all delayed rcu free are flushed before we
113 * destroy caches.
114 */
115 rcu_barrier();
110 if (extent_state_cache) 116 if (extent_state_cache)
111 kmem_cache_destroy(extent_state_cache); 117 kmem_cache_destroy(extent_state_cache);
112 if (extent_buffer_cache) 118 if (extent_buffer_cache)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 2a028a58619..a6ed6944e50 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7076,6 +7076,11 @@ static void init_once(void *foo)
7076 7076
7077void btrfs_destroy_cachep(void) 7077void btrfs_destroy_cachep(void)
7078{ 7078{
7079 /*
7080 * Make sure all delayed rcu free inodes are flushed before we
7081 * destroy cache.
7082 */
7083 rcu_barrier();
7079 if (btrfs_inode_cachep) 7084 if (btrfs_inode_cachep)
7080 kmem_cache_destroy(btrfs_inode_cachep); 7085 kmem_cache_destroy(btrfs_inode_cachep);
7081 if (btrfs_trans_handle_cachep) 7086 if (btrfs_trans_handle_cachep)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 27bfce58da3..47127c1bd29 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1397,7 +1397,6 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
1397 u64 *transid, bool readonly, 1397 u64 *transid, bool readonly,
1398 struct btrfs_qgroup_inherit **inherit) 1398 struct btrfs_qgroup_inherit **inherit)
1399{ 1399{
1400 struct file *src_file;
1401 int namelen; 1400 int namelen;
1402 int ret = 0; 1401 int ret = 0;
1403 1402
@@ -1421,25 +1420,24 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
1421 ret = btrfs_mksubvol(&file->f_path, name, namelen, 1420 ret = btrfs_mksubvol(&file->f_path, name, namelen,
1422 NULL, transid, readonly, inherit); 1421 NULL, transid, readonly, inherit);
1423 } else { 1422 } else {
1423 struct fd src = fdget(fd);
1424 struct inode *src_inode; 1424 struct inode *src_inode;
1425 src_file = fget(fd); 1425 if (!src.file) {
1426 if (!src_file) {
1427 ret = -EINVAL; 1426 ret = -EINVAL;
1428 goto out_drop_write; 1427 goto out_drop_write;
1429 } 1428 }
1430 1429
1431 src_inode = src_file->f_path.dentry->d_inode; 1430 src_inode = src.file->f_path.dentry->d_inode;
1432 if (src_inode->i_sb != file->f_path.dentry->d_inode->i_sb) { 1431 if (src_inode->i_sb != file->f_path.dentry->d_inode->i_sb) {
1433 printk(KERN_INFO "btrfs: Snapshot src from " 1432 printk(KERN_INFO "btrfs: Snapshot src from "
1434 "another FS\n"); 1433 "another FS\n");
1435 ret = -EINVAL; 1434 ret = -EINVAL;
1436 fput(src_file); 1435 } else {
1437 goto out_drop_write; 1436 ret = btrfs_mksubvol(&file->f_path, name, namelen,
1437 BTRFS_I(src_inode)->root,
1438 transid, readonly, inherit);
1438 } 1439 }
1439 ret = btrfs_mksubvol(&file->f_path, name, namelen, 1440 fdput(src);
1440 BTRFS_I(src_inode)->root,
1441 transid, readonly, inherit);
1442 fput(src_file);
1443 } 1441 }
1444out_drop_write: 1442out_drop_write:
1445 mnt_drop_write_file(file); 1443 mnt_drop_write_file(file);
@@ -2341,7 +2339,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2341{ 2339{
2342 struct inode *inode = fdentry(file)->d_inode; 2340 struct inode *inode = fdentry(file)->d_inode;
2343 struct btrfs_root *root = BTRFS_I(inode)->root; 2341 struct btrfs_root *root = BTRFS_I(inode)->root;
2344 struct file *src_file; 2342 struct fd src_file;
2345 struct inode *src; 2343 struct inode *src;
2346 struct btrfs_trans_handle *trans; 2344 struct btrfs_trans_handle *trans;
2347 struct btrfs_path *path; 2345 struct btrfs_path *path;
@@ -2376,24 +2374,24 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2376 if (ret) 2374 if (ret)
2377 return ret; 2375 return ret;
2378 2376
2379 src_file = fget(srcfd); 2377 src_file = fdget(srcfd);
2380 if (!src_file) { 2378 if (!src_file.file) {
2381 ret = -EBADF; 2379 ret = -EBADF;
2382 goto out_drop_write; 2380 goto out_drop_write;
2383 } 2381 }
2384 2382
2385 ret = -EXDEV; 2383 ret = -EXDEV;
2386 if (src_file->f_path.mnt != file->f_path.mnt) 2384 if (src_file.file->f_path.mnt != file->f_path.mnt)
2387 goto out_fput; 2385 goto out_fput;
2388 2386
2389 src = src_file->f_dentry->d_inode; 2387 src = src_file.file->f_dentry->d_inode;
2390 2388
2391 ret = -EINVAL; 2389 ret = -EINVAL;
2392 if (src == inode) 2390 if (src == inode)
2393 goto out_fput; 2391 goto out_fput;
2394 2392
2395 /* the src must be open for reading */ 2393 /* the src must be open for reading */
2396 if (!(src_file->f_mode & FMODE_READ)) 2394 if (!(src_file.file->f_mode & FMODE_READ))
2397 goto out_fput; 2395 goto out_fput;
2398 2396
2399 /* don't make the dst file partly checksummed */ 2397 /* don't make the dst file partly checksummed */
@@ -2724,7 +2722,7 @@ out_unlock:
2724 vfree(buf); 2722 vfree(buf);
2725 btrfs_free_path(path); 2723 btrfs_free_path(path);
2726out_fput: 2724out_fput:
2727 fput(src_file); 2725 fdput(src_file);
2728out_drop_write: 2726out_drop_write:
2729 mnt_drop_write_file(file); 2727 mnt_drop_write_file(file);
2730 return ret; 2728 return ret;
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 48a4882d8ad..a955669519a 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -68,7 +68,7 @@ struct reada_extent {
68 u32 blocksize; 68 u32 blocksize;
69 int err; 69 int err;
70 struct list_head extctl; 70 struct list_head extctl;
71 struct kref refcnt; 71 int refcnt;
72 spinlock_t lock; 72 spinlock_t lock;
73 struct reada_zone *zones[BTRFS_MAX_MIRRORS]; 73 struct reada_zone *zones[BTRFS_MAX_MIRRORS];
74 int nzones; 74 int nzones;
@@ -126,7 +126,7 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
126 spin_lock(&fs_info->reada_lock); 126 spin_lock(&fs_info->reada_lock);
127 re = radix_tree_lookup(&fs_info->reada_tree, index); 127 re = radix_tree_lookup(&fs_info->reada_tree, index);
128 if (re) 128 if (re)
129 kref_get(&re->refcnt); 129 re->refcnt++;
130 spin_unlock(&fs_info->reada_lock); 130 spin_unlock(&fs_info->reada_lock);
131 131
132 if (!re) 132 if (!re)
@@ -336,7 +336,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
336 spin_lock(&fs_info->reada_lock); 336 spin_lock(&fs_info->reada_lock);
337 re = radix_tree_lookup(&fs_info->reada_tree, index); 337 re = radix_tree_lookup(&fs_info->reada_tree, index);
338 if (re) 338 if (re)
339 kref_get(&re->refcnt); 339 re->refcnt++;
340 spin_unlock(&fs_info->reada_lock); 340 spin_unlock(&fs_info->reada_lock);
341 341
342 if (re) 342 if (re)
@@ -352,7 +352,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
352 re->top = *top; 352 re->top = *top;
353 INIT_LIST_HEAD(&re->extctl); 353 INIT_LIST_HEAD(&re->extctl);
354 spin_lock_init(&re->lock); 354 spin_lock_init(&re->lock);
355 kref_init(&re->refcnt); 355 re->refcnt = 1;
356 356
357 /* 357 /*
358 * map block 358 * map block
@@ -398,7 +398,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
398 if (ret == -EEXIST) { 398 if (ret == -EEXIST) {
399 re_exist = radix_tree_lookup(&fs_info->reada_tree, index); 399 re_exist = radix_tree_lookup(&fs_info->reada_tree, index);
400 BUG_ON(!re_exist); 400 BUG_ON(!re_exist);
401 kref_get(&re_exist->refcnt); 401 re_exist->refcnt++;
402 spin_unlock(&fs_info->reada_lock); 402 spin_unlock(&fs_info->reada_lock);
403 goto error; 403 goto error;
404 } 404 }
@@ -465,10 +465,6 @@ error:
465 return re_exist; 465 return re_exist;
466} 466}
467 467
468static void reada_kref_dummy(struct kref *kr)
469{
470}
471
472static void reada_extent_put(struct btrfs_fs_info *fs_info, 468static void reada_extent_put(struct btrfs_fs_info *fs_info,
473 struct reada_extent *re) 469 struct reada_extent *re)
474{ 470{
@@ -476,7 +472,7 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info,
476 unsigned long index = re->logical >> PAGE_CACHE_SHIFT; 472 unsigned long index = re->logical >> PAGE_CACHE_SHIFT;
477 473
478 spin_lock(&fs_info->reada_lock); 474 spin_lock(&fs_info->reada_lock);
479 if (!kref_put(&re->refcnt, reada_kref_dummy)) { 475 if (--re->refcnt) {
480 spin_unlock(&fs_info->reada_lock); 476 spin_unlock(&fs_info->reada_lock);
481 return; 477 return;
482 } 478 }
@@ -671,7 +667,7 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
671 return 0; 667 return 0;
672 } 668 }
673 dev->reada_next = re->logical + re->blocksize; 669 dev->reada_next = re->logical + re->blocksize;
674 kref_get(&re->refcnt); 670 re->refcnt++;
675 671
676 spin_unlock(&fs_info->reada_lock); 672 spin_unlock(&fs_info->reada_lock);
677 673
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 4b5762ef7c2..ba95eea201b 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1104,7 +1104,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1104 pr_err("fill_trace bad get_inode " 1104 pr_err("fill_trace bad get_inode "
1105 "%llx.%llx\n", vino.ino, vino.snap); 1105 "%llx.%llx\n", vino.ino, vino.snap);
1106 err = PTR_ERR(in); 1106 err = PTR_ERR(in);
1107 d_delete(dn); 1107 d_drop(dn);
1108 goto done; 1108 goto done;
1109 } 1109 }
1110 dn = splice_dentry(dn, in, &have_lease, true); 1110 dn = splice_dentry(dn, in, &have_lease, true);
@@ -1277,7 +1277,7 @@ retry_lookup:
1277 in = ceph_get_inode(parent->d_sb, vino); 1277 in = ceph_get_inode(parent->d_sb, vino);
1278 if (IS_ERR(in)) { 1278 if (IS_ERR(in)) {
1279 dout("new_inode badness\n"); 1279 dout("new_inode badness\n");
1280 d_delete(dn); 1280 d_drop(dn);
1281 dput(dn); 1281 dput(dn);
1282 err = PTR_ERR(in); 1282 err = PTR_ERR(in);
1283 goto out; 1283 goto out;
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index b982239f38f..3a42d932637 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -603,6 +603,11 @@ bad_cap:
603 603
604static void destroy_caches(void) 604static void destroy_caches(void)
605{ 605{
606 /*
607 * Make sure all delayed rcu free inodes are flushed before we
608 * destroy cache.
609 */
610 rcu_barrier();
606 kmem_cache_destroy(ceph_inode_cachep); 611 kmem_cache_destroy(ceph_inode_cachep);
607 kmem_cache_destroy(ceph_cap_cachep); 612 kmem_cache_destroy(ceph_cap_cachep);
608 kmem_cache_destroy(ceph_dentry_cachep); 613 kmem_cache_destroy(ceph_dentry_cachep);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index a41044a3108..e7931cc55d0 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -968,6 +968,11 @@ cifs_init_inodecache(void)
968static void 968static void
969cifs_destroy_inodecache(void) 969cifs_destroy_inodecache(void)
970{ 970{
971 /*
972 * Make sure all delayed rcu free inodes are flushed before we
973 * destroy cache.
974 */
975 rcu_barrier();
971 kmem_cache_destroy(cifs_inode_cachep); 976 kmem_cache_destroy(cifs_inode_cachep);
972} 977}
973 978
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index f1813120d75..be2aa490948 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -85,6 +85,11 @@ int coda_init_inodecache(void)
85 85
86void coda_destroy_inodecache(void) 86void coda_destroy_inodecache(void)
87{ 87{
88 /*
89 * Make sure all delayed rcu free inodes are flushed before we
90 * destroy cache.
91 */
92 rcu_barrier();
88 kmem_cache_destroy(coda_inode_cachep); 93 kmem_cache_destroy(coda_inode_cachep);
89} 94}
90 95
@@ -107,43 +112,41 @@ static const struct super_operations coda_super_operations =
107 112
108static int get_device_index(struct coda_mount_data *data) 113static int get_device_index(struct coda_mount_data *data)
109{ 114{
110 struct file *file; 115 struct fd f;
111 struct inode *inode; 116 struct inode *inode;
112 int idx; 117 int idx;
113 118
114 if(data == NULL) { 119 if (data == NULL) {
115 printk("coda_read_super: Bad mount data\n"); 120 printk("coda_read_super: Bad mount data\n");
116 return -1; 121 return -1;
117 } 122 }
118 123
119 if(data->version != CODA_MOUNT_VERSION) { 124 if (data->version != CODA_MOUNT_VERSION) {
120 printk("coda_read_super: Bad mount version\n"); 125 printk("coda_read_super: Bad mount version\n");
121 return -1; 126 return -1;
122 } 127 }
123 128
124 file = fget(data->fd); 129 f = fdget(data->fd);
125 inode = NULL; 130 if (!f.file)
126 if(file) 131 goto Ebadf;
127 inode = file->f_path.dentry->d_inode; 132 inode = f.file->f_path.dentry->d_inode;
128 133 if (!S_ISCHR(inode->i_mode) || imajor(inode) != CODA_PSDEV_MAJOR) {
129 if(!inode || !S_ISCHR(inode->i_mode) || 134 fdput(f);
130 imajor(inode) != CODA_PSDEV_MAJOR) { 135 goto Ebadf;
131 if(file)
132 fput(file);
133
134 printk("coda_read_super: Bad file\n");
135 return -1;
136 } 136 }
137 137
138 idx = iminor(inode); 138 idx = iminor(inode);
139 fput(file); 139 fdput(f);
140 140
141 if(idx < 0 || idx >= MAX_CODADEVS) { 141 if (idx < 0 || idx >= MAX_CODADEVS) {
142 printk("coda_read_super: Bad minor number\n"); 142 printk("coda_read_super: Bad minor number\n");
143 return -1; 143 return -1;
144 } 144 }
145 145
146 return idx; 146 return idx;
147Ebadf:
148 printk("coda_read_super: Bad file\n");
149 return -1;
147} 150}
148 151
149static int coda_fill_super(struct super_block *sb, void *data, int silent) 152static int coda_fill_super(struct super_block *sb, void *data, int silent)
diff --git a/fs/compat.c b/fs/compat.c
index 1bdb350ea5d..b7a24d0ca30 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -870,22 +870,20 @@ asmlinkage long compat_sys_old_readdir(unsigned int fd,
870 struct compat_old_linux_dirent __user *dirent, unsigned int count) 870 struct compat_old_linux_dirent __user *dirent, unsigned int count)
871{ 871{
872 int error; 872 int error;
873 struct file *file; 873 struct fd f = fdget(fd);
874 int fput_needed;
875 struct compat_readdir_callback buf; 874 struct compat_readdir_callback buf;
876 875
877 file = fget_light(fd, &fput_needed); 876 if (!f.file)
878 if (!file)
879 return -EBADF; 877 return -EBADF;
880 878
881 buf.result = 0; 879 buf.result = 0;
882 buf.dirent = dirent; 880 buf.dirent = dirent;
883 881
884 error = vfs_readdir(file, compat_fillonedir, &buf); 882 error = vfs_readdir(f.file, compat_fillonedir, &buf);
885 if (buf.result) 883 if (buf.result)
886 error = buf.result; 884 error = buf.result;
887 885
888 fput_light(file, fput_needed); 886 fdput(f);
889 return error; 887 return error;
890} 888}
891 889
@@ -949,17 +947,16 @@ efault:
949asmlinkage long compat_sys_getdents(unsigned int fd, 947asmlinkage long compat_sys_getdents(unsigned int fd,
950 struct compat_linux_dirent __user *dirent, unsigned int count) 948 struct compat_linux_dirent __user *dirent, unsigned int count)
951{ 949{
952 struct file * file; 950 struct fd f;
953 struct compat_linux_dirent __user * lastdirent; 951 struct compat_linux_dirent __user * lastdirent;
954 struct compat_getdents_callback buf; 952 struct compat_getdents_callback buf;
955 int fput_needed;
956 int error; 953 int error;
957 954
958 if (!access_ok(VERIFY_WRITE, dirent, count)) 955 if (!access_ok(VERIFY_WRITE, dirent, count))
959 return -EFAULT; 956 return -EFAULT;
960 957
961 file = fget_light(fd, &fput_needed); 958 f = fdget(fd);
962 if (!file) 959 if (!f.file)
963 return -EBADF; 960 return -EBADF;
964 961
965 buf.current_dir = dirent; 962 buf.current_dir = dirent;
@@ -967,17 +964,17 @@ asmlinkage long compat_sys_getdents(unsigned int fd,
967 buf.count = count; 964 buf.count = count;
968 buf.error = 0; 965 buf.error = 0;
969 966
970 error = vfs_readdir(file, compat_filldir, &buf); 967 error = vfs_readdir(f.file, compat_filldir, &buf);
971 if (error >= 0) 968 if (error >= 0)
972 error = buf.error; 969 error = buf.error;
973 lastdirent = buf.previous; 970 lastdirent = buf.previous;
974 if (lastdirent) { 971 if (lastdirent) {
975 if (put_user(file->f_pos, &lastdirent->d_off)) 972 if (put_user(f.file->f_pos, &lastdirent->d_off))
976 error = -EFAULT; 973 error = -EFAULT;
977 else 974 else
978 error = count - buf.count; 975 error = count - buf.count;
979 } 976 }
980 fput_light(file, fput_needed); 977 fdput(f);
981 return error; 978 return error;
982} 979}
983 980
@@ -1035,17 +1032,16 @@ efault:
1035asmlinkage long compat_sys_getdents64(unsigned int fd, 1032asmlinkage long compat_sys_getdents64(unsigned int fd,
1036 struct linux_dirent64 __user * dirent, unsigned int count) 1033 struct linux_dirent64 __user * dirent, unsigned int count)
1037{ 1034{
1038 struct file * file; 1035 struct fd f;
1039 struct linux_dirent64 __user * lastdirent; 1036 struct linux_dirent64 __user * lastdirent;
1040 struct compat_getdents_callback64 buf; 1037 struct compat_getdents_callback64 buf;
1041 int fput_needed;
1042 int error; 1038 int error;
1043 1039
1044 if (!access_ok(VERIFY_WRITE, dirent, count)) 1040 if (!access_ok(VERIFY_WRITE, dirent, count))
1045 return -EFAULT; 1041 return -EFAULT;
1046 1042
1047 file = fget_light(fd, &fput_needed); 1043 f = fdget(fd);
1048 if (!file) 1044 if (!f.file)
1049 return -EBADF; 1045 return -EBADF;
1050 1046
1051 buf.current_dir = dirent; 1047 buf.current_dir = dirent;
@@ -1053,18 +1049,18 @@ asmlinkage long compat_sys_getdents64(unsigned int fd,
1053 buf.count = count; 1049 buf.count = count;
1054 buf.error = 0; 1050 buf.error = 0;
1055 1051
1056 error = vfs_readdir(file, compat_filldir64, &buf); 1052 error = vfs_readdir(f.file, compat_filldir64, &buf);
1057 if (error >= 0) 1053 if (error >= 0)
1058 error = buf.error; 1054 error = buf.error;
1059 lastdirent = buf.previous; 1055 lastdirent = buf.previous;
1060 if (lastdirent) { 1056 if (lastdirent) {
1061 typeof(lastdirent->d_off) d_off = file->f_pos; 1057 typeof(lastdirent->d_off) d_off = f.file->f_pos;
1062 if (__put_user_unaligned(d_off, &lastdirent->d_off)) 1058 if (__put_user_unaligned(d_off, &lastdirent->d_off))
1063 error = -EFAULT; 1059 error = -EFAULT;
1064 else 1060 else
1065 error = count - buf.count; 1061 error = count - buf.count;
1066 } 1062 }
1067 fput_light(file, fput_needed); 1063 fdput(f);
1068 return error; 1064 return error;
1069} 1065}
1070#endif /* ! __ARCH_OMIT_COMPAT_SYS_GETDENTS64 */ 1066#endif /* ! __ARCH_OMIT_COMPAT_SYS_GETDENTS64 */
@@ -1152,18 +1148,16 @@ asmlinkage ssize_t
1152compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec, 1148compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec,
1153 unsigned long vlen) 1149 unsigned long vlen)
1154{ 1150{
1155 struct file *file; 1151 struct fd f = fdget(fd);
1156 int fput_needed;
1157 ssize_t ret; 1152 ssize_t ret;
1158 loff_t pos; 1153 loff_t pos;
1159 1154
1160 file = fget_light(fd, &fput_needed); 1155 if (!f.file)
1161 if (!file)
1162 return -EBADF; 1156 return -EBADF;
1163 pos = file->f_pos; 1157 pos = f.file->f_pos;
1164 ret = compat_readv(file, vec, vlen, &pos); 1158 ret = compat_readv(f.file, vec, vlen, &pos);
1165 file->f_pos = pos; 1159 f.file->f_pos = pos;
1166 fput_light(file, fput_needed); 1160 fdput(f);
1167 return ret; 1161 return ret;
1168} 1162}
1169 1163
@@ -1171,19 +1165,18 @@ asmlinkage ssize_t
1171compat_sys_preadv64(unsigned long fd, const struct compat_iovec __user *vec, 1165compat_sys_preadv64(unsigned long fd, const struct compat_iovec __user *vec,
1172 unsigned long vlen, loff_t pos) 1166 unsigned long vlen, loff_t pos)
1173{ 1167{
1174 struct file *file; 1168 struct fd f;
1175 int fput_needed;
1176 ssize_t ret; 1169 ssize_t ret;
1177 1170
1178 if (pos < 0) 1171 if (pos < 0)
1179 return -EINVAL; 1172 return -EINVAL;
1180 file = fget_light(fd, &fput_needed); 1173 f = fdget(fd);
1181 if (!file) 1174 if (!f.file)
1182 return -EBADF; 1175 return -EBADF;
1183 ret = -ESPIPE; 1176 ret = -ESPIPE;
1184 if (file->f_mode & FMODE_PREAD) 1177 if (f.file->f_mode & FMODE_PREAD)
1185 ret = compat_readv(file, vec, vlen, &pos); 1178 ret = compat_readv(f.file, vec, vlen, &pos);
1186 fput_light(file, fput_needed); 1179 fdput(f);
1187 return ret; 1180 return ret;
1188} 1181}
1189 1182
@@ -1221,18 +1214,16 @@ asmlinkage ssize_t
1221compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec, 1214compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec,
1222 unsigned long vlen) 1215 unsigned long vlen)
1223{ 1216{
1224 struct file *file; 1217 struct fd f = fdget(fd);
1225 int fput_needed;
1226 ssize_t ret; 1218 ssize_t ret;
1227 loff_t pos; 1219 loff_t pos;
1228 1220
1229 file = fget_light(fd, &fput_needed); 1221 if (!f.file)
1230 if (!file)
1231 return -EBADF; 1222 return -EBADF;
1232 pos = file->f_pos; 1223 pos = f.file->f_pos;
1233 ret = compat_writev(file, vec, vlen, &pos); 1224 ret = compat_writev(f.file, vec, vlen, &pos);
1234 file->f_pos = pos; 1225 f.file->f_pos = pos;
1235 fput_light(file, fput_needed); 1226 fdput(f);
1236 return ret; 1227 return ret;
1237} 1228}
1238 1229
@@ -1240,19 +1231,18 @@ asmlinkage ssize_t
1240compat_sys_pwritev64(unsigned long fd, const struct compat_iovec __user *vec, 1231compat_sys_pwritev64(unsigned long fd, const struct compat_iovec __user *vec,
1241 unsigned long vlen, loff_t pos) 1232 unsigned long vlen, loff_t pos)
1242{ 1233{
1243 struct file *file; 1234 struct fd f;
1244 int fput_needed;
1245 ssize_t ret; 1235 ssize_t ret;
1246 1236
1247 if (pos < 0) 1237 if (pos < 0)
1248 return -EINVAL; 1238 return -EINVAL;
1249 file = fget_light(fd, &fput_needed); 1239 f = fdget(fd);
1250 if (!file) 1240 if (!f.file)
1251 return -EBADF; 1241 return -EBADF;
1252 ret = -ESPIPE; 1242 ret = -ESPIPE;
1253 if (file->f_mode & FMODE_PWRITE) 1243 if (f.file->f_mode & FMODE_PWRITE)
1254 ret = compat_writev(file, vec, vlen, &pos); 1244 ret = compat_writev(f.file, vec, vlen, &pos);
1255 fput_light(file, fput_needed); 1245 fdput(f);
1256 return ret; 1246 return ret;
1257} 1247}
1258 1248
@@ -1802,3 +1792,25 @@ compat_sys_open_by_handle_at(int mountdirfd,
1802 return do_handle_open(mountdirfd, handle, flags); 1792 return do_handle_open(mountdirfd, handle, flags);
1803} 1793}
1804#endif 1794#endif
1795
1796#ifdef __ARCH_WANT_COMPAT_SYS_SENDFILE
1797asmlinkage long compat_sys_sendfile(int out_fd, int in_fd,
1798 compat_off_t __user *offset, compat_size_t count)
1799{
1800 loff_t pos;
1801 off_t off;
1802 ssize_t ret;
1803
1804 if (offset) {
1805 if (unlikely(get_user(off, offset)))
1806 return -EFAULT;
1807 pos = off;
1808 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
1809 if (unlikely(put_user(pos, offset)))
1810 return -EFAULT;
1811 return ret;
1812 }
1813
1814 return do_sendfile(out_fd, in_fd, NULL, count, 0);
1815}
1816#endif /* __ARCH_WANT_COMPAT_SYS_SENDFILE */
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 9c03a3ae898..f5054025f9d 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1539,16 +1539,13 @@ static int compat_ioctl_check_table(unsigned int xcmd)
1539asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, 1539asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
1540 unsigned long arg) 1540 unsigned long arg)
1541{ 1541{
1542 struct file *filp; 1542 struct fd f = fdget(fd);
1543 int error = -EBADF; 1543 int error = -EBADF;
1544 int fput_needed; 1544 if (!f.file)
1545
1546 filp = fget_light(fd, &fput_needed);
1547 if (!filp)
1548 goto out; 1545 goto out;
1549 1546
1550 /* RED-PEN how should LSM module know it's handling 32bit? */ 1547 /* RED-PEN how should LSM module know it's handling 32bit? */
1551 error = security_file_ioctl(filp, cmd, arg); 1548 error = security_file_ioctl(f.file, cmd, arg);
1552 if (error) 1549 if (error)
1553 goto out_fput; 1550 goto out_fput;
1554 1551
@@ -1568,30 +1565,30 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
1568#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) 1565#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
1569 case FS_IOC_RESVSP_32: 1566 case FS_IOC_RESVSP_32:
1570 case FS_IOC_RESVSP64_32: 1567 case FS_IOC_RESVSP64_32:
1571 error = compat_ioctl_preallocate(filp, compat_ptr(arg)); 1568 error = compat_ioctl_preallocate(f.file, compat_ptr(arg));
1572 goto out_fput; 1569 goto out_fput;
1573#else 1570#else
1574 case FS_IOC_RESVSP: 1571 case FS_IOC_RESVSP:
1575 case FS_IOC_RESVSP64: 1572 case FS_IOC_RESVSP64:
1576 error = ioctl_preallocate(filp, compat_ptr(arg)); 1573 error = ioctl_preallocate(f.file, compat_ptr(arg));
1577 goto out_fput; 1574 goto out_fput;
1578#endif 1575#endif
1579 1576
1580 case FIBMAP: 1577 case FIBMAP:
1581 case FIGETBSZ: 1578 case FIGETBSZ:
1582 case FIONREAD: 1579 case FIONREAD:
1583 if (S_ISREG(filp->f_path.dentry->d_inode->i_mode)) 1580 if (S_ISREG(f.file->f_path.dentry->d_inode->i_mode))
1584 break; 1581 break;
1585 /*FALL THROUGH*/ 1582 /*FALL THROUGH*/
1586 1583
1587 default: 1584 default:
1588 if (filp->f_op && filp->f_op->compat_ioctl) { 1585 if (f.file->f_op && f.file->f_op->compat_ioctl) {
1589 error = filp->f_op->compat_ioctl(filp, cmd, arg); 1586 error = f.file->f_op->compat_ioctl(f.file, cmd, arg);
1590 if (error != -ENOIOCTLCMD) 1587 if (error != -ENOIOCTLCMD)
1591 goto out_fput; 1588 goto out_fput;
1592 } 1589 }
1593 1590
1594 if (!filp->f_op || !filp->f_op->unlocked_ioctl) 1591 if (!f.file->f_op || !f.file->f_op->unlocked_ioctl)
1595 goto do_ioctl; 1592 goto do_ioctl;
1596 break; 1593 break;
1597 } 1594 }
@@ -1599,7 +1596,7 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
1599 if (compat_ioctl_check_table(XFORM(cmd))) 1596 if (compat_ioctl_check_table(XFORM(cmd)))
1600 goto found_handler; 1597 goto found_handler;
1601 1598
1602 error = do_ioctl_trans(fd, cmd, arg, filp); 1599 error = do_ioctl_trans(fd, cmd, arg, f.file);
1603 if (error == -ENOIOCTLCMD) 1600 if (error == -ENOIOCTLCMD)
1604 error = -ENOTTY; 1601 error = -ENOTTY;
1605 1602
@@ -1608,9 +1605,9 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
1608 found_handler: 1605 found_handler:
1609 arg = (unsigned long)compat_ptr(arg); 1606 arg = (unsigned long)compat_ptr(arg);
1610 do_ioctl: 1607 do_ioctl:
1611 error = do_vfs_ioctl(filp, fd, cmd, arg); 1608 error = do_vfs_ioctl(f.file, fd, cmd, arg);
1612 out_fput: 1609 out_fput:
1613 fput_light(filp, fput_needed); 1610 fdput(f);
1614 out: 1611 out:
1615 return error; 1612 return error;
1616} 1613}
diff --git a/fs/coredump.c b/fs/coredump.c
new file mode 100644
index 00000000000..f045bbad682
--- /dev/null
+++ b/fs/coredump.c
@@ -0,0 +1,686 @@
1#include <linux/slab.h>
2#include <linux/file.h>
3#include <linux/fdtable.h>
4#include <linux/mm.h>
5#include <linux/stat.h>
6#include <linux/fcntl.h>
7#include <linux/swap.h>
8#include <linux/string.h>
9#include <linux/init.h>
10#include <linux/pagemap.h>
11#include <linux/perf_event.h>
12#include <linux/highmem.h>
13#include <linux/spinlock.h>
14#include <linux/key.h>
15#include <linux/personality.h>
16#include <linux/binfmts.h>
17#include <linux/utsname.h>
18#include <linux/pid_namespace.h>
19#include <linux/module.h>
20#include <linux/namei.h>
21#include <linux/mount.h>
22#include <linux/security.h>
23#include <linux/syscalls.h>
24#include <linux/tsacct_kern.h>
25#include <linux/cn_proc.h>
26#include <linux/audit.h>
27#include <linux/tracehook.h>
28#include <linux/kmod.h>
29#include <linux/fsnotify.h>
30#include <linux/fs_struct.h>
31#include <linux/pipe_fs_i.h>
32#include <linux/oom.h>
33#include <linux/compat.h>
34
35#include <asm/uaccess.h>
36#include <asm/mmu_context.h>
37#include <asm/tlb.h>
38#include <asm/exec.h>
39
40#include <trace/events/task.h>
41#include "internal.h"
42
43#include <trace/events/sched.h>
44
45int core_uses_pid;
46char core_pattern[CORENAME_MAX_SIZE] = "core";
47unsigned int core_pipe_limit;
48
49struct core_name {
50 char *corename;
51 int used, size;
52};
53static atomic_t call_count = ATOMIC_INIT(1);
54
55/* The maximal length of core_pattern is also specified in sysctl.c */
56
57static int expand_corename(struct core_name *cn)
58{
59 char *old_corename = cn->corename;
60
61 cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count);
62 cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL);
63
64 if (!cn->corename) {
65 kfree(old_corename);
66 return -ENOMEM;
67 }
68
69 return 0;
70}
71
72static int cn_printf(struct core_name *cn, const char *fmt, ...)
73{
74 char *cur;
75 int need;
76 int ret;
77 va_list arg;
78
79 va_start(arg, fmt);
80 need = vsnprintf(NULL, 0, fmt, arg);
81 va_end(arg);
82
83 if (likely(need < cn->size - cn->used - 1))
84 goto out_printf;
85
86 ret = expand_corename(cn);
87 if (ret)
88 goto expand_fail;
89
90out_printf:
91 cur = cn->corename + cn->used;
92 va_start(arg, fmt);
93 vsnprintf(cur, need + 1, fmt, arg);
94 va_end(arg);
95 cn->used += need;
96 return 0;
97
98expand_fail:
99 return ret;
100}
101
102static void cn_escape(char *str)
103{
104 for (; *str; str++)
105 if (*str == '/')
106 *str = '!';
107}
108
109static int cn_print_exe_file(struct core_name *cn)
110{
111 struct file *exe_file;
112 char *pathbuf, *path;
113 int ret;
114
115 exe_file = get_mm_exe_file(current->mm);
116 if (!exe_file) {
117 char *commstart = cn->corename + cn->used;
118 ret = cn_printf(cn, "%s (path unknown)", current->comm);
119 cn_escape(commstart);
120 return ret;
121 }
122
123 pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
124 if (!pathbuf) {
125 ret = -ENOMEM;
126 goto put_exe_file;
127 }
128
129 path = d_path(&exe_file->f_path, pathbuf, PATH_MAX);
130 if (IS_ERR(path)) {
131 ret = PTR_ERR(path);
132 goto free_buf;
133 }
134
135 cn_escape(path);
136
137 ret = cn_printf(cn, "%s", path);
138
139free_buf:
140 kfree(pathbuf);
141put_exe_file:
142 fput(exe_file);
143 return ret;
144}
145
146/* format_corename will inspect the pattern parameter, and output a
147 * name into corename, which must have space for at least
148 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
149 */
150static int format_corename(struct core_name *cn, long signr)
151{
152 const struct cred *cred = current_cred();
153 const char *pat_ptr = core_pattern;
154 int ispipe = (*pat_ptr == '|');
155 int pid_in_pattern = 0;
156 int err = 0;
157
158 cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count);
159 cn->corename = kmalloc(cn->size, GFP_KERNEL);
160 cn->used = 0;
161
162 if (!cn->corename)
163 return -ENOMEM;
164
165 /* Repeat as long as we have more pattern to process and more output
166 space */
167 while (*pat_ptr) {
168 if (*pat_ptr != '%') {
169 if (*pat_ptr == 0)
170 goto out;
171 err = cn_printf(cn, "%c", *pat_ptr++);
172 } else {
173 switch (*++pat_ptr) {
174 /* single % at the end, drop that */
175 case 0:
176 goto out;
177 /* Double percent, output one percent */
178 case '%':
179 err = cn_printf(cn, "%c", '%');
180 break;
181 /* pid */
182 case 'p':
183 pid_in_pattern = 1;
184 err = cn_printf(cn, "%d",
185 task_tgid_vnr(current));
186 break;
187 /* uid */
188 case 'u':
189 err = cn_printf(cn, "%d", cred->uid);
190 break;
191 /* gid */
192 case 'g':
193 err = cn_printf(cn, "%d", cred->gid);
194 break;
195 /* signal that caused the coredump */
196 case 's':
197 err = cn_printf(cn, "%ld", signr);
198 break;
199 /* UNIX time of coredump */
200 case 't': {
201 struct timeval tv;
202 do_gettimeofday(&tv);
203 err = cn_printf(cn, "%lu", tv.tv_sec);
204 break;
205 }
206 /* hostname */
207 case 'h': {
208 char *namestart = cn->corename + cn->used;
209 down_read(&uts_sem);
210 err = cn_printf(cn, "%s",
211 utsname()->nodename);
212 up_read(&uts_sem);
213 cn_escape(namestart);
214 break;
215 }
216 /* executable */
217 case 'e': {
218 char *commstart = cn->corename + cn->used;
219 err = cn_printf(cn, "%s", current->comm);
220 cn_escape(commstart);
221 break;
222 }
223 case 'E':
224 err = cn_print_exe_file(cn);
225 break;
226 /* core limit size */
227 case 'c':
228 err = cn_printf(cn, "%lu",
229 rlimit(RLIMIT_CORE));
230 break;
231 default:
232 break;
233 }
234 ++pat_ptr;
235 }
236
237 if (err)
238 return err;
239 }
240
241 /* Backward compatibility with core_uses_pid:
242 *
243 * If core_pattern does not include a %p (as is the default)
244 * and core_uses_pid is set, then .%pid will be appended to
245 * the filename. Do not do this for piped commands. */
246 if (!ispipe && !pid_in_pattern && core_uses_pid) {
247 err = cn_printf(cn, ".%d", task_tgid_vnr(current));
248 if (err)
249 return err;
250 }
251out:
252 return ispipe;
253}
254
255static int zap_process(struct task_struct *start, int exit_code)
256{
257 struct task_struct *t;
258 int nr = 0;
259
260 start->signal->flags = SIGNAL_GROUP_EXIT;
261 start->signal->group_exit_code = exit_code;
262 start->signal->group_stop_count = 0;
263
264 t = start;
265 do {
266 task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
267 if (t != current && t->mm) {
268 sigaddset(&t->pending.signal, SIGKILL);
269 signal_wake_up(t, 1);
270 nr++;
271 }
272 } while_each_thread(start, t);
273
274 return nr;
275}
276
277static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
278 struct core_state *core_state, int exit_code)
279{
280 struct task_struct *g, *p;
281 unsigned long flags;
282 int nr = -EAGAIN;
283
284 spin_lock_irq(&tsk->sighand->siglock);
285 if (!signal_group_exit(tsk->signal)) {
286 mm->core_state = core_state;
287 nr = zap_process(tsk, exit_code);
288 }
289 spin_unlock_irq(&tsk->sighand->siglock);
290 if (unlikely(nr < 0))
291 return nr;
292
293 if (atomic_read(&mm->mm_users) == nr + 1)
294 goto done;
295 /*
296 * We should find and kill all tasks which use this mm, and we should
297 * count them correctly into ->nr_threads. We don't take tasklist
298 * lock, but this is safe wrt:
299 *
300 * fork:
301 * None of sub-threads can fork after zap_process(leader). All
302 * processes which were created before this point should be
303 * visible to zap_threads() because copy_process() adds the new
304 * process to the tail of init_task.tasks list, and lock/unlock
305 * of ->siglock provides a memory barrier.
306 *
307 * do_exit:
308 * The caller holds mm->mmap_sem. This means that the task which
309 * uses this mm can't pass exit_mm(), so it can't exit or clear
310 * its ->mm.
311 *
312 * de_thread:
313 * It does list_replace_rcu(&leader->tasks, &current->tasks),
314 * we must see either old or new leader, this does not matter.
315 * However, it can change p->sighand, so lock_task_sighand(p)
316 * must be used. Since p->mm != NULL and we hold ->mmap_sem
317 * it can't fail.
318 *
319 * Note also that "g" can be the old leader with ->mm == NULL
320 * and already unhashed and thus removed from ->thread_group.
321 * This is OK, __unhash_process()->list_del_rcu() does not
322 * clear the ->next pointer, we will find the new leader via
323 * next_thread().
324 */
325 rcu_read_lock();
326 for_each_process(g) {
327 if (g == tsk->group_leader)
328 continue;
329 if (g->flags & PF_KTHREAD)
330 continue;
331 p = g;
332 do {
333 if (p->mm) {
334 if (unlikely(p->mm == mm)) {
335 lock_task_sighand(p, &flags);
336 nr += zap_process(p, exit_code);
337 unlock_task_sighand(p, &flags);
338 }
339 break;
340 }
341 } while_each_thread(g, p);
342 }
343 rcu_read_unlock();
344done:
345 atomic_set(&core_state->nr_threads, nr);
346 return nr;
347}
348
349static int coredump_wait(int exit_code, struct core_state *core_state)
350{
351 struct task_struct *tsk = current;
352 struct mm_struct *mm = tsk->mm;
353 int core_waiters = -EBUSY;
354
355 init_completion(&core_state->startup);
356 core_state->dumper.task = tsk;
357 core_state->dumper.next = NULL;
358
359 down_write(&mm->mmap_sem);
360 if (!mm->core_state)
361 core_waiters = zap_threads(tsk, mm, core_state, exit_code);
362 up_write(&mm->mmap_sem);
363
364 if (core_waiters > 0) {
365 struct core_thread *ptr;
366
367 wait_for_completion(&core_state->startup);
368 /*
369 * Wait for all the threads to become inactive, so that
370 * all the thread context (extended register state, like
371 * fpu etc) gets copied to the memory.
372 */
373 ptr = core_state->dumper.next;
374 while (ptr != NULL) {
375 wait_task_inactive(ptr->task, 0);
376 ptr = ptr->next;
377 }
378 }
379
380 return core_waiters;
381}
382
383static void coredump_finish(struct mm_struct *mm)
384{
385 struct core_thread *curr, *next;
386 struct task_struct *task;
387
388 next = mm->core_state->dumper.next;
389 while ((curr = next) != NULL) {
390 next = curr->next;
391 task = curr->task;
392 /*
393 * see exit_mm(), curr->task must not see
394 * ->task == NULL before we read ->next.
395 */
396 smp_mb();
397 curr->task = NULL;
398 wake_up_process(task);
399 }
400
401 mm->core_state = NULL;
402}
403
404static void wait_for_dump_helpers(struct file *file)
405{
406 struct pipe_inode_info *pipe;
407
408 pipe = file->f_path.dentry->d_inode->i_pipe;
409
410 pipe_lock(pipe);
411 pipe->readers++;
412 pipe->writers--;
413
414 while ((pipe->readers > 1) && (!signal_pending(current))) {
415 wake_up_interruptible_sync(&pipe->wait);
416 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
417 pipe_wait(pipe);
418 }
419
420 pipe->readers--;
421 pipe->writers++;
422 pipe_unlock(pipe);
423
424}
425
426/*
427 * umh_pipe_setup
428 * helper function to customize the process used
429 * to collect the core in userspace. Specifically
430 * it sets up a pipe and installs it as fd 0 (stdin)
431 * for the process. Returns 0 on success, or
432 * PTR_ERR on failure.
433 * Note that it also sets the core limit to 1. This
434 * is a special value that we use to trap recursive
435 * core dumps
436 */
437static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
438{
439 struct file *files[2];
440 struct coredump_params *cp = (struct coredump_params *)info->data;
441 int err = create_pipe_files(files, 0);
442 if (err)
443 return err;
444
445 cp->file = files[1];
446
447 replace_fd(0, files[0], 0);
448 /* and disallow core files too */
449 current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
450
451 return 0;
452}
453
454void do_coredump(long signr, int exit_code, struct pt_regs *regs)
455{
456 struct core_state core_state;
457 struct core_name cn;
458 struct mm_struct *mm = current->mm;
459 struct linux_binfmt * binfmt;
460 const struct cred *old_cred;
461 struct cred *cred;
462 int retval = 0;
463 int flag = 0;
464 int ispipe;
465 struct files_struct *displaced;
466 bool need_nonrelative = false;
467 static atomic_t core_dump_count = ATOMIC_INIT(0);
468 struct coredump_params cprm = {
469 .signr = signr,
470 .regs = regs,
471 .limit = rlimit(RLIMIT_CORE),
472 /*
473 * We must use the same mm->flags while dumping core to avoid
474 * inconsistency of bit flags, since this flag is not protected
475 * by any locks.
476 */
477 .mm_flags = mm->flags,
478 };
479
480 audit_core_dumps(signr);
481
482 binfmt = mm->binfmt;
483 if (!binfmt || !binfmt->core_dump)
484 goto fail;
485 if (!__get_dumpable(cprm.mm_flags))
486 goto fail;
487
488 cred = prepare_creds();
489 if (!cred)
490 goto fail;
491 /*
492 * We cannot trust fsuid as being the "true" uid of the process
493 * nor do we know its entire history. We only know it was tainted
494 * so we dump it as root in mode 2, and only into a controlled
495 * environment (pipe handler or fully qualified path).
496 */
497 if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) {
498 /* Setuid core dump mode */
499 flag = O_EXCL; /* Stop rewrite attacks */
500 cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */
501 need_nonrelative = true;
502 }
503
504 retval = coredump_wait(exit_code, &core_state);
505 if (retval < 0)
506 goto fail_creds;
507
508 old_cred = override_creds(cred);
509
510 /*
511 * Clear any false indication of pending signals that might
512 * be seen by the filesystem code called to write the core file.
513 */
514 clear_thread_flag(TIF_SIGPENDING);
515
516 ispipe = format_corename(&cn, signr);
517
518 if (ispipe) {
519 int dump_count;
520 char **helper_argv;
521
522 if (ispipe < 0) {
523 printk(KERN_WARNING "format_corename failed\n");
524 printk(KERN_WARNING "Aborting core\n");
525 goto fail_corename;
526 }
527
528 if (cprm.limit == 1) {
529 /* See umh_pipe_setup() which sets RLIMIT_CORE = 1.
530 *
531 * Normally core limits are irrelevant to pipes, since
532 * we're not writing to the file system, but we use
533 * cprm.limit of 1 here as a speacial value, this is a
534 * consistent way to catch recursive crashes.
535 * We can still crash if the core_pattern binary sets
536 * RLIM_CORE = !1, but it runs as root, and can do
537 * lots of stupid things.
538 *
539 * Note that we use task_tgid_vnr here to grab the pid
540 * of the process group leader. That way we get the
541 * right pid if a thread in a multi-threaded
542 * core_pattern process dies.
543 */
544 printk(KERN_WARNING
545 "Process %d(%s) has RLIMIT_CORE set to 1\n",
546 task_tgid_vnr(current), current->comm);
547 printk(KERN_WARNING "Aborting core\n");
548 goto fail_unlock;
549 }
550 cprm.limit = RLIM_INFINITY;
551
552 dump_count = atomic_inc_return(&core_dump_count);
553 if (core_pipe_limit && (core_pipe_limit < dump_count)) {
554 printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
555 task_tgid_vnr(current), current->comm);
556 printk(KERN_WARNING "Skipping core dump\n");
557 goto fail_dropcount;
558 }
559
560 helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL);
561 if (!helper_argv) {
562 printk(KERN_WARNING "%s failed to allocate memory\n",
563 __func__);
564 goto fail_dropcount;
565 }
566
567 retval = call_usermodehelper_fns(helper_argv[0], helper_argv,
568 NULL, UMH_WAIT_EXEC, umh_pipe_setup,
569 NULL, &cprm);
570 argv_free(helper_argv);
571 if (retval) {
572 printk(KERN_INFO "Core dump to %s pipe failed\n",
573 cn.corename);
574 goto close_fail;
575 }
576 } else {
577 struct inode *inode;
578
579 if (cprm.limit < binfmt->min_coredump)
580 goto fail_unlock;
581
582 if (need_nonrelative && cn.corename[0] != '/') {
583 printk(KERN_WARNING "Pid %d(%s) can only dump core "\
584 "to fully qualified path!\n",
585 task_tgid_vnr(current), current->comm);
586 printk(KERN_WARNING "Skipping core dump\n");
587 goto fail_unlock;
588 }
589
590 cprm.file = filp_open(cn.corename,
591 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
592 0600);
593 if (IS_ERR(cprm.file))
594 goto fail_unlock;
595
596 inode = cprm.file->f_path.dentry->d_inode;
597 if (inode->i_nlink > 1)
598 goto close_fail;
599 if (d_unhashed(cprm.file->f_path.dentry))
600 goto close_fail;
601 /*
602 * AK: actually i see no reason to not allow this for named
603 * pipes etc, but keep the previous behaviour for now.
604 */
605 if (!S_ISREG(inode->i_mode))
606 goto close_fail;
607 /*
608 * Dont allow local users get cute and trick others to coredump
609 * into their pre-created files.
610 */
611 if (!uid_eq(inode->i_uid, current_fsuid()))
612 goto close_fail;
613 if (!cprm.file->f_op || !cprm.file->f_op->write)
614 goto close_fail;
615 if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
616 goto close_fail;
617 }
618
619 /* get us an unshared descriptor table; almost always a no-op */
620 retval = unshare_files(&displaced);
621 if (retval)
622 goto close_fail;
623 if (displaced)
624 put_files_struct(displaced);
625 retval = binfmt->core_dump(&cprm);
626 if (retval)
627 current->signal->group_exit_code |= 0x80;
628
629 if (ispipe && core_pipe_limit)
630 wait_for_dump_helpers(cprm.file);
631close_fail:
632 if (cprm.file)
633 filp_close(cprm.file, NULL);
634fail_dropcount:
635 if (ispipe)
636 atomic_dec(&core_dump_count);
637fail_unlock:
638 kfree(cn.corename);
639fail_corename:
640 coredump_finish(mm);
641 revert_creds(old_cred);
642fail_creds:
643 put_cred(cred);
644fail:
645 return;
646}
647
648/*
649 * Core dumping helper functions. These are the only things you should
650 * do on a core-file: use only these functions to write out all the
651 * necessary info.
652 */
653int dump_write(struct file *file, const void *addr, int nr)
654{
655 return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr;
656}
657EXPORT_SYMBOL(dump_write);
658
659int dump_seek(struct file *file, loff_t off)
660{
661 int ret = 1;
662
663 if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
664 if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
665 return 0;
666 } else {
667 char *buf = (char *)get_zeroed_page(GFP_KERNEL);
668
669 if (!buf)
670 return 0;
671 while (off > 0) {
672 unsigned long n = off;
673
674 if (n > PAGE_SIZE)
675 n = PAGE_SIZE;
676 if (!dump_write(file, buf, n)) {
677 ret = 0;
678 break;
679 }
680 off -= n;
681 }
682 free_page((unsigned long)buf);
683 }
684 return ret;
685}
686EXPORT_SYMBOL(dump_seek);
diff --git a/fs/dcache.c b/fs/dcache.c
index 693f95bf1ca..3a463d0c4fe 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2113,7 +2113,7 @@ again:
2113 inode = dentry->d_inode; 2113 inode = dentry->d_inode;
2114 isdir = S_ISDIR(inode->i_mode); 2114 isdir = S_ISDIR(inode->i_mode);
2115 if (dentry->d_count == 1) { 2115 if (dentry->d_count == 1) {
2116 if (inode && !spin_trylock(&inode->i_lock)) { 2116 if (!spin_trylock(&inode->i_lock)) {
2117 spin_unlock(&dentry->d_lock); 2117 spin_unlock(&dentry->d_lock);
2118 cpu_relax(); 2118 cpu_relax();
2119 goto again; 2119 goto again;
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 24bb043e50d..4e0886c9e5c 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -711,6 +711,12 @@ static void ecryptfs_free_kmem_caches(void)
711{ 711{
712 int i; 712 int i;
713 713
714 /*
715 * Make sure all delayed rcu free inodes are flushed before we
716 * destroy cache.
717 */
718 rcu_barrier();
719
714 for (i = 0; i < ARRAY_SIZE(ecryptfs_cache_infos); i++) { 720 for (i = 0; i < ARRAY_SIZE(ecryptfs_cache_infos); i++) {
715 struct ecryptfs_cache_info *info; 721 struct ecryptfs_cache_info *info;
716 722
diff --git a/fs/efs/super.c b/fs/efs/super.c
index e755ec746c6..2002431ef9a 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -96,6 +96,11 @@ static int init_inodecache(void)
96 96
97static void destroy_inodecache(void) 97static void destroy_inodecache(void)
98{ 98{
99 /*
100 * Make sure all delayed rcu free inodes are flushed before we
101 * destroy cache.
102 */
103 rcu_barrier();
99 kmem_cache_destroy(efs_inode_cachep); 104 kmem_cache_destroy(efs_inode_cachep);
100} 105}
101 106
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index eedec84c180..cd96649bfe6 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1810,7 +1810,7 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events,
1810 int, maxevents, int, timeout) 1810 int, maxevents, int, timeout)
1811{ 1811{
1812 int error; 1812 int error;
1813 struct file *file; 1813 struct fd f;
1814 struct eventpoll *ep; 1814 struct eventpoll *ep;
1815 1815
1816 /* The maximum number of event must be greater than zero */ 1816 /* The maximum number of event must be greater than zero */
@@ -1818,38 +1818,33 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events,
1818 return -EINVAL; 1818 return -EINVAL;
1819 1819
1820 /* Verify that the area passed by the user is writeable */ 1820 /* Verify that the area passed by the user is writeable */
1821 if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event))) { 1821 if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event)))
1822 error = -EFAULT; 1822 return -EFAULT;
1823 goto error_return;
1824 }
1825 1823
1826 /* Get the "struct file *" for the eventpoll file */ 1824 /* Get the "struct file *" for the eventpoll file */
1827 error = -EBADF; 1825 f = fdget(epfd);
1828 file = fget(epfd); 1826 if (!f.file)
1829 if (!file) 1827 return -EBADF;
1830 goto error_return;
1831 1828
1832 /* 1829 /*
1833 * We have to check that the file structure underneath the fd 1830 * We have to check that the file structure underneath the fd
1834 * the user passed to us _is_ an eventpoll file. 1831 * the user passed to us _is_ an eventpoll file.
1835 */ 1832 */
1836 error = -EINVAL; 1833 error = -EINVAL;
1837 if (!is_file_epoll(file)) 1834 if (!is_file_epoll(f.file))
1838 goto error_fput; 1835 goto error_fput;
1839 1836
1840 /* 1837 /*
1841 * At this point it is safe to assume that the "private_data" contains 1838 * At this point it is safe to assume that the "private_data" contains
1842 * our own data structure. 1839 * our own data structure.
1843 */ 1840 */
1844 ep = file->private_data; 1841 ep = f.file->private_data;
1845 1842
1846 /* Time to fish for events ... */ 1843 /* Time to fish for events ... */
1847 error = ep_poll(ep, events, maxevents, timeout); 1844 error = ep_poll(ep, events, maxevents, timeout);
1848 1845
1849error_fput: 1846error_fput:
1850 fput(file); 1847 fdput(f);
1851error_return:
1852
1853 return error; 1848 return error;
1854} 1849}
1855 1850
diff --git a/fs/exec.c b/fs/exec.c
index 574cf4de4ec..48fb26ef8a1 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -66,19 +66,8 @@
66 66
67#include <trace/events/sched.h> 67#include <trace/events/sched.h>
68 68
69int core_uses_pid;
70char core_pattern[CORENAME_MAX_SIZE] = "core";
71unsigned int core_pipe_limit;
72int suid_dumpable = 0; 69int suid_dumpable = 0;
73 70
74struct core_name {
75 char *corename;
76 int used, size;
77};
78static atomic_t call_count = ATOMIC_INIT(1);
79
80/* The maximal length of core_pattern is also specified in sysctl.c */
81
82static LIST_HEAD(formats); 71static LIST_HEAD(formats);
83static DEFINE_RWLOCK(binfmt_lock); 72static DEFINE_RWLOCK(binfmt_lock);
84 73
@@ -1006,40 +995,6 @@ no_thread_group:
1006 return 0; 995 return 0;
1007} 996}
1008 997
1009/*
1010 * These functions flushes out all traces of the currently running executable
1011 * so that a new one can be started
1012 */
1013static void flush_old_files(struct files_struct * files)
1014{
1015 long j = -1;
1016 struct fdtable *fdt;
1017
1018 spin_lock(&files->file_lock);
1019 for (;;) {
1020 unsigned long set, i;
1021
1022 j++;
1023 i = j * BITS_PER_LONG;
1024 fdt = files_fdtable(files);
1025 if (i >= fdt->max_fds)
1026 break;
1027 set = fdt->close_on_exec[j];
1028 if (!set)
1029 continue;
1030 fdt->close_on_exec[j] = 0;
1031 spin_unlock(&files->file_lock);
1032 for ( ; set ; i++,set >>= 1) {
1033 if (set & 1) {
1034 sys_close(i);
1035 }
1036 }
1037 spin_lock(&files->file_lock);
1038
1039 }
1040 spin_unlock(&files->file_lock);
1041}
1042
1043char *get_task_comm(char *buf, struct task_struct *tsk) 998char *get_task_comm(char *buf, struct task_struct *tsk)
1044{ 999{
1045 /* buf must be at least sizeof(tsk->comm) in size */ 1000 /* buf must be at least sizeof(tsk->comm) in size */
@@ -1050,6 +1005,11 @@ char *get_task_comm(char *buf, struct task_struct *tsk)
1050} 1005}
1051EXPORT_SYMBOL_GPL(get_task_comm); 1006EXPORT_SYMBOL_GPL(get_task_comm);
1052 1007
1008/*
1009 * These functions flushes out all traces of the currently running executable
1010 * so that a new one can be started
1011 */
1012
1053void set_task_comm(struct task_struct *tsk, char *buf) 1013void set_task_comm(struct task_struct *tsk, char *buf)
1054{ 1014{
1055 task_lock(tsk); 1015 task_lock(tsk);
@@ -1171,7 +1131,7 @@ void setup_new_exec(struct linux_binprm * bprm)
1171 current->self_exec_id++; 1131 current->self_exec_id++;
1172 1132
1173 flush_signal_handlers(current, 0); 1133 flush_signal_handlers(current, 0);
1174 flush_old_files(current->files); 1134 do_close_on_exec(current->files);
1175} 1135}
1176EXPORT_SYMBOL(setup_new_exec); 1136EXPORT_SYMBOL(setup_new_exec);
1177 1137
@@ -1632,353 +1592,6 @@ void set_binfmt(struct linux_binfmt *new)
1632 1592
1633EXPORT_SYMBOL(set_binfmt); 1593EXPORT_SYMBOL(set_binfmt);
1634 1594
1635static int expand_corename(struct core_name *cn)
1636{
1637 char *old_corename = cn->corename;
1638
1639 cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count);
1640 cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL);
1641
1642 if (!cn->corename) {
1643 kfree(old_corename);
1644 return -ENOMEM;
1645 }
1646
1647 return 0;
1648}
1649
1650static int cn_printf(struct core_name *cn, const char *fmt, ...)
1651{
1652 char *cur;
1653 int need;
1654 int ret;
1655 va_list arg;
1656
1657 va_start(arg, fmt);
1658 need = vsnprintf(NULL, 0, fmt, arg);
1659 va_end(arg);
1660
1661 if (likely(need < cn->size - cn->used - 1))
1662 goto out_printf;
1663
1664 ret = expand_corename(cn);
1665 if (ret)
1666 goto expand_fail;
1667
1668out_printf:
1669 cur = cn->corename + cn->used;
1670 va_start(arg, fmt);
1671 vsnprintf(cur, need + 1, fmt, arg);
1672 va_end(arg);
1673 cn->used += need;
1674 return 0;
1675
1676expand_fail:
1677 return ret;
1678}
1679
1680static void cn_escape(char *str)
1681{
1682 for (; *str; str++)
1683 if (*str == '/')
1684 *str = '!';
1685}
1686
1687static int cn_print_exe_file(struct core_name *cn)
1688{
1689 struct file *exe_file;
1690 char *pathbuf, *path;
1691 int ret;
1692
1693 exe_file = get_mm_exe_file(current->mm);
1694 if (!exe_file) {
1695 char *commstart = cn->corename + cn->used;
1696 ret = cn_printf(cn, "%s (path unknown)", current->comm);
1697 cn_escape(commstart);
1698 return ret;
1699 }
1700
1701 pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
1702 if (!pathbuf) {
1703 ret = -ENOMEM;
1704 goto put_exe_file;
1705 }
1706
1707 path = d_path(&exe_file->f_path, pathbuf, PATH_MAX);
1708 if (IS_ERR(path)) {
1709 ret = PTR_ERR(path);
1710 goto free_buf;
1711 }
1712
1713 cn_escape(path);
1714
1715 ret = cn_printf(cn, "%s", path);
1716
1717free_buf:
1718 kfree(pathbuf);
1719put_exe_file:
1720 fput(exe_file);
1721 return ret;
1722}
1723
1724/* format_corename will inspect the pattern parameter, and output a
1725 * name into corename, which must have space for at least
1726 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
1727 */
1728static int format_corename(struct core_name *cn, long signr)
1729{
1730 const struct cred *cred = current_cred();
1731 const char *pat_ptr = core_pattern;
1732 int ispipe = (*pat_ptr == '|');
1733 int pid_in_pattern = 0;
1734 int err = 0;
1735
1736 cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count);
1737 cn->corename = kmalloc(cn->size, GFP_KERNEL);
1738 cn->used = 0;
1739
1740 if (!cn->corename)
1741 return -ENOMEM;
1742
1743 /* Repeat as long as we have more pattern to process and more output
1744 space */
1745 while (*pat_ptr) {
1746 if (*pat_ptr != '%') {
1747 if (*pat_ptr == 0)
1748 goto out;
1749 err = cn_printf(cn, "%c", *pat_ptr++);
1750 } else {
1751 switch (*++pat_ptr) {
1752 /* single % at the end, drop that */
1753 case 0:
1754 goto out;
1755 /* Double percent, output one percent */
1756 case '%':
1757 err = cn_printf(cn, "%c", '%');
1758 break;
1759 /* pid */
1760 case 'p':
1761 pid_in_pattern = 1;
1762 err = cn_printf(cn, "%d",
1763 task_tgid_vnr(current));
1764 break;
1765 /* uid */
1766 case 'u':
1767 err = cn_printf(cn, "%d", cred->uid);
1768 break;
1769 /* gid */
1770 case 'g':
1771 err = cn_printf(cn, "%d", cred->gid);
1772 break;
1773 /* signal that caused the coredump */
1774 case 's':
1775 err = cn_printf(cn, "%ld", signr);
1776 break;
1777 /* UNIX time of coredump */
1778 case 't': {
1779 struct timeval tv;
1780 do_gettimeofday(&tv);
1781 err = cn_printf(cn, "%lu", tv.tv_sec);
1782 break;
1783 }
1784 /* hostname */
1785 case 'h': {
1786 char *namestart = cn->corename + cn->used;
1787 down_read(&uts_sem);
1788 err = cn_printf(cn, "%s",
1789 utsname()->nodename);
1790 up_read(&uts_sem);
1791 cn_escape(namestart);
1792 break;
1793 }
1794 /* executable */
1795 case 'e': {
1796 char *commstart = cn->corename + cn->used;
1797 err = cn_printf(cn, "%s", current->comm);
1798 cn_escape(commstart);
1799 break;
1800 }
1801 case 'E':
1802 err = cn_print_exe_file(cn);
1803 break;
1804 /* core limit size */
1805 case 'c':
1806 err = cn_printf(cn, "%lu",
1807 rlimit(RLIMIT_CORE));
1808 break;
1809 default:
1810 break;
1811 }
1812 ++pat_ptr;
1813 }
1814
1815 if (err)
1816 return err;
1817 }
1818
1819 /* Backward compatibility with core_uses_pid:
1820 *
1821 * If core_pattern does not include a %p (as is the default)
1822 * and core_uses_pid is set, then .%pid will be appended to
1823 * the filename. Do not do this for piped commands. */
1824 if (!ispipe && !pid_in_pattern && core_uses_pid) {
1825 err = cn_printf(cn, ".%d", task_tgid_vnr(current));
1826 if (err)
1827 return err;
1828 }
1829out:
1830 return ispipe;
1831}
1832
1833static int zap_process(struct task_struct *start, int exit_code)
1834{
1835 struct task_struct *t;
1836 int nr = 0;
1837
1838 start->signal->flags = SIGNAL_GROUP_EXIT;
1839 start->signal->group_exit_code = exit_code;
1840 start->signal->group_stop_count = 0;
1841
1842 t = start;
1843 do {
1844 task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
1845 if (t != current && t->mm) {
1846 sigaddset(&t->pending.signal, SIGKILL);
1847 signal_wake_up(t, 1);
1848 nr++;
1849 }
1850 } while_each_thread(start, t);
1851
1852 return nr;
1853}
1854
1855static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
1856 struct core_state *core_state, int exit_code)
1857{
1858 struct task_struct *g, *p;
1859 unsigned long flags;
1860 int nr = -EAGAIN;
1861
1862 spin_lock_irq(&tsk->sighand->siglock);
1863 if (!signal_group_exit(tsk->signal)) {
1864 mm->core_state = core_state;
1865 nr = zap_process(tsk, exit_code);
1866 }
1867 spin_unlock_irq(&tsk->sighand->siglock);
1868 if (unlikely(nr < 0))
1869 return nr;
1870
1871 if (atomic_read(&mm->mm_users) == nr + 1)
1872 goto done;
1873 /*
1874 * We should find and kill all tasks which use this mm, and we should
1875 * count them correctly into ->nr_threads. We don't take tasklist
1876 * lock, but this is safe wrt:
1877 *
1878 * fork:
1879 * None of sub-threads can fork after zap_process(leader). All
1880 * processes which were created before this point should be
1881 * visible to zap_threads() because copy_process() adds the new
1882 * process to the tail of init_task.tasks list, and lock/unlock
1883 * of ->siglock provides a memory barrier.
1884 *
1885 * do_exit:
1886 * The caller holds mm->mmap_sem. This means that the task which
1887 * uses this mm can't pass exit_mm(), so it can't exit or clear
1888 * its ->mm.
1889 *
1890 * de_thread:
1891 * It does list_replace_rcu(&leader->tasks, &current->tasks),
1892 * we must see either old or new leader, this does not matter.
1893 * However, it can change p->sighand, so lock_task_sighand(p)
1894 * must be used. Since p->mm != NULL and we hold ->mmap_sem
1895 * it can't fail.
1896 *
1897 * Note also that "g" can be the old leader with ->mm == NULL
1898 * and already unhashed and thus removed from ->thread_group.
1899 * This is OK, __unhash_process()->list_del_rcu() does not
1900 * clear the ->next pointer, we will find the new leader via
1901 * next_thread().
1902 */
1903 rcu_read_lock();
1904 for_each_process(g) {
1905 if (g == tsk->group_leader)
1906 continue;
1907 if (g->flags & PF_KTHREAD)
1908 continue;
1909 p = g;
1910 do {
1911 if (p->mm) {
1912 if (unlikely(p->mm == mm)) {
1913 lock_task_sighand(p, &flags);
1914 nr += zap_process(p, exit_code);
1915 unlock_task_sighand(p, &flags);
1916 }
1917 break;
1918 }
1919 } while_each_thread(g, p);
1920 }
1921 rcu_read_unlock();
1922done:
1923 atomic_set(&core_state->nr_threads, nr);
1924 return nr;
1925}
1926
1927static int coredump_wait(int exit_code, struct core_state *core_state)
1928{
1929 struct task_struct *tsk = current;
1930 struct mm_struct *mm = tsk->mm;
1931 int core_waiters = -EBUSY;
1932
1933 init_completion(&core_state->startup);
1934 core_state->dumper.task = tsk;
1935 core_state->dumper.next = NULL;
1936
1937 down_write(&mm->mmap_sem);
1938 if (!mm->core_state)
1939 core_waiters = zap_threads(tsk, mm, core_state, exit_code);
1940 up_write(&mm->mmap_sem);
1941
1942 if (core_waiters > 0) {
1943 struct core_thread *ptr;
1944
1945 wait_for_completion(&core_state->startup);
1946 /*
1947 * Wait for all the threads to become inactive, so that
1948 * all the thread context (extended register state, like
1949 * fpu etc) gets copied to the memory.
1950 */
1951 ptr = core_state->dumper.next;
1952 while (ptr != NULL) {
1953 wait_task_inactive(ptr->task, 0);
1954 ptr = ptr->next;
1955 }
1956 }
1957
1958 return core_waiters;
1959}
1960
1961static void coredump_finish(struct mm_struct *mm)
1962{
1963 struct core_thread *curr, *next;
1964 struct task_struct *task;
1965
1966 next = mm->core_state->dumper.next;
1967 while ((curr = next) != NULL) {
1968 next = curr->next;
1969 task = curr->task;
1970 /*
1971 * see exit_mm(), curr->task must not see
1972 * ->task == NULL before we read ->next.
1973 */
1974 smp_mb();
1975 curr->task = NULL;
1976 wake_up_process(task);
1977 }
1978
1979 mm->core_state = NULL;
1980}
1981
1982/* 1595/*
1983 * set_dumpable converts traditional three-value dumpable to two flags and 1596 * set_dumpable converts traditional three-value dumpable to two flags and
1984 * stores them into mm->flags. It modifies lower two bits of mm->flags, but 1597 * stores them into mm->flags. It modifies lower two bits of mm->flags, but
@@ -2020,7 +1633,7 @@ void set_dumpable(struct mm_struct *mm, int value)
2020 } 1633 }
2021} 1634}
2022 1635
2023static int __get_dumpable(unsigned long mm_flags) 1636int __get_dumpable(unsigned long mm_flags)
2024{ 1637{
2025 int ret; 1638 int ret;
2026 1639
@@ -2032,290 +1645,3 @@ int get_dumpable(struct mm_struct *mm)
2032{ 1645{
2033 return __get_dumpable(mm->flags); 1646 return __get_dumpable(mm->flags);
2034} 1647}
2035
2036static void wait_for_dump_helpers(struct file *file)
2037{
2038 struct pipe_inode_info *pipe;
2039
2040 pipe = file->f_path.dentry->d_inode->i_pipe;
2041
2042 pipe_lock(pipe);
2043 pipe->readers++;
2044 pipe->writers--;
2045
2046 while ((pipe->readers > 1) && (!signal_pending(current))) {
2047 wake_up_interruptible_sync(&pipe->wait);
2048 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
2049 pipe_wait(pipe);
2050 }
2051
2052 pipe->readers--;
2053 pipe->writers++;
2054 pipe_unlock(pipe);
2055
2056}
2057
2058
2059/*
2060 * umh_pipe_setup
2061 * helper function to customize the process used
2062 * to collect the core in userspace. Specifically
2063 * it sets up a pipe and installs it as fd 0 (stdin)
2064 * for the process. Returns 0 on success, or
2065 * PTR_ERR on failure.
2066 * Note that it also sets the core limit to 1. This
2067 * is a special value that we use to trap recursive
2068 * core dumps
2069 */
2070static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
2071{
2072 struct file *files[2];
2073 struct fdtable *fdt;
2074 struct coredump_params *cp = (struct coredump_params *)info->data;
2075 struct files_struct *cf = current->files;
2076 int err = create_pipe_files(files, 0);
2077 if (err)
2078 return err;
2079
2080 cp->file = files[1];
2081
2082 sys_close(0);
2083 fd_install(0, files[0]);
2084 spin_lock(&cf->file_lock);
2085 fdt = files_fdtable(cf);
2086 __set_open_fd(0, fdt);
2087 __clear_close_on_exec(0, fdt);
2088 spin_unlock(&cf->file_lock);
2089
2090 /* and disallow core files too */
2091 current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
2092
2093 return 0;
2094}
2095
2096void do_coredump(long signr, int exit_code, struct pt_regs *regs)
2097{
2098 struct core_state core_state;
2099 struct core_name cn;
2100 struct mm_struct *mm = current->mm;
2101 struct linux_binfmt * binfmt;
2102 const struct cred *old_cred;
2103 struct cred *cred;
2104 int retval = 0;
2105 int flag = 0;
2106 int ispipe;
2107 bool need_nonrelative = false;
2108 static atomic_t core_dump_count = ATOMIC_INIT(0);
2109 struct coredump_params cprm = {
2110 .signr = signr,
2111 .regs = regs,
2112 .limit = rlimit(RLIMIT_CORE),
2113 /*
2114 * We must use the same mm->flags while dumping core to avoid
2115 * inconsistency of bit flags, since this flag is not protected
2116 * by any locks.
2117 */
2118 .mm_flags = mm->flags,
2119 };
2120
2121 audit_core_dumps(signr);
2122
2123 binfmt = mm->binfmt;
2124 if (!binfmt || !binfmt->core_dump)
2125 goto fail;
2126 if (!__get_dumpable(cprm.mm_flags))
2127 goto fail;
2128
2129 cred = prepare_creds();
2130 if (!cred)
2131 goto fail;
2132 /*
2133 * We cannot trust fsuid as being the "true" uid of the process
2134 * nor do we know its entire history. We only know it was tainted
2135 * so we dump it as root in mode 2, and only into a controlled
2136 * environment (pipe handler or fully qualified path).
2137 */
2138 if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) {
2139 /* Setuid core dump mode */
2140 flag = O_EXCL; /* Stop rewrite attacks */
2141 cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */
2142 need_nonrelative = true;
2143 }
2144
2145 retval = coredump_wait(exit_code, &core_state);
2146 if (retval < 0)
2147 goto fail_creds;
2148
2149 old_cred = override_creds(cred);
2150
2151 /*
2152 * Clear any false indication of pending signals that might
2153 * be seen by the filesystem code called to write the core file.
2154 */
2155 clear_thread_flag(TIF_SIGPENDING);
2156
2157 ispipe = format_corename(&cn, signr);
2158
2159 if (ispipe) {
2160 int dump_count;
2161 char **helper_argv;
2162
2163 if (ispipe < 0) {
2164 printk(KERN_WARNING "format_corename failed\n");
2165 printk(KERN_WARNING "Aborting core\n");
2166 goto fail_corename;
2167 }
2168
2169 if (cprm.limit == 1) {
2170 /* See umh_pipe_setup() which sets RLIMIT_CORE = 1.
2171 *
2172 * Normally core limits are irrelevant to pipes, since
2173 * we're not writing to the file system, but we use
2174 * cprm.limit of 1 here as a speacial value, this is a
2175 * consistent way to catch recursive crashes.
2176 * We can still crash if the core_pattern binary sets
2177 * RLIM_CORE = !1, but it runs as root, and can do
2178 * lots of stupid things.
2179 *
2180 * Note that we use task_tgid_vnr here to grab the pid
2181 * of the process group leader. That way we get the
2182 * right pid if a thread in a multi-threaded
2183 * core_pattern process dies.
2184 */
2185 printk(KERN_WARNING
2186 "Process %d(%s) has RLIMIT_CORE set to 1\n",
2187 task_tgid_vnr(current), current->comm);
2188 printk(KERN_WARNING "Aborting core\n");
2189 goto fail_unlock;
2190 }
2191 cprm.limit = RLIM_INFINITY;
2192
2193 dump_count = atomic_inc_return(&core_dump_count);
2194 if (core_pipe_limit && (core_pipe_limit < dump_count)) {
2195 printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
2196 task_tgid_vnr(current), current->comm);
2197 printk(KERN_WARNING "Skipping core dump\n");
2198 goto fail_dropcount;
2199 }
2200
2201 helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL);
2202 if (!helper_argv) {
2203 printk(KERN_WARNING "%s failed to allocate memory\n",
2204 __func__);
2205 goto fail_dropcount;
2206 }
2207
2208 retval = call_usermodehelper_fns(helper_argv[0], helper_argv,
2209 NULL, UMH_WAIT_EXEC, umh_pipe_setup,
2210 NULL, &cprm);
2211 argv_free(helper_argv);
2212 if (retval) {
2213 printk(KERN_INFO "Core dump to %s pipe failed\n",
2214 cn.corename);
2215 goto close_fail;
2216 }
2217 } else {
2218 struct inode *inode;
2219
2220 if (cprm.limit < binfmt->min_coredump)
2221 goto fail_unlock;
2222
2223 if (need_nonrelative && cn.corename[0] != '/') {
2224 printk(KERN_WARNING "Pid %d(%s) can only dump core "\
2225 "to fully qualified path!\n",
2226 task_tgid_vnr(current), current->comm);
2227 printk(KERN_WARNING "Skipping core dump\n");
2228 goto fail_unlock;
2229 }
2230
2231 cprm.file = filp_open(cn.corename,
2232 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
2233 0600);
2234 if (IS_ERR(cprm.file))
2235 goto fail_unlock;
2236
2237 inode = cprm.file->f_path.dentry->d_inode;
2238 if (inode->i_nlink > 1)
2239 goto close_fail;
2240 if (d_unhashed(cprm.file->f_path.dentry))
2241 goto close_fail;
2242 /*
2243 * AK: actually i see no reason to not allow this for named
2244 * pipes etc, but keep the previous behaviour for now.
2245 */
2246 if (!S_ISREG(inode->i_mode))
2247 goto close_fail;
2248 /*
2249 * Dont allow local users get cute and trick others to coredump
2250 * into their pre-created files.
2251 */
2252 if (!uid_eq(inode->i_uid, current_fsuid()))
2253 goto close_fail;
2254 if (!cprm.file->f_op || !cprm.file->f_op->write)
2255 goto close_fail;
2256 if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
2257 goto close_fail;
2258 }
2259
2260 retval = binfmt->core_dump(&cprm);
2261 if (retval)
2262 current->signal->group_exit_code |= 0x80;
2263
2264 if (ispipe && core_pipe_limit)
2265 wait_for_dump_helpers(cprm.file);
2266close_fail:
2267 if (cprm.file)
2268 filp_close(cprm.file, NULL);
2269fail_dropcount:
2270 if (ispipe)
2271 atomic_dec(&core_dump_count);
2272fail_unlock:
2273 kfree(cn.corename);
2274fail_corename:
2275 coredump_finish(mm);
2276 revert_creds(old_cred);
2277fail_creds:
2278 put_cred(cred);
2279fail:
2280 return;
2281}
2282
2283/*
2284 * Core dumping helper functions. These are the only things you should
2285 * do on a core-file: use only these functions to write out all the
2286 * necessary info.
2287 */
2288int dump_write(struct file *file, const void *addr, int nr)
2289{
2290 return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr;
2291}
2292EXPORT_SYMBOL(dump_write);
2293
2294int dump_seek(struct file *file, loff_t off)
2295{
2296 int ret = 1;
2297
2298 if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
2299 if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
2300 return 0;
2301 } else {
2302 char *buf = (char *)get_zeroed_page(GFP_KERNEL);
2303
2304 if (!buf)
2305 return 0;
2306 while (off > 0) {
2307 unsigned long n = off;
2308
2309 if (n > PAGE_SIZE)
2310 n = PAGE_SIZE;
2311 if (!dump_write(file, buf, n)) {
2312 ret = 0;
2313 break;
2314 }
2315 off -= n;
2316 }
2317 free_page((unsigned long)buf);
2318 }
2319 return ret;
2320}
2321EXPORT_SYMBOL(dump_seek);
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index dde41a75c7c..59e3bbfac0b 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -206,6 +206,11 @@ static int init_inodecache(void)
206 */ 206 */
207static void destroy_inodecache(void) 207static void destroy_inodecache(void)
208{ 208{
209 /*
210 * Make sure all delayed rcu free inodes are flushed before we
211 * destroy cache.
212 */
213 rcu_barrier();
209 kmem_cache_destroy(exofs_inode_cachep); 214 kmem_cache_destroy(exofs_inode_cachep);
210} 215}
211 216
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index af74d9e27b7..6c205d0c565 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -206,6 +206,11 @@ static int init_inodecache(void)
206 206
207static void destroy_inodecache(void) 207static void destroy_inodecache(void)
208{ 208{
209 /*
210 * Make sure all delayed rcu free inodes are flushed before we
211 * destroy cache.
212 */
213 rcu_barrier();
209 kmem_cache_destroy(ext2_inode_cachep); 214 kmem_cache_destroy(ext2_inode_cachep);
210} 215}
211 216
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 09b8455bd7e..bd29894c8fb 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -532,6 +532,11 @@ static int init_inodecache(void)
532 532
533static void destroy_inodecache(void) 533static void destroy_inodecache(void)
534{ 534{
535 /*
536 * Make sure all delayed rcu free inodes are flushed before we
537 * destroy cache.
538 */
539 rcu_barrier();
535 kmem_cache_destroy(ext3_inode_cachep); 540 kmem_cache_destroy(ext3_inode_cachep);
536} 541}
537 542
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 7f7dad78760..5439d6a56e9 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -233,7 +233,7 @@ group_extend_out:
233 233
234 case EXT4_IOC_MOVE_EXT: { 234 case EXT4_IOC_MOVE_EXT: {
235 struct move_extent me; 235 struct move_extent me;
236 struct file *donor_filp; 236 struct fd donor;
237 int err; 237 int err;
238 238
239 if (!(filp->f_mode & FMODE_READ) || 239 if (!(filp->f_mode & FMODE_READ) ||
@@ -245,11 +245,11 @@ group_extend_out:
245 return -EFAULT; 245 return -EFAULT;
246 me.moved_len = 0; 246 me.moved_len = 0;
247 247
248 donor_filp = fget(me.donor_fd); 248 donor = fdget(me.donor_fd);
249 if (!donor_filp) 249 if (!donor.file)
250 return -EBADF; 250 return -EBADF;
251 251
252 if (!(donor_filp->f_mode & FMODE_WRITE)) { 252 if (!(donor.file->f_mode & FMODE_WRITE)) {
253 err = -EBADF; 253 err = -EBADF;
254 goto mext_out; 254 goto mext_out;
255 } 255 }
@@ -258,14 +258,15 @@ group_extend_out:
258 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { 258 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
259 ext4_msg(sb, KERN_ERR, 259 ext4_msg(sb, KERN_ERR,
260 "Online defrag not supported with bigalloc"); 260 "Online defrag not supported with bigalloc");
261 return -EOPNOTSUPP; 261 err = -EOPNOTSUPP;
262 goto mext_out;
262 } 263 }
263 264
264 err = mnt_want_write_file(filp); 265 err = mnt_want_write_file(filp);
265 if (err) 266 if (err)
266 goto mext_out; 267 goto mext_out;
267 268
268 err = ext4_move_extents(filp, donor_filp, me.orig_start, 269 err = ext4_move_extents(filp, donor.file, me.orig_start,
269 me.donor_start, me.len, &me.moved_len); 270 me.donor_start, me.len, &me.moved_len);
270 mnt_drop_write_file(filp); 271 mnt_drop_write_file(filp);
271 272
@@ -273,7 +274,7 @@ group_extend_out:
273 &me, sizeof(me))) 274 &me, sizeof(me)))
274 err = -EFAULT; 275 err = -EFAULT;
275mext_out: 276mext_out:
276 fput(donor_filp); 277 fdput(donor);
277 return err; 278 return err;
278 } 279 }
279 280
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 1f15cc836fb..69c55d4e462 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1019,6 +1019,11 @@ static int init_inodecache(void)
1019 1019
1020static void destroy_inodecache(void) 1020static void destroy_inodecache(void)
1021{ 1021{
1022 /*
1023 * Make sure all delayed rcu free inodes are flushed before we
1024 * destroy cache.
1025 */
1026 rcu_barrier();
1022 kmem_cache_destroy(ext4_inode_cachep); 1027 kmem_cache_destroy(ext4_inode_cachep);
1023} 1028}
1024 1029
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 47d9eb0be88..4e5a6ac54eb 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -521,6 +521,11 @@ static int __init fat_init_inodecache(void)
521 521
522static void __exit fat_destroy_inodecache(void) 522static void __exit fat_destroy_inodecache(void)
523{ 523{
524 /*
525 * Make sure all delayed rcu free inodes are flushed before we
526 * destroy cache.
527 */
528 rcu_barrier();
524 kmem_cache_destroy(fat_inode_cachep); 529 kmem_cache_destroy(fat_inode_cachep);
525} 530}
526 531
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 887b5ba8c9b..8f704291d4e 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -26,124 +26,6 @@
26#include <asm/siginfo.h> 26#include <asm/siginfo.h>
27#include <asm/uaccess.h> 27#include <asm/uaccess.h>
28 28
29void set_close_on_exec(unsigned int fd, int flag)
30{
31 struct files_struct *files = current->files;
32 struct fdtable *fdt;
33 spin_lock(&files->file_lock);
34 fdt = files_fdtable(files);
35 if (flag)
36 __set_close_on_exec(fd, fdt);
37 else
38 __clear_close_on_exec(fd, fdt);
39 spin_unlock(&files->file_lock);
40}
41
42static bool get_close_on_exec(unsigned int fd)
43{
44 struct files_struct *files = current->files;
45 struct fdtable *fdt;
46 bool res;
47 rcu_read_lock();
48 fdt = files_fdtable(files);
49 res = close_on_exec(fd, fdt);
50 rcu_read_unlock();
51 return res;
52}
53
54SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
55{
56 int err = -EBADF;
57 struct file * file, *tofree;
58 struct files_struct * files = current->files;
59 struct fdtable *fdt;
60
61 if ((flags & ~O_CLOEXEC) != 0)
62 return -EINVAL;
63
64 if (unlikely(oldfd == newfd))
65 return -EINVAL;
66
67 spin_lock(&files->file_lock);
68 err = expand_files(files, newfd);
69 file = fcheck(oldfd);
70 if (unlikely(!file))
71 goto Ebadf;
72 if (unlikely(err < 0)) {
73 if (err == -EMFILE)
74 goto Ebadf;
75 goto out_unlock;
76 }
77 /*
78 * We need to detect attempts to do dup2() over allocated but still
79 * not finished descriptor. NB: OpenBSD avoids that at the price of
80 * extra work in their equivalent of fget() - they insert struct
81 * file immediately after grabbing descriptor, mark it larval if
82 * more work (e.g. actual opening) is needed and make sure that
83 * fget() treats larval files as absent. Potentially interesting,
84 * but while extra work in fget() is trivial, locking implications
85 * and amount of surgery on open()-related paths in VFS are not.
86 * FreeBSD fails with -EBADF in the same situation, NetBSD "solution"
87 * deadlocks in rather amusing ways, AFAICS. All of that is out of
88 * scope of POSIX or SUS, since neither considers shared descriptor
89 * tables and this condition does not arise without those.
90 */
91 err = -EBUSY;
92 fdt = files_fdtable(files);
93 tofree = fdt->fd[newfd];
94 if (!tofree && fd_is_open(newfd, fdt))
95 goto out_unlock;
96 get_file(file);
97 rcu_assign_pointer(fdt->fd[newfd], file);
98 __set_open_fd(newfd, fdt);
99 if (flags & O_CLOEXEC)
100 __set_close_on_exec(newfd, fdt);
101 else
102 __clear_close_on_exec(newfd, fdt);
103 spin_unlock(&files->file_lock);
104
105 if (tofree)
106 filp_close(tofree, files);
107
108 return newfd;
109
110Ebadf:
111 err = -EBADF;
112out_unlock:
113 spin_unlock(&files->file_lock);
114 return err;
115}
116
117SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
118{
119 if (unlikely(newfd == oldfd)) { /* corner case */
120 struct files_struct *files = current->files;
121 int retval = oldfd;
122
123 rcu_read_lock();
124 if (!fcheck_files(files, oldfd))
125 retval = -EBADF;
126 rcu_read_unlock();
127 return retval;
128 }
129 return sys_dup3(oldfd, newfd, 0);
130}
131
132SYSCALL_DEFINE1(dup, unsigned int, fildes)
133{
134 int ret = -EBADF;
135 struct file *file = fget_raw(fildes);
136
137 if (file) {
138 ret = get_unused_fd();
139 if (ret >= 0)
140 fd_install(ret, file);
141 else
142 fput(file);
143 }
144 return ret;
145}
146
147#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME) 29#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
148 30
149static int setfl(int fd, struct file * filp, unsigned long arg) 31static int setfl(int fd, struct file * filp, unsigned long arg)
@@ -267,7 +149,7 @@ pid_t f_getown(struct file *filp)
267 149
268static int f_setown_ex(struct file *filp, unsigned long arg) 150static int f_setown_ex(struct file *filp, unsigned long arg)
269{ 151{
270 struct f_owner_ex * __user owner_p = (void * __user)arg; 152 struct f_owner_ex __user *owner_p = (void __user *)arg;
271 struct f_owner_ex owner; 153 struct f_owner_ex owner;
272 struct pid *pid; 154 struct pid *pid;
273 int type; 155 int type;
@@ -307,7 +189,7 @@ static int f_setown_ex(struct file *filp, unsigned long arg)
307 189
308static int f_getown_ex(struct file *filp, unsigned long arg) 190static int f_getown_ex(struct file *filp, unsigned long arg)
309{ 191{
310 struct f_owner_ex * __user owner_p = (void * __user)arg; 192 struct f_owner_ex __user *owner_p = (void __user *)arg;
311 struct f_owner_ex owner; 193 struct f_owner_ex owner;
312 int ret = 0; 194 int ret = 0;
313 195
@@ -345,7 +227,7 @@ static int f_getown_ex(struct file *filp, unsigned long arg)
345static int f_getowner_uids(struct file *filp, unsigned long arg) 227static int f_getowner_uids(struct file *filp, unsigned long arg)
346{ 228{
347 struct user_namespace *user_ns = current_user_ns(); 229 struct user_namespace *user_ns = current_user_ns();
348 uid_t * __user dst = (void * __user)arg; 230 uid_t __user *dst = (void __user *)arg;
349 uid_t src[2]; 231 uid_t src[2];
350 int err; 232 int err;
351 233
@@ -373,14 +255,10 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
373 255
374 switch (cmd) { 256 switch (cmd) {
375 case F_DUPFD: 257 case F_DUPFD:
258 err = f_dupfd(arg, filp, 0);
259 break;
376 case F_DUPFD_CLOEXEC: 260 case F_DUPFD_CLOEXEC:
377 if (arg >= rlimit(RLIMIT_NOFILE)) 261 err = f_dupfd(arg, filp, FD_CLOEXEC);
378 break;
379 err = alloc_fd(arg, cmd == F_DUPFD_CLOEXEC ? O_CLOEXEC : 0);
380 if (err >= 0) {
381 get_file(filp);
382 fd_install(err, filp);
383 }
384 break; 262 break;
385 case F_GETFD: 263 case F_GETFD:
386 err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; 264 err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
@@ -470,25 +348,23 @@ static int check_fcntl_cmd(unsigned cmd)
470 348
471SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) 349SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
472{ 350{
473 struct file *filp; 351 struct fd f = fdget_raw(fd);
474 int fput_needed;
475 long err = -EBADF; 352 long err = -EBADF;
476 353
477 filp = fget_raw_light(fd, &fput_needed); 354 if (!f.file)
478 if (!filp)
479 goto out; 355 goto out;
480 356
481 if (unlikely(filp->f_mode & FMODE_PATH)) { 357 if (unlikely(f.file->f_mode & FMODE_PATH)) {
482 if (!check_fcntl_cmd(cmd)) 358 if (!check_fcntl_cmd(cmd))
483 goto out1; 359 goto out1;
484 } 360 }
485 361
486 err = security_file_fcntl(filp, cmd, arg); 362 err = security_file_fcntl(f.file, cmd, arg);
487 if (!err) 363 if (!err)
488 err = do_fcntl(fd, cmd, arg, filp); 364 err = do_fcntl(fd, cmd, arg, f.file);
489 365
490out1: 366out1:
491 fput_light(filp, fput_needed); 367 fdput(f);
492out: 368out:
493 return err; 369 return err;
494} 370}
@@ -497,38 +373,36 @@ out:
497SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, 373SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
498 unsigned long, arg) 374 unsigned long, arg)
499{ 375{
500 struct file * filp; 376 struct fd f = fdget_raw(fd);
501 long err = -EBADF; 377 long err = -EBADF;
502 int fput_needed;
503 378
504 filp = fget_raw_light(fd, &fput_needed); 379 if (!f.file)
505 if (!filp)
506 goto out; 380 goto out;
507 381
508 if (unlikely(filp->f_mode & FMODE_PATH)) { 382 if (unlikely(f.file->f_mode & FMODE_PATH)) {
509 if (!check_fcntl_cmd(cmd)) 383 if (!check_fcntl_cmd(cmd))
510 goto out1; 384 goto out1;
511 } 385 }
512 386
513 err = security_file_fcntl(filp, cmd, arg); 387 err = security_file_fcntl(f.file, cmd, arg);
514 if (err) 388 if (err)
515 goto out1; 389 goto out1;
516 390
517 switch (cmd) { 391 switch (cmd) {
518 case F_GETLK64: 392 case F_GETLK64:
519 err = fcntl_getlk64(filp, (struct flock64 __user *) arg); 393 err = fcntl_getlk64(f.file, (struct flock64 __user *) arg);
520 break; 394 break;
521 case F_SETLK64: 395 case F_SETLK64:
522 case F_SETLKW64: 396 case F_SETLKW64:
523 err = fcntl_setlk64(fd, filp, cmd, 397 err = fcntl_setlk64(fd, f.file, cmd,
524 (struct flock64 __user *) arg); 398 (struct flock64 __user *) arg);
525 break; 399 break;
526 default: 400 default:
527 err = do_fcntl(fd, cmd, arg, filp); 401 err = do_fcntl(fd, cmd, arg, f.file);
528 break; 402 break;
529 } 403 }
530out1: 404out1:
531 fput_light(filp, fput_needed); 405 fdput(f);
532out: 406out:
533 return err; 407 return err;
534} 408}
diff --git a/fs/fhandle.c b/fs/fhandle.c
index a48e4a139be..f775bfdd6e4 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -113,24 +113,21 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
113 113
114static struct vfsmount *get_vfsmount_from_fd(int fd) 114static struct vfsmount *get_vfsmount_from_fd(int fd)
115{ 115{
116 struct path path; 116 struct vfsmount *mnt;
117 117
118 if (fd == AT_FDCWD) { 118 if (fd == AT_FDCWD) {
119 struct fs_struct *fs = current->fs; 119 struct fs_struct *fs = current->fs;
120 spin_lock(&fs->lock); 120 spin_lock(&fs->lock);
121 path = fs->pwd; 121 mnt = mntget(fs->pwd.mnt);
122 mntget(path.mnt);
123 spin_unlock(&fs->lock); 122 spin_unlock(&fs->lock);
124 } else { 123 } else {
125 int fput_needed; 124 struct fd f = fdget(fd);
126 struct file *file = fget_light(fd, &fput_needed); 125 if (!f.file)
127 if (!file)
128 return ERR_PTR(-EBADF); 126 return ERR_PTR(-EBADF);
129 path = file->f_path; 127 mnt = mntget(f.file->f_path.mnt);
130 mntget(path.mnt); 128 fdput(f);
131 fput_light(file, fput_needed);
132 } 129 }
133 return path.mnt; 130 return mnt;
134} 131}
135 132
136static int vfs_dentry_acceptable(void *context, struct dentry *dentry) 133static int vfs_dentry_acceptable(void *context, struct dentry *dentry)
diff --git a/fs/file.c b/fs/file.c
index ba3f6053025..0f1bda4bebf 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -6,6 +6,7 @@
6 * Manage the dynamic fd arrays in the process files_struct. 6 * Manage the dynamic fd arrays in the process files_struct.
7 */ 7 */
8 8
9#include <linux/syscalls.h>
9#include <linux/export.h> 10#include <linux/export.h>
10#include <linux/fs.h> 11#include <linux/fs.h>
11#include <linux/mm.h> 12#include <linux/mm.h>
@@ -84,22 +85,14 @@ static void free_fdtable_work(struct work_struct *work)
84 } 85 }
85} 86}
86 87
87void free_fdtable_rcu(struct rcu_head *rcu) 88static void free_fdtable_rcu(struct rcu_head *rcu)
88{ 89{
89 struct fdtable *fdt = container_of(rcu, struct fdtable, rcu); 90 struct fdtable *fdt = container_of(rcu, struct fdtable, rcu);
90 struct fdtable_defer *fddef; 91 struct fdtable_defer *fddef;
91 92
92 BUG_ON(!fdt); 93 BUG_ON(!fdt);
94 BUG_ON(fdt->max_fds <= NR_OPEN_DEFAULT);
93 95
94 if (fdt->max_fds <= NR_OPEN_DEFAULT) {
95 /*
96 * This fdtable is embedded in the files structure and that
97 * structure itself is getting destroyed.
98 */
99 kmem_cache_free(files_cachep,
100 container_of(fdt, struct files_struct, fdtab));
101 return;
102 }
103 if (!is_vmalloc_addr(fdt->fd) && !is_vmalloc_addr(fdt->open_fds)) { 96 if (!is_vmalloc_addr(fdt->fd) && !is_vmalloc_addr(fdt->open_fds)) {
104 kfree(fdt->fd); 97 kfree(fdt->fd);
105 kfree(fdt->open_fds); 98 kfree(fdt->open_fds);
@@ -229,7 +222,7 @@ static int expand_fdtable(struct files_struct *files, int nr)
229 copy_fdtable(new_fdt, cur_fdt); 222 copy_fdtable(new_fdt, cur_fdt);
230 rcu_assign_pointer(files->fdt, new_fdt); 223 rcu_assign_pointer(files->fdt, new_fdt);
231 if (cur_fdt->max_fds > NR_OPEN_DEFAULT) 224 if (cur_fdt->max_fds > NR_OPEN_DEFAULT)
232 free_fdtable(cur_fdt); 225 call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
233 } else { 226 } else {
234 /* Somebody else expanded, so undo our attempt */ 227 /* Somebody else expanded, so undo our attempt */
235 __free_fdtable(new_fdt); 228 __free_fdtable(new_fdt);
@@ -245,19 +238,12 @@ static int expand_fdtable(struct files_struct *files, int nr)
245 * expanded and execution may have blocked. 238 * expanded and execution may have blocked.
246 * The files->file_lock should be held on entry, and will be held on exit. 239 * The files->file_lock should be held on entry, and will be held on exit.
247 */ 240 */
248int expand_files(struct files_struct *files, int nr) 241static int expand_files(struct files_struct *files, int nr)
249{ 242{
250 struct fdtable *fdt; 243 struct fdtable *fdt;
251 244
252 fdt = files_fdtable(files); 245 fdt = files_fdtable(files);
253 246
254 /*
255 * N.B. For clone tasks sharing a files structure, this test
256 * will limit the total number of files that can be opened.
257 */
258 if (nr >= rlimit(RLIMIT_NOFILE))
259 return -EMFILE;
260
261 /* Do we need to expand? */ 247 /* Do we need to expand? */
262 if (nr < fdt->max_fds) 248 if (nr < fdt->max_fds)
263 return 0; 249 return 0;
@@ -270,6 +256,26 @@ int expand_files(struct files_struct *files, int nr)
270 return expand_fdtable(files, nr); 256 return expand_fdtable(files, nr);
271} 257}
272 258
259static inline void __set_close_on_exec(int fd, struct fdtable *fdt)
260{
261 __set_bit(fd, fdt->close_on_exec);
262}
263
264static inline void __clear_close_on_exec(int fd, struct fdtable *fdt)
265{
266 __clear_bit(fd, fdt->close_on_exec);
267}
268
269static inline void __set_open_fd(int fd, struct fdtable *fdt)
270{
271 __set_bit(fd, fdt->open_fds);
272}
273
274static inline void __clear_open_fd(int fd, struct fdtable *fdt)
275{
276 __clear_bit(fd, fdt->open_fds);
277}
278
273static int count_open_files(struct fdtable *fdt) 279static int count_open_files(struct fdtable *fdt)
274{ 280{
275 int size = fdt->max_fds; 281 int size = fdt->max_fds;
@@ -395,6 +401,95 @@ out:
395 return NULL; 401 return NULL;
396} 402}
397 403
404static void close_files(struct files_struct * files)
405{
406 int i, j;
407 struct fdtable *fdt;
408
409 j = 0;
410
411 /*
412 * It is safe to dereference the fd table without RCU or
413 * ->file_lock because this is the last reference to the
414 * files structure. But use RCU to shut RCU-lockdep up.
415 */
416 rcu_read_lock();
417 fdt = files_fdtable(files);
418 rcu_read_unlock();
419 for (;;) {
420 unsigned long set;
421 i = j * BITS_PER_LONG;
422 if (i >= fdt->max_fds)
423 break;
424 set = fdt->open_fds[j++];
425 while (set) {
426 if (set & 1) {
427 struct file * file = xchg(&fdt->fd[i], NULL);
428 if (file) {
429 filp_close(file, files);
430 cond_resched();
431 }
432 }
433 i++;
434 set >>= 1;
435 }
436 }
437}
438
439struct files_struct *get_files_struct(struct task_struct *task)
440{
441 struct files_struct *files;
442
443 task_lock(task);
444 files = task->files;
445 if (files)
446 atomic_inc(&files->count);
447 task_unlock(task);
448
449 return files;
450}
451
452void put_files_struct(struct files_struct *files)
453{
454 struct fdtable *fdt;
455
456 if (atomic_dec_and_test(&files->count)) {
457 close_files(files);
458 /* not really needed, since nobody can see us */
459 rcu_read_lock();
460 fdt = files_fdtable(files);
461 rcu_read_unlock();
462 /* free the arrays if they are not embedded */
463 if (fdt != &files->fdtab)
464 __free_fdtable(fdt);
465 kmem_cache_free(files_cachep, files);
466 }
467}
468
469void reset_files_struct(struct files_struct *files)
470{
471 struct task_struct *tsk = current;
472 struct files_struct *old;
473
474 old = tsk->files;
475 task_lock(tsk);
476 tsk->files = files;
477 task_unlock(tsk);
478 put_files_struct(old);
479}
480
481void exit_files(struct task_struct *tsk)
482{
483 struct files_struct * files = tsk->files;
484
485 if (files) {
486 task_lock(tsk);
487 tsk->files = NULL;
488 task_unlock(tsk);
489 put_files_struct(files);
490 }
491}
492
398static void __devinit fdtable_defer_list_init(int cpu) 493static void __devinit fdtable_defer_list_init(int cpu)
399{ 494{
400 struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu); 495 struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu);
@@ -424,12 +519,18 @@ struct files_struct init_files = {
424 .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), 519 .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock),
425}; 520};
426 521
522void daemonize_descriptors(void)
523{
524 atomic_inc(&init_files.count);
525 reset_files_struct(&init_files);
526}
527
427/* 528/*
428 * allocate a file descriptor, mark it busy. 529 * allocate a file descriptor, mark it busy.
429 */ 530 */
430int alloc_fd(unsigned start, unsigned flags) 531int __alloc_fd(struct files_struct *files,
532 unsigned start, unsigned end, unsigned flags)
431{ 533{
432 struct files_struct *files = current->files;
433 unsigned int fd; 534 unsigned int fd;
434 int error; 535 int error;
435 struct fdtable *fdt; 536 struct fdtable *fdt;
@@ -444,6 +545,14 @@ repeat:
444 if (fd < fdt->max_fds) 545 if (fd < fdt->max_fds)
445 fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd); 546 fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd);
446 547
548 /*
549 * N.B. For clone tasks sharing a files structure, this test
550 * will limit the total number of files that can be opened.
551 */
552 error = -EMFILE;
553 if (fd >= end)
554 goto out;
555
447 error = expand_files(files, fd); 556 error = expand_files(files, fd);
448 if (error < 0) 557 if (error < 0)
449 goto out; 558 goto out;
@@ -477,8 +586,424 @@ out:
477 return error; 586 return error;
478} 587}
479 588
480int get_unused_fd(void) 589static int alloc_fd(unsigned start, unsigned flags)
590{
591 return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags);
592}
593
594int get_unused_fd_flags(unsigned flags)
595{
596 return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags);
597}
598EXPORT_SYMBOL(get_unused_fd_flags);
599
600static void __put_unused_fd(struct files_struct *files, unsigned int fd)
601{
602 struct fdtable *fdt = files_fdtable(files);
603 __clear_open_fd(fd, fdt);
604 if (fd < files->next_fd)
605 files->next_fd = fd;
606}
607
608void put_unused_fd(unsigned int fd)
609{
610 struct files_struct *files = current->files;
611 spin_lock(&files->file_lock);
612 __put_unused_fd(files, fd);
613 spin_unlock(&files->file_lock);
614}
615
616EXPORT_SYMBOL(put_unused_fd);
617
618/*
619 * Install a file pointer in the fd array.
620 *
621 * The VFS is full of places where we drop the files lock between
622 * setting the open_fds bitmap and installing the file in the file
623 * array. At any such point, we are vulnerable to a dup2() race
624 * installing a file in the array before us. We need to detect this and
625 * fput() the struct file we are about to overwrite in this case.
626 *
627 * It should never happen - if we allow dup2() do it, _really_ bad things
628 * will follow.
629 *
630 * NOTE: __fd_install() variant is really, really low-level; don't
631 * use it unless you are forced to by truly lousy API shoved down
632 * your throat. 'files' *MUST* be either current->files or obtained
633 * by get_files_struct(current) done by whoever had given it to you,
634 * or really bad things will happen. Normally you want to use
635 * fd_install() instead.
636 */
637
638void __fd_install(struct files_struct *files, unsigned int fd,
639 struct file *file)
640{
641 struct fdtable *fdt;
642 spin_lock(&files->file_lock);
643 fdt = files_fdtable(files);
644 BUG_ON(fdt->fd[fd] != NULL);
645 rcu_assign_pointer(fdt->fd[fd], file);
646 spin_unlock(&files->file_lock);
647}
648
649void fd_install(unsigned int fd, struct file *file)
481{ 650{
482 return alloc_fd(0, 0); 651 __fd_install(current->files, fd, file);
652}
653
654EXPORT_SYMBOL(fd_install);
655
656/*
657 * The same warnings as for __alloc_fd()/__fd_install() apply here...
658 */
659int __close_fd(struct files_struct *files, unsigned fd)
660{
661 struct file *file;
662 struct fdtable *fdt;
663
664 spin_lock(&files->file_lock);
665 fdt = files_fdtable(files);
666 if (fd >= fdt->max_fds)
667 goto out_unlock;
668 file = fdt->fd[fd];
669 if (!file)
670 goto out_unlock;
671 rcu_assign_pointer(fdt->fd[fd], NULL);
672 __clear_close_on_exec(fd, fdt);
673 __put_unused_fd(files, fd);
674 spin_unlock(&files->file_lock);
675 return filp_close(file, files);
676
677out_unlock:
678 spin_unlock(&files->file_lock);
679 return -EBADF;
680}
681
682void do_close_on_exec(struct files_struct *files)
683{
684 unsigned i;
685 struct fdtable *fdt;
686
687 /* exec unshares first */
688 BUG_ON(atomic_read(&files->count) != 1);
689 spin_lock(&files->file_lock);
690 for (i = 0; ; i++) {
691 unsigned long set;
692 unsigned fd = i * BITS_PER_LONG;
693 fdt = files_fdtable(files);
694 if (fd >= fdt->max_fds)
695 break;
696 set = fdt->close_on_exec[i];
697 if (!set)
698 continue;
699 fdt->close_on_exec[i] = 0;
700 for ( ; set ; fd++, set >>= 1) {
701 struct file *file;
702 if (!(set & 1))
703 continue;
704 file = fdt->fd[fd];
705 if (!file)
706 continue;
707 rcu_assign_pointer(fdt->fd[fd], NULL);
708 __put_unused_fd(files, fd);
709 spin_unlock(&files->file_lock);
710 filp_close(file, files);
711 cond_resched();
712 spin_lock(&files->file_lock);
713 }
714
715 }
716 spin_unlock(&files->file_lock);
717}
718
719struct file *fget(unsigned int fd)
720{
721 struct file *file;
722 struct files_struct *files = current->files;
723
724 rcu_read_lock();
725 file = fcheck_files(files, fd);
726 if (file) {
727 /* File object ref couldn't be taken */
728 if (file->f_mode & FMODE_PATH ||
729 !atomic_long_inc_not_zero(&file->f_count))
730 file = NULL;
731 }
732 rcu_read_unlock();
733
734 return file;
735}
736
737EXPORT_SYMBOL(fget);
738
739struct file *fget_raw(unsigned int fd)
740{
741 struct file *file;
742 struct files_struct *files = current->files;
743
744 rcu_read_lock();
745 file = fcheck_files(files, fd);
746 if (file) {
747 /* File object ref couldn't be taken */
748 if (!atomic_long_inc_not_zero(&file->f_count))
749 file = NULL;
750 }
751 rcu_read_unlock();
752
753 return file;
754}
755
756EXPORT_SYMBOL(fget_raw);
757
758/*
759 * Lightweight file lookup - no refcnt increment if fd table isn't shared.
760 *
761 * You can use this instead of fget if you satisfy all of the following
762 * conditions:
763 * 1) You must call fput_light before exiting the syscall and returning control
764 * to userspace (i.e. you cannot remember the returned struct file * after
765 * returning to userspace).
766 * 2) You must not call filp_close on the returned struct file * in between
767 * calls to fget_light and fput_light.
768 * 3) You must not clone the current task in between the calls to fget_light
769 * and fput_light.
770 *
771 * The fput_needed flag returned by fget_light should be passed to the
772 * corresponding fput_light.
773 */
774struct file *fget_light(unsigned int fd, int *fput_needed)
775{
776 struct file *file;
777 struct files_struct *files = current->files;
778
779 *fput_needed = 0;
780 if (atomic_read(&files->count) == 1) {
781 file = fcheck_files(files, fd);
782 if (file && (file->f_mode & FMODE_PATH))
783 file = NULL;
784 } else {
785 rcu_read_lock();
786 file = fcheck_files(files, fd);
787 if (file) {
788 if (!(file->f_mode & FMODE_PATH) &&
789 atomic_long_inc_not_zero(&file->f_count))
790 *fput_needed = 1;
791 else
792 /* Didn't get the reference, someone's freed */
793 file = NULL;
794 }
795 rcu_read_unlock();
796 }
797
798 return file;
799}
800EXPORT_SYMBOL(fget_light);
801
802struct file *fget_raw_light(unsigned int fd, int *fput_needed)
803{
804 struct file *file;
805 struct files_struct *files = current->files;
806
807 *fput_needed = 0;
808 if (atomic_read(&files->count) == 1) {
809 file = fcheck_files(files, fd);
810 } else {
811 rcu_read_lock();
812 file = fcheck_files(files, fd);
813 if (file) {
814 if (atomic_long_inc_not_zero(&file->f_count))
815 *fput_needed = 1;
816 else
817 /* Didn't get the reference, someone's freed */
818 file = NULL;
819 }
820 rcu_read_unlock();
821 }
822
823 return file;
824}
825
826void set_close_on_exec(unsigned int fd, int flag)
827{
828 struct files_struct *files = current->files;
829 struct fdtable *fdt;
830 spin_lock(&files->file_lock);
831 fdt = files_fdtable(files);
832 if (flag)
833 __set_close_on_exec(fd, fdt);
834 else
835 __clear_close_on_exec(fd, fdt);
836 spin_unlock(&files->file_lock);
837}
838
839bool get_close_on_exec(unsigned int fd)
840{
841 struct files_struct *files = current->files;
842 struct fdtable *fdt;
843 bool res;
844 rcu_read_lock();
845 fdt = files_fdtable(files);
846 res = close_on_exec(fd, fdt);
847 rcu_read_unlock();
848 return res;
849}
850
851static int do_dup2(struct files_struct *files,
852 struct file *file, unsigned fd, unsigned flags)
853{
854 struct file *tofree;
855 struct fdtable *fdt;
856
857 /*
858 * We need to detect attempts to do dup2() over allocated but still
859 * not finished descriptor. NB: OpenBSD avoids that at the price of
860 * extra work in their equivalent of fget() - they insert struct
861 * file immediately after grabbing descriptor, mark it larval if
862 * more work (e.g. actual opening) is needed and make sure that
863 * fget() treats larval files as absent. Potentially interesting,
864 * but while extra work in fget() is trivial, locking implications
865 * and amount of surgery on open()-related paths in VFS are not.
866 * FreeBSD fails with -EBADF in the same situation, NetBSD "solution"
867 * deadlocks in rather amusing ways, AFAICS. All of that is out of
868 * scope of POSIX or SUS, since neither considers shared descriptor
869 * tables and this condition does not arise without those.
870 */
871 fdt = files_fdtable(files);
872 tofree = fdt->fd[fd];
873 if (!tofree && fd_is_open(fd, fdt))
874 goto Ebusy;
875 get_file(file);
876 rcu_assign_pointer(fdt->fd[fd], file);
877 __set_open_fd(fd, fdt);
878 if (flags & O_CLOEXEC)
879 __set_close_on_exec(fd, fdt);
880 else
881 __clear_close_on_exec(fd, fdt);
882 spin_unlock(&files->file_lock);
883
884 if (tofree)
885 filp_close(tofree, files);
886
887 return fd;
888
889Ebusy:
890 spin_unlock(&files->file_lock);
891 return -EBUSY;
892}
893
894int replace_fd(unsigned fd, struct file *file, unsigned flags)
895{
896 int err;
897 struct files_struct *files = current->files;
898
899 if (!file)
900 return __close_fd(files, fd);
901
902 if (fd >= rlimit(RLIMIT_NOFILE))
903 return -EMFILE;
904
905 spin_lock(&files->file_lock);
906 err = expand_files(files, fd);
907 if (unlikely(err < 0))
908 goto out_unlock;
909 return do_dup2(files, file, fd, flags);
910
911out_unlock:
912 spin_unlock(&files->file_lock);
913 return err;
914}
915
916SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
917{
918 int err = -EBADF;
919 struct file *file;
920 struct files_struct *files = current->files;
921
922 if ((flags & ~O_CLOEXEC) != 0)
923 return -EINVAL;
924
925 if (newfd >= rlimit(RLIMIT_NOFILE))
926 return -EMFILE;
927
928 spin_lock(&files->file_lock);
929 err = expand_files(files, newfd);
930 file = fcheck(oldfd);
931 if (unlikely(!file))
932 goto Ebadf;
933 if (unlikely(err < 0)) {
934 if (err == -EMFILE)
935 goto Ebadf;
936 goto out_unlock;
937 }
938 return do_dup2(files, file, newfd, flags);
939
940Ebadf:
941 err = -EBADF;
942out_unlock:
943 spin_unlock(&files->file_lock);
944 return err;
945}
946
947SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
948{
949 if (unlikely(newfd == oldfd)) { /* corner case */
950 struct files_struct *files = current->files;
951 int retval = oldfd;
952
953 rcu_read_lock();
954 if (!fcheck_files(files, oldfd))
955 retval = -EBADF;
956 rcu_read_unlock();
957 return retval;
958 }
959 return sys_dup3(oldfd, newfd, 0);
960}
961
962SYSCALL_DEFINE1(dup, unsigned int, fildes)
963{
964 int ret = -EBADF;
965 struct file *file = fget_raw(fildes);
966
967 if (file) {
968 ret = get_unused_fd();
969 if (ret >= 0)
970 fd_install(ret, file);
971 else
972 fput(file);
973 }
974 return ret;
975}
976
977int f_dupfd(unsigned int from, struct file *file, unsigned flags)
978{
979 int err;
980 if (from >= rlimit(RLIMIT_NOFILE))
981 return -EINVAL;
982 err = alloc_fd(from, flags);
983 if (err >= 0) {
984 get_file(file);
985 fd_install(err, file);
986 }
987 return err;
988}
989
990int iterate_fd(struct files_struct *files, unsigned n,
991 int (*f)(const void *, struct file *, unsigned),
992 const void *p)
993{
994 struct fdtable *fdt;
995 struct file *file;
996 int res = 0;
997 if (!files)
998 return 0;
999 spin_lock(&files->file_lock);
1000 fdt = files_fdtable(files);
1001 while (!res && n < fdt->max_fds) {
1002 file = rcu_dereference_check_fdtable(files, fdt->fd[n++]);
1003 if (file)
1004 res = f(p, file, n);
1005 }
1006 spin_unlock(&files->file_lock);
1007 return res;
483} 1008}
484EXPORT_SYMBOL(get_unused_fd); 1009EXPORT_SYMBOL(iterate_fd);
diff --git a/fs/file_table.c b/fs/file_table.c
index 701985e4ccd..c6780163bf3 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -339,112 +339,6 @@ void __fput_sync(struct file *file)
339 339
340EXPORT_SYMBOL(fput); 340EXPORT_SYMBOL(fput);
341 341
342struct file *fget(unsigned int fd)
343{
344 struct file *file;
345 struct files_struct *files = current->files;
346
347 rcu_read_lock();
348 file = fcheck_files(files, fd);
349 if (file) {
350 /* File object ref couldn't be taken */
351 if (file->f_mode & FMODE_PATH ||
352 !atomic_long_inc_not_zero(&file->f_count))
353 file = NULL;
354 }
355 rcu_read_unlock();
356
357 return file;
358}
359
360EXPORT_SYMBOL(fget);
361
362struct file *fget_raw(unsigned int fd)
363{
364 struct file *file;
365 struct files_struct *files = current->files;
366
367 rcu_read_lock();
368 file = fcheck_files(files, fd);
369 if (file) {
370 /* File object ref couldn't be taken */
371 if (!atomic_long_inc_not_zero(&file->f_count))
372 file = NULL;
373 }
374 rcu_read_unlock();
375
376 return file;
377}
378
379EXPORT_SYMBOL(fget_raw);
380
381/*
382 * Lightweight file lookup - no refcnt increment if fd table isn't shared.
383 *
384 * You can use this instead of fget if you satisfy all of the following
385 * conditions:
386 * 1) You must call fput_light before exiting the syscall and returning control
387 * to userspace (i.e. you cannot remember the returned struct file * after
388 * returning to userspace).
389 * 2) You must not call filp_close on the returned struct file * in between
390 * calls to fget_light and fput_light.
391 * 3) You must not clone the current task in between the calls to fget_light
392 * and fput_light.
393 *
394 * The fput_needed flag returned by fget_light should be passed to the
395 * corresponding fput_light.
396 */
397struct file *fget_light(unsigned int fd, int *fput_needed)
398{
399 struct file *file;
400 struct files_struct *files = current->files;
401
402 *fput_needed = 0;
403 if (atomic_read(&files->count) == 1) {
404 file = fcheck_files(files, fd);
405 if (file && (file->f_mode & FMODE_PATH))
406 file = NULL;
407 } else {
408 rcu_read_lock();
409 file = fcheck_files(files, fd);
410 if (file) {
411 if (!(file->f_mode & FMODE_PATH) &&
412 atomic_long_inc_not_zero(&file->f_count))
413 *fput_needed = 1;
414 else
415 /* Didn't get the reference, someone's freed */
416 file = NULL;
417 }
418 rcu_read_unlock();
419 }
420
421 return file;
422}
423
424struct file *fget_raw_light(unsigned int fd, int *fput_needed)
425{
426 struct file *file;
427 struct files_struct *files = current->files;
428
429 *fput_needed = 0;
430 if (atomic_read(&files->count) == 1) {
431 file = fcheck_files(files, fd);
432 } else {
433 rcu_read_lock();
434 file = fcheck_files(files, fd);
435 if (file) {
436 if (atomic_long_inc_not_zero(&file->f_count))
437 *fput_needed = 1;
438 else
439 /* Didn't get the reference, someone's freed */
440 file = NULL;
441 }
442 rcu_read_unlock();
443 }
444
445 return file;
446}
447
448void put_filp(struct file *file) 342void put_filp(struct file *file)
449{ 343{
450 if (atomic_long_dec_and_test(&file->f_count)) { 344 if (atomic_long_dec_and_test(&file->f_count)) {
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index d4fabd26084..fed2c8afb3a 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -279,6 +279,11 @@ static void __exit
279vxfs_cleanup(void) 279vxfs_cleanup(void)
280{ 280{
281 unregister_filesystem(&vxfs_fs_type); 281 unregister_filesystem(&vxfs_fs_type);
282 /*
283 * Make sure all delayed rcu free inodes are flushed before we
284 * destroy cache.
285 */
286 rcu_barrier();
282 kmem_cache_destroy(vxfs_inode_cachep); 287 kmem_cache_destroy(vxfs_inode_cachep);
283} 288}
284 289
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index f4246cfc8d8..8c23fa7a91e 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -148,8 +148,7 @@ static struct fuse_req *get_reserved_req(struct fuse_conn *fc,
148 if (ff->reserved_req) { 148 if (ff->reserved_req) {
149 req = ff->reserved_req; 149 req = ff->reserved_req;
150 ff->reserved_req = NULL; 150 ff->reserved_req = NULL;
151 get_file(file); 151 req->stolen_file = get_file(file);
152 req->stolen_file = file;
153 } 152 }
154 spin_unlock(&fc->lock); 153 spin_unlock(&fc->lock);
155 } while (!req); 154 } while (!req);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index fca222dabe3..f0eda124cff 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1197,6 +1197,12 @@ static void fuse_fs_cleanup(void)
1197{ 1197{
1198 unregister_filesystem(&fuse_fs_type); 1198 unregister_filesystem(&fuse_fs_type);
1199 unregister_fuseblk(); 1199 unregister_fuseblk();
1200
1201 /*
1202 * Make sure all delayed rcu free inodes are flushed before we
1203 * destroy cache.
1204 */
1205 rcu_barrier();
1200 kmem_cache_destroy(fuse_inode_cachep); 1206 kmem_cache_destroy(fuse_inode_cachep);
1201} 1207}
1202 1208
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 0b63d135a09..e93ddaadfd1 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -492,6 +492,12 @@ static int __init init_hfs_fs(void)
492static void __exit exit_hfs_fs(void) 492static void __exit exit_hfs_fs(void)
493{ 493{
494 unregister_filesystem(&hfs_fs_type); 494 unregister_filesystem(&hfs_fs_type);
495
496 /*
497 * Make sure all delayed rcu free inodes are flushed before we
498 * destroy cache.
499 */
500 rcu_barrier();
495 kmem_cache_destroy(hfs_inode_cachep); 501 kmem_cache_destroy(hfs_inode_cachep);
496} 502}
497 503
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index fdafb2d7165..811a84d2d96 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -635,6 +635,12 @@ static int __init init_hfsplus_fs(void)
635static void __exit exit_hfsplus_fs(void) 635static void __exit exit_hfsplus_fs(void)
636{ 636{
637 unregister_filesystem(&hfsplus_fs_type); 637 unregister_filesystem(&hfsplus_fs_type);
638
639 /*
640 * Make sure all delayed rcu free inodes are flushed before we
641 * destroy cache.
642 */
643 rcu_barrier();
638 kmem_cache_destroy(hfsplus_inode_cachep); 644 kmem_cache_destroy(hfsplus_inode_cachep);
639} 645}
640 646
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index a152783602d..bc28bf077a6 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -210,6 +210,11 @@ static int init_inodecache(void)
210 210
211static void destroy_inodecache(void) 211static void destroy_inodecache(void)
212{ 212{
213 /*
214 * Make sure all delayed rcu free inodes are flushed before we
215 * destroy cache.
216 */
217 rcu_barrier();
213 kmem_cache_destroy(hpfs_inode_cachep); 218 kmem_cache_destroy(hpfs_inode_cachep);
214} 219}
215 220
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 6e572c4fbf6..9460120a517 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -1048,6 +1048,11 @@ static int __init init_hugetlbfs_fs(void)
1048 1048
1049static void __exit exit_hugetlbfs_fs(void) 1049static void __exit exit_hugetlbfs_fs(void)
1050{ 1050{
1051 /*
1052 * Make sure all delayed rcu free inodes are flushed before we
1053 * destroy cache.
1054 */
1055 rcu_barrier();
1051 kmem_cache_destroy(hugetlbfs_inode_cachep); 1056 kmem_cache_destroy(hugetlbfs_inode_cachep);
1052 kern_unmount(hugetlbfs_vfsmount); 1057 kern_unmount(hugetlbfs_vfsmount);
1053 unregister_filesystem(&hugetlbfs_fs_type); 1058 unregister_filesystem(&hugetlbfs_fs_type);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 29167bebe87..3bdad6d1f26 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -603,21 +603,14 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
603 603
604SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) 604SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
605{ 605{
606 struct file *filp; 606 int error;
607 int error = -EBADF; 607 struct fd f = fdget(fd);
608 int fput_needed; 608
609 609 if (!f.file)
610 filp = fget_light(fd, &fput_needed); 610 return -EBADF;
611 if (!filp) 611 error = security_file_ioctl(f.file, cmd, arg);
612 goto out; 612 if (!error)
613 613 error = do_vfs_ioctl(f.file, fd, cmd, arg);
614 error = security_file_ioctl(filp, cmd, arg); 614 fdput(f);
615 if (error)
616 goto out_fput;
617
618 error = do_vfs_ioctl(filp, fd, cmd, arg);
619 out_fput:
620 fput_light(filp, fput_needed);
621 out:
622 return error; 615 return error;
623} 616}
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index a7d8e6cc5e0..67ce52507d7 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -115,6 +115,11 @@ static int init_inodecache(void)
115 115
116static void destroy_inodecache(void) 116static void destroy_inodecache(void)
117{ 117{
118 /*
119 * Make sure all delayed rcu free inodes are flushed before we
120 * destroy cache.
121 */
122 rcu_barrier();
118 kmem_cache_destroy(isofs_inode_cachep); 123 kmem_cache_destroy(isofs_inode_cachep);
119} 124}
120 125
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 61ea41389f9..ff487954cd9 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -418,6 +418,12 @@ static void __exit exit_jffs2_fs(void)
418 unregister_filesystem(&jffs2_fs_type); 418 unregister_filesystem(&jffs2_fs_type);
419 jffs2_destroy_slab_caches(); 419 jffs2_destroy_slab_caches();
420 jffs2_compressors_exit(); 420 jffs2_compressors_exit();
421
422 /*
423 * Make sure all delayed rcu free inodes are flushed before we
424 * destroy cache.
425 */
426 rcu_barrier();
421 kmem_cache_destroy(jffs2_inode_cachep); 427 kmem_cache_destroy(jffs2_inode_cachep);
422} 428}
423 429
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 706692f2403..efdf8835dfc 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -911,6 +911,12 @@ static void __exit exit_jfs_fs(void)
911 jfs_proc_clean(); 911 jfs_proc_clean();
912#endif 912#endif
913 unregister_filesystem(&jfs_fs_type); 913 unregister_filesystem(&jfs_fs_type);
914
915 /*
916 * Make sure all delayed rcu free inodes are flushed before we
917 * destroy cache.
918 */
919 rcu_barrier();
914 kmem_cache_destroy(jfs_inode_cachep); 920 kmem_cache_destroy(jfs_inode_cachep);
915} 921}
916 922
diff --git a/fs/locks.c b/fs/locks.c
index 7e81bfc7516..abc7dc6c490 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1625,15 +1625,13 @@ EXPORT_SYMBOL(flock_lock_file_wait);
1625 */ 1625 */
1626SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) 1626SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
1627{ 1627{
1628 struct file *filp; 1628 struct fd f = fdget(fd);
1629 int fput_needed;
1630 struct file_lock *lock; 1629 struct file_lock *lock;
1631 int can_sleep, unlock; 1630 int can_sleep, unlock;
1632 int error; 1631 int error;
1633 1632
1634 error = -EBADF; 1633 error = -EBADF;
1635 filp = fget_light(fd, &fput_needed); 1634 if (!f.file)
1636 if (!filp)
1637 goto out; 1635 goto out;
1638 1636
1639 can_sleep = !(cmd & LOCK_NB); 1637 can_sleep = !(cmd & LOCK_NB);
@@ -1641,31 +1639,31 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
1641 unlock = (cmd == LOCK_UN); 1639 unlock = (cmd == LOCK_UN);
1642 1640
1643 if (!unlock && !(cmd & LOCK_MAND) && 1641 if (!unlock && !(cmd & LOCK_MAND) &&
1644 !(filp->f_mode & (FMODE_READ|FMODE_WRITE))) 1642 !(f.file->f_mode & (FMODE_READ|FMODE_WRITE)))
1645 goto out_putf; 1643 goto out_putf;
1646 1644
1647 error = flock_make_lock(filp, &lock, cmd); 1645 error = flock_make_lock(f.file, &lock, cmd);
1648 if (error) 1646 if (error)
1649 goto out_putf; 1647 goto out_putf;
1650 if (can_sleep) 1648 if (can_sleep)
1651 lock->fl_flags |= FL_SLEEP; 1649 lock->fl_flags |= FL_SLEEP;
1652 1650
1653 error = security_file_lock(filp, lock->fl_type); 1651 error = security_file_lock(f.file, lock->fl_type);
1654 if (error) 1652 if (error)
1655 goto out_free; 1653 goto out_free;
1656 1654
1657 if (filp->f_op && filp->f_op->flock) 1655 if (f.file->f_op && f.file->f_op->flock)
1658 error = filp->f_op->flock(filp, 1656 error = f.file->f_op->flock(f.file,
1659 (can_sleep) ? F_SETLKW : F_SETLK, 1657 (can_sleep) ? F_SETLKW : F_SETLK,
1660 lock); 1658 lock);
1661 else 1659 else
1662 error = flock_lock_file_wait(filp, lock); 1660 error = flock_lock_file_wait(f.file, lock);
1663 1661
1664 out_free: 1662 out_free:
1665 locks_free_lock(lock); 1663 locks_free_lock(lock);
1666 1664
1667 out_putf: 1665 out_putf:
1668 fput_light(filp, fput_needed); 1666 fdput(f);
1669 out: 1667 out:
1670 return error; 1668 return error;
1671} 1669}
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index bda39085309..adb90116d36 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -417,5 +417,10 @@ int logfs_init_inode_cache(void)
417 417
418void logfs_destroy_inode_cache(void) 418void logfs_destroy_inode_cache(void)
419{ 419{
420 /*
421 * Make sure all delayed rcu free inodes are flushed before we
422 * destroy cache.
423 */
424 rcu_barrier();
420 kmem_cache_destroy(logfs_inode_cache); 425 kmem_cache_destroy(logfs_inode_cache);
421} 426}
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index d0e42c67892..4fc5f8ab1c4 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -100,6 +100,11 @@ static int init_inodecache(void)
100 100
101static void destroy_inodecache(void) 101static void destroy_inodecache(void)
102{ 102{
103 /*
104 * Make sure all delayed rcu free inodes are flushed before we
105 * destroy cache.
106 */
107 rcu_barrier();
103 kmem_cache_destroy(minix_inode_cachep); 108 kmem_cache_destroy(minix_inode_cachep);
104} 109}
105 110
diff --git a/fs/namei.c b/fs/namei.c
index a856e7f7b6e..aa30d19e9ed 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1797,8 +1797,6 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1797 struct nameidata *nd, struct file **fp) 1797 struct nameidata *nd, struct file **fp)
1798{ 1798{
1799 int retval = 0; 1799 int retval = 0;
1800 int fput_needed;
1801 struct file *file;
1802 1800
1803 nd->last_type = LAST_ROOT; /* if there are only slashes... */ 1801 nd->last_type = LAST_ROOT; /* if there are only slashes... */
1804 nd->flags = flags | LOOKUP_JUMPED; 1802 nd->flags = flags | LOOKUP_JUMPED;
@@ -1850,44 +1848,41 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1850 get_fs_pwd(current->fs, &nd->path); 1848 get_fs_pwd(current->fs, &nd->path);
1851 } 1849 }
1852 } else { 1850 } else {
1851 struct fd f = fdget_raw(dfd);
1853 struct dentry *dentry; 1852 struct dentry *dentry;
1854 1853
1855 file = fget_raw_light(dfd, &fput_needed); 1854 if (!f.file)
1856 retval = -EBADF; 1855 return -EBADF;
1857 if (!file)
1858 goto out_fail;
1859 1856
1860 dentry = file->f_path.dentry; 1857 dentry = f.file->f_path.dentry;
1861 1858
1862 if (*name) { 1859 if (*name) {
1863 retval = -ENOTDIR; 1860 if (!S_ISDIR(dentry->d_inode->i_mode)) {
1864 if (!S_ISDIR(dentry->d_inode->i_mode)) 1861 fdput(f);
1865 goto fput_fail; 1862 return -ENOTDIR;
1863 }
1866 1864
1867 retval = inode_permission(dentry->d_inode, MAY_EXEC); 1865 retval = inode_permission(dentry->d_inode, MAY_EXEC);
1868 if (retval) 1866 if (retval) {
1869 goto fput_fail; 1867 fdput(f);
1868 return retval;
1869 }
1870 } 1870 }
1871 1871
1872 nd->path = file->f_path; 1872 nd->path = f.file->f_path;
1873 if (flags & LOOKUP_RCU) { 1873 if (flags & LOOKUP_RCU) {
1874 if (fput_needed) 1874 if (f.need_put)
1875 *fp = file; 1875 *fp = f.file;
1876 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); 1876 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1877 lock_rcu_walk(); 1877 lock_rcu_walk();
1878 } else { 1878 } else {
1879 path_get(&file->f_path); 1879 path_get(&nd->path);
1880 fput_light(file, fput_needed); 1880 fdput(f);
1881 } 1881 }
1882 } 1882 }
1883 1883
1884 nd->inode = nd->path.dentry->d_inode; 1884 nd->inode = nd->path.dentry->d_inode;
1885 return 0; 1885 return 0;
1886
1887fput_fail:
1888 fput_light(file, fput_needed);
1889out_fail:
1890 return retval;
1891} 1886}
1892 1887
1893static inline int lookup_last(struct nameidata *nd, struct path *path) 1888static inline int lookup_last(struct nameidata *nd, struct path *path)
@@ -3971,7 +3966,7 @@ EXPORT_SYMBOL(user_path_at);
3971EXPORT_SYMBOL(follow_down_one); 3966EXPORT_SYMBOL(follow_down_one);
3972EXPORT_SYMBOL(follow_down); 3967EXPORT_SYMBOL(follow_down);
3973EXPORT_SYMBOL(follow_up); 3968EXPORT_SYMBOL(follow_up);
3974EXPORT_SYMBOL(get_write_access); /* binfmt_aout */ 3969EXPORT_SYMBOL(get_write_access); /* nfsd */
3975EXPORT_SYMBOL(getname); 3970EXPORT_SYMBOL(getname);
3976EXPORT_SYMBOL(lock_rename); 3971EXPORT_SYMBOL(lock_rename);
3977EXPORT_SYMBOL(lookup_one_len); 3972EXPORT_SYMBOL(lookup_one_len);
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index eaa74323663..d7e9fe77188 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -89,6 +89,11 @@ static int init_inodecache(void)
89 89
90static void destroy_inodecache(void) 90static void destroy_inodecache(void)
91{ 91{
92 /*
93 * Make sure all delayed rcu free inodes are flushed before we
94 * destroy cache.
95 */
96 rcu_barrier();
92 kmem_cache_destroy(ncp_inode_cachep); 97 kmem_cache_destroy(ncp_inode_cachep);
93} 98}
94 99
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 9b47610338f..e4c716d374a 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1571,6 +1571,11 @@ static int __init nfs_init_inodecache(void)
1571 1571
1572static void nfs_destroy_inodecache(void) 1572static void nfs_destroy_inodecache(void)
1573{ 1573{
1574 /*
1575 * Make sure all delayed rcu free inodes are flushed before we
1576 * destroy cache.
1577 */
1578 rcu_barrier();
1574 kmem_cache_destroy(nfs_inode_cachep); 1579 kmem_cache_destroy(nfs_inode_cachep);
1575} 1580}
1576 1581
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index cc894eda385..48a1bad3733 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2837,8 +2837,7 @@ static int nfs4_setlease(struct nfs4_delegation *dp, int flag)
2837 return -ENOMEM; 2837 return -ENOMEM;
2838 } 2838 }
2839 fp->fi_lease = fl; 2839 fp->fi_lease = fl;
2840 fp->fi_deleg_file = fl->fl_file; 2840 fp->fi_deleg_file = get_file(fl->fl_file);
2841 get_file(fp->fi_deleg_file);
2842 atomic_set(&fp->fi_delegees, 1); 2841 atomic_set(&fp->fi_delegees, 1);
2843 list_add(&dp->dl_perfile, &fp->fi_delegations); 2842 list_add(&dp->dl_perfile, &fp->fi_delegations);
2844 return 0; 2843 return 0;
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 6a10812711c..3c991dc84f2 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1382,6 +1382,12 @@ static void nilfs_segbuf_init_once(void *obj)
1382 1382
1383static void nilfs_destroy_cachep(void) 1383static void nilfs_destroy_cachep(void)
1384{ 1384{
1385 /*
1386 * Make sure all delayed rcu free inodes are flushed before we
1387 * destroy cache.
1388 */
1389 rcu_barrier();
1390
1385 if (nilfs_inode_cachep) 1391 if (nilfs_inode_cachep)
1386 kmem_cache_destroy(nilfs_inode_cachep); 1392 kmem_cache_destroy(nilfs_inode_cachep);
1387 if (nilfs_transaction_cachep) 1393 if (nilfs_transaction_cachep)
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index d4380366973..721d692fa8d 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -58,7 +58,9 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
58 return fsnotify_remove_notify_event(group); 58 return fsnotify_remove_notify_event(group);
59} 59}
60 60
61static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event) 61static int create_fd(struct fsnotify_group *group,
62 struct fsnotify_event *event,
63 struct file **file)
62{ 64{
63 int client_fd; 65 int client_fd;
64 struct file *new_file; 66 struct file *new_file;
@@ -98,7 +100,7 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event)
98 put_unused_fd(client_fd); 100 put_unused_fd(client_fd);
99 client_fd = PTR_ERR(new_file); 101 client_fd = PTR_ERR(new_file);
100 } else { 102 } else {
101 fd_install(client_fd, new_file); 103 *file = new_file;
102 } 104 }
103 105
104 return client_fd; 106 return client_fd;
@@ -106,13 +108,15 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event)
106 108
107static int fill_event_metadata(struct fsnotify_group *group, 109static int fill_event_metadata(struct fsnotify_group *group,
108 struct fanotify_event_metadata *metadata, 110 struct fanotify_event_metadata *metadata,
109 struct fsnotify_event *event) 111 struct fsnotify_event *event,
112 struct file **file)
110{ 113{
111 int ret = 0; 114 int ret = 0;
112 115
113 pr_debug("%s: group=%p metadata=%p event=%p\n", __func__, 116 pr_debug("%s: group=%p metadata=%p event=%p\n", __func__,
114 group, metadata, event); 117 group, metadata, event);
115 118
119 *file = NULL;
116 metadata->event_len = FAN_EVENT_METADATA_LEN; 120 metadata->event_len = FAN_EVENT_METADATA_LEN;
117 metadata->metadata_len = FAN_EVENT_METADATA_LEN; 121 metadata->metadata_len = FAN_EVENT_METADATA_LEN;
118 metadata->vers = FANOTIFY_METADATA_VERSION; 122 metadata->vers = FANOTIFY_METADATA_VERSION;
@@ -121,7 +125,7 @@ static int fill_event_metadata(struct fsnotify_group *group,
121 if (unlikely(event->mask & FAN_Q_OVERFLOW)) 125 if (unlikely(event->mask & FAN_Q_OVERFLOW))
122 metadata->fd = FAN_NOFD; 126 metadata->fd = FAN_NOFD;
123 else { 127 else {
124 metadata->fd = create_fd(group, event); 128 metadata->fd = create_fd(group, event, file);
125 if (metadata->fd < 0) 129 if (metadata->fd < 0)
126 ret = metadata->fd; 130 ret = metadata->fd;
127 } 131 }
@@ -220,25 +224,6 @@ static int prepare_for_access_response(struct fsnotify_group *group,
220 return 0; 224 return 0;
221} 225}
222 226
223static void remove_access_response(struct fsnotify_group *group,
224 struct fsnotify_event *event,
225 __s32 fd)
226{
227 struct fanotify_response_event *re;
228
229 if (!(event->mask & FAN_ALL_PERM_EVENTS))
230 return;
231
232 re = dequeue_re(group, fd);
233 if (!re)
234 return;
235
236 BUG_ON(re->event != event);
237
238 kmem_cache_free(fanotify_response_event_cache, re);
239
240 return;
241}
242#else 227#else
243static int prepare_for_access_response(struct fsnotify_group *group, 228static int prepare_for_access_response(struct fsnotify_group *group,
244 struct fsnotify_event *event, 229 struct fsnotify_event *event,
@@ -247,12 +232,6 @@ static int prepare_for_access_response(struct fsnotify_group *group,
247 return 0; 232 return 0;
248} 233}
249 234
250static void remove_access_response(struct fsnotify_group *group,
251 struct fsnotify_event *event,
252 __s32 fd)
253{
254 return;
255}
256#endif 235#endif
257 236
258static ssize_t copy_event_to_user(struct fsnotify_group *group, 237static ssize_t copy_event_to_user(struct fsnotify_group *group,
@@ -260,31 +239,33 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
260 char __user *buf) 239 char __user *buf)
261{ 240{
262 struct fanotify_event_metadata fanotify_event_metadata; 241 struct fanotify_event_metadata fanotify_event_metadata;
242 struct file *f;
263 int fd, ret; 243 int fd, ret;
264 244
265 pr_debug("%s: group=%p event=%p\n", __func__, group, event); 245 pr_debug("%s: group=%p event=%p\n", __func__, group, event);
266 246
267 ret = fill_event_metadata(group, &fanotify_event_metadata, event); 247 ret = fill_event_metadata(group, &fanotify_event_metadata, event, &f);
268 if (ret < 0) 248 if (ret < 0)
269 goto out; 249 goto out;
270 250
271 fd = fanotify_event_metadata.fd; 251 fd = fanotify_event_metadata.fd;
272 ret = prepare_for_access_response(group, event, fd);
273 if (ret)
274 goto out_close_fd;
275
276 ret = -EFAULT; 252 ret = -EFAULT;
277 if (copy_to_user(buf, &fanotify_event_metadata, 253 if (copy_to_user(buf, &fanotify_event_metadata,
278 fanotify_event_metadata.event_len)) 254 fanotify_event_metadata.event_len))
279 goto out_kill_access_response; 255 goto out_close_fd;
280 256
257 ret = prepare_for_access_response(group, event, fd);
258 if (ret)
259 goto out_close_fd;
260
261 fd_install(fd, f);
281 return fanotify_event_metadata.event_len; 262 return fanotify_event_metadata.event_len;
282 263
283out_kill_access_response:
284 remove_access_response(group, event, fd);
285out_close_fd: 264out_close_fd:
286 if (fd != FAN_NOFD) 265 if (fd != FAN_NOFD) {
287 sys_close(fd); 266 put_unused_fd(fd);
267 fput(f);
268 }
288out: 269out:
289#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS 270#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
290 if (event->mask & FAN_ALL_PERM_EVENTS) { 271 if (event->mask & FAN_ALL_PERM_EVENTS) {
@@ -470,24 +451,22 @@ static int fanotify_find_path(int dfd, const char __user *filename,
470 dfd, filename, flags); 451 dfd, filename, flags);
471 452
472 if (filename == NULL) { 453 if (filename == NULL) {
473 struct file *file; 454 struct fd f = fdget(dfd);
474 int fput_needed;
475 455
476 ret = -EBADF; 456 ret = -EBADF;
477 file = fget_light(dfd, &fput_needed); 457 if (!f.file)
478 if (!file)
479 goto out; 458 goto out;
480 459
481 ret = -ENOTDIR; 460 ret = -ENOTDIR;
482 if ((flags & FAN_MARK_ONLYDIR) && 461 if ((flags & FAN_MARK_ONLYDIR) &&
483 !(S_ISDIR(file->f_path.dentry->d_inode->i_mode))) { 462 !(S_ISDIR(f.file->f_path.dentry->d_inode->i_mode))) {
484 fput_light(file, fput_needed); 463 fdput(f);
485 goto out; 464 goto out;
486 } 465 }
487 466
488 *path = file->f_path; 467 *path = f.file->f_path;
489 path_get(path); 468 path_get(path);
490 fput_light(file, fput_needed); 469 fdput(f);
491 } else { 470 } else {
492 unsigned int lookup_flags = 0; 471 unsigned int lookup_flags = 0;
493 472
@@ -767,9 +746,9 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
767 struct inode *inode = NULL; 746 struct inode *inode = NULL;
768 struct vfsmount *mnt = NULL; 747 struct vfsmount *mnt = NULL;
769 struct fsnotify_group *group; 748 struct fsnotify_group *group;
770 struct file *filp; 749 struct fd f;
771 struct path path; 750 struct path path;
772 int ret, fput_needed; 751 int ret;
773 752
774 pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n", 753 pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n",
775 __func__, fanotify_fd, flags, dfd, pathname, mask); 754 __func__, fanotify_fd, flags, dfd, pathname, mask);
@@ -803,15 +782,15 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
803#endif 782#endif
804 return -EINVAL; 783 return -EINVAL;
805 784
806 filp = fget_light(fanotify_fd, &fput_needed); 785 f = fdget(fanotify_fd);
807 if (unlikely(!filp)) 786 if (unlikely(!f.file))
808 return -EBADF; 787 return -EBADF;
809 788
810 /* verify that this is indeed an fanotify instance */ 789 /* verify that this is indeed an fanotify instance */
811 ret = -EINVAL; 790 ret = -EINVAL;
812 if (unlikely(filp->f_op != &fanotify_fops)) 791 if (unlikely(f.file->f_op != &fanotify_fops))
813 goto fput_and_out; 792 goto fput_and_out;
814 group = filp->private_data; 793 group = f.file->private_data;
815 794
816 /* 795 /*
817 * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF. These are not 796 * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF. These are not
@@ -858,7 +837,7 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
858 837
859 path_put(&path); 838 path_put(&path);
860fput_and_out: 839fput_and_out:
861 fput_light(filp, fput_needed); 840 fdput(f);
862 return ret; 841 return ret;
863} 842}
864 843
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 8445fbc8985..c311dda054a 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -757,16 +757,16 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
757 struct fsnotify_group *group; 757 struct fsnotify_group *group;
758 struct inode *inode; 758 struct inode *inode;
759 struct path path; 759 struct path path;
760 struct file *filp; 760 struct fd f;
761 int ret, fput_needed; 761 int ret;
762 unsigned flags = 0; 762 unsigned flags = 0;
763 763
764 filp = fget_light(fd, &fput_needed); 764 f = fdget(fd);
765 if (unlikely(!filp)) 765 if (unlikely(!f.file))
766 return -EBADF; 766 return -EBADF;
767 767
768 /* verify that this is indeed an inotify instance */ 768 /* verify that this is indeed an inotify instance */
769 if (unlikely(filp->f_op != &inotify_fops)) { 769 if (unlikely(f.file->f_op != &inotify_fops)) {
770 ret = -EINVAL; 770 ret = -EINVAL;
771 goto fput_and_out; 771 goto fput_and_out;
772 } 772 }
@@ -782,13 +782,13 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
782 782
783 /* inode held in place by reference to path; group by fget on fd */ 783 /* inode held in place by reference to path; group by fget on fd */
784 inode = path.dentry->d_inode; 784 inode = path.dentry->d_inode;
785 group = filp->private_data; 785 group = f.file->private_data;
786 786
787 /* create/update an inode mark */ 787 /* create/update an inode mark */
788 ret = inotify_update_watch(group, inode, mask); 788 ret = inotify_update_watch(group, inode, mask);
789 path_put(&path); 789 path_put(&path);
790fput_and_out: 790fput_and_out:
791 fput_light(filp, fput_needed); 791 fdput(f);
792 return ret; 792 return ret;
793} 793}
794 794
@@ -796,19 +796,19 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
796{ 796{
797 struct fsnotify_group *group; 797 struct fsnotify_group *group;
798 struct inotify_inode_mark *i_mark; 798 struct inotify_inode_mark *i_mark;
799 struct file *filp; 799 struct fd f;
800 int ret = 0, fput_needed; 800 int ret = 0;
801 801
802 filp = fget_light(fd, &fput_needed); 802 f = fdget(fd);
803 if (unlikely(!filp)) 803 if (unlikely(!f.file))
804 return -EBADF; 804 return -EBADF;
805 805
806 /* verify that this is indeed an inotify instance */ 806 /* verify that this is indeed an inotify instance */
807 ret = -EINVAL; 807 ret = -EINVAL;
808 if (unlikely(filp->f_op != &inotify_fops)) 808 if (unlikely(f.file->f_op != &inotify_fops))
809 goto out; 809 goto out;
810 810
811 group = filp->private_data; 811 group = f.file->private_data;
812 812
813 ret = -EINVAL; 813 ret = -EINVAL;
814 i_mark = inotify_idr_find(group, wd); 814 i_mark = inotify_idr_find(group, wd);
@@ -823,7 +823,7 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
823 fsnotify_put_mark(&i_mark->fsn_mark); 823 fsnotify_put_mark(&i_mark->fsn_mark);
824 824
825out: 825out:
826 fput_light(filp, fput_needed); 826 fdput(f);
827 return ret; 827 return ret;
828} 828}
829 829
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index da01c165067..4a8289f8b16 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -3193,6 +3193,12 @@ static void __exit exit_ntfs_fs(void)
3193 ntfs_debug("Unregistering NTFS driver."); 3193 ntfs_debug("Unregistering NTFS driver.");
3194 3194
3195 unregister_filesystem(&ntfs_fs_type); 3195 unregister_filesystem(&ntfs_fs_type);
3196
3197 /*
3198 * Make sure all delayed rcu free inodes are flushed before we
3199 * destroy cache.
3200 */
3201 rcu_barrier();
3196 kmem_cache_destroy(ntfs_big_inode_cache); 3202 kmem_cache_destroy(ntfs_big_inode_cache);
3197 kmem_cache_destroy(ntfs_inode_cache); 3203 kmem_cache_destroy(ntfs_inode_cache);
3198 kmem_cache_destroy(ntfs_name_cache); 3204 kmem_cache_destroy(ntfs_name_cache);
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index a4e855e3690..f7c648d7d6b 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1746,8 +1746,8 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1746 long fd; 1746 long fd;
1747 int sectsize; 1747 int sectsize;
1748 char *p = (char *)page; 1748 char *p = (char *)page;
1749 struct file *filp = NULL; 1749 struct fd f;
1750 struct inode *inode = NULL; 1750 struct inode *inode;
1751 ssize_t ret = -EINVAL; 1751 ssize_t ret = -EINVAL;
1752 int live_threshold; 1752 int live_threshold;
1753 1753
@@ -1766,26 +1766,26 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1766 if (fd < 0 || fd >= INT_MAX) 1766 if (fd < 0 || fd >= INT_MAX)
1767 goto out; 1767 goto out;
1768 1768
1769 filp = fget(fd); 1769 f = fdget(fd);
1770 if (filp == NULL) 1770 if (f.file == NULL)
1771 goto out; 1771 goto out;
1772 1772
1773 if (reg->hr_blocks == 0 || reg->hr_start_block == 0 || 1773 if (reg->hr_blocks == 0 || reg->hr_start_block == 0 ||
1774 reg->hr_block_bytes == 0) 1774 reg->hr_block_bytes == 0)
1775 goto out; 1775 goto out2;
1776 1776
1777 inode = igrab(filp->f_mapping->host); 1777 inode = igrab(f.file->f_mapping->host);
1778 if (inode == NULL) 1778 if (inode == NULL)
1779 goto out; 1779 goto out2;
1780 1780
1781 if (!S_ISBLK(inode->i_mode)) 1781 if (!S_ISBLK(inode->i_mode))
1782 goto out; 1782 goto out3;
1783 1783
1784 reg->hr_bdev = I_BDEV(filp->f_mapping->host); 1784 reg->hr_bdev = I_BDEV(f.file->f_mapping->host);
1785 ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ, NULL); 1785 ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ, NULL);
1786 if (ret) { 1786 if (ret) {
1787 reg->hr_bdev = NULL; 1787 reg->hr_bdev = NULL;
1788 goto out; 1788 goto out3;
1789 } 1789 }
1790 inode = NULL; 1790 inode = NULL;
1791 1791
@@ -1797,7 +1797,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1797 "blocksize %u incorrect for device, expected %d", 1797 "blocksize %u incorrect for device, expected %d",
1798 reg->hr_block_bytes, sectsize); 1798 reg->hr_block_bytes, sectsize);
1799 ret = -EINVAL; 1799 ret = -EINVAL;
1800 goto out; 1800 goto out3;
1801 } 1801 }
1802 1802
1803 o2hb_init_region_params(reg); 1803 o2hb_init_region_params(reg);
@@ -1811,13 +1811,13 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1811 ret = o2hb_map_slot_data(reg); 1811 ret = o2hb_map_slot_data(reg);
1812 if (ret) { 1812 if (ret) {
1813 mlog_errno(ret); 1813 mlog_errno(ret);
1814 goto out; 1814 goto out3;
1815 } 1815 }
1816 1816
1817 ret = o2hb_populate_slot_data(reg); 1817 ret = o2hb_populate_slot_data(reg);
1818 if (ret) { 1818 if (ret) {
1819 mlog_errno(ret); 1819 mlog_errno(ret);
1820 goto out; 1820 goto out3;
1821 } 1821 }
1822 1822
1823 INIT_DELAYED_WORK(&reg->hr_write_timeout_work, o2hb_write_timeout); 1823 INIT_DELAYED_WORK(&reg->hr_write_timeout_work, o2hb_write_timeout);
@@ -1847,7 +1847,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1847 if (IS_ERR(hb_task)) { 1847 if (IS_ERR(hb_task)) {
1848 ret = PTR_ERR(hb_task); 1848 ret = PTR_ERR(hb_task);
1849 mlog_errno(ret); 1849 mlog_errno(ret);
1850 goto out; 1850 goto out3;
1851 } 1851 }
1852 1852
1853 spin_lock(&o2hb_live_lock); 1853 spin_lock(&o2hb_live_lock);
@@ -1863,7 +1863,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1863 1863
1864 if (reg->hr_aborted_start) { 1864 if (reg->hr_aborted_start) {
1865 ret = -EIO; 1865 ret = -EIO;
1866 goto out; 1866 goto out3;
1867 } 1867 }
1868 1868
1869 /* Ok, we were woken. Make sure it wasn't by drop_item() */ 1869 /* Ok, we were woken. Make sure it wasn't by drop_item() */
@@ -1882,11 +1882,11 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1882 printk(KERN_NOTICE "o2hb: Heartbeat started on region %s (%s)\n", 1882 printk(KERN_NOTICE "o2hb: Heartbeat started on region %s (%s)\n",
1883 config_item_name(&reg->hr_item), reg->hr_dev_name); 1883 config_item_name(&reg->hr_item), reg->hr_dev_name);
1884 1884
1885out3:
1886 iput(inode);
1887out2:
1888 fdput(f);
1885out: 1889out:
1886 if (filp)
1887 fput(filp);
1888 if (inode)
1889 iput(inode);
1890 if (ret < 0) { 1890 if (ret < 0) {
1891 if (reg->hr_bdev) { 1891 if (reg->hr_bdev) {
1892 blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE); 1892 blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE);
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 83b6f98e066..16b712d260d 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -691,6 +691,11 @@ static void __exit exit_dlmfs_fs(void)
691 flush_workqueue(user_dlm_worker); 691 flush_workqueue(user_dlm_worker);
692 destroy_workqueue(user_dlm_worker); 692 destroy_workqueue(user_dlm_worker);
693 693
694 /*
695 * Make sure all delayed rcu free inodes are flushed before we
696 * destroy cache.
697 */
698 rcu_barrier();
694 kmem_cache_destroy(dlmfs_inode_cache); 699 kmem_cache_destroy(dlmfs_inode_cache);
695 700
696 bdi_destroy(&dlmfs_backing_dev_info); 701 bdi_destroy(&dlmfs_backing_dev_info);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 68f4541c2db..0e91ec22a94 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1818,6 +1818,11 @@ static int ocfs2_initialize_mem_caches(void)
1818 1818
1819static void ocfs2_free_mem_caches(void) 1819static void ocfs2_free_mem_caches(void)
1820{ 1820{
1821 /*
1822 * Make sure all delayed rcu free inodes are flushed before we
1823 * destroy cache.
1824 */
1825 rcu_barrier();
1821 if (ocfs2_inode_cachep) 1826 if (ocfs2_inode_cachep)
1822 kmem_cache_destroy(ocfs2_inode_cachep); 1827 kmem_cache_destroy(ocfs2_inode_cachep);
1823 ocfs2_inode_cachep = NULL; 1828 ocfs2_inode_cachep = NULL;
diff --git a/fs/open.c b/fs/open.c
index b0bae3a4182..44da0feeca2 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -132,27 +132,27 @@ SYSCALL_DEFINE2(truncate, const char __user *, path, long, length)
132 132
133static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) 133static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
134{ 134{
135 struct inode * inode; 135 struct inode *inode;
136 struct dentry *dentry; 136 struct dentry *dentry;
137 struct file * file; 137 struct fd f;
138 int error; 138 int error;
139 139
140 error = -EINVAL; 140 error = -EINVAL;
141 if (length < 0) 141 if (length < 0)
142 goto out; 142 goto out;
143 error = -EBADF; 143 error = -EBADF;
144 file = fget(fd); 144 f = fdget(fd);
145 if (!file) 145 if (!f.file)
146 goto out; 146 goto out;
147 147
148 /* explicitly opened as large or we are on 64-bit box */ 148 /* explicitly opened as large or we are on 64-bit box */
149 if (file->f_flags & O_LARGEFILE) 149 if (f.file->f_flags & O_LARGEFILE)
150 small = 0; 150 small = 0;
151 151
152 dentry = file->f_path.dentry; 152 dentry = f.file->f_path.dentry;
153 inode = dentry->d_inode; 153 inode = dentry->d_inode;
154 error = -EINVAL; 154 error = -EINVAL;
155 if (!S_ISREG(inode->i_mode) || !(file->f_mode & FMODE_WRITE)) 155 if (!S_ISREG(inode->i_mode) || !(f.file->f_mode & FMODE_WRITE))
156 goto out_putf; 156 goto out_putf;
157 157
158 error = -EINVAL; 158 error = -EINVAL;
@@ -165,14 +165,14 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
165 goto out_putf; 165 goto out_putf;
166 166
167 sb_start_write(inode->i_sb); 167 sb_start_write(inode->i_sb);
168 error = locks_verify_truncate(inode, file, length); 168 error = locks_verify_truncate(inode, f.file, length);
169 if (!error) 169 if (!error)
170 error = security_path_truncate(&file->f_path); 170 error = security_path_truncate(&f.file->f_path);
171 if (!error) 171 if (!error)
172 error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file); 172 error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, f.file);
173 sb_end_write(inode->i_sb); 173 sb_end_write(inode->i_sb);
174out_putf: 174out_putf:
175 fput(file); 175 fdput(f);
176out: 176out:
177 return error; 177 return error;
178} 178}
@@ -276,15 +276,13 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
276 276
277SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len) 277SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len)
278{ 278{
279 struct file *file; 279 struct fd f = fdget(fd);
280 int error = -EBADF; 280 int error = -EBADF;
281 281
282 file = fget(fd); 282 if (f.file) {
283 if (file) { 283 error = do_fallocate(f.file, mode, offset, len);
284 error = do_fallocate(file, mode, offset, len); 284 fdput(f);
285 fput(file);
286 } 285 }
287
288 return error; 286 return error;
289} 287}
290 288
@@ -400,16 +398,15 @@ out:
400 398
401SYSCALL_DEFINE1(fchdir, unsigned int, fd) 399SYSCALL_DEFINE1(fchdir, unsigned int, fd)
402{ 400{
403 struct file *file; 401 struct fd f = fdget_raw(fd);
404 struct inode *inode; 402 struct inode *inode;
405 int error, fput_needed; 403 int error = -EBADF;
406 404
407 error = -EBADF; 405 error = -EBADF;
408 file = fget_raw_light(fd, &fput_needed); 406 if (!f.file)
409 if (!file)
410 goto out; 407 goto out;
411 408
412 inode = file->f_path.dentry->d_inode; 409 inode = f.file->f_path.dentry->d_inode;
413 410
414 error = -ENOTDIR; 411 error = -ENOTDIR;
415 if (!S_ISDIR(inode->i_mode)) 412 if (!S_ISDIR(inode->i_mode))
@@ -417,9 +414,9 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd)
417 414
418 error = inode_permission(inode, MAY_EXEC | MAY_CHDIR); 415 error = inode_permission(inode, MAY_EXEC | MAY_CHDIR);
419 if (!error) 416 if (!error)
420 set_fs_pwd(current->fs, &file->f_path); 417 set_fs_pwd(current->fs, &f.file->f_path);
421out_putf: 418out_putf:
422 fput_light(file, fput_needed); 419 fdput(f);
423out: 420out:
424 return error; 421 return error;
425} 422}
@@ -582,23 +579,20 @@ SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group
582 579
583SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) 580SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
584{ 581{
585 struct file * file; 582 struct fd f = fdget(fd);
586 int error = -EBADF; 583 int error = -EBADF;
587 struct dentry * dentry;
588 584
589 file = fget(fd); 585 if (!f.file)
590 if (!file)
591 goto out; 586 goto out;
592 587
593 error = mnt_want_write_file(file); 588 error = mnt_want_write_file(f.file);
594 if (error) 589 if (error)
595 goto out_fput; 590 goto out_fput;
596 dentry = file->f_path.dentry; 591 audit_inode(NULL, f.file->f_path.dentry);
597 audit_inode(NULL, dentry); 592 error = chown_common(&f.file->f_path, user, group);
598 error = chown_common(&file->f_path, user, group); 593 mnt_drop_write_file(f.file);
599 mnt_drop_write_file(file);
600out_fput: 594out_fput:
601 fput(file); 595 fdput(f);
602out: 596out:
603 return error; 597 return error;
604} 598}
@@ -803,50 +797,6 @@ struct file *dentry_open(const struct path *path, int flags,
803} 797}
804EXPORT_SYMBOL(dentry_open); 798EXPORT_SYMBOL(dentry_open);
805 799
806static void __put_unused_fd(struct files_struct *files, unsigned int fd)
807{
808 struct fdtable *fdt = files_fdtable(files);
809 __clear_open_fd(fd, fdt);
810 if (fd < files->next_fd)
811 files->next_fd = fd;
812}
813
814void put_unused_fd(unsigned int fd)
815{
816 struct files_struct *files = current->files;
817 spin_lock(&files->file_lock);
818 __put_unused_fd(files, fd);
819 spin_unlock(&files->file_lock);
820}
821
822EXPORT_SYMBOL(put_unused_fd);
823
824/*
825 * Install a file pointer in the fd array.
826 *
827 * The VFS is full of places where we drop the files lock between
828 * setting the open_fds bitmap and installing the file in the file
829 * array. At any such point, we are vulnerable to a dup2() race
830 * installing a file in the array before us. We need to detect this and
831 * fput() the struct file we are about to overwrite in this case.
832 *
833 * It should never happen - if we allow dup2() do it, _really_ bad things
834 * will follow.
835 */
836
837void fd_install(unsigned int fd, struct file *file)
838{
839 struct files_struct *files = current->files;
840 struct fdtable *fdt;
841 spin_lock(&files->file_lock);
842 fdt = files_fdtable(files);
843 BUG_ON(fdt->fd[fd] != NULL);
844 rcu_assign_pointer(fdt->fd[fd], file);
845 spin_unlock(&files->file_lock);
846}
847
848EXPORT_SYMBOL(fd_install);
849
850static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op) 800static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op)
851{ 801{
852 int lookup_flags = 0; 802 int lookup_flags = 0;
@@ -858,7 +808,7 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o
858 op->mode = 0; 808 op->mode = 0;
859 809
860 /* Must never be set by userspace */ 810 /* Must never be set by userspace */
861 flags &= ~FMODE_NONOTIFY; 811 flags &= ~FMODE_NONOTIFY & ~O_CLOEXEC;
862 812
863 /* 813 /*
864 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only 814 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
@@ -1038,23 +988,7 @@ EXPORT_SYMBOL(filp_close);
1038 */ 988 */
1039SYSCALL_DEFINE1(close, unsigned int, fd) 989SYSCALL_DEFINE1(close, unsigned int, fd)
1040{ 990{
1041 struct file * filp; 991 int retval = __close_fd(current->files, fd);
1042 struct files_struct *files = current->files;
1043 struct fdtable *fdt;
1044 int retval;
1045
1046 spin_lock(&files->file_lock);
1047 fdt = files_fdtable(files);
1048 if (fd >= fdt->max_fds)
1049 goto out_unlock;
1050 filp = fdt->fd[fd];
1051 if (!filp)
1052 goto out_unlock;
1053 rcu_assign_pointer(fdt->fd[fd], NULL);
1054 __clear_close_on_exec(fd, fdt);
1055 __put_unused_fd(files, fd);
1056 spin_unlock(&files->file_lock);
1057 retval = filp_close(filp, files);
1058 992
1059 /* can't restart close syscall because file table entry was cleared */ 993 /* can't restart close syscall because file table entry was cleared */
1060 if (unlikely(retval == -ERESTARTSYS || 994 if (unlikely(retval == -ERESTARTSYS ||
@@ -1064,10 +998,6 @@ SYSCALL_DEFINE1(close, unsigned int, fd)
1064 retval = -EINTR; 998 retval = -EINTR;
1065 999
1066 return retval; 1000 return retval;
1067
1068out_unlock:
1069 spin_unlock(&files->file_lock);
1070 return -EBADF;
1071} 1001}
1072EXPORT_SYMBOL(sys_close); 1002EXPORT_SYMBOL(sys_close);
1073 1003
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 4a3477949bc..2ad080faca3 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -463,6 +463,11 @@ static int __init init_openprom_fs(void)
463static void __exit exit_openprom_fs(void) 463static void __exit exit_openprom_fs(void)
464{ 464{
465 unregister_filesystem(&openprom_fs_type); 465 unregister_filesystem(&openprom_fs_type);
466 /*
467 * Make sure all delayed rcu free inodes are flushed before we
468 * destroy cache.
469 */
470 rcu_barrier();
466 kmem_cache_destroy(op_inode_cachep); 471 kmem_cache_destroy(op_inode_cachep);
467} 472}
468 473
diff --git a/fs/pipe.c b/fs/pipe.c
index 8d85d7068c1..bd3479db4b6 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1064,9 +1064,8 @@ err_inode:
1064 return err; 1064 return err;
1065} 1065}
1066 1066
1067int do_pipe_flags(int *fd, int flags) 1067static int __do_pipe_flags(int *fd, struct file **files, int flags)
1068{ 1068{
1069 struct file *files[2];
1070 int error; 1069 int error;
1071 int fdw, fdr; 1070 int fdw, fdr;
1072 1071
@@ -1088,11 +1087,8 @@ int do_pipe_flags(int *fd, int flags)
1088 fdw = error; 1087 fdw = error;
1089 1088
1090 audit_fd_pair(fdr, fdw); 1089 audit_fd_pair(fdr, fdw);
1091 fd_install(fdr, files[0]);
1092 fd_install(fdw, files[1]);
1093 fd[0] = fdr; 1090 fd[0] = fdr;
1094 fd[1] = fdw; 1091 fd[1] = fdw;
1095
1096 return 0; 1092 return 0;
1097 1093
1098 err_fdr: 1094 err_fdr:
@@ -1103,21 +1099,38 @@ int do_pipe_flags(int *fd, int flags)
1103 return error; 1099 return error;
1104} 1100}
1105 1101
1102int do_pipe_flags(int *fd, int flags)
1103{
1104 struct file *files[2];
1105 int error = __do_pipe_flags(fd, files, flags);
1106 if (!error) {
1107 fd_install(fd[0], files[0]);
1108 fd_install(fd[1], files[1]);
1109 }
1110 return error;
1111}
1112
1106/* 1113/*
1107 * sys_pipe() is the normal C calling standard for creating 1114 * sys_pipe() is the normal C calling standard for creating
1108 * a pipe. It's not the way Unix traditionally does this, though. 1115 * a pipe. It's not the way Unix traditionally does this, though.
1109 */ 1116 */
1110SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags) 1117SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)
1111{ 1118{
1119 struct file *files[2];
1112 int fd[2]; 1120 int fd[2];
1113 int error; 1121 int error;
1114 1122
1115 error = do_pipe_flags(fd, flags); 1123 error = __do_pipe_flags(fd, files, flags);
1116 if (!error) { 1124 if (!error) {
1117 if (copy_to_user(fildes, fd, sizeof(fd))) { 1125 if (unlikely(copy_to_user(fildes, fd, sizeof(fd)))) {
1118 sys_close(fd[0]); 1126 fput(files[0]);
1119 sys_close(fd[1]); 1127 fput(files[1]);
1128 put_unused_fd(fd[0]);
1129 put_unused_fd(fd[1]);
1120 error = -EFAULT; 1130 error = -EFAULT;
1131 } else {
1132 fd_install(fd[0], files[0]);
1133 fd_install(fd[1], files[1]);
1121 } 1134 }
1122 } 1135 }
1123 return error; 1136 return error;
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index c1c72933592..99349efbbc2 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -8,7 +8,7 @@ proc-y := nommu.o task_nommu.o
8proc-$(CONFIG_MMU) := mmu.o task_mmu.o 8proc-$(CONFIG_MMU) := mmu.o task_mmu.o
9 9
10proc-y += inode.o root.o base.o generic.o array.o \ 10proc-y += inode.o root.o base.o generic.o array.o \
11 proc_tty.o 11 proc_tty.o fd.o
12proc-y += cmdline.o 12proc-y += cmdline.o
13proc-y += consoles.o 13proc-y += consoles.o
14proc-y += cpuinfo.o 14proc-y += cpuinfo.o
diff --git a/fs/proc/base.c b/fs/proc/base.c
index acd1960c28a..d295af99367 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -90,6 +90,7 @@
90#endif 90#endif
91#include <trace/events/oom.h> 91#include <trace/events/oom.h>
92#include "internal.h" 92#include "internal.h"
93#include "fd.h"
93 94
94/* NOTE: 95/* NOTE:
95 * Implementing inode permission operations in /proc is almost 96 * Implementing inode permission operations in /proc is almost
@@ -136,8 +137,6 @@ struct pid_entry {
136 NULL, &proc_single_file_operations, \ 137 NULL, &proc_single_file_operations, \
137 { .proc_show = show } ) 138 { .proc_show = show } )
138 139
139static int proc_fd_permission(struct inode *inode, int mask);
140
141/* 140/*
142 * Count the number of hardlinks for the pid_entry table, excluding the . 141 * Count the number of hardlinks for the pid_entry table, excluding the .
143 * and .. links. 142 * and .. links.
@@ -1500,7 +1499,7 @@ out:
1500 return error; 1499 return error;
1501} 1500}
1502 1501
1503static const struct inode_operations proc_pid_link_inode_operations = { 1502const struct inode_operations proc_pid_link_inode_operations = {
1504 .readlink = proc_pid_readlink, 1503 .readlink = proc_pid_readlink,
1505 .follow_link = proc_pid_follow_link, 1504 .follow_link = proc_pid_follow_link,
1506 .setattr = proc_setattr, 1505 .setattr = proc_setattr,
@@ -1509,21 +1508,6 @@ static const struct inode_operations proc_pid_link_inode_operations = {
1509 1508
1510/* building an inode */ 1509/* building an inode */
1511 1510
1512static int task_dumpable(struct task_struct *task)
1513{
1514 int dumpable = 0;
1515 struct mm_struct *mm;
1516
1517 task_lock(task);
1518 mm = task->mm;
1519 if (mm)
1520 dumpable = get_dumpable(mm);
1521 task_unlock(task);
1522 if(dumpable == 1)
1523 return 1;
1524 return 0;
1525}
1526
1527struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task) 1511struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
1528{ 1512{
1529 struct inode * inode; 1513 struct inode * inode;
@@ -1649,15 +1633,6 @@ int pid_revalidate(struct dentry *dentry, unsigned int flags)
1649 return 0; 1633 return 0;
1650} 1634}
1651 1635
1652static int pid_delete_dentry(const struct dentry * dentry)
1653{
1654 /* Is the task we represent dead?
1655 * If so, then don't put the dentry on the lru list,
1656 * kill it immediately.
1657 */
1658 return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
1659}
1660
1661const struct dentry_operations pid_dentry_operations = 1636const struct dentry_operations pid_dentry_operations =
1662{ 1637{
1663 .d_revalidate = pid_revalidate, 1638 .d_revalidate = pid_revalidate,
@@ -1720,289 +1695,6 @@ end_instantiate:
1720 return filldir(dirent, name, len, filp->f_pos, ino, type); 1695 return filldir(dirent, name, len, filp->f_pos, ino, type);
1721} 1696}
1722 1697
1723static unsigned name_to_int(struct dentry *dentry)
1724{
1725 const char *name = dentry->d_name.name;
1726 int len = dentry->d_name.len;
1727 unsigned n = 0;
1728
1729 if (len > 1 && *name == '0')
1730 goto out;
1731 while (len-- > 0) {
1732 unsigned c = *name++ - '0';
1733 if (c > 9)
1734 goto out;
1735 if (n >= (~0U-9)/10)
1736 goto out;
1737 n *= 10;
1738 n += c;
1739 }
1740 return n;
1741out:
1742 return ~0U;
1743}
1744
1745#define PROC_FDINFO_MAX 64
1746
1747static int proc_fd_info(struct inode *inode, struct path *path, char *info)
1748{
1749 struct task_struct *task = get_proc_task(inode);
1750 struct files_struct *files = NULL;
1751 struct file *file;
1752 int fd = proc_fd(inode);
1753
1754 if (task) {
1755 files = get_files_struct(task);
1756 put_task_struct(task);
1757 }
1758 if (files) {
1759 /*
1760 * We are not taking a ref to the file structure, so we must
1761 * hold ->file_lock.
1762 */
1763 spin_lock(&files->file_lock);
1764 file = fcheck_files(files, fd);
1765 if (file) {
1766 unsigned int f_flags;
1767 struct fdtable *fdt;
1768
1769 fdt = files_fdtable(files);
1770 f_flags = file->f_flags & ~O_CLOEXEC;
1771 if (close_on_exec(fd, fdt))
1772 f_flags |= O_CLOEXEC;
1773
1774 if (path) {
1775 *path = file->f_path;
1776 path_get(&file->f_path);
1777 }
1778 if (info)
1779 snprintf(info, PROC_FDINFO_MAX,
1780 "pos:\t%lli\n"
1781 "flags:\t0%o\n",
1782 (long long) file->f_pos,
1783 f_flags);
1784 spin_unlock(&files->file_lock);
1785 put_files_struct(files);
1786 return 0;
1787 }
1788 spin_unlock(&files->file_lock);
1789 put_files_struct(files);
1790 }
1791 return -ENOENT;
1792}
1793
1794static int proc_fd_link(struct dentry *dentry, struct path *path)
1795{
1796 return proc_fd_info(dentry->d_inode, path, NULL);
1797}
1798
1799static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags)
1800{
1801 struct inode *inode;
1802 struct task_struct *task;
1803 int fd;
1804 struct files_struct *files;
1805 const struct cred *cred;
1806
1807 if (flags & LOOKUP_RCU)
1808 return -ECHILD;
1809
1810 inode = dentry->d_inode;
1811 task = get_proc_task(inode);
1812 fd = proc_fd(inode);
1813
1814 if (task) {
1815 files = get_files_struct(task);
1816 if (files) {
1817 struct file *file;
1818 rcu_read_lock();
1819 file = fcheck_files(files, fd);
1820 if (file) {
1821 unsigned f_mode = file->f_mode;
1822
1823 rcu_read_unlock();
1824 put_files_struct(files);
1825
1826 if (task_dumpable(task)) {
1827 rcu_read_lock();
1828 cred = __task_cred(task);
1829 inode->i_uid = cred->euid;
1830 inode->i_gid = cred->egid;
1831 rcu_read_unlock();
1832 } else {
1833 inode->i_uid = GLOBAL_ROOT_UID;
1834 inode->i_gid = GLOBAL_ROOT_GID;
1835 }
1836
1837 if (S_ISLNK(inode->i_mode)) {
1838 unsigned i_mode = S_IFLNK;
1839 if (f_mode & FMODE_READ)
1840 i_mode |= S_IRUSR | S_IXUSR;
1841 if (f_mode & FMODE_WRITE)
1842 i_mode |= S_IWUSR | S_IXUSR;
1843 inode->i_mode = i_mode;
1844 }
1845
1846 security_task_to_inode(task, inode);
1847 put_task_struct(task);
1848 return 1;
1849 }
1850 rcu_read_unlock();
1851 put_files_struct(files);
1852 }
1853 put_task_struct(task);
1854 }
1855 d_drop(dentry);
1856 return 0;
1857}
1858
1859static const struct dentry_operations tid_fd_dentry_operations =
1860{
1861 .d_revalidate = tid_fd_revalidate,
1862 .d_delete = pid_delete_dentry,
1863};
1864
1865static struct dentry *proc_fd_instantiate(struct inode *dir,
1866 struct dentry *dentry, struct task_struct *task, const void *ptr)
1867{
1868 unsigned fd = (unsigned long)ptr;
1869 struct inode *inode;
1870 struct proc_inode *ei;
1871 struct dentry *error = ERR_PTR(-ENOENT);
1872
1873 inode = proc_pid_make_inode(dir->i_sb, task);
1874 if (!inode)
1875 goto out;
1876 ei = PROC_I(inode);
1877 ei->fd = fd;
1878
1879 inode->i_mode = S_IFLNK;
1880 inode->i_op = &proc_pid_link_inode_operations;
1881 inode->i_size = 64;
1882 ei->op.proc_get_link = proc_fd_link;
1883 d_set_d_op(dentry, &tid_fd_dentry_operations);
1884 d_add(dentry, inode);
1885 /* Close the race of the process dying before we return the dentry */
1886 if (tid_fd_revalidate(dentry, 0))
1887 error = NULL;
1888
1889 out:
1890 return error;
1891}
1892
1893static struct dentry *proc_lookupfd_common(struct inode *dir,
1894 struct dentry *dentry,
1895 instantiate_t instantiate)
1896{
1897 struct task_struct *task = get_proc_task(dir);
1898 unsigned fd = name_to_int(dentry);
1899 struct dentry *result = ERR_PTR(-ENOENT);
1900
1901 if (!task)
1902 goto out_no_task;
1903 if (fd == ~0U)
1904 goto out;
1905
1906 result = instantiate(dir, dentry, task, (void *)(unsigned long)fd);
1907out:
1908 put_task_struct(task);
1909out_no_task:
1910 return result;
1911}
1912
1913static int proc_readfd_common(struct file * filp, void * dirent,
1914 filldir_t filldir, instantiate_t instantiate)
1915{
1916 struct dentry *dentry = filp->f_path.dentry;
1917 struct inode *inode = dentry->d_inode;
1918 struct task_struct *p = get_proc_task(inode);
1919 unsigned int fd, ino;
1920 int retval;
1921 struct files_struct * files;
1922
1923 retval = -ENOENT;
1924 if (!p)
1925 goto out_no_task;
1926 retval = 0;
1927
1928 fd = filp->f_pos;
1929 switch (fd) {
1930 case 0:
1931 if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
1932 goto out;
1933 filp->f_pos++;
1934 case 1:
1935 ino = parent_ino(dentry);
1936 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
1937 goto out;
1938 filp->f_pos++;
1939 default:
1940 files = get_files_struct(p);
1941 if (!files)
1942 goto out;
1943 rcu_read_lock();
1944 for (fd = filp->f_pos-2;
1945 fd < files_fdtable(files)->max_fds;
1946 fd++, filp->f_pos++) {
1947 char name[PROC_NUMBUF];
1948 int len;
1949 int rv;
1950
1951 if (!fcheck_files(files, fd))
1952 continue;
1953 rcu_read_unlock();
1954
1955 len = snprintf(name, sizeof(name), "%d", fd);
1956 rv = proc_fill_cache(filp, dirent, filldir,
1957 name, len, instantiate, p,
1958 (void *)(unsigned long)fd);
1959 if (rv < 0)
1960 goto out_fd_loop;
1961 rcu_read_lock();
1962 }
1963 rcu_read_unlock();
1964out_fd_loop:
1965 put_files_struct(files);
1966 }
1967out:
1968 put_task_struct(p);
1969out_no_task:
1970 return retval;
1971}
1972
1973static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry,
1974 unsigned int flags)
1975{
1976 return proc_lookupfd_common(dir, dentry, proc_fd_instantiate);
1977}
1978
1979static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir)
1980{
1981 return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate);
1982}
1983
1984static ssize_t proc_fdinfo_read(struct file *file, char __user *buf,
1985 size_t len, loff_t *ppos)
1986{
1987 char tmp[PROC_FDINFO_MAX];
1988 int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp);
1989 if (!err)
1990 err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp));
1991 return err;
1992}
1993
1994static const struct file_operations proc_fdinfo_file_operations = {
1995 .open = nonseekable_open,
1996 .read = proc_fdinfo_read,
1997 .llseek = no_llseek,
1998};
1999
2000static const struct file_operations proc_fd_operations = {
2001 .read = generic_read_dir,
2002 .readdir = proc_readfd,
2003 .llseek = default_llseek,
2004};
2005
2006#ifdef CONFIG_CHECKPOINT_RESTORE 1698#ifdef CONFIG_CHECKPOINT_RESTORE
2007 1699
2008/* 1700/*
@@ -2121,7 +1813,7 @@ out:
2121} 1813}
2122 1814
2123struct map_files_info { 1815struct map_files_info {
2124 struct file *file; 1816 fmode_t mode;
2125 unsigned long len; 1817 unsigned long len;
2126 unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ 1818 unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */
2127}; 1819};
@@ -2130,13 +1822,10 @@ static struct dentry *
2130proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, 1822proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
2131 struct task_struct *task, const void *ptr) 1823 struct task_struct *task, const void *ptr)
2132{ 1824{
2133 const struct file *file = ptr; 1825 fmode_t mode = (fmode_t)(unsigned long)ptr;
2134 struct proc_inode *ei; 1826 struct proc_inode *ei;
2135 struct inode *inode; 1827 struct inode *inode;
2136 1828
2137 if (!file)
2138 return ERR_PTR(-ENOENT);
2139
2140 inode = proc_pid_make_inode(dir->i_sb, task); 1829 inode = proc_pid_make_inode(dir->i_sb, task);
2141 if (!inode) 1830 if (!inode)
2142 return ERR_PTR(-ENOENT); 1831 return ERR_PTR(-ENOENT);
@@ -2148,9 +1837,9 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
2148 inode->i_size = 64; 1837 inode->i_size = 64;
2149 inode->i_mode = S_IFLNK; 1838 inode->i_mode = S_IFLNK;
2150 1839
2151 if (file->f_mode & FMODE_READ) 1840 if (mode & FMODE_READ)
2152 inode->i_mode |= S_IRUSR; 1841 inode->i_mode |= S_IRUSR;
2153 if (file->f_mode & FMODE_WRITE) 1842 if (mode & FMODE_WRITE)
2154 inode->i_mode |= S_IWUSR; 1843 inode->i_mode |= S_IWUSR;
2155 1844
2156 d_set_d_op(dentry, &tid_map_files_dentry_operations); 1845 d_set_d_op(dentry, &tid_map_files_dentry_operations);
@@ -2194,7 +1883,8 @@ static struct dentry *proc_map_files_lookup(struct inode *dir,
2194 if (!vma) 1883 if (!vma)
2195 goto out_no_vma; 1884 goto out_no_vma;
2196 1885
2197 result = proc_map_files_instantiate(dir, dentry, task, vma->vm_file); 1886 result = proc_map_files_instantiate(dir, dentry, task,
1887 (void *)(unsigned long)vma->vm_file->f_mode);
2198 1888
2199out_no_vma: 1889out_no_vma:
2200 up_read(&mm->mmap_sem); 1890 up_read(&mm->mmap_sem);
@@ -2295,8 +1985,7 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
2295 if (++pos <= filp->f_pos) 1985 if (++pos <= filp->f_pos)
2296 continue; 1986 continue;
2297 1987
2298 get_file(vma->vm_file); 1988 info.mode = vma->vm_file->f_mode;
2299 info.file = vma->vm_file;
2300 info.len = snprintf(info.name, 1989 info.len = snprintf(info.name,
2301 sizeof(info.name), "%lx-%lx", 1990 sizeof(info.name), "%lx-%lx",
2302 vma->vm_start, vma->vm_end); 1991 vma->vm_start, vma->vm_end);
@@ -2311,19 +2000,11 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
2311 ret = proc_fill_cache(filp, dirent, filldir, 2000 ret = proc_fill_cache(filp, dirent, filldir,
2312 p->name, p->len, 2001 p->name, p->len,
2313 proc_map_files_instantiate, 2002 proc_map_files_instantiate,
2314 task, p->file); 2003 task,
2004 (void *)(unsigned long)p->mode);
2315 if (ret) 2005 if (ret)
2316 break; 2006 break;
2317 filp->f_pos++; 2007 filp->f_pos++;
2318 fput(p->file);
2319 }
2320 for (; i < nr_files; i++) {
2321 /*
2322 * In case of error don't forget
2323 * to put rest of file refs.
2324 */
2325 p = flex_array_get(fa, i);
2326 fput(p->file);
2327 } 2008 }
2328 if (fa) 2009 if (fa)
2329 flex_array_free(fa); 2010 flex_array_free(fa);
@@ -2345,82 +2026,6 @@ static const struct file_operations proc_map_files_operations = {
2345 2026
2346#endif /* CONFIG_CHECKPOINT_RESTORE */ 2027#endif /* CONFIG_CHECKPOINT_RESTORE */
2347 2028
2348/*
2349 * /proc/pid/fd needs a special permission handler so that a process can still
2350 * access /proc/self/fd after it has executed a setuid().
2351 */
2352static int proc_fd_permission(struct inode *inode, int mask)
2353{
2354 int rv = generic_permission(inode, mask);
2355 if (rv == 0)
2356 return 0;
2357 if (task_pid(current) == proc_pid(inode))
2358 rv = 0;
2359 return rv;
2360}
2361
2362/*
2363 * proc directories can do almost nothing..
2364 */
2365static const struct inode_operations proc_fd_inode_operations = {
2366 .lookup = proc_lookupfd,
2367 .permission = proc_fd_permission,
2368 .setattr = proc_setattr,
2369};
2370
2371static struct dentry *proc_fdinfo_instantiate(struct inode *dir,
2372 struct dentry *dentry, struct task_struct *task, const void *ptr)
2373{
2374 unsigned fd = (unsigned long)ptr;
2375 struct inode *inode;
2376 struct proc_inode *ei;
2377 struct dentry *error = ERR_PTR(-ENOENT);
2378
2379 inode = proc_pid_make_inode(dir->i_sb, task);
2380 if (!inode)
2381 goto out;
2382 ei = PROC_I(inode);
2383 ei->fd = fd;
2384 inode->i_mode = S_IFREG | S_IRUSR;
2385 inode->i_fop = &proc_fdinfo_file_operations;
2386 d_set_d_op(dentry, &tid_fd_dentry_operations);
2387 d_add(dentry, inode);
2388 /* Close the race of the process dying before we return the dentry */
2389 if (tid_fd_revalidate(dentry, 0))
2390 error = NULL;
2391
2392 out:
2393 return error;
2394}
2395
2396static struct dentry *proc_lookupfdinfo(struct inode *dir,
2397 struct dentry *dentry,
2398 unsigned int flags)
2399{
2400 return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate);
2401}
2402
2403static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir)
2404{
2405 return proc_readfd_common(filp, dirent, filldir,
2406 proc_fdinfo_instantiate);
2407}
2408
2409static const struct file_operations proc_fdinfo_operations = {
2410 .read = generic_read_dir,
2411 .readdir = proc_readfdinfo,
2412 .llseek = default_llseek,
2413};
2414
2415/*
2416 * proc directories can do almost nothing..
2417 */
2418static const struct inode_operations proc_fdinfo_inode_operations = {
2419 .lookup = proc_lookupfdinfo,
2420 .setattr = proc_setattr,
2421};
2422
2423
2424static struct dentry *proc_pident_instantiate(struct inode *dir, 2029static struct dentry *proc_pident_instantiate(struct inode *dir,
2425 struct dentry *dentry, struct task_struct *task, const void *ptr) 2030 struct dentry *dentry, struct task_struct *task, const void *ptr)
2426{ 2031{
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
new file mode 100644
index 00000000000..f28a875f877
--- /dev/null
+++ b/fs/proc/fd.c
@@ -0,0 +1,367 @@
1#include <linux/sched.h>
2#include <linux/errno.h>
3#include <linux/dcache.h>
4#include <linux/path.h>
5#include <linux/fdtable.h>
6#include <linux/namei.h>
7#include <linux/pid.h>
8#include <linux/security.h>
9#include <linux/file.h>
10#include <linux/seq_file.h>
11
12#include <linux/proc_fs.h>
13
14#include "internal.h"
15#include "fd.h"
16
17static int seq_show(struct seq_file *m, void *v)
18{
19 struct files_struct *files = NULL;
20 int f_flags = 0, ret = -ENOENT;
21 struct file *file = NULL;
22 struct task_struct *task;
23
24 task = get_proc_task(m->private);
25 if (!task)
26 return -ENOENT;
27
28 files = get_files_struct(task);
29 put_task_struct(task);
30
31 if (files) {
32 int fd = proc_fd(m->private);
33
34 spin_lock(&files->file_lock);
35 file = fcheck_files(files, fd);
36 if (file) {
37 struct fdtable *fdt = files_fdtable(files);
38
39 f_flags = file->f_flags;
40 if (close_on_exec(fd, fdt))
41 f_flags |= O_CLOEXEC;
42
43 get_file(file);
44 ret = 0;
45 }
46 spin_unlock(&files->file_lock);
47 put_files_struct(files);
48 }
49
50 if (!ret) {
51 seq_printf(m, "pos:\t%lli\nflags:\t0%o\n",
52 (long long)file->f_pos, f_flags);
53 fput(file);
54 }
55
56 return ret;
57}
58
59static int seq_fdinfo_open(struct inode *inode, struct file *file)
60{
61 return single_open(file, seq_show, inode);
62}
63
64static const struct file_operations proc_fdinfo_file_operations = {
65 .open = seq_fdinfo_open,
66 .read = seq_read,
67 .llseek = seq_lseek,
68 .release = single_release,
69};
70
71static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags)
72{
73 struct files_struct *files;
74 struct task_struct *task;
75 const struct cred *cred;
76 struct inode *inode;
77 int fd;
78
79 if (flags & LOOKUP_RCU)
80 return -ECHILD;
81
82 inode = dentry->d_inode;
83 task = get_proc_task(inode);
84 fd = proc_fd(inode);
85
86 if (task) {
87 files = get_files_struct(task);
88 if (files) {
89 struct file *file;
90
91 rcu_read_lock();
92 file = fcheck_files(files, fd);
93 if (file) {
94 unsigned f_mode = file->f_mode;
95
96 rcu_read_unlock();
97 put_files_struct(files);
98
99 if (task_dumpable(task)) {
100 rcu_read_lock();
101 cred = __task_cred(task);
102 inode->i_uid = cred->euid;
103 inode->i_gid = cred->egid;
104 rcu_read_unlock();
105 } else {
106 inode->i_uid = GLOBAL_ROOT_UID;
107 inode->i_gid = GLOBAL_ROOT_GID;
108 }
109
110 if (S_ISLNK(inode->i_mode)) {
111 unsigned i_mode = S_IFLNK;
112 if (f_mode & FMODE_READ)
113 i_mode |= S_IRUSR | S_IXUSR;
114 if (f_mode & FMODE_WRITE)
115 i_mode |= S_IWUSR | S_IXUSR;
116 inode->i_mode = i_mode;
117 }
118
119 security_task_to_inode(task, inode);
120 put_task_struct(task);
121 return 1;
122 }
123 rcu_read_unlock();
124 put_files_struct(files);
125 }
126 put_task_struct(task);
127 }
128
129 d_drop(dentry);
130 return 0;
131}
132
133static const struct dentry_operations tid_fd_dentry_operations = {
134 .d_revalidate = tid_fd_revalidate,
135 .d_delete = pid_delete_dentry,
136};
137
138static int proc_fd_link(struct dentry *dentry, struct path *path)
139{
140 struct files_struct *files = NULL;
141 struct task_struct *task;
142 int ret = -ENOENT;
143
144 task = get_proc_task(dentry->d_inode);
145 if (task) {
146 files = get_files_struct(task);
147 put_task_struct(task);
148 }
149
150 if (files) {
151 int fd = proc_fd(dentry->d_inode);
152 struct file *fd_file;
153
154 spin_lock(&files->file_lock);
155 fd_file = fcheck_files(files, fd);
156 if (fd_file) {
157 *path = fd_file->f_path;
158 path_get(&fd_file->f_path);
159 ret = 0;
160 }
161 spin_unlock(&files->file_lock);
162 put_files_struct(files);
163 }
164
165 return ret;
166}
167
168static struct dentry *
169proc_fd_instantiate(struct inode *dir, struct dentry *dentry,
170 struct task_struct *task, const void *ptr)
171{
172 struct dentry *error = ERR_PTR(-ENOENT);
173 unsigned fd = (unsigned long)ptr;
174 struct proc_inode *ei;
175 struct inode *inode;
176
177 inode = proc_pid_make_inode(dir->i_sb, task);
178 if (!inode)
179 goto out;
180
181 ei = PROC_I(inode);
182 ei->fd = fd;
183
184 inode->i_mode = S_IFLNK;
185 inode->i_op = &proc_pid_link_inode_operations;
186 inode->i_size = 64;
187
188 ei->op.proc_get_link = proc_fd_link;
189
190 d_set_d_op(dentry, &tid_fd_dentry_operations);
191 d_add(dentry, inode);
192
193 /* Close the race of the process dying before we return the dentry */
194 if (tid_fd_revalidate(dentry, 0))
195 error = NULL;
196 out:
197 return error;
198}
199
200static struct dentry *proc_lookupfd_common(struct inode *dir,
201 struct dentry *dentry,
202 instantiate_t instantiate)
203{
204 struct task_struct *task = get_proc_task(dir);
205 struct dentry *result = ERR_PTR(-ENOENT);
206 unsigned fd = name_to_int(dentry);
207
208 if (!task)
209 goto out_no_task;
210 if (fd == ~0U)
211 goto out;
212
213 result = instantiate(dir, dentry, task, (void *)(unsigned long)fd);
214out:
215 put_task_struct(task);
216out_no_task:
217 return result;
218}
219
220static int proc_readfd_common(struct file * filp, void * dirent,
221 filldir_t filldir, instantiate_t instantiate)
222{
223 struct dentry *dentry = filp->f_path.dentry;
224 struct inode *inode = dentry->d_inode;
225 struct task_struct *p = get_proc_task(inode);
226 struct files_struct *files;
227 unsigned int fd, ino;
228 int retval;
229
230 retval = -ENOENT;
231 if (!p)
232 goto out_no_task;
233 retval = 0;
234
235 fd = filp->f_pos;
236 switch (fd) {
237 case 0:
238 if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
239 goto out;
240 filp->f_pos++;
241 case 1:
242 ino = parent_ino(dentry);
243 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
244 goto out;
245 filp->f_pos++;
246 default:
247 files = get_files_struct(p);
248 if (!files)
249 goto out;
250 rcu_read_lock();
251 for (fd = filp->f_pos - 2;
252 fd < files_fdtable(files)->max_fds;
253 fd++, filp->f_pos++) {
254 char name[PROC_NUMBUF];
255 int len;
256 int rv;
257
258 if (!fcheck_files(files, fd))
259 continue;
260 rcu_read_unlock();
261
262 len = snprintf(name, sizeof(name), "%d", fd);
263 rv = proc_fill_cache(filp, dirent, filldir,
264 name, len, instantiate, p,
265 (void *)(unsigned long)fd);
266 if (rv < 0)
267 goto out_fd_loop;
268 rcu_read_lock();
269 }
270 rcu_read_unlock();
271out_fd_loop:
272 put_files_struct(files);
273 }
274out:
275 put_task_struct(p);
276out_no_task:
277 return retval;
278}
279
280static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir)
281{
282 return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate);
283}
284
285const struct file_operations proc_fd_operations = {
286 .read = generic_read_dir,
287 .readdir = proc_readfd,
288 .llseek = default_llseek,
289};
290
291static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry,
292 unsigned int flags)
293{
294 return proc_lookupfd_common(dir, dentry, proc_fd_instantiate);
295}
296
297/*
298 * /proc/pid/fd needs a special permission handler so that a process can still
299 * access /proc/self/fd after it has executed a setuid().
300 */
301int proc_fd_permission(struct inode *inode, int mask)
302{
303 int rv = generic_permission(inode, mask);
304 if (rv == 0)
305 return 0;
306 if (task_pid(current) == proc_pid(inode))
307 rv = 0;
308 return rv;
309}
310
311const struct inode_operations proc_fd_inode_operations = {
312 .lookup = proc_lookupfd,
313 .permission = proc_fd_permission,
314 .setattr = proc_setattr,
315};
316
317static struct dentry *
318proc_fdinfo_instantiate(struct inode *dir, struct dentry *dentry,
319 struct task_struct *task, const void *ptr)
320{
321 struct dentry *error = ERR_PTR(-ENOENT);
322 unsigned fd = (unsigned long)ptr;
323 struct proc_inode *ei;
324 struct inode *inode;
325
326 inode = proc_pid_make_inode(dir->i_sb, task);
327 if (!inode)
328 goto out;
329
330 ei = PROC_I(inode);
331 ei->fd = fd;
332
333 inode->i_mode = S_IFREG | S_IRUSR;
334 inode->i_fop = &proc_fdinfo_file_operations;
335
336 d_set_d_op(dentry, &tid_fd_dentry_operations);
337 d_add(dentry, inode);
338
339 /* Close the race of the process dying before we return the dentry */
340 if (tid_fd_revalidate(dentry, 0))
341 error = NULL;
342 out:
343 return error;
344}
345
346static struct dentry *
347proc_lookupfdinfo(struct inode *dir, struct dentry *dentry, unsigned int flags)
348{
349 return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate);
350}
351
352static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir)
353{
354 return proc_readfd_common(filp, dirent, filldir,
355 proc_fdinfo_instantiate);
356}
357
358const struct inode_operations proc_fdinfo_inode_operations = {
359 .lookup = proc_lookupfdinfo,
360 .setattr = proc_setattr,
361};
362
363const struct file_operations proc_fdinfo_operations = {
364 .read = generic_read_dir,
365 .readdir = proc_readfdinfo,
366 .llseek = default_llseek,
367};
diff --git a/fs/proc/fd.h b/fs/proc/fd.h
new file mode 100644
index 00000000000..cbb1d47deda
--- /dev/null
+++ b/fs/proc/fd.h
@@ -0,0 +1,14 @@
1#ifndef __PROCFS_FD_H__
2#define __PROCFS_FD_H__
3
4#include <linux/fs.h>
5
6extern const struct file_operations proc_fd_operations;
7extern const struct inode_operations proc_fd_inode_operations;
8
9extern const struct file_operations proc_fdinfo_operations;
10extern const struct inode_operations proc_fdinfo_inode_operations;
11
12extern int proc_fd_permission(struct inode *inode, int mask);
13
14#endif /* __PROCFS_FD_H__ */
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index e1167a1c912..67925a7bd8c 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -9,6 +9,7 @@
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11 11
12#include <linux/sched.h>
12#include <linux/proc_fs.h> 13#include <linux/proc_fs.h>
13struct ctl_table_header; 14struct ctl_table_header;
14 15
@@ -65,6 +66,7 @@ extern const struct file_operations proc_clear_refs_operations;
65extern const struct file_operations proc_pagemap_operations; 66extern const struct file_operations proc_pagemap_operations;
66extern const struct file_operations proc_net_operations; 67extern const struct file_operations proc_net_operations;
67extern const struct inode_operations proc_net_inode_operations; 68extern const struct inode_operations proc_net_inode_operations;
69extern const struct inode_operations proc_pid_link_inode_operations;
68 70
69struct proc_maps_private { 71struct proc_maps_private {
70 struct pid *pid; 72 struct pid *pid;
@@ -91,6 +93,52 @@ static inline int proc_fd(struct inode *inode)
91 return PROC_I(inode)->fd; 93 return PROC_I(inode)->fd;
92} 94}
93 95
96static inline int task_dumpable(struct task_struct *task)
97{
98 int dumpable = 0;
99 struct mm_struct *mm;
100
101 task_lock(task);
102 mm = task->mm;
103 if (mm)
104 dumpable = get_dumpable(mm);
105 task_unlock(task);
106 if(dumpable == 1)
107 return 1;
108 return 0;
109}
110
111static inline int pid_delete_dentry(const struct dentry * dentry)
112{
113 /* Is the task we represent dead?
114 * If so, then don't put the dentry on the lru list,
115 * kill it immediately.
116 */
117 return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
118}
119
120static inline unsigned name_to_int(struct dentry *dentry)
121{
122 const char *name = dentry->d_name.name;
123 int len = dentry->d_name.len;
124 unsigned n = 0;
125
126 if (len > 1 && *name == '0')
127 goto out;
128 while (len-- > 0) {
129 unsigned c = *name++ - '0';
130 if (c > 9)
131 goto out;
132 if (n >= (~0U-9)/10)
133 goto out;
134 n *= 10;
135 n += c;
136 }
137 return n;
138out:
139 return ~0U;
140}
141
94struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino, 142struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino,
95 struct dentry *dentry); 143 struct dentry *dentry);
96int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, 144int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 5c3c7b02e17..43098bb5723 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -391,6 +391,11 @@ static int init_inodecache(void)
391 391
392static void destroy_inodecache(void) 392static void destroy_inodecache(void)
393{ 393{
394 /*
395 * Make sure all delayed rcu free inodes are flushed before we
396 * destroy cache.
397 */
398 rcu_barrier();
394 kmem_cache_destroy(qnx4_inode_cachep); 399 kmem_cache_destroy(qnx4_inode_cachep);
395} 400}
396 401
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index f4eef0b5e7b..b6addf56048 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -651,6 +651,11 @@ static int init_inodecache(void)
651 651
652static void destroy_inodecache(void) 652static void destroy_inodecache(void)
653{ 653{
654 /*
655 * Make sure all delayed rcu free inodes are flushed before we
656 * destroy cache.
657 */
658 rcu_barrier();
654 kmem_cache_destroy(qnx6_inode_cachep); 659 kmem_cache_destroy(qnx6_inode_cachep);
655} 660}
656 661
diff --git a/fs/read_write.c b/fs/read_write.c
index 1adfb691e4f..d06534857e9 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -232,23 +232,18 @@ EXPORT_SYMBOL(vfs_llseek);
232SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin) 232SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin)
233{ 233{
234 off_t retval; 234 off_t retval;
235 struct file * file; 235 struct fd f = fdget(fd);
236 int fput_needed; 236 if (!f.file)
237 237 return -EBADF;
238 retval = -EBADF;
239 file = fget_light(fd, &fput_needed);
240 if (!file)
241 goto bad;
242 238
243 retval = -EINVAL; 239 retval = -EINVAL;
244 if (origin <= SEEK_MAX) { 240 if (origin <= SEEK_MAX) {
245 loff_t res = vfs_llseek(file, offset, origin); 241 loff_t res = vfs_llseek(f.file, offset, origin);
246 retval = res; 242 retval = res;
247 if (res != (loff_t)retval) 243 if (res != (loff_t)retval)
248 retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ 244 retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */
249 } 245 }
250 fput_light(file, fput_needed); 246 fdput(f);
251bad:
252 return retval; 247 return retval;
253} 248}
254 249
@@ -258,20 +253,17 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
258 unsigned int, origin) 253 unsigned int, origin)
259{ 254{
260 int retval; 255 int retval;
261 struct file * file; 256 struct fd f = fdget(fd);
262 loff_t offset; 257 loff_t offset;
263 int fput_needed;
264 258
265 retval = -EBADF; 259 if (!f.file)
266 file = fget_light(fd, &fput_needed); 260 return -EBADF;
267 if (!file)
268 goto bad;
269 261
270 retval = -EINVAL; 262 retval = -EINVAL;
271 if (origin > SEEK_MAX) 263 if (origin > SEEK_MAX)
272 goto out_putf; 264 goto out_putf;
273 265
274 offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low, 266 offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low,
275 origin); 267 origin);
276 268
277 retval = (int)offset; 269 retval = (int)offset;
@@ -281,8 +273,7 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
281 retval = 0; 273 retval = 0;
282 } 274 }
283out_putf: 275out_putf:
284 fput_light(file, fput_needed); 276 fdput(f);
285bad:
286 return retval; 277 return retval;
287} 278}
288#endif 279#endif
@@ -461,34 +452,29 @@ static inline void file_pos_write(struct file *file, loff_t pos)
461 452
462SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) 453SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
463{ 454{
464 struct file *file; 455 struct fd f = fdget(fd);
465 ssize_t ret = -EBADF; 456 ssize_t ret = -EBADF;
466 int fput_needed;
467 457
468 file = fget_light(fd, &fput_needed); 458 if (f.file) {
469 if (file) { 459 loff_t pos = file_pos_read(f.file);
470 loff_t pos = file_pos_read(file); 460 ret = vfs_read(f.file, buf, count, &pos);
471 ret = vfs_read(file, buf, count, &pos); 461 file_pos_write(f.file, pos);
472 file_pos_write(file, pos); 462 fdput(f);
473 fput_light(file, fput_needed);
474 } 463 }
475
476 return ret; 464 return ret;
477} 465}
478 466
479SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, 467SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
480 size_t, count) 468 size_t, count)
481{ 469{
482 struct file *file; 470 struct fd f = fdget(fd);
483 ssize_t ret = -EBADF; 471 ssize_t ret = -EBADF;
484 int fput_needed;
485 472
486 file = fget_light(fd, &fput_needed); 473 if (f.file) {
487 if (file) { 474 loff_t pos = file_pos_read(f.file);
488 loff_t pos = file_pos_read(file); 475 ret = vfs_write(f.file, buf, count, &pos);
489 ret = vfs_write(file, buf, count, &pos); 476 file_pos_write(f.file, pos);
490 file_pos_write(file, pos); 477 fdput(f);
491 fput_light(file, fput_needed);
492 } 478 }
493 479
494 return ret; 480 return ret;
@@ -497,19 +483,18 @@ SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
497SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf, 483SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf,
498 size_t count, loff_t pos) 484 size_t count, loff_t pos)
499{ 485{
500 struct file *file; 486 struct fd f;
501 ssize_t ret = -EBADF; 487 ssize_t ret = -EBADF;
502 int fput_needed;
503 488
504 if (pos < 0) 489 if (pos < 0)
505 return -EINVAL; 490 return -EINVAL;
506 491
507 file = fget_light(fd, &fput_needed); 492 f = fdget(fd);
508 if (file) { 493 if (f.file) {
509 ret = -ESPIPE; 494 ret = -ESPIPE;
510 if (file->f_mode & FMODE_PREAD) 495 if (f.file->f_mode & FMODE_PREAD)
511 ret = vfs_read(file, buf, count, &pos); 496 ret = vfs_read(f.file, buf, count, &pos);
512 fput_light(file, fput_needed); 497 fdput(f);
513 } 498 }
514 499
515 return ret; 500 return ret;
@@ -526,19 +511,18 @@ SYSCALL_ALIAS(sys_pread64, SyS_pread64);
526SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf, 511SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf,
527 size_t count, loff_t pos) 512 size_t count, loff_t pos)
528{ 513{
529 struct file *file; 514 struct fd f;
530 ssize_t ret = -EBADF; 515 ssize_t ret = -EBADF;
531 int fput_needed;
532 516
533 if (pos < 0) 517 if (pos < 0)
534 return -EINVAL; 518 return -EINVAL;
535 519
536 file = fget_light(fd, &fput_needed); 520 f = fdget(fd);
537 if (file) { 521 if (f.file) {
538 ret = -ESPIPE; 522 ret = -ESPIPE;
539 if (file->f_mode & FMODE_PWRITE) 523 if (f.file->f_mode & FMODE_PWRITE)
540 ret = vfs_write(file, buf, count, &pos); 524 ret = vfs_write(f.file, buf, count, &pos);
541 fput_light(file, fput_needed); 525 fdput(f);
542 } 526 }
543 527
544 return ret; 528 return ret;
@@ -789,16 +773,14 @@ EXPORT_SYMBOL(vfs_writev);
789SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, 773SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
790 unsigned long, vlen) 774 unsigned long, vlen)
791{ 775{
792 struct file *file; 776 struct fd f = fdget(fd);
793 ssize_t ret = -EBADF; 777 ssize_t ret = -EBADF;
794 int fput_needed;
795 778
796 file = fget_light(fd, &fput_needed); 779 if (f.file) {
797 if (file) { 780 loff_t pos = file_pos_read(f.file);
798 loff_t pos = file_pos_read(file); 781 ret = vfs_readv(f.file, vec, vlen, &pos);
799 ret = vfs_readv(file, vec, vlen, &pos); 782 file_pos_write(f.file, pos);
800 file_pos_write(file, pos); 783 fdput(f);
801 fput_light(file, fput_needed);
802 } 784 }
803 785
804 if (ret > 0) 786 if (ret > 0)
@@ -810,16 +792,14 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
810SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, 792SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
811 unsigned long, vlen) 793 unsigned long, vlen)
812{ 794{
813 struct file *file; 795 struct fd f = fdget(fd);
814 ssize_t ret = -EBADF; 796 ssize_t ret = -EBADF;
815 int fput_needed;
816 797
817 file = fget_light(fd, &fput_needed); 798 if (f.file) {
818 if (file) { 799 loff_t pos = file_pos_read(f.file);
819 loff_t pos = file_pos_read(file); 800 ret = vfs_writev(f.file, vec, vlen, &pos);
820 ret = vfs_writev(file, vec, vlen, &pos); 801 file_pos_write(f.file, pos);
821 file_pos_write(file, pos); 802 fdput(f);
822 fput_light(file, fput_needed);
823 } 803 }
824 804
825 if (ret > 0) 805 if (ret > 0)
@@ -838,19 +818,18 @@ SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
838 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 818 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
839{ 819{
840 loff_t pos = pos_from_hilo(pos_h, pos_l); 820 loff_t pos = pos_from_hilo(pos_h, pos_l);
841 struct file *file; 821 struct fd f;
842 ssize_t ret = -EBADF; 822 ssize_t ret = -EBADF;
843 int fput_needed;
844 823
845 if (pos < 0) 824 if (pos < 0)
846 return -EINVAL; 825 return -EINVAL;
847 826
848 file = fget_light(fd, &fput_needed); 827 f = fdget(fd);
849 if (file) { 828 if (f.file) {
850 ret = -ESPIPE; 829 ret = -ESPIPE;
851 if (file->f_mode & FMODE_PREAD) 830 if (f.file->f_mode & FMODE_PREAD)
852 ret = vfs_readv(file, vec, vlen, &pos); 831 ret = vfs_readv(f.file, vec, vlen, &pos);
853 fput_light(file, fput_needed); 832 fdput(f);
854 } 833 }
855 834
856 if (ret > 0) 835 if (ret > 0)
@@ -863,19 +842,18 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
863 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 842 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
864{ 843{
865 loff_t pos = pos_from_hilo(pos_h, pos_l); 844 loff_t pos = pos_from_hilo(pos_h, pos_l);
866 struct file *file; 845 struct fd f;
867 ssize_t ret = -EBADF; 846 ssize_t ret = -EBADF;
868 int fput_needed;
869 847
870 if (pos < 0) 848 if (pos < 0)
871 return -EINVAL; 849 return -EINVAL;
872 850
873 file = fget_light(fd, &fput_needed); 851 f = fdget(fd);
874 if (file) { 852 if (f.file) {
875 ret = -ESPIPE; 853 ret = -ESPIPE;
876 if (file->f_mode & FMODE_PWRITE) 854 if (f.file->f_mode & FMODE_PWRITE)
877 ret = vfs_writev(file, vec, vlen, &pos); 855 ret = vfs_writev(f.file, vec, vlen, &pos);
878 fput_light(file, fput_needed); 856 fdput(f);
879 } 857 }
880 858
881 if (ret > 0) 859 if (ret > 0)
@@ -884,31 +862,31 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
884 return ret; 862 return ret;
885} 863}
886 864
887static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, 865ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count,
888 size_t count, loff_t max) 866 loff_t max)
889{ 867{
890 struct file * in_file, * out_file; 868 struct fd in, out;
891 struct inode * in_inode, * out_inode; 869 struct inode *in_inode, *out_inode;
892 loff_t pos; 870 loff_t pos;
893 ssize_t retval; 871 ssize_t retval;
894 int fput_needed_in, fput_needed_out, fl; 872 int fl;
895 873
896 /* 874 /*
897 * Get input file, and verify that it is ok.. 875 * Get input file, and verify that it is ok..
898 */ 876 */
899 retval = -EBADF; 877 retval = -EBADF;
900 in_file = fget_light(in_fd, &fput_needed_in); 878 in = fdget(in_fd);
901 if (!in_file) 879 if (!in.file)
902 goto out; 880 goto out;
903 if (!(in_file->f_mode & FMODE_READ)) 881 if (!(in.file->f_mode & FMODE_READ))
904 goto fput_in; 882 goto fput_in;
905 retval = -ESPIPE; 883 retval = -ESPIPE;
906 if (!ppos) 884 if (!ppos)
907 ppos = &in_file->f_pos; 885 ppos = &in.file->f_pos;
908 else 886 else
909 if (!(in_file->f_mode & FMODE_PREAD)) 887 if (!(in.file->f_mode & FMODE_PREAD))
910 goto fput_in; 888 goto fput_in;
911 retval = rw_verify_area(READ, in_file, ppos, count); 889 retval = rw_verify_area(READ, in.file, ppos, count);
912 if (retval < 0) 890 if (retval < 0)
913 goto fput_in; 891 goto fput_in;
914 count = retval; 892 count = retval;
@@ -917,15 +895,15 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
917 * Get output file, and verify that it is ok.. 895 * Get output file, and verify that it is ok..
918 */ 896 */
919 retval = -EBADF; 897 retval = -EBADF;
920 out_file = fget_light(out_fd, &fput_needed_out); 898 out = fdget(out_fd);
921 if (!out_file) 899 if (!out.file)
922 goto fput_in; 900 goto fput_in;
923 if (!(out_file->f_mode & FMODE_WRITE)) 901 if (!(out.file->f_mode & FMODE_WRITE))
924 goto fput_out; 902 goto fput_out;
925 retval = -EINVAL; 903 retval = -EINVAL;
926 in_inode = in_file->f_path.dentry->d_inode; 904 in_inode = in.file->f_path.dentry->d_inode;
927 out_inode = out_file->f_path.dentry->d_inode; 905 out_inode = out.file->f_path.dentry->d_inode;
928 retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); 906 retval = rw_verify_area(WRITE, out.file, &out.file->f_pos, count);
929 if (retval < 0) 907 if (retval < 0)
930 goto fput_out; 908 goto fput_out;
931 count = retval; 909 count = retval;
@@ -949,10 +927,10 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
949 * and the application is arguably buggy if it doesn't expect 927 * and the application is arguably buggy if it doesn't expect
950 * EAGAIN on a non-blocking file descriptor. 928 * EAGAIN on a non-blocking file descriptor.
951 */ 929 */
952 if (in_file->f_flags & O_NONBLOCK) 930 if (in.file->f_flags & O_NONBLOCK)
953 fl = SPLICE_F_NONBLOCK; 931 fl = SPLICE_F_NONBLOCK;
954#endif 932#endif
955 retval = do_splice_direct(in_file, ppos, out_file, count, fl); 933 retval = do_splice_direct(in.file, ppos, out.file, count, fl);
956 934
957 if (retval > 0) { 935 if (retval > 0) {
958 add_rchar(current, retval); 936 add_rchar(current, retval);
@@ -965,9 +943,9 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
965 retval = -EOVERFLOW; 943 retval = -EOVERFLOW;
966 944
967fput_out: 945fput_out:
968 fput_light(out_file, fput_needed_out); 946 fdput(out);
969fput_in: 947fput_in:
970 fput_light(in_file, fput_needed_in); 948 fdput(in);
971out: 949out:
972 return retval; 950 return retval;
973} 951}
diff --git a/fs/read_write.h b/fs/read_write.h
index d07b954c6e0..d3e00ef6742 100644
--- a/fs/read_write.h
+++ b/fs/read_write.h
@@ -12,3 +12,5 @@ ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
12 unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn); 12 unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn);
13ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, 13ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
14 unsigned long nr_segs, loff_t *ppos, io_fn_t fn); 14 unsigned long nr_segs, loff_t *ppos, io_fn_t fn);
15ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count,
16 loff_t max);
diff --git a/fs/readdir.c b/fs/readdir.c
index 39e3370d79c..5e69ef533b7 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -106,22 +106,20 @@ SYSCALL_DEFINE3(old_readdir, unsigned int, fd,
106 struct old_linux_dirent __user *, dirent, unsigned int, count) 106 struct old_linux_dirent __user *, dirent, unsigned int, count)
107{ 107{
108 int error; 108 int error;
109 struct file * file; 109 struct fd f = fdget(fd);
110 struct readdir_callback buf; 110 struct readdir_callback buf;
111 int fput_needed;
112 111
113 file = fget_light(fd, &fput_needed); 112 if (!f.file)
114 if (!file)
115 return -EBADF; 113 return -EBADF;
116 114
117 buf.result = 0; 115 buf.result = 0;
118 buf.dirent = dirent; 116 buf.dirent = dirent;
119 117
120 error = vfs_readdir(file, fillonedir, &buf); 118 error = vfs_readdir(f.file, fillonedir, &buf);
121 if (buf.result) 119 if (buf.result)
122 error = buf.result; 120 error = buf.result;
123 121
124 fput_light(file, fput_needed); 122 fdput(f);
125 return error; 123 return error;
126} 124}
127 125
@@ -191,17 +189,16 @@ efault:
191SYSCALL_DEFINE3(getdents, unsigned int, fd, 189SYSCALL_DEFINE3(getdents, unsigned int, fd,
192 struct linux_dirent __user *, dirent, unsigned int, count) 190 struct linux_dirent __user *, dirent, unsigned int, count)
193{ 191{
194 struct file * file; 192 struct fd f;
195 struct linux_dirent __user * lastdirent; 193 struct linux_dirent __user * lastdirent;
196 struct getdents_callback buf; 194 struct getdents_callback buf;
197 int fput_needed;
198 int error; 195 int error;
199 196
200 if (!access_ok(VERIFY_WRITE, dirent, count)) 197 if (!access_ok(VERIFY_WRITE, dirent, count))
201 return -EFAULT; 198 return -EFAULT;
202 199
203 file = fget_light(fd, &fput_needed); 200 f = fdget(fd);
204 if (!file) 201 if (!f.file)
205 return -EBADF; 202 return -EBADF;
206 203
207 buf.current_dir = dirent; 204 buf.current_dir = dirent;
@@ -209,17 +206,17 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd,
209 buf.count = count; 206 buf.count = count;
210 buf.error = 0; 207 buf.error = 0;
211 208
212 error = vfs_readdir(file, filldir, &buf); 209 error = vfs_readdir(f.file, filldir, &buf);
213 if (error >= 0) 210 if (error >= 0)
214 error = buf.error; 211 error = buf.error;
215 lastdirent = buf.previous; 212 lastdirent = buf.previous;
216 if (lastdirent) { 213 if (lastdirent) {
217 if (put_user(file->f_pos, &lastdirent->d_off)) 214 if (put_user(f.file->f_pos, &lastdirent->d_off))
218 error = -EFAULT; 215 error = -EFAULT;
219 else 216 else
220 error = count - buf.count; 217 error = count - buf.count;
221 } 218 }
222 fput_light(file, fput_needed); 219 fdput(f);
223 return error; 220 return error;
224} 221}
225 222
@@ -272,17 +269,16 @@ efault:
272SYSCALL_DEFINE3(getdents64, unsigned int, fd, 269SYSCALL_DEFINE3(getdents64, unsigned int, fd,
273 struct linux_dirent64 __user *, dirent, unsigned int, count) 270 struct linux_dirent64 __user *, dirent, unsigned int, count)
274{ 271{
275 struct file * file; 272 struct fd f;
276 struct linux_dirent64 __user * lastdirent; 273 struct linux_dirent64 __user * lastdirent;
277 struct getdents_callback64 buf; 274 struct getdents_callback64 buf;
278 int fput_needed;
279 int error; 275 int error;
280 276
281 if (!access_ok(VERIFY_WRITE, dirent, count)) 277 if (!access_ok(VERIFY_WRITE, dirent, count))
282 return -EFAULT; 278 return -EFAULT;
283 279
284 file = fget_light(fd, &fput_needed); 280 f = fdget(fd);
285 if (!file) 281 if (!f.file)
286 return -EBADF; 282 return -EBADF;
287 283
288 buf.current_dir = dirent; 284 buf.current_dir = dirent;
@@ -290,17 +286,17 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd,
290 buf.count = count; 286 buf.count = count;
291 buf.error = 0; 287 buf.error = 0;
292 288
293 error = vfs_readdir(file, filldir64, &buf); 289 error = vfs_readdir(f.file, filldir64, &buf);
294 if (error >= 0) 290 if (error >= 0)
295 error = buf.error; 291 error = buf.error;
296 lastdirent = buf.previous; 292 lastdirent = buf.previous;
297 if (lastdirent) { 293 if (lastdirent) {
298 typeof(lastdirent->d_off) d_off = file->f_pos; 294 typeof(lastdirent->d_off) d_off = f.file->f_pos;
299 if (__put_user(d_off, &lastdirent->d_off)) 295 if (__put_user(d_off, &lastdirent->d_off))
300 error = -EFAULT; 296 error = -EFAULT;
301 else 297 else
302 error = count - buf.count; 298 error = count - buf.count;
303 } 299 }
304 fput_light(file, fput_needed); 300 fdput(f);
305 return error; 301 return error;
306} 302}
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 7a37dabf5a9..1078ae17999 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -608,6 +608,11 @@ static int init_inodecache(void)
608 608
609static void destroy_inodecache(void) 609static void destroy_inodecache(void)
610{ 610{
611 /*
612 * Make sure all delayed rcu free inodes are flushed before we
613 * destroy cache.
614 */
615 rcu_barrier();
611 kmem_cache_destroy(reiserfs_inode_cachep); 616 kmem_cache_destroy(reiserfs_inode_cachep);
612} 617}
613 618
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 77c5f217398..fd7c5f60b46 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -648,6 +648,11 @@ error_register:
648static void __exit exit_romfs_fs(void) 648static void __exit exit_romfs_fs(void)
649{ 649{
650 unregister_filesystem(&romfs_fs_type); 650 unregister_filesystem(&romfs_fs_type);
651 /*
652 * Make sure all delayed rcu free inodes are flushed before we
653 * destroy cache.
654 */
655 rcu_barrier();
651 kmem_cache_destroy(romfs_inode_cachep); 656 kmem_cache_destroy(romfs_inode_cachep);
652} 657}
653 658
diff --git a/fs/select.c b/fs/select.c
index db14c781335..2ef72d96503 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -220,8 +220,7 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
220 struct poll_table_entry *entry = poll_get_entry(pwq); 220 struct poll_table_entry *entry = poll_get_entry(pwq);
221 if (!entry) 221 if (!entry)
222 return; 222 return;
223 get_file(filp); 223 entry->filp = get_file(filp);
224 entry->filp = filp;
225 entry->wait_address = wait_address; 224 entry->wait_address = wait_address;
226 entry->key = p->_key; 225 entry->key = p->_key;
227 init_waitqueue_func_entry(&entry->wait, pollwake); 226 init_waitqueue_func_entry(&entry->wait, pollwake);
@@ -429,8 +428,6 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
429 for (i = 0; i < n; ++rinp, ++routp, ++rexp) { 428 for (i = 0; i < n; ++rinp, ++routp, ++rexp) {
430 unsigned long in, out, ex, all_bits, bit = 1, mask, j; 429 unsigned long in, out, ex, all_bits, bit = 1, mask, j;
431 unsigned long res_in = 0, res_out = 0, res_ex = 0; 430 unsigned long res_in = 0, res_out = 0, res_ex = 0;
432 const struct file_operations *f_op = NULL;
433 struct file *file = NULL;
434 431
435 in = *inp++; out = *outp++; ex = *exp++; 432 in = *inp++; out = *outp++; ex = *exp++;
436 all_bits = in | out | ex; 433 all_bits = in | out | ex;
@@ -440,20 +437,21 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
440 } 437 }
441 438
442 for (j = 0; j < BITS_PER_LONG; ++j, ++i, bit <<= 1) { 439 for (j = 0; j < BITS_PER_LONG; ++j, ++i, bit <<= 1) {
443 int fput_needed; 440 struct fd f;
444 if (i >= n) 441 if (i >= n)
445 break; 442 break;
446 if (!(bit & all_bits)) 443 if (!(bit & all_bits))
447 continue; 444 continue;
448 file = fget_light(i, &fput_needed); 445 f = fdget(i);
449 if (file) { 446 if (f.file) {
450 f_op = file->f_op; 447 const struct file_operations *f_op;
448 f_op = f.file->f_op;
451 mask = DEFAULT_POLLMASK; 449 mask = DEFAULT_POLLMASK;
452 if (f_op && f_op->poll) { 450 if (f_op && f_op->poll) {
453 wait_key_set(wait, in, out, bit); 451 wait_key_set(wait, in, out, bit);
454 mask = (*f_op->poll)(file, wait); 452 mask = (*f_op->poll)(f.file, wait);
455 } 453 }
456 fput_light(file, fput_needed); 454 fdput(f);
457 if ((mask & POLLIN_SET) && (in & bit)) { 455 if ((mask & POLLIN_SET) && (in & bit)) {
458 res_in |= bit; 456 res_in |= bit;
459 retval++; 457 retval++;
@@ -726,20 +724,17 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait)
726 mask = 0; 724 mask = 0;
727 fd = pollfd->fd; 725 fd = pollfd->fd;
728 if (fd >= 0) { 726 if (fd >= 0) {
729 int fput_needed; 727 struct fd f = fdget(fd);
730 struct file * file;
731
732 file = fget_light(fd, &fput_needed);
733 mask = POLLNVAL; 728 mask = POLLNVAL;
734 if (file != NULL) { 729 if (f.file) {
735 mask = DEFAULT_POLLMASK; 730 mask = DEFAULT_POLLMASK;
736 if (file->f_op && file->f_op->poll) { 731 if (f.file->f_op && f.file->f_op->poll) {
737 pwait->_key = pollfd->events|POLLERR|POLLHUP; 732 pwait->_key = pollfd->events|POLLERR|POLLHUP;
738 mask = file->f_op->poll(file, pwait); 733 mask = f.file->f_op->poll(f.file, pwait);
739 } 734 }
740 /* Mask out unneeded events. */ 735 /* Mask out unneeded events. */
741 mask &= pollfd->events | POLLERR | POLLHUP; 736 mask &= pollfd->events | POLLERR | POLLHUP;
742 fput_light(file, fput_needed); 737 fdput(f);
743 } 738 }
744 } 739 }
745 pollfd->revents = mask; 740 pollfd->revents = mask;
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 9f35a37173d..8bee4e57091 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -269,13 +269,12 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask,
269 if (ufd < 0) 269 if (ufd < 0)
270 kfree(ctx); 270 kfree(ctx);
271 } else { 271 } else {
272 int fput_needed; 272 struct fd f = fdget(ufd);
273 struct file *file = fget_light(ufd, &fput_needed); 273 if (!f.file)
274 if (!file)
275 return -EBADF; 274 return -EBADF;
276 ctx = file->private_data; 275 ctx = f.file->private_data;
277 if (file->f_op != &signalfd_fops) { 276 if (f.file->f_op != &signalfd_fops) {
278 fput_light(file, fput_needed); 277 fdput(f);
279 return -EINVAL; 278 return -EINVAL;
280 } 279 }
281 spin_lock_irq(&current->sighand->siglock); 280 spin_lock_irq(&current->sighand->siglock);
@@ -283,7 +282,7 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask,
283 spin_unlock_irq(&current->sighand->siglock); 282 spin_unlock_irq(&current->sighand->siglock);
284 283
285 wake_up(&current->sighand->signalfd_wqh); 284 wake_up(&current->sighand->signalfd_wqh);
286 fput_light(file, fput_needed); 285 fdput(f);
287 } 286 }
288 287
289 return ufd; 288 return ufd;
diff --git a/fs/splice.c b/fs/splice.c
index 41514dd8946..13e5b4776e7 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1666,9 +1666,8 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
1666SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov, 1666SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov,
1667 unsigned long, nr_segs, unsigned int, flags) 1667 unsigned long, nr_segs, unsigned int, flags)
1668{ 1668{
1669 struct file *file; 1669 struct fd f;
1670 long error; 1670 long error;
1671 int fput;
1672 1671
1673 if (unlikely(nr_segs > UIO_MAXIOV)) 1672 if (unlikely(nr_segs > UIO_MAXIOV))
1674 return -EINVAL; 1673 return -EINVAL;
@@ -1676,14 +1675,14 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov,
1676 return 0; 1675 return 0;
1677 1676
1678 error = -EBADF; 1677 error = -EBADF;
1679 file = fget_light(fd, &fput); 1678 f = fdget(fd);
1680 if (file) { 1679 if (f.file) {
1681 if (file->f_mode & FMODE_WRITE) 1680 if (f.file->f_mode & FMODE_WRITE)
1682 error = vmsplice_to_pipe(file, iov, nr_segs, flags); 1681 error = vmsplice_to_pipe(f.file, iov, nr_segs, flags);
1683 else if (file->f_mode & FMODE_READ) 1682 else if (f.file->f_mode & FMODE_READ)
1684 error = vmsplice_to_user(file, iov, nr_segs, flags); 1683 error = vmsplice_to_user(f.file, iov, nr_segs, flags);
1685 1684
1686 fput_light(file, fput); 1685 fdput(f);
1687 } 1686 }
1688 1687
1689 return error; 1688 return error;
@@ -1693,30 +1692,27 @@ SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in,
1693 int, fd_out, loff_t __user *, off_out, 1692 int, fd_out, loff_t __user *, off_out,
1694 size_t, len, unsigned int, flags) 1693 size_t, len, unsigned int, flags)
1695{ 1694{
1695 struct fd in, out;
1696 long error; 1696 long error;
1697 struct file *in, *out;
1698 int fput_in, fput_out;
1699 1697
1700 if (unlikely(!len)) 1698 if (unlikely(!len))
1701 return 0; 1699 return 0;
1702 1700
1703 error = -EBADF; 1701 error = -EBADF;
1704 in = fget_light(fd_in, &fput_in); 1702 in = fdget(fd_in);
1705 if (in) { 1703 if (in.file) {
1706 if (in->f_mode & FMODE_READ) { 1704 if (in.file->f_mode & FMODE_READ) {
1707 out = fget_light(fd_out, &fput_out); 1705 out = fdget(fd_out);
1708 if (out) { 1706 if (out.file) {
1709 if (out->f_mode & FMODE_WRITE) 1707 if (out.file->f_mode & FMODE_WRITE)
1710 error = do_splice(in, off_in, 1708 error = do_splice(in.file, off_in,
1711 out, off_out, 1709 out.file, off_out,
1712 len, flags); 1710 len, flags);
1713 fput_light(out, fput_out); 1711 fdput(out);
1714 } 1712 }
1715 } 1713 }
1716 1714 fdput(in);
1717 fput_light(in, fput_in);
1718 } 1715 }
1719
1720 return error; 1716 return error;
1721} 1717}
1722 1718
@@ -2027,26 +2023,25 @@ static long do_tee(struct file *in, struct file *out, size_t len,
2027 2023
2028SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags) 2024SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags)
2029{ 2025{
2030 struct file *in; 2026 struct fd in;
2031 int error, fput_in; 2027 int error;
2032 2028
2033 if (unlikely(!len)) 2029 if (unlikely(!len))
2034 return 0; 2030 return 0;
2035 2031
2036 error = -EBADF; 2032 error = -EBADF;
2037 in = fget_light(fdin, &fput_in); 2033 in = fdget(fdin);
2038 if (in) { 2034 if (in.file) {
2039 if (in->f_mode & FMODE_READ) { 2035 if (in.file->f_mode & FMODE_READ) {
2040 int fput_out; 2036 struct fd out = fdget(fdout);
2041 struct file *out = fget_light(fdout, &fput_out); 2037 if (out.file) {
2042 2038 if (out.file->f_mode & FMODE_WRITE)
2043 if (out) { 2039 error = do_tee(in.file, out.file,
2044 if (out->f_mode & FMODE_WRITE) 2040 len, flags);
2045 error = do_tee(in, out, len, flags); 2041 fdput(out);
2046 fput_light(out, fput_out);
2047 } 2042 }
2048 } 2043 }
2049 fput_light(in, fput_in); 2044 fdput(in);
2050 } 2045 }
2051 2046
2052 return error; 2047 return error;
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 29cd014ed3a..260e3928d4f 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -425,6 +425,11 @@ static int __init init_inodecache(void)
425 425
426static void destroy_inodecache(void) 426static void destroy_inodecache(void)
427{ 427{
428 /*
429 * Make sure all delayed rcu free inodes are flushed before we
430 * destroy cache.
431 */
432 rcu_barrier();
428 kmem_cache_destroy(squashfs_inode_cachep); 433 kmem_cache_destroy(squashfs_inode_cachep);
429} 434}
430 435
diff --git a/fs/stat.c b/fs/stat.c
index 208039eec6c..eae494630a3 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -57,13 +57,13 @@ EXPORT_SYMBOL(vfs_getattr);
57 57
58int vfs_fstat(unsigned int fd, struct kstat *stat) 58int vfs_fstat(unsigned int fd, struct kstat *stat)
59{ 59{
60 int fput_needed; 60 struct fd f = fdget_raw(fd);
61 struct file *f = fget_raw_light(fd, &fput_needed);
62 int error = -EBADF; 61 int error = -EBADF;
63 62
64 if (f) { 63 if (f.file) {
65 error = vfs_getattr(f->f_path.mnt, f->f_path.dentry, stat); 64 error = vfs_getattr(f.file->f_path.mnt, f.file->f_path.dentry,
66 fput_light(f, fput_needed); 65 stat);
66 fdput(f);
67 } 67 }
68 return error; 68 return error;
69} 69}
diff --git a/fs/statfs.c b/fs/statfs.c
index 95ad5c0e586..f8e832e6f0a 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -87,12 +87,11 @@ int user_statfs(const char __user *pathname, struct kstatfs *st)
87 87
88int fd_statfs(int fd, struct kstatfs *st) 88int fd_statfs(int fd, struct kstatfs *st)
89{ 89{
90 int fput_needed; 90 struct fd f = fdget(fd);
91 struct file *file = fget_light(fd, &fput_needed);
92 int error = -EBADF; 91 int error = -EBADF;
93 if (file) { 92 if (f.file) {
94 error = vfs_statfs(&file->f_path, st); 93 error = vfs_statfs(&f.file->f_path, st);
95 fput_light(file, fput_needed); 94 fdput(f);
96 } 95 }
97 return error; 96 return error;
98} 97}
diff --git a/fs/super.c b/fs/super.c
index 0902cfa6a12..5fdf7ff32c4 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -307,12 +307,6 @@ void deactivate_locked_super(struct super_block *s)
307 307
308 /* caches are now gone, we can safely kill the shrinker now */ 308 /* caches are now gone, we can safely kill the shrinker now */
309 unregister_shrinker(&s->s_shrink); 309 unregister_shrinker(&s->s_shrink);
310
311 /*
312 * We need to call rcu_barrier so all the delayed rcu free
313 * inodes are flushed before we release the fs module.
314 */
315 rcu_barrier();
316 put_filesystem(fs); 310 put_filesystem(fs);
317 put_super(s); 311 put_super(s);
318 } else { 312 } else {
diff --git a/fs/sync.c b/fs/sync.c
index eb8722dc556..14eefeb4463 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -148,21 +148,19 @@ void emergency_sync(void)
148 */ 148 */
149SYSCALL_DEFINE1(syncfs, int, fd) 149SYSCALL_DEFINE1(syncfs, int, fd)
150{ 150{
151 struct file *file; 151 struct fd f = fdget(fd);
152 struct super_block *sb; 152 struct super_block *sb;
153 int ret; 153 int ret;
154 int fput_needed;
155 154
156 file = fget_light(fd, &fput_needed); 155 if (!f.file)
157 if (!file)
158 return -EBADF; 156 return -EBADF;
159 sb = file->f_dentry->d_sb; 157 sb = f.file->f_dentry->d_sb;
160 158
161 down_read(&sb->s_umount); 159 down_read(&sb->s_umount);
162 ret = sync_filesystem(sb); 160 ret = sync_filesystem(sb);
163 up_read(&sb->s_umount); 161 up_read(&sb->s_umount);
164 162
165 fput_light(file, fput_needed); 163 fdput(f);
166 return ret; 164 return ret;
167} 165}
168 166
@@ -201,14 +199,12 @@ EXPORT_SYMBOL(vfs_fsync);
201 199
202static int do_fsync(unsigned int fd, int datasync) 200static int do_fsync(unsigned int fd, int datasync)
203{ 201{
204 struct file *file; 202 struct fd f = fdget(fd);
205 int ret = -EBADF; 203 int ret = -EBADF;
206 int fput_needed;
207 204
208 file = fget_light(fd, &fput_needed); 205 if (f.file) {
209 if (file) { 206 ret = vfs_fsync(f.file, datasync);
210 ret = vfs_fsync(file, datasync); 207 fdput(f);
211 fput_light(file, fput_needed);
212 } 208 }
213 return ret; 209 return ret;
214} 210}
@@ -291,10 +287,9 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
291 unsigned int flags) 287 unsigned int flags)
292{ 288{
293 int ret; 289 int ret;
294 struct file *file; 290 struct fd f;
295 struct address_space *mapping; 291 struct address_space *mapping;
296 loff_t endbyte; /* inclusive */ 292 loff_t endbyte; /* inclusive */
297 int fput_needed;
298 umode_t i_mode; 293 umode_t i_mode;
299 294
300 ret = -EINVAL; 295 ret = -EINVAL;
@@ -333,17 +328,17 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
333 endbyte--; /* inclusive */ 328 endbyte--; /* inclusive */
334 329
335 ret = -EBADF; 330 ret = -EBADF;
336 file = fget_light(fd, &fput_needed); 331 f = fdget(fd);
337 if (!file) 332 if (!f.file)
338 goto out; 333 goto out;
339 334
340 i_mode = file->f_path.dentry->d_inode->i_mode; 335 i_mode = f.file->f_path.dentry->d_inode->i_mode;
341 ret = -ESPIPE; 336 ret = -ESPIPE;
342 if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) && 337 if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) &&
343 !S_ISLNK(i_mode)) 338 !S_ISLNK(i_mode))
344 goto out_put; 339 goto out_put;
345 340
346 mapping = file->f_mapping; 341 mapping = f.file->f_mapping;
347 if (!mapping) { 342 if (!mapping) {
348 ret = -EINVAL; 343 ret = -EINVAL;
349 goto out_put; 344 goto out_put;
@@ -366,7 +361,7 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
366 ret = filemap_fdatawait_range(mapping, offset, endbyte); 361 ret = filemap_fdatawait_range(mapping, offset, endbyte);
367 362
368out_put: 363out_put:
369 fput_light(file, fput_needed); 364 fdput(f);
370out: 365out:
371 return ret; 366 return ret;
372} 367}
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index b23ab736685..d33e506c1ea 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -360,5 +360,10 @@ int __init sysv_init_icache(void)
360 360
361void sysv_destroy_icache(void) 361void sysv_destroy_icache(void)
362{ 362{
363 /*
364 * Make sure all delayed rcu free inodes are flushed before we
365 * destroy cache.
366 */
367 rcu_barrier();
363 kmem_cache_destroy(sysv_inode_cachep); 368 kmem_cache_destroy(sysv_inode_cachep);
364} 369}
diff --git a/fs/timerfd.c b/fs/timerfd.c
index dffeb3795af..d03822bbf19 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -234,19 +234,17 @@ static const struct file_operations timerfd_fops = {
234 .llseek = noop_llseek, 234 .llseek = noop_llseek,
235}; 235};
236 236
237static struct file *timerfd_fget(int fd) 237static int timerfd_fget(int fd, struct fd *p)
238{ 238{
239 struct file *file; 239 struct fd f = fdget(fd);
240 240 if (!f.file)
241 file = fget(fd); 241 return -EBADF;
242 if (!file) 242 if (f.file->f_op != &timerfd_fops) {
243 return ERR_PTR(-EBADF); 243 fdput(f);
244 if (file->f_op != &timerfd_fops) { 244 return -EINVAL;
245 fput(file);
246 return ERR_PTR(-EINVAL);
247 } 245 }
248 246 *p = f;
249 return file; 247 return 0;
250} 248}
251 249
252SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) 250SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
@@ -284,7 +282,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
284 const struct itimerspec __user *, utmr, 282 const struct itimerspec __user *, utmr,
285 struct itimerspec __user *, otmr) 283 struct itimerspec __user *, otmr)
286{ 284{
287 struct file *file; 285 struct fd f;
288 struct timerfd_ctx *ctx; 286 struct timerfd_ctx *ctx;
289 struct itimerspec ktmr, kotmr; 287 struct itimerspec ktmr, kotmr;
290 int ret; 288 int ret;
@@ -297,10 +295,10 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
297 !timespec_valid(&ktmr.it_interval)) 295 !timespec_valid(&ktmr.it_interval))
298 return -EINVAL; 296 return -EINVAL;
299 297
300 file = timerfd_fget(ufd); 298 ret = timerfd_fget(ufd, &f);
301 if (IS_ERR(file)) 299 if (ret)
302 return PTR_ERR(file); 300 return ret;
303 ctx = file->private_data; 301 ctx = f.file->private_data;
304 302
305 timerfd_setup_cancel(ctx, flags); 303 timerfd_setup_cancel(ctx, flags);
306 304
@@ -334,7 +332,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
334 ret = timerfd_setup(ctx, flags, &ktmr); 332 ret = timerfd_setup(ctx, flags, &ktmr);
335 333
336 spin_unlock_irq(&ctx->wqh.lock); 334 spin_unlock_irq(&ctx->wqh.lock);
337 fput(file); 335 fdput(f);
338 if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr))) 336 if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr)))
339 return -EFAULT; 337 return -EFAULT;
340 338
@@ -343,14 +341,13 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
343 341
344SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) 342SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
345{ 343{
346 struct file *file; 344 struct fd f;
347 struct timerfd_ctx *ctx; 345 struct timerfd_ctx *ctx;
348 struct itimerspec kotmr; 346 struct itimerspec kotmr;
349 347 int ret = timerfd_fget(ufd, &f);
350 file = timerfd_fget(ufd); 348 if (ret)
351 if (IS_ERR(file)) 349 return ret;
352 return PTR_ERR(file); 350 ctx = f.file->private_data;
353 ctx = file->private_data;
354 351
355 spin_lock_irq(&ctx->wqh.lock); 352 spin_lock_irq(&ctx->wqh.lock);
356 if (ctx->expired && ctx->tintv.tv64) { 353 if (ctx->expired && ctx->tintv.tv64) {
@@ -362,7 +359,7 @@ SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
362 kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); 359 kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
363 kotmr.it_interval = ktime_to_timespec(ctx->tintv); 360 kotmr.it_interval = ktime_to_timespec(ctx->tintv);
364 spin_unlock_irq(&ctx->wqh.lock); 361 spin_unlock_irq(&ctx->wqh.lock);
365 fput(file); 362 fdput(f);
366 363
367 return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0; 364 return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0;
368} 365}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 681f3a94244..49825427a0e 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -2298,6 +2298,12 @@ static void __exit ubifs_exit(void)
2298 dbg_debugfs_exit(); 2298 dbg_debugfs_exit();
2299 ubifs_compressors_exit(); 2299 ubifs_compressors_exit();
2300 unregister_shrinker(&ubifs_shrinker_info); 2300 unregister_shrinker(&ubifs_shrinker_info);
2301
2302 /*
2303 * Make sure all delayed rcu free inodes are flushed before we
2304 * destroy cache.
2305 */
2306 rcu_barrier();
2301 kmem_cache_destroy(ubifs_inode_slab); 2307 kmem_cache_destroy(ubifs_inode_slab);
2302 unregister_filesystem(&ubifs_fs_type); 2308 unregister_filesystem(&ubifs_fs_type);
2303} 2309}
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 862741dddf2..d44fb568abe 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -171,6 +171,11 @@ static int init_inodecache(void)
171 171
172static void destroy_inodecache(void) 172static void destroy_inodecache(void)
173{ 173{
174 /*
175 * Make sure all delayed rcu free inodes are flushed before we
176 * destroy cache.
177 */
178 rcu_barrier();
174 kmem_cache_destroy(udf_inode_cachep); 179 kmem_cache_destroy(udf_inode_cachep);
175} 180}
176 181
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 444927e5706..f7cfecfe1ca 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1466,6 +1466,11 @@ static int init_inodecache(void)
1466 1466
1467static void destroy_inodecache(void) 1467static void destroy_inodecache(void)
1468{ 1468{
1469 /*
1470 * Make sure all delayed rcu free inodes are flushed before we
1471 * destroy cache.
1472 */
1473 rcu_barrier();
1469 kmem_cache_destroy(ufs_inode_cachep); 1474 kmem_cache_destroy(ufs_inode_cachep);
1470} 1475}
1471 1476
diff --git a/fs/utimes.c b/fs/utimes.c
index fa4dbe451e2..bb0696a4173 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -140,19 +140,18 @@ long do_utimes(int dfd, const char __user *filename, struct timespec *times,
140 goto out; 140 goto out;
141 141
142 if (filename == NULL && dfd != AT_FDCWD) { 142 if (filename == NULL && dfd != AT_FDCWD) {
143 int fput_needed; 143 struct fd f;
144 struct file *file;
145 144
146 if (flags & AT_SYMLINK_NOFOLLOW) 145 if (flags & AT_SYMLINK_NOFOLLOW)
147 goto out; 146 goto out;
148 147
149 file = fget_light(dfd, &fput_needed); 148 f = fdget(dfd);
150 error = -EBADF; 149 error = -EBADF;
151 if (!file) 150 if (!f.file)
152 goto out; 151 goto out;
153 152
154 error = utimes_common(&file->f_path, times); 153 error = utimes_common(&f.file->f_path, times);
155 fput_light(file, fput_needed); 154 fdput(f);
156 } else { 155 } else {
157 struct path path; 156 struct path path;
158 int lookup_flags = 0; 157 int lookup_flags = 0;
diff --git a/fs/xattr.c b/fs/xattr.c
index f7f7f09b0b4..ca15fbd391c 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -403,22 +403,20 @@ SYSCALL_DEFINE5(lsetxattr, const char __user *, pathname,
403SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, 403SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
404 const void __user *,value, size_t, size, int, flags) 404 const void __user *,value, size_t, size, int, flags)
405{ 405{
406 int fput_needed; 406 struct fd f = fdget(fd);
407 struct file *f;
408 struct dentry *dentry; 407 struct dentry *dentry;
409 int error = -EBADF; 408 int error = -EBADF;
410 409
411 f = fget_light(fd, &fput_needed); 410 if (!f.file)
412 if (!f)
413 return error; 411 return error;
414 dentry = f->f_path.dentry; 412 dentry = f.file->f_path.dentry;
415 audit_inode(NULL, dentry); 413 audit_inode(NULL, dentry);
416 error = mnt_want_write_file(f); 414 error = mnt_want_write_file(f.file);
417 if (!error) { 415 if (!error) {
418 error = setxattr(dentry, name, value, size, flags); 416 error = setxattr(dentry, name, value, size, flags);
419 mnt_drop_write_file(f); 417 mnt_drop_write_file(f.file);
420 } 418 }
421 fput_light(f, fput_needed); 419 fdput(f);
422 return error; 420 return error;
423} 421}
424 422
@@ -502,16 +500,14 @@ SYSCALL_DEFINE4(lgetxattr, const char __user *, pathname,
502SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name, 500SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name,
503 void __user *, value, size_t, size) 501 void __user *, value, size_t, size)
504{ 502{
505 int fput_needed; 503 struct fd f = fdget(fd);
506 struct file *f;
507 ssize_t error = -EBADF; 504 ssize_t error = -EBADF;
508 505
509 f = fget_light(fd, &fput_needed); 506 if (!f.file)
510 if (!f)
511 return error; 507 return error;
512 audit_inode(NULL, f->f_path.dentry); 508 audit_inode(NULL, f.file->f_path.dentry);
513 error = getxattr(f->f_path.dentry, name, value, size); 509 error = getxattr(f.file->f_path.dentry, name, value, size);
514 fput_light(f, fput_needed); 510 fdput(f);
515 return error; 511 return error;
516} 512}
517 513
@@ -583,16 +579,14 @@ SYSCALL_DEFINE3(llistxattr, const char __user *, pathname, char __user *, list,
583 579
584SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size) 580SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size)
585{ 581{
586 int fput_needed; 582 struct fd f = fdget(fd);
587 struct file *f;
588 ssize_t error = -EBADF; 583 ssize_t error = -EBADF;
589 584
590 f = fget_light(fd, &fput_needed); 585 if (!f.file)
591 if (!f)
592 return error; 586 return error;
593 audit_inode(NULL, f->f_path.dentry); 587 audit_inode(NULL, f.file->f_path.dentry);
594 error = listxattr(f->f_path.dentry, list, size); 588 error = listxattr(f.file->f_path.dentry, list, size);
595 fput_light(f, fput_needed); 589 fdput(f);
596 return error; 590 return error;
597} 591}
598 592
@@ -652,22 +646,20 @@ SYSCALL_DEFINE2(lremovexattr, const char __user *, pathname,
652 646
653SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name) 647SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name)
654{ 648{
655 int fput_needed; 649 struct fd f = fdget(fd);
656 struct file *f;
657 struct dentry *dentry; 650 struct dentry *dentry;
658 int error = -EBADF; 651 int error = -EBADF;
659 652
660 f = fget_light(fd, &fput_needed); 653 if (!f.file)
661 if (!f)
662 return error; 654 return error;
663 dentry = f->f_path.dentry; 655 dentry = f.file->f_path.dentry;
664 audit_inode(NULL, dentry); 656 audit_inode(NULL, dentry);
665 error = mnt_want_write_file(f); 657 error = mnt_want_write_file(f.file);
666 if (!error) { 658 if (!error) {
667 error = removexattr(dentry, name); 659 error = removexattr(dentry, name);
668 mnt_drop_write_file(f); 660 mnt_drop_write_file(f.file);
669 } 661 }
670 fput_light(f, fput_needed); 662 fdput(f);
671 return error; 663 return error;
672} 664}
673 665
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index e00de08dc8a..b9b8646e62d 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -48,44 +48,44 @@ xfs_swapext(
48 xfs_swapext_t *sxp) 48 xfs_swapext_t *sxp)
49{ 49{
50 xfs_inode_t *ip, *tip; 50 xfs_inode_t *ip, *tip;
51 struct file *file, *tmp_file; 51 struct fd f, tmp;
52 int error = 0; 52 int error = 0;
53 53
54 /* Pull information for the target fd */ 54 /* Pull information for the target fd */
55 file = fget((int)sxp->sx_fdtarget); 55 f = fdget((int)sxp->sx_fdtarget);
56 if (!file) { 56 if (!f.file) {
57 error = XFS_ERROR(EINVAL); 57 error = XFS_ERROR(EINVAL);
58 goto out; 58 goto out;
59 } 59 }
60 60
61 if (!(file->f_mode & FMODE_WRITE) || 61 if (!(f.file->f_mode & FMODE_WRITE) ||
62 !(file->f_mode & FMODE_READ) || 62 !(f.file->f_mode & FMODE_READ) ||
63 (file->f_flags & O_APPEND)) { 63 (f.file->f_flags & O_APPEND)) {
64 error = XFS_ERROR(EBADF); 64 error = XFS_ERROR(EBADF);
65 goto out_put_file; 65 goto out_put_file;
66 } 66 }
67 67
68 tmp_file = fget((int)sxp->sx_fdtmp); 68 tmp = fdget((int)sxp->sx_fdtmp);
69 if (!tmp_file) { 69 if (!tmp.file) {
70 error = XFS_ERROR(EINVAL); 70 error = XFS_ERROR(EINVAL);
71 goto out_put_file; 71 goto out_put_file;
72 } 72 }
73 73
74 if (!(tmp_file->f_mode & FMODE_WRITE) || 74 if (!(tmp.file->f_mode & FMODE_WRITE) ||
75 !(tmp_file->f_mode & FMODE_READ) || 75 !(tmp.file->f_mode & FMODE_READ) ||
76 (tmp_file->f_flags & O_APPEND)) { 76 (tmp.file->f_flags & O_APPEND)) {
77 error = XFS_ERROR(EBADF); 77 error = XFS_ERROR(EBADF);
78 goto out_put_tmp_file; 78 goto out_put_tmp_file;
79 } 79 }
80 80
81 if (IS_SWAPFILE(file->f_path.dentry->d_inode) || 81 if (IS_SWAPFILE(f.file->f_path.dentry->d_inode) ||
82 IS_SWAPFILE(tmp_file->f_path.dentry->d_inode)) { 82 IS_SWAPFILE(tmp.file->f_path.dentry->d_inode)) {
83 error = XFS_ERROR(EINVAL); 83 error = XFS_ERROR(EINVAL);
84 goto out_put_tmp_file; 84 goto out_put_tmp_file;
85 } 85 }
86 86
87 ip = XFS_I(file->f_path.dentry->d_inode); 87 ip = XFS_I(f.file->f_path.dentry->d_inode);
88 tip = XFS_I(tmp_file->f_path.dentry->d_inode); 88 tip = XFS_I(tmp.file->f_path.dentry->d_inode);
89 89
90 if (ip->i_mount != tip->i_mount) { 90 if (ip->i_mount != tip->i_mount) {
91 error = XFS_ERROR(EINVAL); 91 error = XFS_ERROR(EINVAL);
@@ -105,9 +105,9 @@ xfs_swapext(
105 error = xfs_swap_extents(ip, tip, sxp); 105 error = xfs_swap_extents(ip, tip, sxp);
106 106
107 out_put_tmp_file: 107 out_put_tmp_file:
108 fput(tmp_file); 108 fdput(tmp);
109 out_put_file: 109 out_put_file:
110 fput(file); 110 fdput(f);
111 out: 111 out:
112 return error; 112 return error;
113} 113}
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 0e0232c3b6d..8305f2ac677 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -70,16 +70,16 @@ xfs_find_handle(
70 int hsize; 70 int hsize;
71 xfs_handle_t handle; 71 xfs_handle_t handle;
72 struct inode *inode; 72 struct inode *inode;
73 struct file *file = NULL; 73 struct fd f;
74 struct path path; 74 struct path path;
75 int error; 75 int error;
76 struct xfs_inode *ip; 76 struct xfs_inode *ip;
77 77
78 if (cmd == XFS_IOC_FD_TO_HANDLE) { 78 if (cmd == XFS_IOC_FD_TO_HANDLE) {
79 file = fget(hreq->fd); 79 f = fdget(hreq->fd);
80 if (!file) 80 if (!f.file)
81 return -EBADF; 81 return -EBADF;
82 inode = file->f_path.dentry->d_inode; 82 inode = f.file->f_path.dentry->d_inode;
83 } else { 83 } else {
84 error = user_lpath((const char __user *)hreq->path, &path); 84 error = user_lpath((const char __user *)hreq->path, &path);
85 if (error) 85 if (error)
@@ -134,7 +134,7 @@ xfs_find_handle(
134 134
135 out_put: 135 out_put:
136 if (cmd == XFS_IOC_FD_TO_HANDLE) 136 if (cmd == XFS_IOC_FD_TO_HANDLE)
137 fput(file); 137 fdput(f);
138 else 138 else
139 path_put(&path); 139 path_put(&path);
140 return error; 140 return error;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 001537f92ca..e0fd2734189 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1506,6 +1506,11 @@ xfs_init_zones(void)
1506STATIC void 1506STATIC void
1507xfs_destroy_zones(void) 1507xfs_destroy_zones(void)
1508{ 1508{
1509 /*
1510 * Make sure all delayed rcu free are flushed before we
1511 * destroy caches.
1512 */
1513 rcu_barrier();
1509 kmem_zone_destroy(xfs_ili_zone); 1514 kmem_zone_destroy(xfs_ili_zone);
1510 kmem_zone_destroy(xfs_inode_zone); 1515 kmem_zone_destroy(xfs_inode_zone);
1511 kmem_zone_destroy(xfs_efi_zone); 1516 kmem_zone_destroy(xfs_efi_zone);