aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/alpha/kernel/osf_sys.c13
-rw-r--r--arch/ia64/kernel/perfmon.c18
-rw-r--r--arch/parisc/hpux/fs.c17
-rw-r--r--arch/powerpc/include/asm/systbl.h4
-rw-r--r--arch/powerpc/include/asm/unistd.h1
-rw-r--r--arch/powerpc/kernel/sys_ppc32.c45
-rw-r--r--arch/powerpc/platforms/cell/spu_syscalls.c21
-rw-r--r--arch/powerpc/platforms/cell/spufs/coredump.c40
-rw-r--r--arch/s390/hypfs/inode.c2
-rw-r--r--arch/sparc/include/asm/unistd.h1
-rw-r--r--arch/sparc/kernel/sys32.S2
-rw-r--r--arch/sparc/kernel/sys_sparc32.c46
-rw-r--r--arch/um/drivers/mconsole_kern.c99
-rw-r--r--drivers/base/dma-buf.c3
-rw-r--r--drivers/infiniband/core/ucma.c10
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c21
-rw-r--r--drivers/infiniband/core/uverbs_main.c11
-rw-r--r--drivers/staging/android/binder.c111
-rw-r--r--drivers/staging/omapdrm/omap_gem.c3
-rw-r--r--drivers/tty/tty_io.c45
-rw-r--r--drivers/usb/gadget/f_fs.c4
-rw-r--r--drivers/vfio/vfio.c15
-rw-r--r--drivers/vhost/vhost.c8
-rw-r--r--drivers/video/msm/mdp.c12
-rw-r--r--fs/9p/v9fs.c5
-rw-r--r--fs/Makefile2
-rw-r--r--fs/adfs/super.c5
-rw-r--r--fs/affs/super.c5
-rw-r--r--fs/afs/super.c5
-rw-r--r--fs/autofs4/dev-ioctl.c18
-rw-r--r--fs/autofs4/waitq.c3
-rw-r--r--fs/befs/linuxvfs.c5
-rw-r--r--fs/bfs/inode.c5
-rw-r--r--fs/binfmt_elf.c19
-rw-r--r--fs/btrfs/extent_io.c6
-rw-r--r--fs/btrfs/inode.c5
-rw-r--r--fs/btrfs/ioctl.c32
-rw-r--r--fs/btrfs/reada.c18
-rw-r--r--fs/ceph/inode.c4
-rw-r--r--fs/ceph/super.c5
-rw-r--r--fs/cifs/cifsfs.c5
-rw-r--r--fs/coda/inode.c37
-rw-r--r--fs/compat.c112
-rw-r--r--fs/compat_ioctl.c27
-rw-r--r--fs/coredump.c686
-rw-r--r--fs/dcache.c2
-rw-r--r--fs/ecryptfs/main.c6
-rw-r--r--fs/efs/super.c5
-rw-r--r--fs/eventpoll.c23
-rw-r--r--fs/exec.c688
-rw-r--r--fs/exofs/super.c5
-rw-r--r--fs/ext2/super.c5
-rw-r--r--fs/ext3/super.c5
-rw-r--r--fs/ext4/ioctl.c15
-rw-r--r--fs/ext4/super.c5
-rw-r--r--fs/fat/inode.c5
-rw-r--r--fs/fcntl.c166
-rw-r--r--fs/fhandle.c17
-rw-r--r--fs/file.c573
-rw-r--r--fs/file_table.c106
-rw-r--r--fs/freevxfs/vxfs_super.c5
-rw-r--r--fs/fuse/dev.c3
-rw-r--r--fs/fuse/inode.c6
-rw-r--r--fs/hfs/super.c6
-rw-r--r--fs/hfsplus/super.c6
-rw-r--r--fs/hpfs/super.c5
-rw-r--r--fs/hugetlbfs/inode.c5
-rw-r--r--fs/ioctl.c25
-rw-r--r--fs/isofs/inode.c5
-rw-r--r--fs/jffs2/super.c6
-rw-r--r--fs/jfs/super.c6
-rw-r--r--fs/locks.c20
-rw-r--r--fs/logfs/inode.c5
-rw-r--r--fs/minix/inode.c5
-rw-r--r--fs/namei.c41
-rw-r--r--fs/ncpfs/inode.c5
-rw-r--r--fs/nfs/inode.c5
-rw-r--r--fs/nfsd/nfs4state.c3
-rw-r--r--fs/nilfs2/super.c6
-rw-r--r--fs/notify/fanotify/fanotify_user.c87
-rw-r--r--fs/notify/inotify/inotify_user.c28
-rw-r--r--fs/ntfs/super.c6
-rw-r--r--fs/ocfs2/cluster/heartbeat.c38
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c5
-rw-r--r--fs/ocfs2/super.c5
-rw-r--r--fs/open.c130
-rw-r--r--fs/openpromfs/inode.c5
-rw-r--r--fs/pipe.c31
-rw-r--r--fs/proc/Makefile2
-rw-r--r--fs/proc/base.c417
-rw-r--r--fs/proc/fd.c367
-rw-r--r--fs/proc/fd.h14
-rw-r--r--fs/proc/internal.h48
-rw-r--r--fs/qnx4/inode.c5
-rw-r--r--fs/qnx6/inode.c5
-rw-r--r--fs/read_write.c180
-rw-r--r--fs/read_write.h2
-rw-r--r--fs/readdir.c36
-rw-r--r--fs/reiserfs/super.c5
-rw-r--r--fs/romfs/super.c5
-rw-r--r--fs/select.c31
-rw-r--r--fs/signalfd.c13
-rw-r--r--fs/splice.c69
-rw-r--r--fs/squashfs/super.c5
-rw-r--r--fs/stat.c10
-rw-r--r--fs/statfs.c9
-rw-r--r--fs/super.c6
-rw-r--r--fs/sync.c33
-rw-r--r--fs/sysv/inode.c5
-rw-r--r--fs/timerfd.c45
-rw-r--r--fs/ubifs/super.c6
-rw-r--r--fs/udf/super.c5
-rw-r--r--fs/ufs/super.c5
-rw-r--r--fs/utimes.c11
-rw-r--r--fs/xattr.c52
-rw-r--r--fs/xfs/xfs_dfrag.c34
-rw-r--r--fs/xfs/xfs_ioctl.c10
-rw-r--r--fs/xfs/xfs_super.c5
-rw-r--r--include/linux/compat.h3
-rw-r--r--include/linux/fdtable.h39
-rw-r--r--include/linux/file.h35
-rw-r--r--include/linux/fs.h10
-rw-r--r--include/linux/net.h3
-rw-r--r--include/linux/sched.h1
-rw-r--r--ipc/mqueue.c78
-rw-r--r--kernel/events/core.c72
-rw-r--r--kernel/exit.c97
-rw-r--r--kernel/sys.c14
-rw-r--r--kernel/taskstats.c11
-rw-r--r--mm/fadvise.c34
-rw-r--r--mm/fremap.c3
-rw-r--r--mm/mmap.c3
-rw-r--r--mm/nommu.c6
-rw-r--r--mm/readahead.c14
-rw-r--r--net/9p/trans_fd.c16
-rw-r--r--net/compat.c3
-rw-r--r--net/core/netprio_cgroup.c38
-rw-r--r--net/core/scm.c3
-rw-r--r--net/sctp/socket.c25
-rw-r--r--net/socket.c68
-rw-r--r--security/selinux/hooks.c73
-rw-r--r--sound/core/pcm_native.c13
142 files changed, 2915 insertions, 2877 deletions
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 9503a4be40f6..63e77e3944ce 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -145,27 +145,24 @@ SYSCALL_DEFINE4(osf_getdirentries, unsigned int, fd,
145 long __user *, basep) 145 long __user *, basep)
146{ 146{
147 int error; 147 int error;
148 struct file *file; 148 struct fd arg = fdget(fd);
149 struct osf_dirent_callback buf; 149 struct osf_dirent_callback buf;
150 150
151 error = -EBADF; 151 if (!arg.file)
152 file = fget(fd); 152 return -EBADF;
153 if (!file)
154 goto out;
155 153
156 buf.dirent = dirent; 154 buf.dirent = dirent;
157 buf.basep = basep; 155 buf.basep = basep;
158 buf.count = count; 156 buf.count = count;
159 buf.error = 0; 157 buf.error = 0;
160 158
161 error = vfs_readdir(file, osf_filldir, &buf); 159 error = vfs_readdir(arg.file, osf_filldir, &buf);
162 if (error >= 0) 160 if (error >= 0)
163 error = buf.error; 161 error = buf.error;
164 if (count != buf.count) 162 if (count != buf.count)
165 error = count - buf.count; 163 error = count - buf.count;
166 164
167 fput(file); 165 fdput(arg);
168 out:
169 return error; 166 return error;
170} 167}
171 168
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 5a5c22245dee..f388b4e18a37 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2306,7 +2306,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t
2306 * partially initialize the vma for the sampling buffer 2306 * partially initialize the vma for the sampling buffer
2307 */ 2307 */
2308 vma->vm_mm = mm; 2308 vma->vm_mm = mm;
2309 vma->vm_file = filp; 2309 vma->vm_file = get_file(filp);
2310 vma->vm_flags = VM_READ| VM_MAYREAD |VM_RESERVED; 2310 vma->vm_flags = VM_READ| VM_MAYREAD |VM_RESERVED;
2311 vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */ 2311 vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */
2312 2312
@@ -2345,8 +2345,6 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t
2345 goto error; 2345 goto error;
2346 } 2346 }
2347 2347
2348 get_file(filp);
2349
2350 /* 2348 /*
2351 * now insert the vma in the vm list for the process, must be 2349 * now insert the vma in the vm list for the process, must be
2352 * done with mmap lock held 2350 * done with mmap lock held
@@ -4782,7 +4780,7 @@ recheck:
4782asmlinkage long 4780asmlinkage long
4783sys_perfmonctl (int fd, int cmd, void __user *arg, int count) 4781sys_perfmonctl (int fd, int cmd, void __user *arg, int count)
4784{ 4782{
4785 struct file *file = NULL; 4783 struct fd f = {NULL, 0};
4786 pfm_context_t *ctx = NULL; 4784 pfm_context_t *ctx = NULL;
4787 unsigned long flags = 0UL; 4785 unsigned long flags = 0UL;
4788 void *args_k = NULL; 4786 void *args_k = NULL;
@@ -4879,17 +4877,17 @@ restart_args:
4879 4877
4880 ret = -EBADF; 4878 ret = -EBADF;
4881 4879
4882 file = fget(fd); 4880 f = fdget(fd);
4883 if (unlikely(file == NULL)) { 4881 if (unlikely(f.file == NULL)) {
4884 DPRINT(("invalid fd %d\n", fd)); 4882 DPRINT(("invalid fd %d\n", fd));
4885 goto error_args; 4883 goto error_args;
4886 } 4884 }
4887 if (unlikely(PFM_IS_FILE(file) == 0)) { 4885 if (unlikely(PFM_IS_FILE(f.file) == 0)) {
4888 DPRINT(("fd %d not related to perfmon\n", fd)); 4886 DPRINT(("fd %d not related to perfmon\n", fd));
4889 goto error_args; 4887 goto error_args;
4890 } 4888 }
4891 4889
4892 ctx = file->private_data; 4890 ctx = f.file->private_data;
4893 if (unlikely(ctx == NULL)) { 4891 if (unlikely(ctx == NULL)) {
4894 DPRINT(("no context for fd %d\n", fd)); 4892 DPRINT(("no context for fd %d\n", fd));
4895 goto error_args; 4893 goto error_args;
@@ -4919,8 +4917,8 @@ abort_locked:
4919 if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT; 4917 if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT;
4920 4918
4921error_args: 4919error_args:
4922 if (file) 4920 if (f.file)
4923 fput(file); 4921 fdput(f);
4924 4922
4925 kfree(args_k); 4923 kfree(args_k);
4926 4924
diff --git a/arch/parisc/hpux/fs.c b/arch/parisc/hpux/fs.c
index c71eb6c79897..6785de7bd2a0 100644
--- a/arch/parisc/hpux/fs.c
+++ b/arch/parisc/hpux/fs.c
@@ -109,33 +109,32 @@ Efault:
109 109
110int hpux_getdents(unsigned int fd, struct hpux_dirent __user *dirent, unsigned int count) 110int hpux_getdents(unsigned int fd, struct hpux_dirent __user *dirent, unsigned int count)
111{ 111{
112 struct file * file; 112 struct fd arg;
113 struct hpux_dirent __user * lastdirent; 113 struct hpux_dirent __user * lastdirent;
114 struct getdents_callback buf; 114 struct getdents_callback buf;
115 int error = -EBADF; 115 int error;
116 116
117 file = fget(fd); 117 arg = fdget(fd);
118 if (!file) 118 if (!arg.file)
119 goto out; 119 return -EBADF;
120 120
121 buf.current_dir = dirent; 121 buf.current_dir = dirent;
122 buf.previous = NULL; 122 buf.previous = NULL;
123 buf.count = count; 123 buf.count = count;
124 buf.error = 0; 124 buf.error = 0;
125 125
126 error = vfs_readdir(file, filldir, &buf); 126 error = vfs_readdir(arg.file, filldir, &buf);
127 if (error >= 0) 127 if (error >= 0)
128 error = buf.error; 128 error = buf.error;
129 lastdirent = buf.previous; 129 lastdirent = buf.previous;
130 if (lastdirent) { 130 if (lastdirent) {
131 if (put_user(file->f_pos, &lastdirent->d_off)) 131 if (put_user(arg.file->f_pos, &lastdirent->d_off))
132 error = -EFAULT; 132 error = -EFAULT;
133 else 133 else
134 error = count - buf.count; 134 error = count - buf.count;
135 } 135 }
136 136
137 fput(file); 137 fdput(arg);
138out:
139 return error; 138 return error;
140} 139}
141 140
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 559ae1ee6706..840838769853 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -189,7 +189,7 @@ SYSCALL_SPU(getcwd)
189SYSCALL_SPU(capget) 189SYSCALL_SPU(capget)
190SYSCALL_SPU(capset) 190SYSCALL_SPU(capset)
191COMPAT_SYS(sigaltstack) 191COMPAT_SYS(sigaltstack)
192SYSX_SPU(sys_sendfile64,compat_sys_sendfile,sys_sendfile) 192SYSX_SPU(sys_sendfile,compat_sys_sendfile_wrapper,sys_sendfile)
193SYSCALL(ni_syscall) 193SYSCALL(ni_syscall)
194SYSCALL(ni_syscall) 194SYSCALL(ni_syscall)
195PPC_SYS(vfork) 195PPC_SYS(vfork)
@@ -229,7 +229,7 @@ COMPAT_SYS_SPU(sched_setaffinity)
229COMPAT_SYS_SPU(sched_getaffinity) 229COMPAT_SYS_SPU(sched_getaffinity)
230SYSCALL(ni_syscall) 230SYSCALL(ni_syscall)
231SYSCALL(ni_syscall) 231SYSCALL(ni_syscall)
232SYS32ONLY(sendfile64) 232SYSX(sys_ni_syscall,compat_sys_sendfile64_wrapper,sys_sendfile64)
233COMPAT_SYS_SPU(io_setup) 233COMPAT_SYS_SPU(io_setup)
234SYSCALL_SPU(io_destroy) 234SYSCALL_SPU(io_destroy)
235COMPAT_SYS_SPU(io_getevents) 235COMPAT_SYS_SPU(io_getevents)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index bd377a368611..c683fa350add 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -419,6 +419,7 @@
419#define __ARCH_WANT_COMPAT_SYS_TIME 419#define __ARCH_WANT_COMPAT_SYS_TIME
420#define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND 420#define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
421#define __ARCH_WANT_SYS_NEWFSTATAT 421#define __ARCH_WANT_SYS_NEWFSTATAT
422#define __ARCH_WANT_COMPAT_SYS_SENDFILE
422#endif 423#endif
423 424
424/* 425/*
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
index 81c570633ead..abd1112da54f 100644
--- a/arch/powerpc/kernel/sys_ppc32.c
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -143,48 +143,17 @@ long compat_sys_ipc(u32 call, u32 first, u32 second, u32 third, compat_uptr_t pt
143 * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) 143 * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
144 * and the register representation of a signed int (msr in 64-bit mode) is performed. 144 * and the register representation of a signed int (msr in 64-bit mode) is performed.
145 */ 145 */
146asmlinkage long compat_sys_sendfile(u32 out_fd, u32 in_fd, compat_off_t __user * offset, u32 count) 146asmlinkage long compat_sys_sendfile_wrapper(u32 out_fd, u32 in_fd,
147 compat_off_t __user *offset, u32 count)
147{ 148{
148 mm_segment_t old_fs = get_fs(); 149 return compat_sys_sendfile((int)out_fd, (int)in_fd, offset, count);
149 int ret;
150 off_t of;
151 off_t __user *up;
152
153 if (offset && get_user(of, offset))
154 return -EFAULT;
155
156 /* The __user pointer cast is valid because of the set_fs() */
157 set_fs(KERNEL_DS);
158 up = offset ? (off_t __user *) &of : NULL;
159 ret = sys_sendfile((int)out_fd, (int)in_fd, up, count);
160 set_fs(old_fs);
161
162 if (offset && put_user(of, offset))
163 return -EFAULT;
164
165 return ret;
166} 150}
167 151
168asmlinkage int compat_sys_sendfile64(int out_fd, int in_fd, compat_loff_t __user *offset, s32 count) 152asmlinkage long compat_sys_sendfile64_wrapper(u32 out_fd, u32 in_fd,
153 compat_loff_t __user *offset, u32 count)
169{ 154{
170 mm_segment_t old_fs = get_fs(); 155 return sys_sendfile((int)out_fd, (int)in_fd,
171 int ret; 156 (off_t __user *)offset, count);
172 loff_t lof;
173 loff_t __user *up;
174
175 if (offset && get_user(lof, offset))
176 return -EFAULT;
177
178 /* The __user pointer cast is valid because of the set_fs() */
179 set_fs(KERNEL_DS);
180 up = offset ? (loff_t __user *) &lof : NULL;
181 ret = sys_sendfile64(out_fd, in_fd, up, count);
182 set_fs(old_fs);
183
184 if (offset && put_user(lof, offset))
185 return -EFAULT;
186
187 return ret;
188} 157}
189 158
190long compat_sys_execve(unsigned long a0, unsigned long a1, unsigned long a2, 159long compat_sys_execve(unsigned long a0, unsigned long a1, unsigned long a2,
diff --git a/arch/powerpc/platforms/cell/spu_syscalls.c b/arch/powerpc/platforms/cell/spu_syscalls.c
index 714bbfc3162c..db4e638cf408 100644
--- a/arch/powerpc/platforms/cell/spu_syscalls.c
+++ b/arch/powerpc/platforms/cell/spu_syscalls.c
@@ -69,8 +69,6 @@ SYSCALL_DEFINE4(spu_create, const char __user *, name, unsigned int, flags,
69 umode_t, mode, int, neighbor_fd) 69 umode_t, mode, int, neighbor_fd)
70{ 70{
71 long ret; 71 long ret;
72 struct file *neighbor;
73 int fput_needed;
74 struct spufs_calls *calls; 72 struct spufs_calls *calls;
75 73
76 calls = spufs_calls_get(); 74 calls = spufs_calls_get();
@@ -78,11 +76,11 @@ SYSCALL_DEFINE4(spu_create, const char __user *, name, unsigned int, flags,
78 return -ENOSYS; 76 return -ENOSYS;
79 77
80 if (flags & SPU_CREATE_AFFINITY_SPU) { 78 if (flags & SPU_CREATE_AFFINITY_SPU) {
79 struct fd neighbor = fdget(neighbor_fd);
81 ret = -EBADF; 80 ret = -EBADF;
82 neighbor = fget_light(neighbor_fd, &fput_needed); 81 if (neighbor.file) {
83 if (neighbor) { 82 ret = calls->create_thread(name, flags, mode, neighbor.file);
84 ret = calls->create_thread(name, flags, mode, neighbor); 83 fdput(neighbor);
85 fput_light(neighbor, fput_needed);
86 } 84 }
87 } else 85 } else
88 ret = calls->create_thread(name, flags, mode, NULL); 86 ret = calls->create_thread(name, flags, mode, NULL);
@@ -94,8 +92,7 @@ SYSCALL_DEFINE4(spu_create, const char __user *, name, unsigned int, flags,
94asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus) 92asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus)
95{ 93{
96 long ret; 94 long ret;
97 struct file *filp; 95 struct fd arg;
98 int fput_needed;
99 struct spufs_calls *calls; 96 struct spufs_calls *calls;
100 97
101 calls = spufs_calls_get(); 98 calls = spufs_calls_get();
@@ -103,10 +100,10 @@ asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus)
103 return -ENOSYS; 100 return -ENOSYS;
104 101
105 ret = -EBADF; 102 ret = -EBADF;
106 filp = fget_light(fd, &fput_needed); 103 arg = fdget(fd);
107 if (filp) { 104 if (arg.file) {
108 ret = calls->spu_run(filp, unpc, ustatus); 105 ret = calls->spu_run(arg.file, unpc, ustatus);
109 fput_light(filp, fput_needed); 106 fdput(arg);
110 } 107 }
111 108
112 spufs_calls_put(calls); 109 spufs_calls_put(calls);
diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c
index c2c5b078ba80..657e3f233a64 100644
--- a/arch/powerpc/platforms/cell/spufs/coredump.c
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -106,6 +106,17 @@ static int spufs_ctx_note_size(struct spu_context *ctx, int dfd)
106 return total; 106 return total;
107} 107}
108 108
109static int match_context(const void *v, struct file *file, unsigned fd)
110{
111 struct spu_context *ctx;
112 if (file->f_op != &spufs_context_fops)
113 return 0;
114 ctx = SPUFS_I(file->f_dentry->d_inode)->i_ctx;
115 if (ctx->flags & SPU_CREATE_NOSCHED)
116 return 0;
117 return fd + 1;
118}
119
109/* 120/*
110 * The additional architecture-specific notes for Cell are various 121 * The additional architecture-specific notes for Cell are various
111 * context files in the spu context. 122 * context files in the spu context.
@@ -115,29 +126,18 @@ static int spufs_ctx_note_size(struct spu_context *ctx, int dfd)
115 * internal functionality to dump them without needing to actually 126 * internal functionality to dump them without needing to actually
116 * open the files. 127 * open the files.
117 */ 128 */
129/*
130 * descriptor table is not shared, so files can't change or go away.
131 */
118static struct spu_context *coredump_next_context(int *fd) 132static struct spu_context *coredump_next_context(int *fd)
119{ 133{
120 struct fdtable *fdt = files_fdtable(current->files);
121 struct file *file; 134 struct file *file;
122 struct spu_context *ctx = NULL; 135 int n = iterate_fd(current->files, *fd, match_context, NULL);
123 136 if (!n)
124 for (; *fd < fdt->max_fds; (*fd)++) { 137 return NULL;
125 if (!fd_is_open(*fd, fdt)) 138 *fd = n - 1;
126 continue; 139 file = fcheck(*fd);
127 140 return SPUFS_I(file->f_dentry->d_inode)->i_ctx;
128 file = fcheck(*fd);
129
130 if (!file || file->f_op != &spufs_context_fops)
131 continue;
132
133 ctx = SPUFS_I(file->f_dentry->d_inode)->i_ctx;
134 if (ctx->flags & SPU_CREATE_NOSCHED)
135 continue;
136
137 break;
138 }
139
140 return ctx;
141} 141}
142 142
143int spufs_coredump_extra_notes_size(void) 143int spufs_coredump_extra_notes_size(void)
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 124ec1a55cc9..06ea69bd387a 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -72,8 +72,6 @@ static void hypfs_remove(struct dentry *dentry)
72 struct dentry *parent; 72 struct dentry *parent;
73 73
74 parent = dentry->d_parent; 74 parent = dentry->d_parent;
75 if (!parent || !parent->d_inode)
76 return;
77 mutex_lock(&parent->d_inode->i_mutex); 75 mutex_lock(&parent->d_inode->i_mutex);
78 if (hypfs_positive(dentry)) { 76 if (hypfs_positive(dentry)) {
79 if (S_ISDIR(dentry->d_inode->i_mode)) 77 if (S_ISDIR(dentry->d_inode->i_mode))
diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h
index fb2693464807..d9a677c51926 100644
--- a/arch/sparc/include/asm/unistd.h
+++ b/arch/sparc/include/asm/unistd.h
@@ -447,6 +447,7 @@
447#else 447#else
448#define __ARCH_WANT_COMPAT_SYS_TIME 448#define __ARCH_WANT_COMPAT_SYS_TIME
449#define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND 449#define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
450#define __ARCH_WANT_COMPAT_SYS_SENDFILE
450#endif 451#endif
451 452
452/* 453/*
diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S
index d97f3eb72e06..44025f4ba41f 100644
--- a/arch/sparc/kernel/sys32.S
+++ b/arch/sparc/kernel/sys32.S
@@ -90,7 +90,7 @@ SIGN1(sys32_mkdir, sys_mkdir, %o1)
90SIGN3(sys32_futex, compat_sys_futex, %o1, %o2, %o5) 90SIGN3(sys32_futex, compat_sys_futex, %o1, %o2, %o5)
91SIGN1(sys32_sysfs, compat_sys_sysfs, %o0) 91SIGN1(sys32_sysfs, compat_sys_sysfs, %o0)
92SIGN2(sys32_sendfile, compat_sys_sendfile, %o0, %o1) 92SIGN2(sys32_sendfile, compat_sys_sendfile, %o0, %o1)
93SIGN2(sys32_sendfile64, compat_sys_sendfile64, %o0, %o1) 93SIGN2(sys32_sendfile64, sys_sendfile, %o0, %o1)
94SIGN1(sys32_prctl, sys_prctl, %o0) 94SIGN1(sys32_prctl, sys_prctl, %o0)
95SIGN1(sys32_sched_rr_get_interval, compat_sys_sched_rr_get_interval, %o0) 95SIGN1(sys32_sched_rr_get_interval, compat_sys_sched_rr_get_interval, %o0)
96SIGN2(sys32_waitpid, sys_waitpid, %o0, %o2) 96SIGN2(sys32_waitpid, sys_waitpid, %o0, %o2)
diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c
index f7392336961f..d862499eb01c 100644
--- a/arch/sparc/kernel/sys_sparc32.c
+++ b/arch/sparc/kernel/sys_sparc32.c
@@ -506,52 +506,6 @@ long compat_sys_fadvise64_64(int fd,
506 advice); 506 advice);
507} 507}
508 508
509asmlinkage long compat_sys_sendfile(int out_fd, int in_fd,
510 compat_off_t __user *offset,
511 compat_size_t count)
512{
513 mm_segment_t old_fs = get_fs();
514 int ret;
515 off_t of;
516
517 if (offset && get_user(of, offset))
518 return -EFAULT;
519
520 set_fs(KERNEL_DS);
521 ret = sys_sendfile(out_fd, in_fd,
522 offset ? (off_t __user *) &of : NULL,
523 count);
524 set_fs(old_fs);
525
526 if (offset && put_user(of, offset))
527 return -EFAULT;
528
529 return ret;
530}
531
532asmlinkage long compat_sys_sendfile64(int out_fd, int in_fd,
533 compat_loff_t __user *offset,
534 compat_size_t count)
535{
536 mm_segment_t old_fs = get_fs();
537 int ret;
538 loff_t lof;
539
540 if (offset && get_user(lof, offset))
541 return -EFAULT;
542
543 set_fs(KERNEL_DS);
544 ret = sys_sendfile64(out_fd, in_fd,
545 offset ? (loff_t __user *) &lof : NULL,
546 count);
547 set_fs(old_fs);
548
549 if (offset && put_user(lof, offset))
550 return -EFAULT;
551
552 return ret;
553}
554
555/* This is just a version for 32-bit applications which does 509/* This is just a version for 32-bit applications which does
556 * not force O_LARGEFILE on. 510 * not force O_LARGEFILE on.
557 */ 511 */
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index c17de0db6736..9efeb6da48bc 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -21,6 +21,9 @@
21#include <linux/un.h> 21#include <linux/un.h>
22#include <linux/workqueue.h> 22#include <linux/workqueue.h>
23#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/fs.h>
25#include <linux/mount.h>
26#include <linux/file.h>
24#include <asm/uaccess.h> 27#include <asm/uaccess.h>
25#include <asm/switch_to.h> 28#include <asm/switch_to.h>
26 29
@@ -118,90 +121,38 @@ void mconsole_log(struct mc_request *req)
118 mconsole_reply(req, "", 0, 0); 121 mconsole_reply(req, "", 0, 0);
119} 122}
120 123
121/* This is a more convoluted version of mconsole_proc, which has some stability
122 * problems; however, we need it fixed, because it is expected that UML users
123 * mount HPPFS instead of procfs on /proc. And we want mconsole_proc to still
124 * show the real procfs content, not the ones from hppfs.*/
125#if 0
126void mconsole_proc(struct mc_request *req) 124void mconsole_proc(struct mc_request *req)
127{ 125{
128 struct vfsmount *mnt = current->nsproxy->pid_ns->proc_mnt; 126 struct vfsmount *mnt = current->nsproxy->pid_ns->proc_mnt;
129 struct file *file;
130 int n;
131 char *ptr = req->request.data, *buf;
132 mm_segment_t old_fs = get_fs();
133
134 ptr += strlen("proc");
135 ptr = skip_spaces(ptr);
136
137 file = file_open_root(mnt->mnt_root, mnt, ptr, O_RDONLY);
138 if (IS_ERR(file)) {
139 mconsole_reply(req, "Failed to open file", 1, 0);
140 goto out;
141 }
142
143 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
144 if (buf == NULL) {
145 mconsole_reply(req, "Failed to allocate buffer", 1, 0);
146 goto out_fput;
147 }
148
149 if (file->f_op->read) {
150 do {
151 loff_t pos;
152 set_fs(KERNEL_DS);
153 n = vfs_read(file, buf, PAGE_SIZE - 1, &pos);
154 file_pos_write(file, pos);
155 set_fs(old_fs);
156 if (n >= 0) {
157 buf[n] = '\0';
158 mconsole_reply(req, buf, 0, (n > 0));
159 }
160 else {
161 mconsole_reply(req, "Read of file failed",
162 1, 0);
163 goto out_free;
164 }
165 } while (n > 0);
166 }
167 else mconsole_reply(req, "", 0, 0);
168
169 out_free:
170 kfree(buf);
171 out_fput:
172 fput(file);
173 out: ;
174}
175#endif
176
177void mconsole_proc(struct mc_request *req)
178{
179 char path[64];
180 char *buf; 127 char *buf;
181 int len; 128 int len;
182 int fd; 129 struct file *file;
183 int first_chunk = 1; 130 int first_chunk = 1;
184 char *ptr = req->request.data; 131 char *ptr = req->request.data;
185 132
186 ptr += strlen("proc"); 133 ptr += strlen("proc");
187 ptr = skip_spaces(ptr); 134 ptr = skip_spaces(ptr);
188 snprintf(path, sizeof(path), "/proc/%s", ptr);
189 135
190 fd = sys_open(path, 0, 0); 136 file = file_open_root(mnt->mnt_root, mnt, ptr, O_RDONLY);
191 if (fd < 0) { 137 if (IS_ERR(file)) {
192 mconsole_reply(req, "Failed to open file", 1, 0); 138 mconsole_reply(req, "Failed to open file", 1, 0);
193 printk(KERN_ERR "open %s: %d\n",path,fd); 139 printk(KERN_ERR "open /proc/%s: %ld\n", ptr, PTR_ERR(file));
194 goto out; 140 goto out;
195 } 141 }
196 142
197 buf = kmalloc(PAGE_SIZE, GFP_KERNEL); 143 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
198 if (buf == NULL) { 144 if (buf == NULL) {
199 mconsole_reply(req, "Failed to allocate buffer", 1, 0); 145 mconsole_reply(req, "Failed to allocate buffer", 1, 0);
200 goto out_close; 146 goto out_fput;
201 } 147 }
202 148
203 for (;;) { 149 do {
204 len = sys_read(fd, buf, PAGE_SIZE-1); 150 loff_t pos;
151 mm_segment_t old_fs = get_fs();
152 set_fs(KERNEL_DS);
153 len = vfs_read(file, buf, PAGE_SIZE - 1, &pos);
154 set_fs(old_fs);
155 file->f_pos = pos;
205 if (len < 0) { 156 if (len < 0) {
206 mconsole_reply(req, "Read of file failed", 1, 0); 157 mconsole_reply(req, "Read of file failed", 1, 0);
207 goto out_free; 158 goto out_free;
@@ -211,22 +162,14 @@ void mconsole_proc(struct mc_request *req)
211 mconsole_reply(req, "\n", 0, 1); 162 mconsole_reply(req, "\n", 0, 1);
212 first_chunk = 0; 163 first_chunk = 0;
213 } 164 }
214 if (len == PAGE_SIZE-1) { 165 buf[len] = '\0';
215 buf[len] = '\0'; 166 mconsole_reply(req, buf, 0, (len > 0));
216 mconsole_reply(req, buf, 0, 1); 167 } while (len > 0);
217 } else {
218 buf[len] = '\0';
219 mconsole_reply(req, buf, 0, 0);
220 break;
221 }
222 }
223
224 out_free: 168 out_free:
225 kfree(buf); 169 kfree(buf);
226 out_close: 170 out_fput:
227 sys_close(fd); 171 fput(file);
228 out: 172 out: ;
229 /* nothing */;
230} 173}
231 174
232#define UML_MCONSOLE_HELPTEXT \ 175#define UML_MCONSOLE_HELPTEXT \
diff --git a/drivers/base/dma-buf.c b/drivers/base/dma-buf.c
index c30f3e1d0efc..460e22dee36d 100644
--- a/drivers/base/dma-buf.c
+++ b/drivers/base/dma-buf.c
@@ -460,8 +460,7 @@ int dma_buf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma,
460 if (vma->vm_file) 460 if (vma->vm_file)
461 fput(vma->vm_file); 461 fput(vma->vm_file);
462 462
463 vma->vm_file = dmabuf->file; 463 vma->vm_file = get_file(dmabuf->file);
464 get_file(vma->vm_file);
465 464
466 vma->vm_pgoff = pgoff; 465 vma->vm_pgoff = pgoff;
467 466
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 7972bae2e9b3..2709ff581392 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -1183,7 +1183,7 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file,
1183 struct rdma_ucm_migrate_id cmd; 1183 struct rdma_ucm_migrate_id cmd;
1184 struct rdma_ucm_migrate_resp resp; 1184 struct rdma_ucm_migrate_resp resp;
1185 struct ucma_context *ctx; 1185 struct ucma_context *ctx;
1186 struct file *filp; 1186 struct fd f;
1187 struct ucma_file *cur_file; 1187 struct ucma_file *cur_file;
1188 int ret = 0; 1188 int ret = 0;
1189 1189
@@ -1191,12 +1191,12 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file,
1191 return -EFAULT; 1191 return -EFAULT;
1192 1192
1193 /* Get current fd to protect against it being closed */ 1193 /* Get current fd to protect against it being closed */
1194 filp = fget(cmd.fd); 1194 f = fdget(cmd.fd);
1195 if (!filp) 1195 if (!f.file)
1196 return -ENOENT; 1196 return -ENOENT;
1197 1197
1198 /* Validate current fd and prevent destruction of id. */ 1198 /* Validate current fd and prevent destruction of id. */
1199 ctx = ucma_get_ctx(filp->private_data, cmd.id); 1199 ctx = ucma_get_ctx(f.file->private_data, cmd.id);
1200 if (IS_ERR(ctx)) { 1200 if (IS_ERR(ctx)) {
1201 ret = PTR_ERR(ctx); 1201 ret = PTR_ERR(ctx);
1202 goto file_put; 1202 goto file_put;
@@ -1230,7 +1230,7 @@ response:
1230 1230
1231 ucma_put_ctx(ctx); 1231 ucma_put_ctx(ctx);
1232file_put: 1232file_put:
1233 fput(filp); 1233 fdput(f);
1234 return ret; 1234 return ret;
1235} 1235}
1236 1236
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index f9d0d7c413a2..0cb0007724a2 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -705,7 +705,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
705 struct ib_udata udata; 705 struct ib_udata udata;
706 struct ib_uxrcd_object *obj; 706 struct ib_uxrcd_object *obj;
707 struct ib_xrcd *xrcd = NULL; 707 struct ib_xrcd *xrcd = NULL;
708 struct file *f = NULL; 708 struct fd f = {NULL, 0};
709 struct inode *inode = NULL; 709 struct inode *inode = NULL;
710 int ret = 0; 710 int ret = 0;
711 int new_xrcd = 0; 711 int new_xrcd = 0;
@@ -724,18 +724,13 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
724 724
725 if (cmd.fd != -1) { 725 if (cmd.fd != -1) {
726 /* search for file descriptor */ 726 /* search for file descriptor */
727 f = fget(cmd.fd); 727 f = fdget(cmd.fd);
728 if (!f) { 728 if (!f.file) {
729 ret = -EBADF;
730 goto err_tree_mutex_unlock;
731 }
732
733 inode = f->f_dentry->d_inode;
734 if (!inode) {
735 ret = -EBADF; 729 ret = -EBADF;
736 goto err_tree_mutex_unlock; 730 goto err_tree_mutex_unlock;
737 } 731 }
738 732
733 inode = f.file->f_path.dentry->d_inode;
739 xrcd = find_xrcd(file->device, inode); 734 xrcd = find_xrcd(file->device, inode);
740 if (!xrcd && !(cmd.oflags & O_CREAT)) { 735 if (!xrcd && !(cmd.oflags & O_CREAT)) {
741 /* no file descriptor. Need CREATE flag */ 736 /* no file descriptor. Need CREATE flag */
@@ -800,8 +795,8 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
800 goto err_copy; 795 goto err_copy;
801 } 796 }
802 797
803 if (f) 798 if (f.file)
804 fput(f); 799 fdput(f);
805 800
806 mutex_lock(&file->mutex); 801 mutex_lock(&file->mutex);
807 list_add_tail(&obj->uobject.list, &file->ucontext->xrcd_list); 802 list_add_tail(&obj->uobject.list, &file->ucontext->xrcd_list);
@@ -830,8 +825,8 @@ err:
830 put_uobj_write(&obj->uobject); 825 put_uobj_write(&obj->uobject);
831 826
832err_tree_mutex_unlock: 827err_tree_mutex_unlock:
833 if (f) 828 if (f.file)
834 fput(f); 829 fdput(f);
835 830
836 mutex_unlock(&file->device->xrcd_tree_mutex); 831 mutex_unlock(&file->device->xrcd_tree_mutex);
837 832
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 604556d73d25..6f2ce6fa98f8 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -541,16 +541,15 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
541struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd) 541struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd)
542{ 542{
543 struct ib_uverbs_event_file *ev_file = NULL; 543 struct ib_uverbs_event_file *ev_file = NULL;
544 struct file *filp; 544 struct fd f = fdget(fd);
545 545
546 filp = fget(fd); 546 if (!f.file)
547 if (!filp)
548 return NULL; 547 return NULL;
549 548
550 if (filp->f_op != &uverbs_event_fops) 549 if (f.file->f_op != &uverbs_event_fops)
551 goto out; 550 goto out;
552 551
553 ev_file = filp->private_data; 552 ev_file = f.file->private_data;
554 if (ev_file->is_async) { 553 if (ev_file->is_async) {
555 ev_file = NULL; 554 ev_file = NULL;
556 goto out; 555 goto out;
@@ -559,7 +558,7 @@ struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd)
559 kref_get(&ev_file->ref); 558 kref_get(&ev_file->ref);
560 559
561out: 560out:
562 fput(filp); 561 fdput(f);
563 return ev_file; 562 return ev_file;
564} 563}
565 564
diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c
index b1937ca13575..7b0ba92e7e46 100644
--- a/drivers/staging/android/binder.c
+++ b/drivers/staging/android/binder.c
@@ -362,71 +362,22 @@ struct binder_transaction {
362static void 362static void
363binder_defer_work(struct binder_proc *proc, enum binder_deferred_state defer); 363binder_defer_work(struct binder_proc *proc, enum binder_deferred_state defer);
364 364
365/*
366 * copied from get_unused_fd_flags
367 */
368static int task_get_unused_fd_flags(struct binder_proc *proc, int flags) 365static int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
369{ 366{
370 struct files_struct *files = proc->files; 367 struct files_struct *files = proc->files;
371 int fd, error;
372 struct fdtable *fdt;
373 unsigned long rlim_cur; 368 unsigned long rlim_cur;
374 unsigned long irqs; 369 unsigned long irqs;
375 370
376 if (files == NULL) 371 if (files == NULL)
377 return -ESRCH; 372 return -ESRCH;
378 373
379 error = -EMFILE; 374 if (!lock_task_sighand(proc->tsk, &irqs))
380 spin_lock(&files->file_lock); 375 return -EMFILE;
381
382repeat:
383 fdt = files_fdtable(files);
384 fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, files->next_fd);
385
386 /*
387 * N.B. For clone tasks sharing a files structure, this test
388 * will limit the total number of files that can be opened.
389 */
390 rlim_cur = 0;
391 if (lock_task_sighand(proc->tsk, &irqs)) {
392 rlim_cur = proc->tsk->signal->rlim[RLIMIT_NOFILE].rlim_cur;
393 unlock_task_sighand(proc->tsk, &irqs);
394 }
395 if (fd >= rlim_cur)
396 goto out;
397
398 /* Do we need to expand the fd array or fd set? */
399 error = expand_files(files, fd);
400 if (error < 0)
401 goto out;
402
403 if (error) {
404 /*
405 * If we needed to expand the fs array we
406 * might have blocked - try again.
407 */
408 error = -EMFILE;
409 goto repeat;
410 }
411
412 __set_open_fd(fd, fdt);
413 if (flags & O_CLOEXEC)
414 __set_close_on_exec(fd, fdt);
415 else
416 __clear_close_on_exec(fd, fdt);
417 files->next_fd = fd + 1;
418
419 /* Sanity check */
420 if (fdt->fd[fd] != NULL) {
421 pr_warn("get_unused_fd: slot %d not NULL!\n", fd);
422 fdt->fd[fd] = NULL;
423 }
424 376
425 error = fd; 377 rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
378 unlock_task_sighand(proc->tsk, &irqs);
426 379
427out: 380 return __alloc_fd(files, 0, rlim_cur, flags);
428 spin_unlock(&files->file_lock);
429 return error;
430} 381}
431 382
432/* 383/*
@@ -435,28 +386,8 @@ out:
435static void task_fd_install( 386static void task_fd_install(
436 struct binder_proc *proc, unsigned int fd, struct file *file) 387 struct binder_proc *proc, unsigned int fd, struct file *file)
437{ 388{
438 struct files_struct *files = proc->files; 389 if (proc->files)
439 struct fdtable *fdt; 390 __fd_install(proc->files, fd, file);
440
441 if (files == NULL)
442 return;
443
444 spin_lock(&files->file_lock);
445 fdt = files_fdtable(files);
446 BUG_ON(fdt->fd[fd] != NULL);
447 rcu_assign_pointer(fdt->fd[fd], file);
448 spin_unlock(&files->file_lock);
449}
450
451/*
452 * copied from __put_unused_fd in open.c
453 */
454static void __put_unused_fd(struct files_struct *files, unsigned int fd)
455{
456 struct fdtable *fdt = files_fdtable(files);
457 __clear_open_fd(fd, fdt);
458 if (fd < files->next_fd)
459 files->next_fd = fd;
460} 391}
461 392
462/* 393/*
@@ -464,27 +395,12 @@ static void __put_unused_fd(struct files_struct *files, unsigned int fd)
464 */ 395 */
465static long task_close_fd(struct binder_proc *proc, unsigned int fd) 396static long task_close_fd(struct binder_proc *proc, unsigned int fd)
466{ 397{
467 struct file *filp;
468 struct files_struct *files = proc->files;
469 struct fdtable *fdt;
470 int retval; 398 int retval;
471 399
472 if (files == NULL) 400 if (proc->files == NULL)
473 return -ESRCH; 401 return -ESRCH;
474 402
475 spin_lock(&files->file_lock); 403 retval = __close_fd(proc->files, fd);
476 fdt = files_fdtable(files);
477 if (fd >= fdt->max_fds)
478 goto out_unlock;
479 filp = fdt->fd[fd];
480 if (!filp)
481 goto out_unlock;
482 rcu_assign_pointer(fdt->fd[fd], NULL);
483 __clear_close_on_exec(fd, fdt);
484 __put_unused_fd(files, fd);
485 spin_unlock(&files->file_lock);
486 retval = filp_close(filp, files);
487
488 /* can't restart close syscall because file table entry was cleared */ 404 /* can't restart close syscall because file table entry was cleared */
489 if (unlikely(retval == -ERESTARTSYS || 405 if (unlikely(retval == -ERESTARTSYS ||
490 retval == -ERESTARTNOINTR || 406 retval == -ERESTARTNOINTR ||
@@ -493,10 +409,6 @@ static long task_close_fd(struct binder_proc *proc, unsigned int fd)
493 retval = -EINTR; 409 retval = -EINTR;
494 410
495 return retval; 411 return retval;
496
497out_unlock:
498 spin_unlock(&files->file_lock);
499 return -EBADF;
500} 412}
501 413
502static void binder_set_nice(long nice) 414static void binder_set_nice(long nice)
@@ -2793,6 +2705,9 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma)
2793 const char *failure_string; 2705 const char *failure_string;
2794 struct binder_buffer *buffer; 2706 struct binder_buffer *buffer;
2795 2707
2708 if (proc->tsk != current)
2709 return -EINVAL;
2710
2796 if ((vma->vm_end - vma->vm_start) > SZ_4M) 2711 if ((vma->vm_end - vma->vm_start) > SZ_4M)
2797 vma->vm_end = vma->vm_start + SZ_4M; 2712 vma->vm_end = vma->vm_start + SZ_4M;
2798 2713
@@ -2857,7 +2772,7 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma)
2857 binder_insert_free_buffer(proc, buffer); 2772 binder_insert_free_buffer(proc, buffer);
2858 proc->free_async_space = proc->buffer_size / 2; 2773 proc->free_async_space = proc->buffer_size / 2;
2859 barrier(); 2774 barrier();
2860 proc->files = get_files_struct(proc->tsk); 2775 proc->files = get_files_struct(current);
2861 proc->vma = vma; 2776 proc->vma = vma;
2862 proc->vma_vm_mm = vma->vm_mm; 2777 proc->vma_vm_mm = vma->vm_mm;
2863 2778
diff --git a/drivers/staging/omapdrm/omap_gem.c b/drivers/staging/omapdrm/omap_gem.c
index c8287438e0dc..3434e6ec0142 100644
--- a/drivers/staging/omapdrm/omap_gem.c
+++ b/drivers/staging/omapdrm/omap_gem.c
@@ -592,9 +592,8 @@ int omap_gem_mmap_obj(struct drm_gem_object *obj,
592 * in particular in the case of mmap'd dmabufs) 592 * in particular in the case of mmap'd dmabufs)
593 */ 593 */
594 fput(vma->vm_file); 594 fput(vma->vm_file);
595 get_file(obj->filp);
596 vma->vm_pgoff = 0; 595 vma->vm_pgoff = 0;
597 vma->vm_file = obj->filp; 596 vma->vm_file = get_file(obj->filp);
598 597
599 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); 598 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
600 } 599 }
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 8a5a8b064616..2ea176b2280e 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -1166,10 +1166,8 @@ ssize_t redirected_tty_write(struct file *file, const char __user *buf,
1166 struct file *p = NULL; 1166 struct file *p = NULL;
1167 1167
1168 spin_lock(&redirect_lock); 1168 spin_lock(&redirect_lock);
1169 if (redirect) { 1169 if (redirect)
1170 get_file(redirect); 1170 p = get_file(redirect);
1171 p = redirect;
1172 }
1173 spin_unlock(&redirect_lock); 1171 spin_unlock(&redirect_lock);
1174 1172
1175 if (p) { 1173 if (p) {
@@ -2264,8 +2262,7 @@ static int tioccons(struct file *file)
2264 spin_unlock(&redirect_lock); 2262 spin_unlock(&redirect_lock);
2265 return -EBUSY; 2263 return -EBUSY;
2266 } 2264 }
2267 get_file(file); 2265 redirect = get_file(file);
2268 redirect = file;
2269 spin_unlock(&redirect_lock); 2266 spin_unlock(&redirect_lock);
2270 return 0; 2267 return 0;
2271} 2268}
@@ -2809,6 +2806,13 @@ static long tty_compat_ioctl(struct file *file, unsigned int cmd,
2809} 2806}
2810#endif 2807#endif
2811 2808
2809static int this_tty(const void *t, struct file *file, unsigned fd)
2810{
2811 if (likely(file->f_op->read != tty_read))
2812 return 0;
2813 return file_tty(file) != t ? 0 : fd + 1;
2814}
2815
2812/* 2816/*
2813 * This implements the "Secure Attention Key" --- the idea is to 2817 * This implements the "Secure Attention Key" --- the idea is to
2814 * prevent trojan horses by killing all processes associated with this 2818 * prevent trojan horses by killing all processes associated with this
@@ -2836,8 +2840,6 @@ void __do_SAK(struct tty_struct *tty)
2836 struct task_struct *g, *p; 2840 struct task_struct *g, *p;
2837 struct pid *session; 2841 struct pid *session;
2838 int i; 2842 int i;
2839 struct file *filp;
2840 struct fdtable *fdt;
2841 2843
2842 if (!tty) 2844 if (!tty)
2843 return; 2845 return;
@@ -2867,27 +2869,12 @@ void __do_SAK(struct tty_struct *tty)
2867 continue; 2869 continue;
2868 } 2870 }
2869 task_lock(p); 2871 task_lock(p);
2870 if (p->files) { 2872 i = iterate_fd(p->files, 0, this_tty, tty);
2871 /* 2873 if (i != 0) {
2872 * We don't take a ref to the file, so we must 2874 printk(KERN_NOTICE "SAK: killed process %d"
2873 * hold ->file_lock instead. 2875 " (%s): fd#%d opened to the tty\n",
2874 */ 2876 task_pid_nr(p), p->comm, i - 1);
2875 spin_lock(&p->files->file_lock); 2877 force_sig(SIGKILL, p);
2876 fdt = files_fdtable(p->files);
2877 for (i = 0; i < fdt->max_fds; i++) {
2878 filp = fcheck_files(p->files, i);
2879 if (!filp)
2880 continue;
2881 if (filp->f_op->read == tty_read &&
2882 file_tty(filp) == tty) {
2883 printk(KERN_NOTICE "SAK: killed process %d"
2884 " (%s): fd#%d opened to the tty\n",
2885 task_pid_nr(p), p->comm, i);
2886 force_sig(SIGKILL, p);
2887 break;
2888 }
2889 }
2890 spin_unlock(&p->files->file_lock);
2891 } 2878 }
2892 task_unlock(p); 2879 task_unlock(p);
2893 } while_each_thread(g, p); 2880 } while_each_thread(g, p);
diff --git a/drivers/usb/gadget/f_fs.c b/drivers/usb/gadget/f_fs.c
index a26c43a151fd..64c4ec10d1fc 100644
--- a/drivers/usb/gadget/f_fs.c
+++ b/drivers/usb/gadget/f_fs.c
@@ -340,7 +340,7 @@ ffs_sb_create_file(struct super_block *sb, const char *name, void *data,
340 340
341static int ffs_mutex_lock(struct mutex *mutex, unsigned nonblock) 341static int ffs_mutex_lock(struct mutex *mutex, unsigned nonblock)
342 __attribute__((warn_unused_result, nonnull)); 342 __attribute__((warn_unused_result, nonnull));
343static char *ffs_prepare_buffer(const char * __user buf, size_t len) 343static char *ffs_prepare_buffer(const char __user *buf, size_t len)
344 __attribute__((warn_unused_result, nonnull)); 344 __attribute__((warn_unused_result, nonnull));
345 345
346 346
@@ -2445,7 +2445,7 @@ static int ffs_mutex_lock(struct mutex *mutex, unsigned nonblock)
2445 : mutex_lock_interruptible(mutex); 2445 : mutex_lock_interruptible(mutex);
2446} 2446}
2447 2447
2448static char *ffs_prepare_buffer(const char * __user buf, size_t len) 2448static char *ffs_prepare_buffer(const char __user *buf, size_t len)
2449{ 2449{
2450 char *data; 2450 char *data;
2451 2451
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 17830c9c7cc6..56097c6d072d 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -1014,7 +1014,7 @@ static void vfio_group_try_dissolve_container(struct vfio_group *group)
1014 1014
1015static int vfio_group_set_container(struct vfio_group *group, int container_fd) 1015static int vfio_group_set_container(struct vfio_group *group, int container_fd)
1016{ 1016{
1017 struct file *filep; 1017 struct fd f;
1018 struct vfio_container *container; 1018 struct vfio_container *container;
1019 struct vfio_iommu_driver *driver; 1019 struct vfio_iommu_driver *driver;
1020 int ret = 0; 1020 int ret = 0;
@@ -1022,17 +1022,17 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd)
1022 if (atomic_read(&group->container_users)) 1022 if (atomic_read(&group->container_users))
1023 return -EINVAL; 1023 return -EINVAL;
1024 1024
1025 filep = fget(container_fd); 1025 f = fdget(container_fd);
1026 if (!filep) 1026 if (!f.file)
1027 return -EBADF; 1027 return -EBADF;
1028 1028
1029 /* Sanity check, is this really our fd? */ 1029 /* Sanity check, is this really our fd? */
1030 if (filep->f_op != &vfio_fops) { 1030 if (f.file->f_op != &vfio_fops) {
1031 fput(filep); 1031 fdput(f);
1032 return -EINVAL; 1032 return -EINVAL;
1033 } 1033 }
1034 1034
1035 container = filep->private_data; 1035 container = f.file->private_data;
1036 WARN_ON(!container); /* fget ensures we don't race vfio_release */ 1036 WARN_ON(!container); /* fget ensures we don't race vfio_release */
1037 1037
1038 mutex_lock(&container->group_lock); 1038 mutex_lock(&container->group_lock);
@@ -1054,8 +1054,7 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd)
1054 1054
1055unlock_out: 1055unlock_out:
1056 mutex_unlock(&container->group_lock); 1056 mutex_unlock(&container->group_lock);
1057 fput(filep); 1057 fdput(f);
1058
1059 return ret; 1058 return ret;
1060} 1059}
1061 1060
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index ef82a0d18489..99ac2cb08b43 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -636,8 +636,8 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
636 636
637static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp) 637static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp)
638{ 638{
639 struct file *eventfp, *filep = NULL, 639 struct file *eventfp, *filep = NULL;
640 *pollstart = NULL, *pollstop = NULL; 640 bool pollstart = false, pollstop = false;
641 struct eventfd_ctx *ctx = NULL; 641 struct eventfd_ctx *ctx = NULL;
642 u32 __user *idxp = argp; 642 u32 __user *idxp = argp;
643 struct vhost_virtqueue *vq; 643 struct vhost_virtqueue *vq;
@@ -763,8 +763,8 @@ static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp)
763 break; 763 break;
764 } 764 }
765 if (eventfp != vq->kick) { 765 if (eventfp != vq->kick) {
766 pollstop = filep = vq->kick; 766 pollstop = (filep = vq->kick) != NULL;
767 pollstart = vq->kick = eventfp; 767 pollstart = (vq->kick = eventfp) != NULL;
768 } else 768 } else
769 filep = eventfp; 769 filep = eventfp;
770 break; 770 break;
diff --git a/drivers/video/msm/mdp.c b/drivers/video/msm/mdp.c
index d1f881e8030e..2e0f3bab6114 100644
--- a/drivers/video/msm/mdp.c
+++ b/drivers/video/msm/mdp.c
@@ -257,19 +257,17 @@ int get_img(struct mdp_img *img, struct fb_info *info,
257 unsigned long *start, unsigned long *len, 257 unsigned long *start, unsigned long *len,
258 struct file **filep) 258 struct file **filep)
259{ 259{
260 int put_needed, ret = 0; 260 int ret = 0;
261 struct file *file; 261 struct fd f = fdget(img->memory_id);
262 262 if (f.file == NULL)
263 file = fget_light(img->memory_id, &put_needed);
264 if (file == NULL)
265 return -1; 263 return -1;
266 264
267 if (MAJOR(file->f_dentry->d_inode->i_rdev) == FB_MAJOR) { 265 if (MAJOR(f.file->f_dentry->d_inode->i_rdev) == FB_MAJOR) {
268 *start = info->fix.smem_start; 266 *start = info->fix.smem_start;
269 *len = info->fix.smem_len; 267 *len = info->fix.smem_len;
270 } else 268 } else
271 ret = -1; 269 ret = -1;
272 fput_light(file, put_needed); 270 fdput(f);
273 271
274 return ret; 272 return ret;
275} 273}
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index b85efa773949..392c5dac1981 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -560,6 +560,11 @@ static int v9fs_init_inode_cache(void)
560 */ 560 */
561static void v9fs_destroy_inode_cache(void) 561static void v9fs_destroy_inode_cache(void)
562{ 562{
563 /*
564 * Make sure all delayed rcu free inodes are flushed before we
565 * destroy cache.
566 */
567 rcu_barrier();
563 kmem_cache_destroy(v9fs_inode_cache); 568 kmem_cache_destroy(v9fs_inode_cache);
564} 569}
565 570
diff --git a/fs/Makefile b/fs/Makefile
index 2fb977934673..8938f8250320 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \
11 attr.o bad_inode.o file.o filesystems.o namespace.o \ 11 attr.o bad_inode.o file.o filesystems.o namespace.o \
12 seq_file.o xattr.o libfs.o fs-writeback.o \ 12 seq_file.o xattr.o libfs.o fs-writeback.o \
13 pnode.o drop_caches.o splice.o sync.o utimes.o \ 13 pnode.o drop_caches.o splice.o sync.o utimes.o \
14 stack.o fs_struct.o statfs.o 14 stack.o fs_struct.o statfs.o coredump.o
15 15
16ifeq ($(CONFIG_BLOCK),y) 16ifeq ($(CONFIG_BLOCK),y)
17obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o 17obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 22a0d7ed5fa1..d57122935793 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -280,6 +280,11 @@ static int init_inodecache(void)
280 280
281static void destroy_inodecache(void) 281static void destroy_inodecache(void)
282{ 282{
283 /*
284 * Make sure all delayed rcu free inodes are flushed before we
285 * destroy cache.
286 */
287 rcu_barrier();
283 kmem_cache_destroy(adfs_inode_cachep); 288 kmem_cache_destroy(adfs_inode_cachep);
284} 289}
285 290
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 1f030825cd3a..b84dc7352502 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -147,6 +147,11 @@ static int init_inodecache(void)
147 147
148static void destroy_inodecache(void) 148static void destroy_inodecache(void)
149{ 149{
150 /*
151 * Make sure all delayed rcu free inodes are flushed before we
152 * destroy cache.
153 */
154 rcu_barrier();
150 kmem_cache_destroy(affs_inode_cachep); 155 kmem_cache_destroy(affs_inode_cachep);
151} 156}
152 157
diff --git a/fs/afs/super.c b/fs/afs/super.c
index df8c6047c2a1..43165009428d 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -123,6 +123,11 @@ void __exit afs_fs_exit(void)
123 BUG(); 123 BUG();
124 } 124 }
125 125
126 /*
127 * Make sure all delayed rcu free inodes are flushed before we
128 * destroy cache.
129 */
130 rcu_barrier();
126 kmem_cache_destroy(afs_inode_cachep); 131 kmem_cache_destroy(afs_inode_cachep);
127 _leave(""); 132 _leave("");
128} 133}
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index abf645c1703b..a16214109d31 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -221,20 +221,6 @@ static int test_by_type(struct path *path, void *p)
221 return ino && ino->sbi->type & *(unsigned *)p; 221 return ino && ino->sbi->type & *(unsigned *)p;
222} 222}
223 223
224static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file)
225{
226 struct files_struct *files = current->files;
227 struct fdtable *fdt;
228
229 spin_lock(&files->file_lock);
230 fdt = files_fdtable(files);
231 BUG_ON(fdt->fd[fd] != NULL);
232 rcu_assign_pointer(fdt->fd[fd], file);
233 __set_close_on_exec(fd, fdt);
234 spin_unlock(&files->file_lock);
235}
236
237
238/* 224/*
239 * Open a file descriptor on the autofs mount point corresponding 225 * Open a file descriptor on the autofs mount point corresponding
240 * to the given path and device number (aka. new_encode_dev(sb->s_dev)). 226 * to the given path and device number (aka. new_encode_dev(sb->s_dev)).
@@ -243,7 +229,7 @@ static int autofs_dev_ioctl_open_mountpoint(const char *name, dev_t devid)
243{ 229{
244 int err, fd; 230 int err, fd;
245 231
246 fd = get_unused_fd(); 232 fd = get_unused_fd_flags(O_CLOEXEC);
247 if (likely(fd >= 0)) { 233 if (likely(fd >= 0)) {
248 struct file *filp; 234 struct file *filp;
249 struct path path; 235 struct path path;
@@ -264,7 +250,7 @@ static int autofs_dev_ioctl_open_mountpoint(const char *name, dev_t devid)
264 goto out; 250 goto out;
265 } 251 }
266 252
267 autofs_dev_ioctl_fd_install(fd, filp); 253 fd_install(fd, filp);
268 } 254 }
269 255
270 return fd; 256 return fd;
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index da8876d38a7b..dce436e595c1 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -175,8 +175,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
175 return; 175 return;
176 } 176 }
177 177
178 pipe = sbi->pipe; 178 pipe = get_file(sbi->pipe);
179 get_file(pipe);
180 179
181 mutex_unlock(&sbi->wq_mutex); 180 mutex_unlock(&sbi->wq_mutex);
182 181
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 7f73a692bfd0..2b3bda8d5e68 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -457,6 +457,11 @@ befs_init_inodecache(void)
457static void 457static void
458befs_destroy_inodecache(void) 458befs_destroy_inodecache(void)
459{ 459{
460 /*
461 * Make sure all delayed rcu free inodes are flushed before we
462 * destroy cache.
463 */
464 rcu_barrier();
460 kmem_cache_destroy(befs_inode_cachep); 465 kmem_cache_destroy(befs_inode_cachep);
461} 466}
462 467
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index b242beba58ed..737aaa3f7090 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -280,6 +280,11 @@ static int init_inodecache(void)
280 280
281static void destroy_inodecache(void) 281static void destroy_inodecache(void)
282{ 282{
283 /*
284 * Make sure all delayed rcu free inodes are flushed before we
285 * destroy cache.
286 */
287 rcu_barrier();
283 kmem_cache_destroy(bfs_inode_cachep); 288 kmem_cache_destroy(bfs_inode_cachep);
284} 289}
285 290
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 1b52956afe33..0225fddf49b7 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1696,30 +1696,19 @@ static int elf_note_info_init(struct elf_note_info *info)
1696 return 0; 1696 return 0;
1697 info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL); 1697 info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1698 if (!info->psinfo) 1698 if (!info->psinfo)
1699 goto notes_free; 1699 return 0;
1700 info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL); 1700 info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1701 if (!info->prstatus) 1701 if (!info->prstatus)
1702 goto psinfo_free; 1702 return 0;
1703 info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL); 1703 info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1704 if (!info->fpu) 1704 if (!info->fpu)
1705 goto prstatus_free; 1705 return 0;
1706#ifdef ELF_CORE_COPY_XFPREGS 1706#ifdef ELF_CORE_COPY_XFPREGS
1707 info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL); 1707 info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1708 if (!info->xfpu) 1708 if (!info->xfpu)
1709 goto fpu_free; 1709 return 0;
1710#endif 1710#endif
1711 return 1; 1711 return 1;
1712#ifdef ELF_CORE_COPY_XFPREGS
1713 fpu_free:
1714 kfree(info->fpu);
1715#endif
1716 prstatus_free:
1717 kfree(info->prstatus);
1718 psinfo_free:
1719 kfree(info->psinfo);
1720 notes_free:
1721 kfree(info->notes);
1722 return 0;
1723} 1712}
1724 1713
1725static int fill_note_info(struct elfhdr *elf, int phdrs, 1714static int fill_note_info(struct elfhdr *elf, int phdrs,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 4c878476bb91..b08ea4717e9d 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -107,6 +107,12 @@ void extent_io_exit(void)
107 list_del(&eb->leak_list); 107 list_del(&eb->leak_list);
108 kmem_cache_free(extent_buffer_cache, eb); 108 kmem_cache_free(extent_buffer_cache, eb);
109 } 109 }
110
111 /*
112 * Make sure all delayed rcu free are flushed before we
113 * destroy caches.
114 */
115 rcu_barrier();
110 if (extent_state_cache) 116 if (extent_state_cache)
111 kmem_cache_destroy(extent_state_cache); 117 kmem_cache_destroy(extent_state_cache);
112 if (extent_buffer_cache) 118 if (extent_buffer_cache)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 2a028a58619c..a6ed6944e50c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7076,6 +7076,11 @@ static void init_once(void *foo)
7076 7076
7077void btrfs_destroy_cachep(void) 7077void btrfs_destroy_cachep(void)
7078{ 7078{
7079 /*
7080 * Make sure all delayed rcu free inodes are flushed before we
7081 * destroy cache.
7082 */
7083 rcu_barrier();
7079 if (btrfs_inode_cachep) 7084 if (btrfs_inode_cachep)
7080 kmem_cache_destroy(btrfs_inode_cachep); 7085 kmem_cache_destroy(btrfs_inode_cachep);
7081 if (btrfs_trans_handle_cachep) 7086 if (btrfs_trans_handle_cachep)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 27bfce58da3b..47127c1bd290 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1397,7 +1397,6 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
1397 u64 *transid, bool readonly, 1397 u64 *transid, bool readonly,
1398 struct btrfs_qgroup_inherit **inherit) 1398 struct btrfs_qgroup_inherit **inherit)
1399{ 1399{
1400 struct file *src_file;
1401 int namelen; 1400 int namelen;
1402 int ret = 0; 1401 int ret = 0;
1403 1402
@@ -1421,25 +1420,24 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
1421 ret = btrfs_mksubvol(&file->f_path, name, namelen, 1420 ret = btrfs_mksubvol(&file->f_path, name, namelen,
1422 NULL, transid, readonly, inherit); 1421 NULL, transid, readonly, inherit);
1423 } else { 1422 } else {
1423 struct fd src = fdget(fd);
1424 struct inode *src_inode; 1424 struct inode *src_inode;
1425 src_file = fget(fd); 1425 if (!src.file) {
1426 if (!src_file) {
1427 ret = -EINVAL; 1426 ret = -EINVAL;
1428 goto out_drop_write; 1427 goto out_drop_write;
1429 } 1428 }
1430 1429
1431 src_inode = src_file->f_path.dentry->d_inode; 1430 src_inode = src.file->f_path.dentry->d_inode;
1432 if (src_inode->i_sb != file->f_path.dentry->d_inode->i_sb) { 1431 if (src_inode->i_sb != file->f_path.dentry->d_inode->i_sb) {
1433 printk(KERN_INFO "btrfs: Snapshot src from " 1432 printk(KERN_INFO "btrfs: Snapshot src from "
1434 "another FS\n"); 1433 "another FS\n");
1435 ret = -EINVAL; 1434 ret = -EINVAL;
1436 fput(src_file); 1435 } else {
1437 goto out_drop_write; 1436 ret = btrfs_mksubvol(&file->f_path, name, namelen,
1437 BTRFS_I(src_inode)->root,
1438 transid, readonly, inherit);
1438 } 1439 }
1439 ret = btrfs_mksubvol(&file->f_path, name, namelen, 1440 fdput(src);
1440 BTRFS_I(src_inode)->root,
1441 transid, readonly, inherit);
1442 fput(src_file);
1443 } 1441 }
1444out_drop_write: 1442out_drop_write:
1445 mnt_drop_write_file(file); 1443 mnt_drop_write_file(file);
@@ -2341,7 +2339,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2341{ 2339{
2342 struct inode *inode = fdentry(file)->d_inode; 2340 struct inode *inode = fdentry(file)->d_inode;
2343 struct btrfs_root *root = BTRFS_I(inode)->root; 2341 struct btrfs_root *root = BTRFS_I(inode)->root;
2344 struct file *src_file; 2342 struct fd src_file;
2345 struct inode *src; 2343 struct inode *src;
2346 struct btrfs_trans_handle *trans; 2344 struct btrfs_trans_handle *trans;
2347 struct btrfs_path *path; 2345 struct btrfs_path *path;
@@ -2376,24 +2374,24 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2376 if (ret) 2374 if (ret)
2377 return ret; 2375 return ret;
2378 2376
2379 src_file = fget(srcfd); 2377 src_file = fdget(srcfd);
2380 if (!src_file) { 2378 if (!src_file.file) {
2381 ret = -EBADF; 2379 ret = -EBADF;
2382 goto out_drop_write; 2380 goto out_drop_write;
2383 } 2381 }
2384 2382
2385 ret = -EXDEV; 2383 ret = -EXDEV;
2386 if (src_file->f_path.mnt != file->f_path.mnt) 2384 if (src_file.file->f_path.mnt != file->f_path.mnt)
2387 goto out_fput; 2385 goto out_fput;
2388 2386
2389 src = src_file->f_dentry->d_inode; 2387 src = src_file.file->f_dentry->d_inode;
2390 2388
2391 ret = -EINVAL; 2389 ret = -EINVAL;
2392 if (src == inode) 2390 if (src == inode)
2393 goto out_fput; 2391 goto out_fput;
2394 2392
2395 /* the src must be open for reading */ 2393 /* the src must be open for reading */
2396 if (!(src_file->f_mode & FMODE_READ)) 2394 if (!(src_file.file->f_mode & FMODE_READ))
2397 goto out_fput; 2395 goto out_fput;
2398 2396
2399 /* don't make the dst file partly checksummed */ 2397 /* don't make the dst file partly checksummed */
@@ -2724,7 +2722,7 @@ out_unlock:
2724 vfree(buf); 2722 vfree(buf);
2725 btrfs_free_path(path); 2723 btrfs_free_path(path);
2726out_fput: 2724out_fput:
2727 fput(src_file); 2725 fdput(src_file);
2728out_drop_write: 2726out_drop_write:
2729 mnt_drop_write_file(file); 2727 mnt_drop_write_file(file);
2730 return ret; 2728 return ret;
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 48a4882d8ad5..a955669519a2 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -68,7 +68,7 @@ struct reada_extent {
68 u32 blocksize; 68 u32 blocksize;
69 int err; 69 int err;
70 struct list_head extctl; 70 struct list_head extctl;
71 struct kref refcnt; 71 int refcnt;
72 spinlock_t lock; 72 spinlock_t lock;
73 struct reada_zone *zones[BTRFS_MAX_MIRRORS]; 73 struct reada_zone *zones[BTRFS_MAX_MIRRORS];
74 int nzones; 74 int nzones;
@@ -126,7 +126,7 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
126 spin_lock(&fs_info->reada_lock); 126 spin_lock(&fs_info->reada_lock);
127 re = radix_tree_lookup(&fs_info->reada_tree, index); 127 re = radix_tree_lookup(&fs_info->reada_tree, index);
128 if (re) 128 if (re)
129 kref_get(&re->refcnt); 129 re->refcnt++;
130 spin_unlock(&fs_info->reada_lock); 130 spin_unlock(&fs_info->reada_lock);
131 131
132 if (!re) 132 if (!re)
@@ -336,7 +336,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
336 spin_lock(&fs_info->reada_lock); 336 spin_lock(&fs_info->reada_lock);
337 re = radix_tree_lookup(&fs_info->reada_tree, index); 337 re = radix_tree_lookup(&fs_info->reada_tree, index);
338 if (re) 338 if (re)
339 kref_get(&re->refcnt); 339 re->refcnt++;
340 spin_unlock(&fs_info->reada_lock); 340 spin_unlock(&fs_info->reada_lock);
341 341
342 if (re) 342 if (re)
@@ -352,7 +352,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
352 re->top = *top; 352 re->top = *top;
353 INIT_LIST_HEAD(&re->extctl); 353 INIT_LIST_HEAD(&re->extctl);
354 spin_lock_init(&re->lock); 354 spin_lock_init(&re->lock);
355 kref_init(&re->refcnt); 355 re->refcnt = 1;
356 356
357 /* 357 /*
358 * map block 358 * map block
@@ -398,7 +398,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
398 if (ret == -EEXIST) { 398 if (ret == -EEXIST) {
399 re_exist = radix_tree_lookup(&fs_info->reada_tree, index); 399 re_exist = radix_tree_lookup(&fs_info->reada_tree, index);
400 BUG_ON(!re_exist); 400 BUG_ON(!re_exist);
401 kref_get(&re_exist->refcnt); 401 re_exist->refcnt++;
402 spin_unlock(&fs_info->reada_lock); 402 spin_unlock(&fs_info->reada_lock);
403 goto error; 403 goto error;
404 } 404 }
@@ -465,10 +465,6 @@ error:
465 return re_exist; 465 return re_exist;
466} 466}
467 467
468static void reada_kref_dummy(struct kref *kr)
469{
470}
471
472static void reada_extent_put(struct btrfs_fs_info *fs_info, 468static void reada_extent_put(struct btrfs_fs_info *fs_info,
473 struct reada_extent *re) 469 struct reada_extent *re)
474{ 470{
@@ -476,7 +472,7 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info,
476 unsigned long index = re->logical >> PAGE_CACHE_SHIFT; 472 unsigned long index = re->logical >> PAGE_CACHE_SHIFT;
477 473
478 spin_lock(&fs_info->reada_lock); 474 spin_lock(&fs_info->reada_lock);
479 if (!kref_put(&re->refcnt, reada_kref_dummy)) { 475 if (--re->refcnt) {
480 spin_unlock(&fs_info->reada_lock); 476 spin_unlock(&fs_info->reada_lock);
481 return; 477 return;
482 } 478 }
@@ -671,7 +667,7 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
671 return 0; 667 return 0;
672 } 668 }
673 dev->reada_next = re->logical + re->blocksize; 669 dev->reada_next = re->logical + re->blocksize;
674 kref_get(&re->refcnt); 670 re->refcnt++;
675 671
676 spin_unlock(&fs_info->reada_lock); 672 spin_unlock(&fs_info->reada_lock);
677 673
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 4b5762ef7c2b..ba95eea201bf 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1104,7 +1104,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1104 pr_err("fill_trace bad get_inode " 1104 pr_err("fill_trace bad get_inode "
1105 "%llx.%llx\n", vino.ino, vino.snap); 1105 "%llx.%llx\n", vino.ino, vino.snap);
1106 err = PTR_ERR(in); 1106 err = PTR_ERR(in);
1107 d_delete(dn); 1107 d_drop(dn);
1108 goto done; 1108 goto done;
1109 } 1109 }
1110 dn = splice_dentry(dn, in, &have_lease, true); 1110 dn = splice_dentry(dn, in, &have_lease, true);
@@ -1277,7 +1277,7 @@ retry_lookup:
1277 in = ceph_get_inode(parent->d_sb, vino); 1277 in = ceph_get_inode(parent->d_sb, vino);
1278 if (IS_ERR(in)) { 1278 if (IS_ERR(in)) {
1279 dout("new_inode badness\n"); 1279 dout("new_inode badness\n");
1280 d_delete(dn); 1280 d_drop(dn);
1281 dput(dn); 1281 dput(dn);
1282 err = PTR_ERR(in); 1282 err = PTR_ERR(in);
1283 goto out; 1283 goto out;
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index b982239f38f9..3a42d9326378 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -603,6 +603,11 @@ bad_cap:
603 603
604static void destroy_caches(void) 604static void destroy_caches(void)
605{ 605{
606 /*
607 * Make sure all delayed rcu free inodes are flushed before we
608 * destroy cache.
609 */
610 rcu_barrier();
606 kmem_cache_destroy(ceph_inode_cachep); 611 kmem_cache_destroy(ceph_inode_cachep);
607 kmem_cache_destroy(ceph_cap_cachep); 612 kmem_cache_destroy(ceph_cap_cachep);
608 kmem_cache_destroy(ceph_dentry_cachep); 613 kmem_cache_destroy(ceph_dentry_cachep);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index a41044a31083..e7931cc55d0c 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -968,6 +968,11 @@ cifs_init_inodecache(void)
968static void 968static void
969cifs_destroy_inodecache(void) 969cifs_destroy_inodecache(void)
970{ 970{
971 /*
972 * Make sure all delayed rcu free inodes are flushed before we
973 * destroy cache.
974 */
975 rcu_barrier();
971 kmem_cache_destroy(cifs_inode_cachep); 976 kmem_cache_destroy(cifs_inode_cachep);
972} 977}
973 978
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index f1813120d753..be2aa4909487 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -85,6 +85,11 @@ int coda_init_inodecache(void)
85 85
86void coda_destroy_inodecache(void) 86void coda_destroy_inodecache(void)
87{ 87{
88 /*
89 * Make sure all delayed rcu free inodes are flushed before we
90 * destroy cache.
91 */
92 rcu_barrier();
88 kmem_cache_destroy(coda_inode_cachep); 93 kmem_cache_destroy(coda_inode_cachep);
89} 94}
90 95
@@ -107,43 +112,41 @@ static const struct super_operations coda_super_operations =
107 112
108static int get_device_index(struct coda_mount_data *data) 113static int get_device_index(struct coda_mount_data *data)
109{ 114{
110 struct file *file; 115 struct fd f;
111 struct inode *inode; 116 struct inode *inode;
112 int idx; 117 int idx;
113 118
114 if(data == NULL) { 119 if (data == NULL) {
115 printk("coda_read_super: Bad mount data\n"); 120 printk("coda_read_super: Bad mount data\n");
116 return -1; 121 return -1;
117 } 122 }
118 123
119 if(data->version != CODA_MOUNT_VERSION) { 124 if (data->version != CODA_MOUNT_VERSION) {
120 printk("coda_read_super: Bad mount version\n"); 125 printk("coda_read_super: Bad mount version\n");
121 return -1; 126 return -1;
122 } 127 }
123 128
124 file = fget(data->fd); 129 f = fdget(data->fd);
125 inode = NULL; 130 if (!f.file)
126 if(file) 131 goto Ebadf;
127 inode = file->f_path.dentry->d_inode; 132 inode = f.file->f_path.dentry->d_inode;
128 133 if (!S_ISCHR(inode->i_mode) || imajor(inode) != CODA_PSDEV_MAJOR) {
129 if(!inode || !S_ISCHR(inode->i_mode) || 134 fdput(f);
130 imajor(inode) != CODA_PSDEV_MAJOR) { 135 goto Ebadf;
131 if(file)
132 fput(file);
133
134 printk("coda_read_super: Bad file\n");
135 return -1;
136 } 136 }
137 137
138 idx = iminor(inode); 138 idx = iminor(inode);
139 fput(file); 139 fdput(f);
140 140
141 if(idx < 0 || idx >= MAX_CODADEVS) { 141 if (idx < 0 || idx >= MAX_CODADEVS) {
142 printk("coda_read_super: Bad minor number\n"); 142 printk("coda_read_super: Bad minor number\n");
143 return -1; 143 return -1;
144 } 144 }
145 145
146 return idx; 146 return idx;
147Ebadf:
148 printk("coda_read_super: Bad file\n");
149 return -1;
147} 150}
148 151
149static int coda_fill_super(struct super_block *sb, void *data, int silent) 152static int coda_fill_super(struct super_block *sb, void *data, int silent)
diff --git a/fs/compat.c b/fs/compat.c
index 1bdb350ea5d3..b7a24d0ca30d 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -870,22 +870,20 @@ asmlinkage long compat_sys_old_readdir(unsigned int fd,
870 struct compat_old_linux_dirent __user *dirent, unsigned int count) 870 struct compat_old_linux_dirent __user *dirent, unsigned int count)
871{ 871{
872 int error; 872 int error;
873 struct file *file; 873 struct fd f = fdget(fd);
874 int fput_needed;
875 struct compat_readdir_callback buf; 874 struct compat_readdir_callback buf;
876 875
877 file = fget_light(fd, &fput_needed); 876 if (!f.file)
878 if (!file)
879 return -EBADF; 877 return -EBADF;
880 878
881 buf.result = 0; 879 buf.result = 0;
882 buf.dirent = dirent; 880 buf.dirent = dirent;
883 881
884 error = vfs_readdir(file, compat_fillonedir, &buf); 882 error = vfs_readdir(f.file, compat_fillonedir, &buf);
885 if (buf.result) 883 if (buf.result)
886 error = buf.result; 884 error = buf.result;
887 885
888 fput_light(file, fput_needed); 886 fdput(f);
889 return error; 887 return error;
890} 888}
891 889
@@ -949,17 +947,16 @@ efault:
949asmlinkage long compat_sys_getdents(unsigned int fd, 947asmlinkage long compat_sys_getdents(unsigned int fd,
950 struct compat_linux_dirent __user *dirent, unsigned int count) 948 struct compat_linux_dirent __user *dirent, unsigned int count)
951{ 949{
952 struct file * file; 950 struct fd f;
953 struct compat_linux_dirent __user * lastdirent; 951 struct compat_linux_dirent __user * lastdirent;
954 struct compat_getdents_callback buf; 952 struct compat_getdents_callback buf;
955 int fput_needed;
956 int error; 953 int error;
957 954
958 if (!access_ok(VERIFY_WRITE, dirent, count)) 955 if (!access_ok(VERIFY_WRITE, dirent, count))
959 return -EFAULT; 956 return -EFAULT;
960 957
961 file = fget_light(fd, &fput_needed); 958 f = fdget(fd);
962 if (!file) 959 if (!f.file)
963 return -EBADF; 960 return -EBADF;
964 961
965 buf.current_dir = dirent; 962 buf.current_dir = dirent;
@@ -967,17 +964,17 @@ asmlinkage long compat_sys_getdents(unsigned int fd,
967 buf.count = count; 964 buf.count = count;
968 buf.error = 0; 965 buf.error = 0;
969 966
970 error = vfs_readdir(file, compat_filldir, &buf); 967 error = vfs_readdir(f.file, compat_filldir, &buf);
971 if (error >= 0) 968 if (error >= 0)
972 error = buf.error; 969 error = buf.error;
973 lastdirent = buf.previous; 970 lastdirent = buf.previous;
974 if (lastdirent) { 971 if (lastdirent) {
975 if (put_user(file->f_pos, &lastdirent->d_off)) 972 if (put_user(f.file->f_pos, &lastdirent->d_off))
976 error = -EFAULT; 973 error = -EFAULT;
977 else 974 else
978 error = count - buf.count; 975 error = count - buf.count;
979 } 976 }
980 fput_light(file, fput_needed); 977 fdput(f);
981 return error; 978 return error;
982} 979}
983 980
@@ -1035,17 +1032,16 @@ efault:
1035asmlinkage long compat_sys_getdents64(unsigned int fd, 1032asmlinkage long compat_sys_getdents64(unsigned int fd,
1036 struct linux_dirent64 __user * dirent, unsigned int count) 1033 struct linux_dirent64 __user * dirent, unsigned int count)
1037{ 1034{
1038 struct file * file; 1035 struct fd f;
1039 struct linux_dirent64 __user * lastdirent; 1036 struct linux_dirent64 __user * lastdirent;
1040 struct compat_getdents_callback64 buf; 1037 struct compat_getdents_callback64 buf;
1041 int fput_needed;
1042 int error; 1038 int error;
1043 1039
1044 if (!access_ok(VERIFY_WRITE, dirent, count)) 1040 if (!access_ok(VERIFY_WRITE, dirent, count))
1045 return -EFAULT; 1041 return -EFAULT;
1046 1042
1047 file = fget_light(fd, &fput_needed); 1043 f = fdget(fd);
1048 if (!file) 1044 if (!f.file)
1049 return -EBADF; 1045 return -EBADF;
1050 1046
1051 buf.current_dir = dirent; 1047 buf.current_dir = dirent;
@@ -1053,18 +1049,18 @@ asmlinkage long compat_sys_getdents64(unsigned int fd,
1053 buf.count = count; 1049 buf.count = count;
1054 buf.error = 0; 1050 buf.error = 0;
1055 1051
1056 error = vfs_readdir(file, compat_filldir64, &buf); 1052 error = vfs_readdir(f.file, compat_filldir64, &buf);
1057 if (error >= 0) 1053 if (error >= 0)
1058 error = buf.error; 1054 error = buf.error;
1059 lastdirent = buf.previous; 1055 lastdirent = buf.previous;
1060 if (lastdirent) { 1056 if (lastdirent) {
1061 typeof(lastdirent->d_off) d_off = file->f_pos; 1057 typeof(lastdirent->d_off) d_off = f.file->f_pos;
1062 if (__put_user_unaligned(d_off, &lastdirent->d_off)) 1058 if (__put_user_unaligned(d_off, &lastdirent->d_off))
1063 error = -EFAULT; 1059 error = -EFAULT;
1064 else 1060 else
1065 error = count - buf.count; 1061 error = count - buf.count;
1066 } 1062 }
1067 fput_light(file, fput_needed); 1063 fdput(f);
1068 return error; 1064 return error;
1069} 1065}
1070#endif /* ! __ARCH_OMIT_COMPAT_SYS_GETDENTS64 */ 1066#endif /* ! __ARCH_OMIT_COMPAT_SYS_GETDENTS64 */
@@ -1152,18 +1148,16 @@ asmlinkage ssize_t
1152compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec, 1148compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec,
1153 unsigned long vlen) 1149 unsigned long vlen)
1154{ 1150{
1155 struct file *file; 1151 struct fd f = fdget(fd);
1156 int fput_needed;
1157 ssize_t ret; 1152 ssize_t ret;
1158 loff_t pos; 1153 loff_t pos;
1159 1154
1160 file = fget_light(fd, &fput_needed); 1155 if (!f.file)
1161 if (!file)
1162 return -EBADF; 1156 return -EBADF;
1163 pos = file->f_pos; 1157 pos = f.file->f_pos;
1164 ret = compat_readv(file, vec, vlen, &pos); 1158 ret = compat_readv(f.file, vec, vlen, &pos);
1165 file->f_pos = pos; 1159 f.file->f_pos = pos;
1166 fput_light(file, fput_needed); 1160 fdput(f);
1167 return ret; 1161 return ret;
1168} 1162}
1169 1163
@@ -1171,19 +1165,18 @@ asmlinkage ssize_t
1171compat_sys_preadv64(unsigned long fd, const struct compat_iovec __user *vec, 1165compat_sys_preadv64(unsigned long fd, const struct compat_iovec __user *vec,
1172 unsigned long vlen, loff_t pos) 1166 unsigned long vlen, loff_t pos)
1173{ 1167{
1174 struct file *file; 1168 struct fd f;
1175 int fput_needed;
1176 ssize_t ret; 1169 ssize_t ret;
1177 1170
1178 if (pos < 0) 1171 if (pos < 0)
1179 return -EINVAL; 1172 return -EINVAL;
1180 file = fget_light(fd, &fput_needed); 1173 f = fdget(fd);
1181 if (!file) 1174 if (!f.file)
1182 return -EBADF; 1175 return -EBADF;
1183 ret = -ESPIPE; 1176 ret = -ESPIPE;
1184 if (file->f_mode & FMODE_PREAD) 1177 if (f.file->f_mode & FMODE_PREAD)
1185 ret = compat_readv(file, vec, vlen, &pos); 1178 ret = compat_readv(f.file, vec, vlen, &pos);
1186 fput_light(file, fput_needed); 1179 fdput(f);
1187 return ret; 1180 return ret;
1188} 1181}
1189 1182
@@ -1221,18 +1214,16 @@ asmlinkage ssize_t
1221compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec, 1214compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec,
1222 unsigned long vlen) 1215 unsigned long vlen)
1223{ 1216{
1224 struct file *file; 1217 struct fd f = fdget(fd);
1225 int fput_needed;
1226 ssize_t ret; 1218 ssize_t ret;
1227 loff_t pos; 1219 loff_t pos;
1228 1220
1229 file = fget_light(fd, &fput_needed); 1221 if (!f.file)
1230 if (!file)
1231 return -EBADF; 1222 return -EBADF;
1232 pos = file->f_pos; 1223 pos = f.file->f_pos;
1233 ret = compat_writev(file, vec, vlen, &pos); 1224 ret = compat_writev(f.file, vec, vlen, &pos);
1234 file->f_pos = pos; 1225 f.file->f_pos = pos;
1235 fput_light(file, fput_needed); 1226 fdput(f);
1236 return ret; 1227 return ret;
1237} 1228}
1238 1229
@@ -1240,19 +1231,18 @@ asmlinkage ssize_t
1240compat_sys_pwritev64(unsigned long fd, const struct compat_iovec __user *vec, 1231compat_sys_pwritev64(unsigned long fd, const struct compat_iovec __user *vec,
1241 unsigned long vlen, loff_t pos) 1232 unsigned long vlen, loff_t pos)
1242{ 1233{
1243 struct file *file; 1234 struct fd f;
1244 int fput_needed;
1245 ssize_t ret; 1235 ssize_t ret;
1246 1236
1247 if (pos < 0) 1237 if (pos < 0)
1248 return -EINVAL; 1238 return -EINVAL;
1249 file = fget_light(fd, &fput_needed); 1239 f = fdget(fd);
1250 if (!file) 1240 if (!f.file)
1251 return -EBADF; 1241 return -EBADF;
1252 ret = -ESPIPE; 1242 ret = -ESPIPE;
1253 if (file->f_mode & FMODE_PWRITE) 1243 if (f.file->f_mode & FMODE_PWRITE)
1254 ret = compat_writev(file, vec, vlen, &pos); 1244 ret = compat_writev(f.file, vec, vlen, &pos);
1255 fput_light(file, fput_needed); 1245 fdput(f);
1256 return ret; 1246 return ret;
1257} 1247}
1258 1248
@@ -1802,3 +1792,25 @@ compat_sys_open_by_handle_at(int mountdirfd,
1802 return do_handle_open(mountdirfd, handle, flags); 1792 return do_handle_open(mountdirfd, handle, flags);
1803} 1793}
1804#endif 1794#endif
1795
1796#ifdef __ARCH_WANT_COMPAT_SYS_SENDFILE
1797asmlinkage long compat_sys_sendfile(int out_fd, int in_fd,
1798 compat_off_t __user *offset, compat_size_t count)
1799{
1800 loff_t pos;
1801 off_t off;
1802 ssize_t ret;
1803
1804 if (offset) {
1805 if (unlikely(get_user(off, offset)))
1806 return -EFAULT;
1807 pos = off;
1808 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
1809 if (unlikely(put_user(pos, offset)))
1810 return -EFAULT;
1811 return ret;
1812 }
1813
1814 return do_sendfile(out_fd, in_fd, NULL, count, 0);
1815}
1816#endif /* __ARCH_WANT_COMPAT_SYS_SENDFILE */
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 9c03a3ae898f..f5054025f9da 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1539,16 +1539,13 @@ static int compat_ioctl_check_table(unsigned int xcmd)
1539asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, 1539asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
1540 unsigned long arg) 1540 unsigned long arg)
1541{ 1541{
1542 struct file *filp; 1542 struct fd f = fdget(fd);
1543 int error = -EBADF; 1543 int error = -EBADF;
1544 int fput_needed; 1544 if (!f.file)
1545
1546 filp = fget_light(fd, &fput_needed);
1547 if (!filp)
1548 goto out; 1545 goto out;
1549 1546
1550 /* RED-PEN how should LSM module know it's handling 32bit? */ 1547 /* RED-PEN how should LSM module know it's handling 32bit? */
1551 error = security_file_ioctl(filp, cmd, arg); 1548 error = security_file_ioctl(f.file, cmd, arg);
1552 if (error) 1549 if (error)
1553 goto out_fput; 1550 goto out_fput;
1554 1551
@@ -1568,30 +1565,30 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
1568#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) 1565#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
1569 case FS_IOC_RESVSP_32: 1566 case FS_IOC_RESVSP_32:
1570 case FS_IOC_RESVSP64_32: 1567 case FS_IOC_RESVSP64_32:
1571 error = compat_ioctl_preallocate(filp, compat_ptr(arg)); 1568 error = compat_ioctl_preallocate(f.file, compat_ptr(arg));
1572 goto out_fput; 1569 goto out_fput;
1573#else 1570#else
1574 case FS_IOC_RESVSP: 1571 case FS_IOC_RESVSP:
1575 case FS_IOC_RESVSP64: 1572 case FS_IOC_RESVSP64:
1576 error = ioctl_preallocate(filp, compat_ptr(arg)); 1573 error = ioctl_preallocate(f.file, compat_ptr(arg));
1577 goto out_fput; 1574 goto out_fput;
1578#endif 1575#endif
1579 1576
1580 case FIBMAP: 1577 case FIBMAP:
1581 case FIGETBSZ: 1578 case FIGETBSZ:
1582 case FIONREAD: 1579 case FIONREAD:
1583 if (S_ISREG(filp->f_path.dentry->d_inode->i_mode)) 1580 if (S_ISREG(f.file->f_path.dentry->d_inode->i_mode))
1584 break; 1581 break;
1585 /*FALL THROUGH*/ 1582 /*FALL THROUGH*/
1586 1583
1587 default: 1584 default:
1588 if (filp->f_op && filp->f_op->compat_ioctl) { 1585 if (f.file->f_op && f.file->f_op->compat_ioctl) {
1589 error = filp->f_op->compat_ioctl(filp, cmd, arg); 1586 error = f.file->f_op->compat_ioctl(f.file, cmd, arg);
1590 if (error != -ENOIOCTLCMD) 1587 if (error != -ENOIOCTLCMD)
1591 goto out_fput; 1588 goto out_fput;
1592 } 1589 }
1593 1590
1594 if (!filp->f_op || !filp->f_op->unlocked_ioctl) 1591 if (!f.file->f_op || !f.file->f_op->unlocked_ioctl)
1595 goto do_ioctl; 1592 goto do_ioctl;
1596 break; 1593 break;
1597 } 1594 }
@@ -1599,7 +1596,7 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
1599 if (compat_ioctl_check_table(XFORM(cmd))) 1596 if (compat_ioctl_check_table(XFORM(cmd)))
1600 goto found_handler; 1597 goto found_handler;
1601 1598
1602 error = do_ioctl_trans(fd, cmd, arg, filp); 1599 error = do_ioctl_trans(fd, cmd, arg, f.file);
1603 if (error == -ENOIOCTLCMD) 1600 if (error == -ENOIOCTLCMD)
1604 error = -ENOTTY; 1601 error = -ENOTTY;
1605 1602
@@ -1608,9 +1605,9 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
1608 found_handler: 1605 found_handler:
1609 arg = (unsigned long)compat_ptr(arg); 1606 arg = (unsigned long)compat_ptr(arg);
1610 do_ioctl: 1607 do_ioctl:
1611 error = do_vfs_ioctl(filp, fd, cmd, arg); 1608 error = do_vfs_ioctl(f.file, fd, cmd, arg);
1612 out_fput: 1609 out_fput:
1613 fput_light(filp, fput_needed); 1610 fdput(f);
1614 out: 1611 out:
1615 return error; 1612 return error;
1616} 1613}
diff --git a/fs/coredump.c b/fs/coredump.c
new file mode 100644
index 000000000000..f045bbad6822
--- /dev/null
+++ b/fs/coredump.c
@@ -0,0 +1,686 @@
1#include <linux/slab.h>
2#include <linux/file.h>
3#include <linux/fdtable.h>
4#include <linux/mm.h>
5#include <linux/stat.h>
6#include <linux/fcntl.h>
7#include <linux/swap.h>
8#include <linux/string.h>
9#include <linux/init.h>
10#include <linux/pagemap.h>
11#include <linux/perf_event.h>
12#include <linux/highmem.h>
13#include <linux/spinlock.h>
14#include <linux/key.h>
15#include <linux/personality.h>
16#include <linux/binfmts.h>
17#include <linux/utsname.h>
18#include <linux/pid_namespace.h>
19#include <linux/module.h>
20#include <linux/namei.h>
21#include <linux/mount.h>
22#include <linux/security.h>
23#include <linux/syscalls.h>
24#include <linux/tsacct_kern.h>
25#include <linux/cn_proc.h>
26#include <linux/audit.h>
27#include <linux/tracehook.h>
28#include <linux/kmod.h>
29#include <linux/fsnotify.h>
30#include <linux/fs_struct.h>
31#include <linux/pipe_fs_i.h>
32#include <linux/oom.h>
33#include <linux/compat.h>
34
35#include <asm/uaccess.h>
36#include <asm/mmu_context.h>
37#include <asm/tlb.h>
38#include <asm/exec.h>
39
40#include <trace/events/task.h>
41#include "internal.h"
42
43#include <trace/events/sched.h>
44
45int core_uses_pid;
46char core_pattern[CORENAME_MAX_SIZE] = "core";
47unsigned int core_pipe_limit;
48
49struct core_name {
50 char *corename;
51 int used, size;
52};
53static atomic_t call_count = ATOMIC_INIT(1);
54
55/* The maximal length of core_pattern is also specified in sysctl.c */
56
57static int expand_corename(struct core_name *cn)
58{
59 char *old_corename = cn->corename;
60
61 cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count);
62 cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL);
63
64 if (!cn->corename) {
65 kfree(old_corename);
66 return -ENOMEM;
67 }
68
69 return 0;
70}
71
72static int cn_printf(struct core_name *cn, const char *fmt, ...)
73{
74 char *cur;
75 int need;
76 int ret;
77 va_list arg;
78
79 va_start(arg, fmt);
80 need = vsnprintf(NULL, 0, fmt, arg);
81 va_end(arg);
82
83 if (likely(need < cn->size - cn->used - 1))
84 goto out_printf;
85
86 ret = expand_corename(cn);
87 if (ret)
88 goto expand_fail;
89
90out_printf:
91 cur = cn->corename + cn->used;
92 va_start(arg, fmt);
93 vsnprintf(cur, need + 1, fmt, arg);
94 va_end(arg);
95 cn->used += need;
96 return 0;
97
98expand_fail:
99 return ret;
100}
101
102static void cn_escape(char *str)
103{
104 for (; *str; str++)
105 if (*str == '/')
106 *str = '!';
107}
108
109static int cn_print_exe_file(struct core_name *cn)
110{
111 struct file *exe_file;
112 char *pathbuf, *path;
113 int ret;
114
115 exe_file = get_mm_exe_file(current->mm);
116 if (!exe_file) {
117 char *commstart = cn->corename + cn->used;
118 ret = cn_printf(cn, "%s (path unknown)", current->comm);
119 cn_escape(commstart);
120 return ret;
121 }
122
123 pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
124 if (!pathbuf) {
125 ret = -ENOMEM;
126 goto put_exe_file;
127 }
128
129 path = d_path(&exe_file->f_path, pathbuf, PATH_MAX);
130 if (IS_ERR(path)) {
131 ret = PTR_ERR(path);
132 goto free_buf;
133 }
134
135 cn_escape(path);
136
137 ret = cn_printf(cn, "%s", path);
138
139free_buf:
140 kfree(pathbuf);
141put_exe_file:
142 fput(exe_file);
143 return ret;
144}
145
146/* format_corename will inspect the pattern parameter, and output a
147 * name into corename, which must have space for at least
148 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
149 */
150static int format_corename(struct core_name *cn, long signr)
151{
152 const struct cred *cred = current_cred();
153 const char *pat_ptr = core_pattern;
154 int ispipe = (*pat_ptr == '|');
155 int pid_in_pattern = 0;
156 int err = 0;
157
158 cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count);
159 cn->corename = kmalloc(cn->size, GFP_KERNEL);
160 cn->used = 0;
161
162 if (!cn->corename)
163 return -ENOMEM;
164
165 /* Repeat as long as we have more pattern to process and more output
166 space */
167 while (*pat_ptr) {
168 if (*pat_ptr != '%') {
169 if (*pat_ptr == 0)
170 goto out;
171 err = cn_printf(cn, "%c", *pat_ptr++);
172 } else {
173 switch (*++pat_ptr) {
174 /* single % at the end, drop that */
175 case 0:
176 goto out;
177 /* Double percent, output one percent */
178 case '%':
179 err = cn_printf(cn, "%c", '%');
180 break;
181 /* pid */
182 case 'p':
183 pid_in_pattern = 1;
184 err = cn_printf(cn, "%d",
185 task_tgid_vnr(current));
186 break;
187 /* uid */
188 case 'u':
189 err = cn_printf(cn, "%d", cred->uid);
190 break;
191 /* gid */
192 case 'g':
193 err = cn_printf(cn, "%d", cred->gid);
194 break;
195 /* signal that caused the coredump */
196 case 's':
197 err = cn_printf(cn, "%ld", signr);
198 break;
199 /* UNIX time of coredump */
200 case 't': {
201 struct timeval tv;
202 do_gettimeofday(&tv);
203 err = cn_printf(cn, "%lu", tv.tv_sec);
204 break;
205 }
206 /* hostname */
207 case 'h': {
208 char *namestart = cn->corename + cn->used;
209 down_read(&uts_sem);
210 err = cn_printf(cn, "%s",
211 utsname()->nodename);
212 up_read(&uts_sem);
213 cn_escape(namestart);
214 break;
215 }
216 /* executable */
217 case 'e': {
218 char *commstart = cn->corename + cn->used;
219 err = cn_printf(cn, "%s", current->comm);
220 cn_escape(commstart);
221 break;
222 }
223 case 'E':
224 err = cn_print_exe_file(cn);
225 break;
226 /* core limit size */
227 case 'c':
228 err = cn_printf(cn, "%lu",
229 rlimit(RLIMIT_CORE));
230 break;
231 default:
232 break;
233 }
234 ++pat_ptr;
235 }
236
237 if (err)
238 return err;
239 }
240
241 /* Backward compatibility with core_uses_pid:
242 *
243 * If core_pattern does not include a %p (as is the default)
244 * and core_uses_pid is set, then .%pid will be appended to
245 * the filename. Do not do this for piped commands. */
246 if (!ispipe && !pid_in_pattern && core_uses_pid) {
247 err = cn_printf(cn, ".%d", task_tgid_vnr(current));
248 if (err)
249 return err;
250 }
251out:
252 return ispipe;
253}
254
255static int zap_process(struct task_struct *start, int exit_code)
256{
257 struct task_struct *t;
258 int nr = 0;
259
260 start->signal->flags = SIGNAL_GROUP_EXIT;
261 start->signal->group_exit_code = exit_code;
262 start->signal->group_stop_count = 0;
263
264 t = start;
265 do {
266 task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
267 if (t != current && t->mm) {
268 sigaddset(&t->pending.signal, SIGKILL);
269 signal_wake_up(t, 1);
270 nr++;
271 }
272 } while_each_thread(start, t);
273
274 return nr;
275}
276
277static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
278 struct core_state *core_state, int exit_code)
279{
280 struct task_struct *g, *p;
281 unsigned long flags;
282 int nr = -EAGAIN;
283
284 spin_lock_irq(&tsk->sighand->siglock);
285 if (!signal_group_exit(tsk->signal)) {
286 mm->core_state = core_state;
287 nr = zap_process(tsk, exit_code);
288 }
289 spin_unlock_irq(&tsk->sighand->siglock);
290 if (unlikely(nr < 0))
291 return nr;
292
293 if (atomic_read(&mm->mm_users) == nr + 1)
294 goto done;
295 /*
296 * We should find and kill all tasks which use this mm, and we should
297 * count them correctly into ->nr_threads. We don't take tasklist
298 * lock, but this is safe wrt:
299 *
300 * fork:
301 * None of sub-threads can fork after zap_process(leader). All
302 * processes which were created before this point should be
303 * visible to zap_threads() because copy_process() adds the new
304 * process to the tail of init_task.tasks list, and lock/unlock
305 * of ->siglock provides a memory barrier.
306 *
307 * do_exit:
308 * The caller holds mm->mmap_sem. This means that the task which
309 * uses this mm can't pass exit_mm(), so it can't exit or clear
310 * its ->mm.
311 *
312 * de_thread:
313 * It does list_replace_rcu(&leader->tasks, &current->tasks),
314 * we must see either old or new leader, this does not matter.
315 * However, it can change p->sighand, so lock_task_sighand(p)
316 * must be used. Since p->mm != NULL and we hold ->mmap_sem
317 * it can't fail.
318 *
319 * Note also that "g" can be the old leader with ->mm == NULL
320 * and already unhashed and thus removed from ->thread_group.
321 * This is OK, __unhash_process()->list_del_rcu() does not
322 * clear the ->next pointer, we will find the new leader via
323 * next_thread().
324 */
325 rcu_read_lock();
326 for_each_process(g) {
327 if (g == tsk->group_leader)
328 continue;
329 if (g->flags & PF_KTHREAD)
330 continue;
331 p = g;
332 do {
333 if (p->mm) {
334 if (unlikely(p->mm == mm)) {
335 lock_task_sighand(p, &flags);
336 nr += zap_process(p, exit_code);
337 unlock_task_sighand(p, &flags);
338 }
339 break;
340 }
341 } while_each_thread(g, p);
342 }
343 rcu_read_unlock();
344done:
345 atomic_set(&core_state->nr_threads, nr);
346 return nr;
347}
348
349static int coredump_wait(int exit_code, struct core_state *core_state)
350{
351 struct task_struct *tsk = current;
352 struct mm_struct *mm = tsk->mm;
353 int core_waiters = -EBUSY;
354
355 init_completion(&core_state->startup);
356 core_state->dumper.task = tsk;
357 core_state->dumper.next = NULL;
358
359 down_write(&mm->mmap_sem);
360 if (!mm->core_state)
361 core_waiters = zap_threads(tsk, mm, core_state, exit_code);
362 up_write(&mm->mmap_sem);
363
364 if (core_waiters > 0) {
365 struct core_thread *ptr;
366
367 wait_for_completion(&core_state->startup);
368 /*
369 * Wait for all the threads to become inactive, so that
370 * all the thread context (extended register state, like
371 * fpu etc) gets copied to the memory.
372 */
373 ptr = core_state->dumper.next;
374 while (ptr != NULL) {
375 wait_task_inactive(ptr->task, 0);
376 ptr = ptr->next;
377 }
378 }
379
380 return core_waiters;
381}
382
383static void coredump_finish(struct mm_struct *mm)
384{
385 struct core_thread *curr, *next;
386 struct task_struct *task;
387
388 next = mm->core_state->dumper.next;
389 while ((curr = next) != NULL) {
390 next = curr->next;
391 task = curr->task;
392 /*
393 * see exit_mm(), curr->task must not see
394 * ->task == NULL before we read ->next.
395 */
396 smp_mb();
397 curr->task = NULL;
398 wake_up_process(task);
399 }
400
401 mm->core_state = NULL;
402}
403
404static void wait_for_dump_helpers(struct file *file)
405{
406 struct pipe_inode_info *pipe;
407
408 pipe = file->f_path.dentry->d_inode->i_pipe;
409
410 pipe_lock(pipe);
411 pipe->readers++;
412 pipe->writers--;
413
414 while ((pipe->readers > 1) && (!signal_pending(current))) {
415 wake_up_interruptible_sync(&pipe->wait);
416 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
417 pipe_wait(pipe);
418 }
419
420 pipe->readers--;
421 pipe->writers++;
422 pipe_unlock(pipe);
423
424}
425
426/*
427 * umh_pipe_setup
428 * helper function to customize the process used
429 * to collect the core in userspace. Specifically
430 * it sets up a pipe and installs it as fd 0 (stdin)
431 * for the process. Returns 0 on success, or
432 * PTR_ERR on failure.
433 * Note that it also sets the core limit to 1. This
434 * is a special value that we use to trap recursive
435 * core dumps
436 */
437static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
438{
439 struct file *files[2];
440 struct coredump_params *cp = (struct coredump_params *)info->data;
441 int err = create_pipe_files(files, 0);
442 if (err)
443 return err;
444
445 cp->file = files[1];
446
447 replace_fd(0, files[0], 0);
448 /* and disallow core files too */
449 current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
450
451 return 0;
452}
453
454void do_coredump(long signr, int exit_code, struct pt_regs *regs)
455{
456 struct core_state core_state;
457 struct core_name cn;
458 struct mm_struct *mm = current->mm;
459 struct linux_binfmt * binfmt;
460 const struct cred *old_cred;
461 struct cred *cred;
462 int retval = 0;
463 int flag = 0;
464 int ispipe;
465 struct files_struct *displaced;
466 bool need_nonrelative = false;
467 static atomic_t core_dump_count = ATOMIC_INIT(0);
468 struct coredump_params cprm = {
469 .signr = signr,
470 .regs = regs,
471 .limit = rlimit(RLIMIT_CORE),
472 /*
473 * We must use the same mm->flags while dumping core to avoid
474 * inconsistency of bit flags, since this flag is not protected
475 * by any locks.
476 */
477 .mm_flags = mm->flags,
478 };
479
480 audit_core_dumps(signr);
481
482 binfmt = mm->binfmt;
483 if (!binfmt || !binfmt->core_dump)
484 goto fail;
485 if (!__get_dumpable(cprm.mm_flags))
486 goto fail;
487
488 cred = prepare_creds();
489 if (!cred)
490 goto fail;
491 /*
492 * We cannot trust fsuid as being the "true" uid of the process
493 * nor do we know its entire history. We only know it was tainted
494 * so we dump it as root in mode 2, and only into a controlled
495 * environment (pipe handler or fully qualified path).
496 */
497 if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) {
498 /* Setuid core dump mode */
499 flag = O_EXCL; /* Stop rewrite attacks */
500 cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */
501 need_nonrelative = true;
502 }
503
504 retval = coredump_wait(exit_code, &core_state);
505 if (retval < 0)
506 goto fail_creds;
507
508 old_cred = override_creds(cred);
509
510 /*
511 * Clear any false indication of pending signals that might
512 * be seen by the filesystem code called to write the core file.
513 */
514 clear_thread_flag(TIF_SIGPENDING);
515
516 ispipe = format_corename(&cn, signr);
517
518 if (ispipe) {
519 int dump_count;
520 char **helper_argv;
521
522 if (ispipe < 0) {
523 printk(KERN_WARNING "format_corename failed\n");
524 printk(KERN_WARNING "Aborting core\n");
525 goto fail_corename;
526 }
527
528 if (cprm.limit == 1) {
529 /* See umh_pipe_setup() which sets RLIMIT_CORE = 1.
530 *
531 * Normally core limits are irrelevant to pipes, since
532 * we're not writing to the file system, but we use
533 * cprm.limit of 1 here as a speacial value, this is a
534 * consistent way to catch recursive crashes.
535 * We can still crash if the core_pattern binary sets
536 * RLIM_CORE = !1, but it runs as root, and can do
537 * lots of stupid things.
538 *
539 * Note that we use task_tgid_vnr here to grab the pid
540 * of the process group leader. That way we get the
541 * right pid if a thread in a multi-threaded
542 * core_pattern process dies.
543 */
544 printk(KERN_WARNING
545 "Process %d(%s) has RLIMIT_CORE set to 1\n",
546 task_tgid_vnr(current), current->comm);
547 printk(KERN_WARNING "Aborting core\n");
548 goto fail_unlock;
549 }
550 cprm.limit = RLIM_INFINITY;
551
552 dump_count = atomic_inc_return(&core_dump_count);
553 if (core_pipe_limit && (core_pipe_limit < dump_count)) {
554 printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
555 task_tgid_vnr(current), current->comm);
556 printk(KERN_WARNING "Skipping core dump\n");
557 goto fail_dropcount;
558 }
559
560 helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL);
561 if (!helper_argv) {
562 printk(KERN_WARNING "%s failed to allocate memory\n",
563 __func__);
564 goto fail_dropcount;
565 }
566
567 retval = call_usermodehelper_fns(helper_argv[0], helper_argv,
568 NULL, UMH_WAIT_EXEC, umh_pipe_setup,
569 NULL, &cprm);
570 argv_free(helper_argv);
571 if (retval) {
572 printk(KERN_INFO "Core dump to %s pipe failed\n",
573 cn.corename);
574 goto close_fail;
575 }
576 } else {
577 struct inode *inode;
578
579 if (cprm.limit < binfmt->min_coredump)
580 goto fail_unlock;
581
582 if (need_nonrelative && cn.corename[0] != '/') {
583 printk(KERN_WARNING "Pid %d(%s) can only dump core "\
584 "to fully qualified path!\n",
585 task_tgid_vnr(current), current->comm);
586 printk(KERN_WARNING "Skipping core dump\n");
587 goto fail_unlock;
588 }
589
590 cprm.file = filp_open(cn.corename,
591 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
592 0600);
593 if (IS_ERR(cprm.file))
594 goto fail_unlock;
595
596 inode = cprm.file->f_path.dentry->d_inode;
597 if (inode->i_nlink > 1)
598 goto close_fail;
599 if (d_unhashed(cprm.file->f_path.dentry))
600 goto close_fail;
601 /*
602 * AK: actually i see no reason to not allow this for named
603 * pipes etc, but keep the previous behaviour for now.
604 */
605 if (!S_ISREG(inode->i_mode))
606 goto close_fail;
607 /*
608 * Dont allow local users get cute and trick others to coredump
609 * into their pre-created files.
610 */
611 if (!uid_eq(inode->i_uid, current_fsuid()))
612 goto close_fail;
613 if (!cprm.file->f_op || !cprm.file->f_op->write)
614 goto close_fail;
615 if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
616 goto close_fail;
617 }
618
619 /* get us an unshared descriptor table; almost always a no-op */
620 retval = unshare_files(&displaced);
621 if (retval)
622 goto close_fail;
623 if (displaced)
624 put_files_struct(displaced);
625 retval = binfmt->core_dump(&cprm);
626 if (retval)
627 current->signal->group_exit_code |= 0x80;
628
629 if (ispipe && core_pipe_limit)
630 wait_for_dump_helpers(cprm.file);
631close_fail:
632 if (cprm.file)
633 filp_close(cprm.file, NULL);
634fail_dropcount:
635 if (ispipe)
636 atomic_dec(&core_dump_count);
637fail_unlock:
638 kfree(cn.corename);
639fail_corename:
640 coredump_finish(mm);
641 revert_creds(old_cred);
642fail_creds:
643 put_cred(cred);
644fail:
645 return;
646}
647
648/*
649 * Core dumping helper functions. These are the only things you should
650 * do on a core-file: use only these functions to write out all the
651 * necessary info.
652 */
653int dump_write(struct file *file, const void *addr, int nr)
654{
655 return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr;
656}
657EXPORT_SYMBOL(dump_write);
658
659int dump_seek(struct file *file, loff_t off)
660{
661 int ret = 1;
662
663 if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
664 if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
665 return 0;
666 } else {
667 char *buf = (char *)get_zeroed_page(GFP_KERNEL);
668
669 if (!buf)
670 return 0;
671 while (off > 0) {
672 unsigned long n = off;
673
674 if (n > PAGE_SIZE)
675 n = PAGE_SIZE;
676 if (!dump_write(file, buf, n)) {
677 ret = 0;
678 break;
679 }
680 off -= n;
681 }
682 free_page((unsigned long)buf);
683 }
684 return ret;
685}
686EXPORT_SYMBOL(dump_seek);
diff --git a/fs/dcache.c b/fs/dcache.c
index 693f95bf1cae..3a463d0c4fe8 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2113,7 +2113,7 @@ again:
2113 inode = dentry->d_inode; 2113 inode = dentry->d_inode;
2114 isdir = S_ISDIR(inode->i_mode); 2114 isdir = S_ISDIR(inode->i_mode);
2115 if (dentry->d_count == 1) { 2115 if (dentry->d_count == 1) {
2116 if (inode && !spin_trylock(&inode->i_lock)) { 2116 if (!spin_trylock(&inode->i_lock)) {
2117 spin_unlock(&dentry->d_lock); 2117 spin_unlock(&dentry->d_lock);
2118 cpu_relax(); 2118 cpu_relax();
2119 goto again; 2119 goto again;
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 24bb043e50d9..4e0886c9e5c4 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -711,6 +711,12 @@ static void ecryptfs_free_kmem_caches(void)
711{ 711{
712 int i; 712 int i;
713 713
714 /*
715 * Make sure all delayed rcu free inodes are flushed before we
716 * destroy cache.
717 */
718 rcu_barrier();
719
714 for (i = 0; i < ARRAY_SIZE(ecryptfs_cache_infos); i++) { 720 for (i = 0; i < ARRAY_SIZE(ecryptfs_cache_infos); i++) {
715 struct ecryptfs_cache_info *info; 721 struct ecryptfs_cache_info *info;
716 722
diff --git a/fs/efs/super.c b/fs/efs/super.c
index e755ec746c69..2002431ef9a0 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -96,6 +96,11 @@ static int init_inodecache(void)
96 96
97static void destroy_inodecache(void) 97static void destroy_inodecache(void)
98{ 98{
99 /*
100 * Make sure all delayed rcu free inodes are flushed before we
101 * destroy cache.
102 */
103 rcu_barrier();
99 kmem_cache_destroy(efs_inode_cachep); 104 kmem_cache_destroy(efs_inode_cachep);
100} 105}
101 106
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index eedec84c1809..cd96649bfe62 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1810,7 +1810,7 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events,
1810 int, maxevents, int, timeout) 1810 int, maxevents, int, timeout)
1811{ 1811{
1812 int error; 1812 int error;
1813 struct file *file; 1813 struct fd f;
1814 struct eventpoll *ep; 1814 struct eventpoll *ep;
1815 1815
1816 /* The maximum number of event must be greater than zero */ 1816 /* The maximum number of event must be greater than zero */
@@ -1818,38 +1818,33 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events,
1818 return -EINVAL; 1818 return -EINVAL;
1819 1819
1820 /* Verify that the area passed by the user is writeable */ 1820 /* Verify that the area passed by the user is writeable */
1821 if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event))) { 1821 if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event)))
1822 error = -EFAULT; 1822 return -EFAULT;
1823 goto error_return;
1824 }
1825 1823
1826 /* Get the "struct file *" for the eventpoll file */ 1824 /* Get the "struct file *" for the eventpoll file */
1827 error = -EBADF; 1825 f = fdget(epfd);
1828 file = fget(epfd); 1826 if (!f.file)
1829 if (!file) 1827 return -EBADF;
1830 goto error_return;
1831 1828
1832 /* 1829 /*
1833 * We have to check that the file structure underneath the fd 1830 * We have to check that the file structure underneath the fd
1834 * the user passed to us _is_ an eventpoll file. 1831 * the user passed to us _is_ an eventpoll file.
1835 */ 1832 */
1836 error = -EINVAL; 1833 error = -EINVAL;
1837 if (!is_file_epoll(file)) 1834 if (!is_file_epoll(f.file))
1838 goto error_fput; 1835 goto error_fput;
1839 1836
1840 /* 1837 /*
1841 * At this point it is safe to assume that the "private_data" contains 1838 * At this point it is safe to assume that the "private_data" contains
1842 * our own data structure. 1839 * our own data structure.
1843 */ 1840 */
1844 ep = file->private_data; 1841 ep = f.file->private_data;
1845 1842
1846 /* Time to fish for events ... */ 1843 /* Time to fish for events ... */
1847 error = ep_poll(ep, events, maxevents, timeout); 1844 error = ep_poll(ep, events, maxevents, timeout);
1848 1845
1849error_fput: 1846error_fput:
1850 fput(file); 1847 fdput(f);
1851error_return:
1852
1853 return error; 1848 return error;
1854} 1849}
1855 1850
diff --git a/fs/exec.c b/fs/exec.c
index 574cf4de4ec3..48fb26ef8a1b 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -66,19 +66,8 @@
66 66
67#include <trace/events/sched.h> 67#include <trace/events/sched.h>
68 68
69int core_uses_pid;
70char core_pattern[CORENAME_MAX_SIZE] = "core";
71unsigned int core_pipe_limit;
72int suid_dumpable = 0; 69int suid_dumpable = 0;
73 70
74struct core_name {
75 char *corename;
76 int used, size;
77};
78static atomic_t call_count = ATOMIC_INIT(1);
79
80/* The maximal length of core_pattern is also specified in sysctl.c */
81
82static LIST_HEAD(formats); 71static LIST_HEAD(formats);
83static DEFINE_RWLOCK(binfmt_lock); 72static DEFINE_RWLOCK(binfmt_lock);
84 73
@@ -1006,40 +995,6 @@ no_thread_group:
1006 return 0; 995 return 0;
1007} 996}
1008 997
1009/*
1010 * These functions flushes out all traces of the currently running executable
1011 * so that a new one can be started
1012 */
1013static void flush_old_files(struct files_struct * files)
1014{
1015 long j = -1;
1016 struct fdtable *fdt;
1017
1018 spin_lock(&files->file_lock);
1019 for (;;) {
1020 unsigned long set, i;
1021
1022 j++;
1023 i = j * BITS_PER_LONG;
1024 fdt = files_fdtable(files);
1025 if (i >= fdt->max_fds)
1026 break;
1027 set = fdt->close_on_exec[j];
1028 if (!set)
1029 continue;
1030 fdt->close_on_exec[j] = 0;
1031 spin_unlock(&files->file_lock);
1032 for ( ; set ; i++,set >>= 1) {
1033 if (set & 1) {
1034 sys_close(i);
1035 }
1036 }
1037 spin_lock(&files->file_lock);
1038
1039 }
1040 spin_unlock(&files->file_lock);
1041}
1042
1043char *get_task_comm(char *buf, struct task_struct *tsk) 998char *get_task_comm(char *buf, struct task_struct *tsk)
1044{ 999{
1045 /* buf must be at least sizeof(tsk->comm) in size */ 1000 /* buf must be at least sizeof(tsk->comm) in size */
@@ -1050,6 +1005,11 @@ char *get_task_comm(char *buf, struct task_struct *tsk)
1050} 1005}
1051EXPORT_SYMBOL_GPL(get_task_comm); 1006EXPORT_SYMBOL_GPL(get_task_comm);
1052 1007
1008/*
1009 * These functions flushes out all traces of the currently running executable
1010 * so that a new one can be started
1011 */
1012
1053void set_task_comm(struct task_struct *tsk, char *buf) 1013void set_task_comm(struct task_struct *tsk, char *buf)
1054{ 1014{
1055 task_lock(tsk); 1015 task_lock(tsk);
@@ -1171,7 +1131,7 @@ void setup_new_exec(struct linux_binprm * bprm)
1171 current->self_exec_id++; 1131 current->self_exec_id++;
1172 1132
1173 flush_signal_handlers(current, 0); 1133 flush_signal_handlers(current, 0);
1174 flush_old_files(current->files); 1134 do_close_on_exec(current->files);
1175} 1135}
1176EXPORT_SYMBOL(setup_new_exec); 1136EXPORT_SYMBOL(setup_new_exec);
1177 1137
@@ -1632,353 +1592,6 @@ void set_binfmt(struct linux_binfmt *new)
1632 1592
1633EXPORT_SYMBOL(set_binfmt); 1593EXPORT_SYMBOL(set_binfmt);
1634 1594
1635static int expand_corename(struct core_name *cn)
1636{
1637 char *old_corename = cn->corename;
1638
1639 cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count);
1640 cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL);
1641
1642 if (!cn->corename) {
1643 kfree(old_corename);
1644 return -ENOMEM;
1645 }
1646
1647 return 0;
1648}
1649
1650static int cn_printf(struct core_name *cn, const char *fmt, ...)
1651{
1652 char *cur;
1653 int need;
1654 int ret;
1655 va_list arg;
1656
1657 va_start(arg, fmt);
1658 need = vsnprintf(NULL, 0, fmt, arg);
1659 va_end(arg);
1660
1661 if (likely(need < cn->size - cn->used - 1))
1662 goto out_printf;
1663
1664 ret = expand_corename(cn);
1665 if (ret)
1666 goto expand_fail;
1667
1668out_printf:
1669 cur = cn->corename + cn->used;
1670 va_start(arg, fmt);
1671 vsnprintf(cur, need + 1, fmt, arg);
1672 va_end(arg);
1673 cn->used += need;
1674 return 0;
1675
1676expand_fail:
1677 return ret;
1678}
1679
1680static void cn_escape(char *str)
1681{
1682 for (; *str; str++)
1683 if (*str == '/')
1684 *str = '!';
1685}
1686
1687static int cn_print_exe_file(struct core_name *cn)
1688{
1689 struct file *exe_file;
1690 char *pathbuf, *path;
1691 int ret;
1692
1693 exe_file = get_mm_exe_file(current->mm);
1694 if (!exe_file) {
1695 char *commstart = cn->corename + cn->used;
1696 ret = cn_printf(cn, "%s (path unknown)", current->comm);
1697 cn_escape(commstart);
1698 return ret;
1699 }
1700
1701 pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
1702 if (!pathbuf) {
1703 ret = -ENOMEM;
1704 goto put_exe_file;
1705 }
1706
1707 path = d_path(&exe_file->f_path, pathbuf, PATH_MAX);
1708 if (IS_ERR(path)) {
1709 ret = PTR_ERR(path);
1710 goto free_buf;
1711 }
1712
1713 cn_escape(path);
1714
1715 ret = cn_printf(cn, "%s", path);
1716
1717free_buf:
1718 kfree(pathbuf);
1719put_exe_file:
1720 fput(exe_file);
1721 return ret;
1722}
1723
1724/* format_corename will inspect the pattern parameter, and output a
1725 * name into corename, which must have space for at least
1726 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
1727 */
1728static int format_corename(struct core_name *cn, long signr)
1729{
1730 const struct cred *cred = current_cred();
1731 const char *pat_ptr = core_pattern;
1732 int ispipe = (*pat_ptr == '|');
1733 int pid_in_pattern = 0;
1734 int err = 0;
1735
1736 cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count);
1737 cn->corename = kmalloc(cn->size, GFP_KERNEL);
1738 cn->used = 0;
1739
1740 if (!cn->corename)
1741 return -ENOMEM;
1742
1743 /* Repeat as long as we have more pattern to process and more output
1744 space */
1745 while (*pat_ptr) {
1746 if (*pat_ptr != '%') {
1747 if (*pat_ptr == 0)
1748 goto out;
1749 err = cn_printf(cn, "%c", *pat_ptr++);
1750 } else {
1751 switch (*++pat_ptr) {
1752 /* single % at the end, drop that */
1753 case 0:
1754 goto out;
1755 /* Double percent, output one percent */
1756 case '%':
1757 err = cn_printf(cn, "%c", '%');
1758 break;
1759 /* pid */
1760 case 'p':
1761 pid_in_pattern = 1;
1762 err = cn_printf(cn, "%d",
1763 task_tgid_vnr(current));
1764 break;
1765 /* uid */
1766 case 'u':
1767 err = cn_printf(cn, "%d", cred->uid);
1768 break;
1769 /* gid */
1770 case 'g':
1771 err = cn_printf(cn, "%d", cred->gid);
1772 break;
1773 /* signal that caused the coredump */
1774 case 's':
1775 err = cn_printf(cn, "%ld", signr);
1776 break;
1777 /* UNIX time of coredump */
1778 case 't': {
1779 struct timeval tv;
1780 do_gettimeofday(&tv);
1781 err = cn_printf(cn, "%lu", tv.tv_sec);
1782 break;
1783 }
1784 /* hostname */
1785 case 'h': {
1786 char *namestart = cn->corename + cn->used;
1787 down_read(&uts_sem);
1788 err = cn_printf(cn, "%s",
1789 utsname()->nodename);
1790 up_read(&uts_sem);
1791 cn_escape(namestart);
1792 break;
1793 }
1794 /* executable */
1795 case 'e': {
1796 char *commstart = cn->corename + cn->used;
1797 err = cn_printf(cn, "%s", current->comm);
1798 cn_escape(commstart);
1799 break;
1800 }
1801 case 'E':
1802 err = cn_print_exe_file(cn);
1803 break;
1804 /* core limit size */
1805 case 'c':
1806 err = cn_printf(cn, "%lu",
1807 rlimit(RLIMIT_CORE));
1808 break;
1809 default:
1810 break;
1811 }
1812 ++pat_ptr;
1813 }
1814
1815 if (err)
1816 return err;
1817 }
1818
1819 /* Backward compatibility with core_uses_pid:
1820 *
1821 * If core_pattern does not include a %p (as is the default)
1822 * and core_uses_pid is set, then .%pid will be appended to
1823 * the filename. Do not do this for piped commands. */
1824 if (!ispipe && !pid_in_pattern && core_uses_pid) {
1825 err = cn_printf(cn, ".%d", task_tgid_vnr(current));
1826 if (err)
1827 return err;
1828 }
1829out:
1830 return ispipe;
1831}
1832
1833static int zap_process(struct task_struct *start, int exit_code)
1834{
1835 struct task_struct *t;
1836 int nr = 0;
1837
1838 start->signal->flags = SIGNAL_GROUP_EXIT;
1839 start->signal->group_exit_code = exit_code;
1840 start->signal->group_stop_count = 0;
1841
1842 t = start;
1843 do {
1844 task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
1845 if (t != current && t->mm) {
1846 sigaddset(&t->pending.signal, SIGKILL);
1847 signal_wake_up(t, 1);
1848 nr++;
1849 }
1850 } while_each_thread(start, t);
1851
1852 return nr;
1853}
1854
1855static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
1856 struct core_state *core_state, int exit_code)
1857{
1858 struct task_struct *g, *p;
1859 unsigned long flags;
1860 int nr = -EAGAIN;
1861
1862 spin_lock_irq(&tsk->sighand->siglock);
1863 if (!signal_group_exit(tsk->signal)) {
1864 mm->core_state = core_state;
1865 nr = zap_process(tsk, exit_code);
1866 }
1867 spin_unlock_irq(&tsk->sighand->siglock);
1868 if (unlikely(nr < 0))
1869 return nr;
1870
1871 if (atomic_read(&mm->mm_users) == nr + 1)
1872 goto done;
1873 /*
1874 * We should find and kill all tasks which use this mm, and we should
1875 * count them correctly into ->nr_threads. We don't take tasklist
1876 * lock, but this is safe wrt:
1877 *
1878 * fork:
1879 * None of sub-threads can fork after zap_process(leader). All
1880 * processes which were created before this point should be
1881 * visible to zap_threads() because copy_process() adds the new
1882 * process to the tail of init_task.tasks list, and lock/unlock
1883 * of ->siglock provides a memory barrier.
1884 *
1885 * do_exit:
1886 * The caller holds mm->mmap_sem. This means that the task which
1887 * uses this mm can't pass exit_mm(), so it can't exit or clear
1888 * its ->mm.
1889 *
1890 * de_thread:
1891 * It does list_replace_rcu(&leader->tasks, &current->tasks),
1892 * we must see either old or new leader, this does not matter.
1893 * However, it can change p->sighand, so lock_task_sighand(p)
1894 * must be used. Since p->mm != NULL and we hold ->mmap_sem
1895 * it can't fail.
1896 *
1897 * Note also that "g" can be the old leader with ->mm == NULL
1898 * and already unhashed and thus removed from ->thread_group.
1899 * This is OK, __unhash_process()->list_del_rcu() does not
1900 * clear the ->next pointer, we will find the new leader via
1901 * next_thread().
1902 */
1903 rcu_read_lock();
1904 for_each_process(g) {
1905 if (g == tsk->group_leader)
1906 continue;
1907 if (g->flags & PF_KTHREAD)
1908 continue;
1909 p = g;
1910 do {
1911 if (p->mm) {
1912 if (unlikely(p->mm == mm)) {
1913 lock_task_sighand(p, &flags);
1914 nr += zap_process(p, exit_code);
1915 unlock_task_sighand(p, &flags);
1916 }
1917 break;
1918 }
1919 } while_each_thread(g, p);
1920 }
1921 rcu_read_unlock();
1922done:
1923 atomic_set(&core_state->nr_threads, nr);
1924 return nr;
1925}
1926
1927static int coredump_wait(int exit_code, struct core_state *core_state)
1928{
1929 struct task_struct *tsk = current;
1930 struct mm_struct *mm = tsk->mm;
1931 int core_waiters = -EBUSY;
1932
1933 init_completion(&core_state->startup);
1934 core_state->dumper.task = tsk;
1935 core_state->dumper.next = NULL;
1936
1937 down_write(&mm->mmap_sem);
1938 if (!mm->core_state)
1939 core_waiters = zap_threads(tsk, mm, core_state, exit_code);
1940 up_write(&mm->mmap_sem);
1941
1942 if (core_waiters > 0) {
1943 struct core_thread *ptr;
1944
1945 wait_for_completion(&core_state->startup);
1946 /*
1947 * Wait for all the threads to become inactive, so that
1948 * all the thread context (extended register state, like
1949 * fpu etc) gets copied to the memory.
1950 */
1951 ptr = core_state->dumper.next;
1952 while (ptr != NULL) {
1953 wait_task_inactive(ptr->task, 0);
1954 ptr = ptr->next;
1955 }
1956 }
1957
1958 return core_waiters;
1959}
1960
1961static void coredump_finish(struct mm_struct *mm)
1962{
1963 struct core_thread *curr, *next;
1964 struct task_struct *task;
1965
1966 next = mm->core_state->dumper.next;
1967 while ((curr = next) != NULL) {
1968 next = curr->next;
1969 task = curr->task;
1970 /*
1971 * see exit_mm(), curr->task must not see
1972 * ->task == NULL before we read ->next.
1973 */
1974 smp_mb();
1975 curr->task = NULL;
1976 wake_up_process(task);
1977 }
1978
1979 mm->core_state = NULL;
1980}
1981
1982/* 1595/*
1983 * set_dumpable converts traditional three-value dumpable to two flags and 1596 * set_dumpable converts traditional three-value dumpable to two flags and
1984 * stores them into mm->flags. It modifies lower two bits of mm->flags, but 1597 * stores them into mm->flags. It modifies lower two bits of mm->flags, but
@@ -2020,7 +1633,7 @@ void set_dumpable(struct mm_struct *mm, int value)
2020 } 1633 }
2021} 1634}
2022 1635
2023static int __get_dumpable(unsigned long mm_flags) 1636int __get_dumpable(unsigned long mm_flags)
2024{ 1637{
2025 int ret; 1638 int ret;
2026 1639
@@ -2032,290 +1645,3 @@ int get_dumpable(struct mm_struct *mm)
2032{ 1645{
2033 return __get_dumpable(mm->flags); 1646 return __get_dumpable(mm->flags);
2034} 1647}
2035
2036static void wait_for_dump_helpers(struct file *file)
2037{
2038 struct pipe_inode_info *pipe;
2039
2040 pipe = file->f_path.dentry->d_inode->i_pipe;
2041
2042 pipe_lock(pipe);
2043 pipe->readers++;
2044 pipe->writers--;
2045
2046 while ((pipe->readers > 1) && (!signal_pending(current))) {
2047 wake_up_interruptible_sync(&pipe->wait);
2048 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
2049 pipe_wait(pipe);
2050 }
2051
2052 pipe->readers--;
2053 pipe->writers++;
2054 pipe_unlock(pipe);
2055
2056}
2057
2058
2059/*
2060 * umh_pipe_setup
2061 * helper function to customize the process used
2062 * to collect the core in userspace. Specifically
2063 * it sets up a pipe and installs it as fd 0 (stdin)
2064 * for the process. Returns 0 on success, or
2065 * PTR_ERR on failure.
2066 * Note that it also sets the core limit to 1. This
2067 * is a special value that we use to trap recursive
2068 * core dumps
2069 */
2070static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
2071{
2072 struct file *files[2];
2073 struct fdtable *fdt;
2074 struct coredump_params *cp = (struct coredump_params *)info->data;
2075 struct files_struct *cf = current->files;
2076 int err = create_pipe_files(files, 0);
2077 if (err)
2078 return err;
2079
2080 cp->file = files[1];
2081
2082 sys_close(0);
2083 fd_install(0, files[0]);
2084 spin_lock(&cf->file_lock);
2085 fdt = files_fdtable(cf);
2086 __set_open_fd(0, fdt);
2087 __clear_close_on_exec(0, fdt);
2088 spin_unlock(&cf->file_lock);
2089
2090 /* and disallow core files too */
2091 current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
2092
2093 return 0;
2094}
2095
2096void do_coredump(long signr, int exit_code, struct pt_regs *regs)
2097{
2098 struct core_state core_state;
2099 struct core_name cn;
2100 struct mm_struct *mm = current->mm;
2101 struct linux_binfmt * binfmt;
2102 const struct cred *old_cred;
2103 struct cred *cred;
2104 int retval = 0;
2105 int flag = 0;
2106 int ispipe;
2107 bool need_nonrelative = false;
2108 static atomic_t core_dump_count = ATOMIC_INIT(0);
2109 struct coredump_params cprm = {
2110 .signr = signr,
2111 .regs = regs,
2112 .limit = rlimit(RLIMIT_CORE),
2113 /*
2114 * We must use the same mm->flags while dumping core to avoid
2115 * inconsistency of bit flags, since this flag is not protected
2116 * by any locks.
2117 */
2118 .mm_flags = mm->flags,
2119 };
2120
2121 audit_core_dumps(signr);
2122
2123 binfmt = mm->binfmt;
2124 if (!binfmt || !binfmt->core_dump)
2125 goto fail;
2126 if (!__get_dumpable(cprm.mm_flags))
2127 goto fail;
2128
2129 cred = prepare_creds();
2130 if (!cred)
2131 goto fail;
2132 /*
2133 * We cannot trust fsuid as being the "true" uid of the process
2134 * nor do we know its entire history. We only know it was tainted
2135 * so we dump it as root in mode 2, and only into a controlled
2136 * environment (pipe handler or fully qualified path).
2137 */
2138 if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) {
2139 /* Setuid core dump mode */
2140 flag = O_EXCL; /* Stop rewrite attacks */
2141 cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */
2142 need_nonrelative = true;
2143 }
2144
2145 retval = coredump_wait(exit_code, &core_state);
2146 if (retval < 0)
2147 goto fail_creds;
2148
2149 old_cred = override_creds(cred);
2150
2151 /*
2152 * Clear any false indication of pending signals that might
2153 * be seen by the filesystem code called to write the core file.
2154 */
2155 clear_thread_flag(TIF_SIGPENDING);
2156
2157 ispipe = format_corename(&cn, signr);
2158
2159 if (ispipe) {
2160 int dump_count;
2161 char **helper_argv;
2162
2163 if (ispipe < 0) {
2164 printk(KERN_WARNING "format_corename failed\n");
2165 printk(KERN_WARNING "Aborting core\n");
2166 goto fail_corename;
2167 }
2168
2169 if (cprm.limit == 1) {
2170 /* See umh_pipe_setup() which sets RLIMIT_CORE = 1.
2171 *
2172 * Normally core limits are irrelevant to pipes, since
2173 * we're not writing to the file system, but we use
2174 * cprm.limit of 1 here as a speacial value, this is a
2175 * consistent way to catch recursive crashes.
2176 * We can still crash if the core_pattern binary sets
2177 * RLIM_CORE = !1, but it runs as root, and can do
2178 * lots of stupid things.
2179 *
2180 * Note that we use task_tgid_vnr here to grab the pid
2181 * of the process group leader. That way we get the
2182 * right pid if a thread in a multi-threaded
2183 * core_pattern process dies.
2184 */
2185 printk(KERN_WARNING
2186 "Process %d(%s) has RLIMIT_CORE set to 1\n",
2187 task_tgid_vnr(current), current->comm);
2188 printk(KERN_WARNING "Aborting core\n");
2189 goto fail_unlock;
2190 }
2191 cprm.limit = RLIM_INFINITY;
2192
2193 dump_count = atomic_inc_return(&core_dump_count);
2194 if (core_pipe_limit && (core_pipe_limit < dump_count)) {
2195 printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
2196 task_tgid_vnr(current), current->comm);
2197 printk(KERN_WARNING "Skipping core dump\n");
2198 goto fail_dropcount;
2199 }
2200
2201 helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL);
2202 if (!helper_argv) {
2203 printk(KERN_WARNING "%s failed to allocate memory\n",
2204 __func__);
2205 goto fail_dropcount;
2206 }
2207
2208 retval = call_usermodehelper_fns(helper_argv[0], helper_argv,
2209 NULL, UMH_WAIT_EXEC, umh_pipe_setup,
2210 NULL, &cprm);
2211 argv_free(helper_argv);
2212 if (retval) {
2213 printk(KERN_INFO "Core dump to %s pipe failed\n",
2214 cn.corename);
2215 goto close_fail;
2216 }
2217 } else {
2218 struct inode *inode;
2219
2220 if (cprm.limit < binfmt->min_coredump)
2221 goto fail_unlock;
2222
2223 if (need_nonrelative && cn.corename[0] != '/') {
2224 printk(KERN_WARNING "Pid %d(%s) can only dump core "\
2225 "to fully qualified path!\n",
2226 task_tgid_vnr(current), current->comm);
2227 printk(KERN_WARNING "Skipping core dump\n");
2228 goto fail_unlock;
2229 }
2230
2231 cprm.file = filp_open(cn.corename,
2232 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
2233 0600);
2234 if (IS_ERR(cprm.file))
2235 goto fail_unlock;
2236
2237 inode = cprm.file->f_path.dentry->d_inode;
2238 if (inode->i_nlink > 1)
2239 goto close_fail;
2240 if (d_unhashed(cprm.file->f_path.dentry))
2241 goto close_fail;
2242 /*
2243 * AK: actually i see no reason to not allow this for named
2244 * pipes etc, but keep the previous behaviour for now.
2245 */
2246 if (!S_ISREG(inode->i_mode))
2247 goto close_fail;
2248 /*
2249 * Dont allow local users get cute and trick others to coredump
2250 * into their pre-created files.
2251 */
2252 if (!uid_eq(inode->i_uid, current_fsuid()))
2253 goto close_fail;
2254 if (!cprm.file->f_op || !cprm.file->f_op->write)
2255 goto close_fail;
2256 if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
2257 goto close_fail;
2258 }
2259
2260 retval = binfmt->core_dump(&cprm);
2261 if (retval)
2262 current->signal->group_exit_code |= 0x80;
2263
2264 if (ispipe && core_pipe_limit)
2265 wait_for_dump_helpers(cprm.file);
2266close_fail:
2267 if (cprm.file)
2268 filp_close(cprm.file, NULL);
2269fail_dropcount:
2270 if (ispipe)
2271 atomic_dec(&core_dump_count);
2272fail_unlock:
2273 kfree(cn.corename);
2274fail_corename:
2275 coredump_finish(mm);
2276 revert_creds(old_cred);
2277fail_creds:
2278 put_cred(cred);
2279fail:
2280 return;
2281}
2282
2283/*
2284 * Core dumping helper functions. These are the only things you should
2285 * do on a core-file: use only these functions to write out all the
2286 * necessary info.
2287 */
2288int dump_write(struct file *file, const void *addr, int nr)
2289{
2290 return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr;
2291}
2292EXPORT_SYMBOL(dump_write);
2293
2294int dump_seek(struct file *file, loff_t off)
2295{
2296 int ret = 1;
2297
2298 if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
2299 if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
2300 return 0;
2301 } else {
2302 char *buf = (char *)get_zeroed_page(GFP_KERNEL);
2303
2304 if (!buf)
2305 return 0;
2306 while (off > 0) {
2307 unsigned long n = off;
2308
2309 if (n > PAGE_SIZE)
2310 n = PAGE_SIZE;
2311 if (!dump_write(file, buf, n)) {
2312 ret = 0;
2313 break;
2314 }
2315 off -= n;
2316 }
2317 free_page((unsigned long)buf);
2318 }
2319 return ret;
2320}
2321EXPORT_SYMBOL(dump_seek);
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index dde41a75c7c8..59e3bbfac0b1 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -206,6 +206,11 @@ static int init_inodecache(void)
206 */ 206 */
207static void destroy_inodecache(void) 207static void destroy_inodecache(void)
208{ 208{
209 /*
210 * Make sure all delayed rcu free inodes are flushed before we
211 * destroy cache.
212 */
213 rcu_barrier();
209 kmem_cache_destroy(exofs_inode_cachep); 214 kmem_cache_destroy(exofs_inode_cachep);
210} 215}
211 216
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index af74d9e27b71..6c205d0c565b 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -206,6 +206,11 @@ static int init_inodecache(void)
206 206
207static void destroy_inodecache(void) 207static void destroy_inodecache(void)
208{ 208{
209 /*
210 * Make sure all delayed rcu free inodes are flushed before we
211 * destroy cache.
212 */
213 rcu_barrier();
209 kmem_cache_destroy(ext2_inode_cachep); 214 kmem_cache_destroy(ext2_inode_cachep);
210} 215}
211 216
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 09b8455bd7eb..bd29894c8fbc 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -532,6 +532,11 @@ static int init_inodecache(void)
532 532
533static void destroy_inodecache(void) 533static void destroy_inodecache(void)
534{ 534{
535 /*
536 * Make sure all delayed rcu free inodes are flushed before we
537 * destroy cache.
538 */
539 rcu_barrier();
535 kmem_cache_destroy(ext3_inode_cachep); 540 kmem_cache_destroy(ext3_inode_cachep);
536} 541}
537 542
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 7f7dad787603..5439d6a56e99 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -233,7 +233,7 @@ group_extend_out:
233 233
234 case EXT4_IOC_MOVE_EXT: { 234 case EXT4_IOC_MOVE_EXT: {
235 struct move_extent me; 235 struct move_extent me;
236 struct file *donor_filp; 236 struct fd donor;
237 int err; 237 int err;
238 238
239 if (!(filp->f_mode & FMODE_READ) || 239 if (!(filp->f_mode & FMODE_READ) ||
@@ -245,11 +245,11 @@ group_extend_out:
245 return -EFAULT; 245 return -EFAULT;
246 me.moved_len = 0; 246 me.moved_len = 0;
247 247
248 donor_filp = fget(me.donor_fd); 248 donor = fdget(me.donor_fd);
249 if (!donor_filp) 249 if (!donor.file)
250 return -EBADF; 250 return -EBADF;
251 251
252 if (!(donor_filp->f_mode & FMODE_WRITE)) { 252 if (!(donor.file->f_mode & FMODE_WRITE)) {
253 err = -EBADF; 253 err = -EBADF;
254 goto mext_out; 254 goto mext_out;
255 } 255 }
@@ -258,14 +258,15 @@ group_extend_out:
258 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { 258 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
259 ext4_msg(sb, KERN_ERR, 259 ext4_msg(sb, KERN_ERR,
260 "Online defrag not supported with bigalloc"); 260 "Online defrag not supported with bigalloc");
261 return -EOPNOTSUPP; 261 err = -EOPNOTSUPP;
262 goto mext_out;
262 } 263 }
263 264
264 err = mnt_want_write_file(filp); 265 err = mnt_want_write_file(filp);
265 if (err) 266 if (err)
266 goto mext_out; 267 goto mext_out;
267 268
268 err = ext4_move_extents(filp, donor_filp, me.orig_start, 269 err = ext4_move_extents(filp, donor.file, me.orig_start,
269 me.donor_start, me.len, &me.moved_len); 270 me.donor_start, me.len, &me.moved_len);
270 mnt_drop_write_file(filp); 271 mnt_drop_write_file(filp);
271 272
@@ -273,7 +274,7 @@ group_extend_out:
273 &me, sizeof(me))) 274 &me, sizeof(me)))
274 err = -EFAULT; 275 err = -EFAULT;
275mext_out: 276mext_out:
276 fput(donor_filp); 277 fdput(donor);
277 return err; 278 return err;
278 } 279 }
279 280
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 1f15cc836fbd..69c55d4e4626 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1019,6 +1019,11 @@ static int init_inodecache(void)
1019 1019
1020static void destroy_inodecache(void) 1020static void destroy_inodecache(void)
1021{ 1021{
1022 /*
1023 * Make sure all delayed rcu free inodes are flushed before we
1024 * destroy cache.
1025 */
1026 rcu_barrier();
1022 kmem_cache_destroy(ext4_inode_cachep); 1027 kmem_cache_destroy(ext4_inode_cachep);
1023} 1028}
1024 1029
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 47d9eb0be886..4e5a6ac54ebd 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -521,6 +521,11 @@ static int __init fat_init_inodecache(void)
521 521
522static void __exit fat_destroy_inodecache(void) 522static void __exit fat_destroy_inodecache(void)
523{ 523{
524 /*
525 * Make sure all delayed rcu free inodes are flushed before we
526 * destroy cache.
527 */
528 rcu_barrier();
524 kmem_cache_destroy(fat_inode_cachep); 529 kmem_cache_destroy(fat_inode_cachep);
525} 530}
526 531
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 887b5ba8c9b5..8f704291d4ed 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -26,124 +26,6 @@
26#include <asm/siginfo.h> 26#include <asm/siginfo.h>
27#include <asm/uaccess.h> 27#include <asm/uaccess.h>
28 28
29void set_close_on_exec(unsigned int fd, int flag)
30{
31 struct files_struct *files = current->files;
32 struct fdtable *fdt;
33 spin_lock(&files->file_lock);
34 fdt = files_fdtable(files);
35 if (flag)
36 __set_close_on_exec(fd, fdt);
37 else
38 __clear_close_on_exec(fd, fdt);
39 spin_unlock(&files->file_lock);
40}
41
42static bool get_close_on_exec(unsigned int fd)
43{
44 struct files_struct *files = current->files;
45 struct fdtable *fdt;
46 bool res;
47 rcu_read_lock();
48 fdt = files_fdtable(files);
49 res = close_on_exec(fd, fdt);
50 rcu_read_unlock();
51 return res;
52}
53
54SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
55{
56 int err = -EBADF;
57 struct file * file, *tofree;
58 struct files_struct * files = current->files;
59 struct fdtable *fdt;
60
61 if ((flags & ~O_CLOEXEC) != 0)
62 return -EINVAL;
63
64 if (unlikely(oldfd == newfd))
65 return -EINVAL;
66
67 spin_lock(&files->file_lock);
68 err = expand_files(files, newfd);
69 file = fcheck(oldfd);
70 if (unlikely(!file))
71 goto Ebadf;
72 if (unlikely(err < 0)) {
73 if (err == -EMFILE)
74 goto Ebadf;
75 goto out_unlock;
76 }
77 /*
78 * We need to detect attempts to do dup2() over allocated but still
79 * not finished descriptor. NB: OpenBSD avoids that at the price of
80 * extra work in their equivalent of fget() - they insert struct
81 * file immediately after grabbing descriptor, mark it larval if
82 * more work (e.g. actual opening) is needed and make sure that
83 * fget() treats larval files as absent. Potentially interesting,
84 * but while extra work in fget() is trivial, locking implications
85 * and amount of surgery on open()-related paths in VFS are not.
86 * FreeBSD fails with -EBADF in the same situation, NetBSD "solution"
87 * deadlocks in rather amusing ways, AFAICS. All of that is out of
88 * scope of POSIX or SUS, since neither considers shared descriptor
89 * tables and this condition does not arise without those.
90 */
91 err = -EBUSY;
92 fdt = files_fdtable(files);
93 tofree = fdt->fd[newfd];
94 if (!tofree && fd_is_open(newfd, fdt))
95 goto out_unlock;
96 get_file(file);
97 rcu_assign_pointer(fdt->fd[newfd], file);
98 __set_open_fd(newfd, fdt);
99 if (flags & O_CLOEXEC)
100 __set_close_on_exec(newfd, fdt);
101 else
102 __clear_close_on_exec(newfd, fdt);
103 spin_unlock(&files->file_lock);
104
105 if (tofree)
106 filp_close(tofree, files);
107
108 return newfd;
109
110Ebadf:
111 err = -EBADF;
112out_unlock:
113 spin_unlock(&files->file_lock);
114 return err;
115}
116
117SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
118{
119 if (unlikely(newfd == oldfd)) { /* corner case */
120 struct files_struct *files = current->files;
121 int retval = oldfd;
122
123 rcu_read_lock();
124 if (!fcheck_files(files, oldfd))
125 retval = -EBADF;
126 rcu_read_unlock();
127 return retval;
128 }
129 return sys_dup3(oldfd, newfd, 0);
130}
131
132SYSCALL_DEFINE1(dup, unsigned int, fildes)
133{
134 int ret = -EBADF;
135 struct file *file = fget_raw(fildes);
136
137 if (file) {
138 ret = get_unused_fd();
139 if (ret >= 0)
140 fd_install(ret, file);
141 else
142 fput(file);
143 }
144 return ret;
145}
146
147#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME) 29#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
148 30
149static int setfl(int fd, struct file * filp, unsigned long arg) 31static int setfl(int fd, struct file * filp, unsigned long arg)
@@ -267,7 +149,7 @@ pid_t f_getown(struct file *filp)
267 149
268static int f_setown_ex(struct file *filp, unsigned long arg) 150static int f_setown_ex(struct file *filp, unsigned long arg)
269{ 151{
270 struct f_owner_ex * __user owner_p = (void * __user)arg; 152 struct f_owner_ex __user *owner_p = (void __user *)arg;
271 struct f_owner_ex owner; 153 struct f_owner_ex owner;
272 struct pid *pid; 154 struct pid *pid;
273 int type; 155 int type;
@@ -307,7 +189,7 @@ static int f_setown_ex(struct file *filp, unsigned long arg)
307 189
308static int f_getown_ex(struct file *filp, unsigned long arg) 190static int f_getown_ex(struct file *filp, unsigned long arg)
309{ 191{
310 struct f_owner_ex * __user owner_p = (void * __user)arg; 192 struct f_owner_ex __user *owner_p = (void __user *)arg;
311 struct f_owner_ex owner; 193 struct f_owner_ex owner;
312 int ret = 0; 194 int ret = 0;
313 195
@@ -345,7 +227,7 @@ static int f_getown_ex(struct file *filp, unsigned long arg)
345static int f_getowner_uids(struct file *filp, unsigned long arg) 227static int f_getowner_uids(struct file *filp, unsigned long arg)
346{ 228{
347 struct user_namespace *user_ns = current_user_ns(); 229 struct user_namespace *user_ns = current_user_ns();
348 uid_t * __user dst = (void * __user)arg; 230 uid_t __user *dst = (void __user *)arg;
349 uid_t src[2]; 231 uid_t src[2];
350 int err; 232 int err;
351 233
@@ -373,14 +255,10 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
373 255
374 switch (cmd) { 256 switch (cmd) {
375 case F_DUPFD: 257 case F_DUPFD:
258 err = f_dupfd(arg, filp, 0);
259 break;
376 case F_DUPFD_CLOEXEC: 260 case F_DUPFD_CLOEXEC:
377 if (arg >= rlimit(RLIMIT_NOFILE)) 261 err = f_dupfd(arg, filp, FD_CLOEXEC);
378 break;
379 err = alloc_fd(arg, cmd == F_DUPFD_CLOEXEC ? O_CLOEXEC : 0);
380 if (err >= 0) {
381 get_file(filp);
382 fd_install(err, filp);
383 }
384 break; 262 break;
385 case F_GETFD: 263 case F_GETFD:
386 err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; 264 err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
@@ -470,25 +348,23 @@ static int check_fcntl_cmd(unsigned cmd)
470 348
471SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) 349SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
472{ 350{
473 struct file *filp; 351 struct fd f = fdget_raw(fd);
474 int fput_needed;
475 long err = -EBADF; 352 long err = -EBADF;
476 353
477 filp = fget_raw_light(fd, &fput_needed); 354 if (!f.file)
478 if (!filp)
479 goto out; 355 goto out;
480 356
481 if (unlikely(filp->f_mode & FMODE_PATH)) { 357 if (unlikely(f.file->f_mode & FMODE_PATH)) {
482 if (!check_fcntl_cmd(cmd)) 358 if (!check_fcntl_cmd(cmd))
483 goto out1; 359 goto out1;
484 } 360 }
485 361
486 err = security_file_fcntl(filp, cmd, arg); 362 err = security_file_fcntl(f.file, cmd, arg);
487 if (!err) 363 if (!err)
488 err = do_fcntl(fd, cmd, arg, filp); 364 err = do_fcntl(fd, cmd, arg, f.file);
489 365
490out1: 366out1:
491 fput_light(filp, fput_needed); 367 fdput(f);
492out: 368out:
493 return err; 369 return err;
494} 370}
@@ -497,38 +373,36 @@ out:
497SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, 373SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
498 unsigned long, arg) 374 unsigned long, arg)
499{ 375{
500 struct file * filp; 376 struct fd f = fdget_raw(fd);
501 long err = -EBADF; 377 long err = -EBADF;
502 int fput_needed;
503 378
504 filp = fget_raw_light(fd, &fput_needed); 379 if (!f.file)
505 if (!filp)
506 goto out; 380 goto out;
507 381
508 if (unlikely(filp->f_mode & FMODE_PATH)) { 382 if (unlikely(f.file->f_mode & FMODE_PATH)) {
509 if (!check_fcntl_cmd(cmd)) 383 if (!check_fcntl_cmd(cmd))
510 goto out1; 384 goto out1;
511 } 385 }
512 386
513 err = security_file_fcntl(filp, cmd, arg); 387 err = security_file_fcntl(f.file, cmd, arg);
514 if (err) 388 if (err)
515 goto out1; 389 goto out1;
516 390
517 switch (cmd) { 391 switch (cmd) {
518 case F_GETLK64: 392 case F_GETLK64:
519 err = fcntl_getlk64(filp, (struct flock64 __user *) arg); 393 err = fcntl_getlk64(f.file, (struct flock64 __user *) arg);
520 break; 394 break;
521 case F_SETLK64: 395 case F_SETLK64:
522 case F_SETLKW64: 396 case F_SETLKW64:
523 err = fcntl_setlk64(fd, filp, cmd, 397 err = fcntl_setlk64(fd, f.file, cmd,
524 (struct flock64 __user *) arg); 398 (struct flock64 __user *) arg);
525 break; 399 break;
526 default: 400 default:
527 err = do_fcntl(fd, cmd, arg, filp); 401 err = do_fcntl(fd, cmd, arg, f.file);
528 break; 402 break;
529 } 403 }
530out1: 404out1:
531 fput_light(filp, fput_needed); 405 fdput(f);
532out: 406out:
533 return err; 407 return err;
534} 408}
diff --git a/fs/fhandle.c b/fs/fhandle.c
index a48e4a139be1..f775bfdd6e4a 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -113,24 +113,21 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
113 113
114static struct vfsmount *get_vfsmount_from_fd(int fd) 114static struct vfsmount *get_vfsmount_from_fd(int fd)
115{ 115{
116 struct path path; 116 struct vfsmount *mnt;
117 117
118 if (fd == AT_FDCWD) { 118 if (fd == AT_FDCWD) {
119 struct fs_struct *fs = current->fs; 119 struct fs_struct *fs = current->fs;
120 spin_lock(&fs->lock); 120 spin_lock(&fs->lock);
121 path = fs->pwd; 121 mnt = mntget(fs->pwd.mnt);
122 mntget(path.mnt);
123 spin_unlock(&fs->lock); 122 spin_unlock(&fs->lock);
124 } else { 123 } else {
125 int fput_needed; 124 struct fd f = fdget(fd);
126 struct file *file = fget_light(fd, &fput_needed); 125 if (!f.file)
127 if (!file)
128 return ERR_PTR(-EBADF); 126 return ERR_PTR(-EBADF);
129 path = file->f_path; 127 mnt = mntget(f.file->f_path.mnt);
130 mntget(path.mnt); 128 fdput(f);
131 fput_light(file, fput_needed);
132 } 129 }
133 return path.mnt; 130 return mnt;
134} 131}
135 132
136static int vfs_dentry_acceptable(void *context, struct dentry *dentry) 133static int vfs_dentry_acceptable(void *context, struct dentry *dentry)
diff --git a/fs/file.c b/fs/file.c
index ba3f6053025c..0f1bda4bebfa 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -6,6 +6,7 @@
6 * Manage the dynamic fd arrays in the process files_struct. 6 * Manage the dynamic fd arrays in the process files_struct.
7 */ 7 */
8 8
9#include <linux/syscalls.h>
9#include <linux/export.h> 10#include <linux/export.h>
10#include <linux/fs.h> 11#include <linux/fs.h>
11#include <linux/mm.h> 12#include <linux/mm.h>
@@ -84,22 +85,14 @@ static void free_fdtable_work(struct work_struct *work)
84 } 85 }
85} 86}
86 87
87void free_fdtable_rcu(struct rcu_head *rcu) 88static void free_fdtable_rcu(struct rcu_head *rcu)
88{ 89{
89 struct fdtable *fdt = container_of(rcu, struct fdtable, rcu); 90 struct fdtable *fdt = container_of(rcu, struct fdtable, rcu);
90 struct fdtable_defer *fddef; 91 struct fdtable_defer *fddef;
91 92
92 BUG_ON(!fdt); 93 BUG_ON(!fdt);
94 BUG_ON(fdt->max_fds <= NR_OPEN_DEFAULT);
93 95
94 if (fdt->max_fds <= NR_OPEN_DEFAULT) {
95 /*
96 * This fdtable is embedded in the files structure and that
97 * structure itself is getting destroyed.
98 */
99 kmem_cache_free(files_cachep,
100 container_of(fdt, struct files_struct, fdtab));
101 return;
102 }
103 if (!is_vmalloc_addr(fdt->fd) && !is_vmalloc_addr(fdt->open_fds)) { 96 if (!is_vmalloc_addr(fdt->fd) && !is_vmalloc_addr(fdt->open_fds)) {
104 kfree(fdt->fd); 97 kfree(fdt->fd);
105 kfree(fdt->open_fds); 98 kfree(fdt->open_fds);
@@ -229,7 +222,7 @@ static int expand_fdtable(struct files_struct *files, int nr)
229 copy_fdtable(new_fdt, cur_fdt); 222 copy_fdtable(new_fdt, cur_fdt);
230 rcu_assign_pointer(files->fdt, new_fdt); 223 rcu_assign_pointer(files->fdt, new_fdt);
231 if (cur_fdt->max_fds > NR_OPEN_DEFAULT) 224 if (cur_fdt->max_fds > NR_OPEN_DEFAULT)
232 free_fdtable(cur_fdt); 225 call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
233 } else { 226 } else {
234 /* Somebody else expanded, so undo our attempt */ 227 /* Somebody else expanded, so undo our attempt */
235 __free_fdtable(new_fdt); 228 __free_fdtable(new_fdt);
@@ -245,19 +238,12 @@ static int expand_fdtable(struct files_struct *files, int nr)
245 * expanded and execution may have blocked. 238 * expanded and execution may have blocked.
246 * The files->file_lock should be held on entry, and will be held on exit. 239 * The files->file_lock should be held on entry, and will be held on exit.
247 */ 240 */
248int expand_files(struct files_struct *files, int nr) 241static int expand_files(struct files_struct *files, int nr)
249{ 242{
250 struct fdtable *fdt; 243 struct fdtable *fdt;
251 244
252 fdt = files_fdtable(files); 245 fdt = files_fdtable(files);
253 246
254 /*
255 * N.B. For clone tasks sharing a files structure, this test
256 * will limit the total number of files that can be opened.
257 */
258 if (nr >= rlimit(RLIMIT_NOFILE))
259 return -EMFILE;
260
261 /* Do we need to expand? */ 247 /* Do we need to expand? */
262 if (nr < fdt->max_fds) 248 if (nr < fdt->max_fds)
263 return 0; 249 return 0;
@@ -270,6 +256,26 @@ int expand_files(struct files_struct *files, int nr)
270 return expand_fdtable(files, nr); 256 return expand_fdtable(files, nr);
271} 257}
272 258
259static inline void __set_close_on_exec(int fd, struct fdtable *fdt)
260{
261 __set_bit(fd, fdt->close_on_exec);
262}
263
264static inline void __clear_close_on_exec(int fd, struct fdtable *fdt)
265{
266 __clear_bit(fd, fdt->close_on_exec);
267}
268
269static inline void __set_open_fd(int fd, struct fdtable *fdt)
270{
271 __set_bit(fd, fdt->open_fds);
272}
273
274static inline void __clear_open_fd(int fd, struct fdtable *fdt)
275{
276 __clear_bit(fd, fdt->open_fds);
277}
278
273static int count_open_files(struct fdtable *fdt) 279static int count_open_files(struct fdtable *fdt)
274{ 280{
275 int size = fdt->max_fds; 281 int size = fdt->max_fds;
@@ -395,6 +401,95 @@ out:
395 return NULL; 401 return NULL;
396} 402}
397 403
404static void close_files(struct files_struct * files)
405{
406 int i, j;
407 struct fdtable *fdt;
408
409 j = 0;
410
411 /*
412 * It is safe to dereference the fd table without RCU or
413 * ->file_lock because this is the last reference to the
414 * files structure. But use RCU to shut RCU-lockdep up.
415 */
416 rcu_read_lock();
417 fdt = files_fdtable(files);
418 rcu_read_unlock();
419 for (;;) {
420 unsigned long set;
421 i = j * BITS_PER_LONG;
422 if (i >= fdt->max_fds)
423 break;
424 set = fdt->open_fds[j++];
425 while (set) {
426 if (set & 1) {
427 struct file * file = xchg(&fdt->fd[i], NULL);
428 if (file) {
429 filp_close(file, files);
430 cond_resched();
431 }
432 }
433 i++;
434 set >>= 1;
435 }
436 }
437}
438
439struct files_struct *get_files_struct(struct task_struct *task)
440{
441 struct files_struct *files;
442
443 task_lock(task);
444 files = task->files;
445 if (files)
446 atomic_inc(&files->count);
447 task_unlock(task);
448
449 return files;
450}
451
452void put_files_struct(struct files_struct *files)
453{
454 struct fdtable *fdt;
455
456 if (atomic_dec_and_test(&files->count)) {
457 close_files(files);
458 /* not really needed, since nobody can see us */
459 rcu_read_lock();
460 fdt = files_fdtable(files);
461 rcu_read_unlock();
462 /* free the arrays if they are not embedded */
463 if (fdt != &files->fdtab)
464 __free_fdtable(fdt);
465 kmem_cache_free(files_cachep, files);
466 }
467}
468
469void reset_files_struct(struct files_struct *files)
470{
471 struct task_struct *tsk = current;
472 struct files_struct *old;
473
474 old = tsk->files;
475 task_lock(tsk);
476 tsk->files = files;
477 task_unlock(tsk);
478 put_files_struct(old);
479}
480
481void exit_files(struct task_struct *tsk)
482{
483 struct files_struct * files = tsk->files;
484
485 if (files) {
486 task_lock(tsk);
487 tsk->files = NULL;
488 task_unlock(tsk);
489 put_files_struct(files);
490 }
491}
492
398static void __devinit fdtable_defer_list_init(int cpu) 493static void __devinit fdtable_defer_list_init(int cpu)
399{ 494{
400 struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu); 495 struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu);
@@ -424,12 +519,18 @@ struct files_struct init_files = {
424 .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), 519 .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock),
425}; 520};
426 521
522void daemonize_descriptors(void)
523{
524 atomic_inc(&init_files.count);
525 reset_files_struct(&init_files);
526}
527
427/* 528/*
428 * allocate a file descriptor, mark it busy. 529 * allocate a file descriptor, mark it busy.
429 */ 530 */
430int alloc_fd(unsigned start, unsigned flags) 531int __alloc_fd(struct files_struct *files,
532 unsigned start, unsigned end, unsigned flags)
431{ 533{
432 struct files_struct *files = current->files;
433 unsigned int fd; 534 unsigned int fd;
434 int error; 535 int error;
435 struct fdtable *fdt; 536 struct fdtable *fdt;
@@ -444,6 +545,14 @@ repeat:
444 if (fd < fdt->max_fds) 545 if (fd < fdt->max_fds)
445 fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd); 546 fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd);
446 547
548 /*
549 * N.B. For clone tasks sharing a files structure, this test
550 * will limit the total number of files that can be opened.
551 */
552 error = -EMFILE;
553 if (fd >= end)
554 goto out;
555
447 error = expand_files(files, fd); 556 error = expand_files(files, fd);
448 if (error < 0) 557 if (error < 0)
449 goto out; 558 goto out;
@@ -477,8 +586,424 @@ out:
477 return error; 586 return error;
478} 587}
479 588
480int get_unused_fd(void) 589static int alloc_fd(unsigned start, unsigned flags)
590{
591 return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags);
592}
593
594int get_unused_fd_flags(unsigned flags)
595{
596 return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags);
597}
598EXPORT_SYMBOL(get_unused_fd_flags);
599
600static void __put_unused_fd(struct files_struct *files, unsigned int fd)
601{
602 struct fdtable *fdt = files_fdtable(files);
603 __clear_open_fd(fd, fdt);
604 if (fd < files->next_fd)
605 files->next_fd = fd;
606}
607
608void put_unused_fd(unsigned int fd)
609{
610 struct files_struct *files = current->files;
611 spin_lock(&files->file_lock);
612 __put_unused_fd(files, fd);
613 spin_unlock(&files->file_lock);
614}
615
616EXPORT_SYMBOL(put_unused_fd);
617
618/*
619 * Install a file pointer in the fd array.
620 *
621 * The VFS is full of places where we drop the files lock between
622 * setting the open_fds bitmap and installing the file in the file
623 * array. At any such point, we are vulnerable to a dup2() race
624 * installing a file in the array before us. We need to detect this and
625 * fput() the struct file we are about to overwrite in this case.
626 *
627 * It should never happen - if we allow dup2() do it, _really_ bad things
628 * will follow.
629 *
630 * NOTE: __fd_install() variant is really, really low-level; don't
631 * use it unless you are forced to by truly lousy API shoved down
632 * your throat. 'files' *MUST* be either current->files or obtained
633 * by get_files_struct(current) done by whoever had given it to you,
634 * or really bad things will happen. Normally you want to use
635 * fd_install() instead.
636 */
637
638void __fd_install(struct files_struct *files, unsigned int fd,
639 struct file *file)
640{
641 struct fdtable *fdt;
642 spin_lock(&files->file_lock);
643 fdt = files_fdtable(files);
644 BUG_ON(fdt->fd[fd] != NULL);
645 rcu_assign_pointer(fdt->fd[fd], file);
646 spin_unlock(&files->file_lock);
647}
648
649void fd_install(unsigned int fd, struct file *file)
481{ 650{
482 return alloc_fd(0, 0); 651 __fd_install(current->files, fd, file);
652}
653
654EXPORT_SYMBOL(fd_install);
655
656/*
657 * The same warnings as for __alloc_fd()/__fd_install() apply here...
658 */
659int __close_fd(struct files_struct *files, unsigned fd)
660{
661 struct file *file;
662 struct fdtable *fdt;
663
664 spin_lock(&files->file_lock);
665 fdt = files_fdtable(files);
666 if (fd >= fdt->max_fds)
667 goto out_unlock;
668 file = fdt->fd[fd];
669 if (!file)
670 goto out_unlock;
671 rcu_assign_pointer(fdt->fd[fd], NULL);
672 __clear_close_on_exec(fd, fdt);
673 __put_unused_fd(files, fd);
674 spin_unlock(&files->file_lock);
675 return filp_close(file, files);
676
677out_unlock:
678 spin_unlock(&files->file_lock);
679 return -EBADF;
680}
681
682void do_close_on_exec(struct files_struct *files)
683{
684 unsigned i;
685 struct fdtable *fdt;
686
687 /* exec unshares first */
688 BUG_ON(atomic_read(&files->count) != 1);
689 spin_lock(&files->file_lock);
690 for (i = 0; ; i++) {
691 unsigned long set;
692 unsigned fd = i * BITS_PER_LONG;
693 fdt = files_fdtable(files);
694 if (fd >= fdt->max_fds)
695 break;
696 set = fdt->close_on_exec[i];
697 if (!set)
698 continue;
699 fdt->close_on_exec[i] = 0;
700 for ( ; set ; fd++, set >>= 1) {
701 struct file *file;
702 if (!(set & 1))
703 continue;
704 file = fdt->fd[fd];
705 if (!file)
706 continue;
707 rcu_assign_pointer(fdt->fd[fd], NULL);
708 __put_unused_fd(files, fd);
709 spin_unlock(&files->file_lock);
710 filp_close(file, files);
711 cond_resched();
712 spin_lock(&files->file_lock);
713 }
714
715 }
716 spin_unlock(&files->file_lock);
717}
718
719struct file *fget(unsigned int fd)
720{
721 struct file *file;
722 struct files_struct *files = current->files;
723
724 rcu_read_lock();
725 file = fcheck_files(files, fd);
726 if (file) {
727 /* File object ref couldn't be taken */
728 if (file->f_mode & FMODE_PATH ||
729 !atomic_long_inc_not_zero(&file->f_count))
730 file = NULL;
731 }
732 rcu_read_unlock();
733
734 return file;
735}
736
737EXPORT_SYMBOL(fget);
738
739struct file *fget_raw(unsigned int fd)
740{
741 struct file *file;
742 struct files_struct *files = current->files;
743
744 rcu_read_lock();
745 file = fcheck_files(files, fd);
746 if (file) {
747 /* File object ref couldn't be taken */
748 if (!atomic_long_inc_not_zero(&file->f_count))
749 file = NULL;
750 }
751 rcu_read_unlock();
752
753 return file;
754}
755
756EXPORT_SYMBOL(fget_raw);
757
758/*
759 * Lightweight file lookup - no refcnt increment if fd table isn't shared.
760 *
761 * You can use this instead of fget if you satisfy all of the following
762 * conditions:
763 * 1) You must call fput_light before exiting the syscall and returning control
764 * to userspace (i.e. you cannot remember the returned struct file * after
765 * returning to userspace).
766 * 2) You must not call filp_close on the returned struct file * in between
767 * calls to fget_light and fput_light.
768 * 3) You must not clone the current task in between the calls to fget_light
769 * and fput_light.
770 *
771 * The fput_needed flag returned by fget_light should be passed to the
772 * corresponding fput_light.
773 */
774struct file *fget_light(unsigned int fd, int *fput_needed)
775{
776 struct file *file;
777 struct files_struct *files = current->files;
778
779 *fput_needed = 0;
780 if (atomic_read(&files->count) == 1) {
781 file = fcheck_files(files, fd);
782 if (file && (file->f_mode & FMODE_PATH))
783 file = NULL;
784 } else {
785 rcu_read_lock();
786 file = fcheck_files(files, fd);
787 if (file) {
788 if (!(file->f_mode & FMODE_PATH) &&
789 atomic_long_inc_not_zero(&file->f_count))
790 *fput_needed = 1;
791 else
792 /* Didn't get the reference, someone's freed */
793 file = NULL;
794 }
795 rcu_read_unlock();
796 }
797
798 return file;
799}
800EXPORT_SYMBOL(fget_light);
801
802struct file *fget_raw_light(unsigned int fd, int *fput_needed)
803{
804 struct file *file;
805 struct files_struct *files = current->files;
806
807 *fput_needed = 0;
808 if (atomic_read(&files->count) == 1) {
809 file = fcheck_files(files, fd);
810 } else {
811 rcu_read_lock();
812 file = fcheck_files(files, fd);
813 if (file) {
814 if (atomic_long_inc_not_zero(&file->f_count))
815 *fput_needed = 1;
816 else
817 /* Didn't get the reference, someone's freed */
818 file = NULL;
819 }
820 rcu_read_unlock();
821 }
822
823 return file;
824}
825
826void set_close_on_exec(unsigned int fd, int flag)
827{
828 struct files_struct *files = current->files;
829 struct fdtable *fdt;
830 spin_lock(&files->file_lock);
831 fdt = files_fdtable(files);
832 if (flag)
833 __set_close_on_exec(fd, fdt);
834 else
835 __clear_close_on_exec(fd, fdt);
836 spin_unlock(&files->file_lock);
837}
838
839bool get_close_on_exec(unsigned int fd)
840{
841 struct files_struct *files = current->files;
842 struct fdtable *fdt;
843 bool res;
844 rcu_read_lock();
845 fdt = files_fdtable(files);
846 res = close_on_exec(fd, fdt);
847 rcu_read_unlock();
848 return res;
849}
850
851static int do_dup2(struct files_struct *files,
852 struct file *file, unsigned fd, unsigned flags)
853{
854 struct file *tofree;
855 struct fdtable *fdt;
856
857 /*
858 * We need to detect attempts to do dup2() over allocated but still
859 * not finished descriptor. NB: OpenBSD avoids that at the price of
860 * extra work in their equivalent of fget() - they insert struct
861 * file immediately after grabbing descriptor, mark it larval if
862 * more work (e.g. actual opening) is needed and make sure that
863 * fget() treats larval files as absent. Potentially interesting,
864 * but while extra work in fget() is trivial, locking implications
865 * and amount of surgery on open()-related paths in VFS are not.
866 * FreeBSD fails with -EBADF in the same situation, NetBSD "solution"
867 * deadlocks in rather amusing ways, AFAICS. All of that is out of
868 * scope of POSIX or SUS, since neither considers shared descriptor
869 * tables and this condition does not arise without those.
870 */
871 fdt = files_fdtable(files);
872 tofree = fdt->fd[fd];
873 if (!tofree && fd_is_open(fd, fdt))
874 goto Ebusy;
875 get_file(file);
876 rcu_assign_pointer(fdt->fd[fd], file);
877 __set_open_fd(fd, fdt);
878 if (flags & O_CLOEXEC)
879 __set_close_on_exec(fd, fdt);
880 else
881 __clear_close_on_exec(fd, fdt);
882 spin_unlock(&files->file_lock);
883
884 if (tofree)
885 filp_close(tofree, files);
886
887 return fd;
888
889Ebusy:
890 spin_unlock(&files->file_lock);
891 return -EBUSY;
892}
893
894int replace_fd(unsigned fd, struct file *file, unsigned flags)
895{
896 int err;
897 struct files_struct *files = current->files;
898
899 if (!file)
900 return __close_fd(files, fd);
901
902 if (fd >= rlimit(RLIMIT_NOFILE))
903 return -EMFILE;
904
905 spin_lock(&files->file_lock);
906 err = expand_files(files, fd);
907 if (unlikely(err < 0))
908 goto out_unlock;
909 return do_dup2(files, file, fd, flags);
910
911out_unlock:
912 spin_unlock(&files->file_lock);
913 return err;
914}
915
916SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
917{
918 int err = -EBADF;
919 struct file *file;
920 struct files_struct *files = current->files;
921
922 if ((flags & ~O_CLOEXEC) != 0)
923 return -EINVAL;
924
925 if (newfd >= rlimit(RLIMIT_NOFILE))
926 return -EMFILE;
927
928 spin_lock(&files->file_lock);
929 err = expand_files(files, newfd);
930 file = fcheck(oldfd);
931 if (unlikely(!file))
932 goto Ebadf;
933 if (unlikely(err < 0)) {
934 if (err == -EMFILE)
935 goto Ebadf;
936 goto out_unlock;
937 }
938 return do_dup2(files, file, newfd, flags);
939
940Ebadf:
941 err = -EBADF;
942out_unlock:
943 spin_unlock(&files->file_lock);
944 return err;
945}
946
947SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
948{
949 if (unlikely(newfd == oldfd)) { /* corner case */
950 struct files_struct *files = current->files;
951 int retval = oldfd;
952
953 rcu_read_lock();
954 if (!fcheck_files(files, oldfd))
955 retval = -EBADF;
956 rcu_read_unlock();
957 return retval;
958 }
959 return sys_dup3(oldfd, newfd, 0);
960}
961
962SYSCALL_DEFINE1(dup, unsigned int, fildes)
963{
964 int ret = -EBADF;
965 struct file *file = fget_raw(fildes);
966
967 if (file) {
968 ret = get_unused_fd();
969 if (ret >= 0)
970 fd_install(ret, file);
971 else
972 fput(file);
973 }
974 return ret;
975}
976
977int f_dupfd(unsigned int from, struct file *file, unsigned flags)
978{
979 int err;
980 if (from >= rlimit(RLIMIT_NOFILE))
981 return -EINVAL;
982 err = alloc_fd(from, flags);
983 if (err >= 0) {
984 get_file(file);
985 fd_install(err, file);
986 }
987 return err;
988}
989
990int iterate_fd(struct files_struct *files, unsigned n,
991 int (*f)(const void *, struct file *, unsigned),
992 const void *p)
993{
994 struct fdtable *fdt;
995 struct file *file;
996 int res = 0;
997 if (!files)
998 return 0;
999 spin_lock(&files->file_lock);
1000 fdt = files_fdtable(files);
1001 while (!res && n < fdt->max_fds) {
1002 file = rcu_dereference_check_fdtable(files, fdt->fd[n++]);
1003 if (file)
1004 res = f(p, file, n);
1005 }
1006 spin_unlock(&files->file_lock);
1007 return res;
483} 1008}
484EXPORT_SYMBOL(get_unused_fd); 1009EXPORT_SYMBOL(iterate_fd);
diff --git a/fs/file_table.c b/fs/file_table.c
index 701985e4ccda..c6780163bf3e 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -339,112 +339,6 @@ void __fput_sync(struct file *file)
339 339
340EXPORT_SYMBOL(fput); 340EXPORT_SYMBOL(fput);
341 341
342struct file *fget(unsigned int fd)
343{
344 struct file *file;
345 struct files_struct *files = current->files;
346
347 rcu_read_lock();
348 file = fcheck_files(files, fd);
349 if (file) {
350 /* File object ref couldn't be taken */
351 if (file->f_mode & FMODE_PATH ||
352 !atomic_long_inc_not_zero(&file->f_count))
353 file = NULL;
354 }
355 rcu_read_unlock();
356
357 return file;
358}
359
360EXPORT_SYMBOL(fget);
361
362struct file *fget_raw(unsigned int fd)
363{
364 struct file *file;
365 struct files_struct *files = current->files;
366
367 rcu_read_lock();
368 file = fcheck_files(files, fd);
369 if (file) {
370 /* File object ref couldn't be taken */
371 if (!atomic_long_inc_not_zero(&file->f_count))
372 file = NULL;
373 }
374 rcu_read_unlock();
375
376 return file;
377}
378
379EXPORT_SYMBOL(fget_raw);
380
381/*
382 * Lightweight file lookup - no refcnt increment if fd table isn't shared.
383 *
384 * You can use this instead of fget if you satisfy all of the following
385 * conditions:
386 * 1) You must call fput_light before exiting the syscall and returning control
387 * to userspace (i.e. you cannot remember the returned struct file * after
388 * returning to userspace).
389 * 2) You must not call filp_close on the returned struct file * in between
390 * calls to fget_light and fput_light.
391 * 3) You must not clone the current task in between the calls to fget_light
392 * and fput_light.
393 *
394 * The fput_needed flag returned by fget_light should be passed to the
395 * corresponding fput_light.
396 */
397struct file *fget_light(unsigned int fd, int *fput_needed)
398{
399 struct file *file;
400 struct files_struct *files = current->files;
401
402 *fput_needed = 0;
403 if (atomic_read(&files->count) == 1) {
404 file = fcheck_files(files, fd);
405 if (file && (file->f_mode & FMODE_PATH))
406 file = NULL;
407 } else {
408 rcu_read_lock();
409 file = fcheck_files(files, fd);
410 if (file) {
411 if (!(file->f_mode & FMODE_PATH) &&
412 atomic_long_inc_not_zero(&file->f_count))
413 *fput_needed = 1;
414 else
415 /* Didn't get the reference, someone's freed */
416 file = NULL;
417 }
418 rcu_read_unlock();
419 }
420
421 return file;
422}
423
424struct file *fget_raw_light(unsigned int fd, int *fput_needed)
425{
426 struct file *file;
427 struct files_struct *files = current->files;
428
429 *fput_needed = 0;
430 if (atomic_read(&files->count) == 1) {
431 file = fcheck_files(files, fd);
432 } else {
433 rcu_read_lock();
434 file = fcheck_files(files, fd);
435 if (file) {
436 if (atomic_long_inc_not_zero(&file->f_count))
437 *fput_needed = 1;
438 else
439 /* Didn't get the reference, someone's freed */
440 file = NULL;
441 }
442 rcu_read_unlock();
443 }
444
445 return file;
446}
447
448void put_filp(struct file *file) 342void put_filp(struct file *file)
449{ 343{
450 if (atomic_long_dec_and_test(&file->f_count)) { 344 if (atomic_long_dec_and_test(&file->f_count)) {
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index d4fabd26084e..fed2c8afb3a9 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -279,6 +279,11 @@ static void __exit
279vxfs_cleanup(void) 279vxfs_cleanup(void)
280{ 280{
281 unregister_filesystem(&vxfs_fs_type); 281 unregister_filesystem(&vxfs_fs_type);
282 /*
283 * Make sure all delayed rcu free inodes are flushed before we
284 * destroy cache.
285 */
286 rcu_barrier();
282 kmem_cache_destroy(vxfs_inode_cachep); 287 kmem_cache_destroy(vxfs_inode_cachep);
283} 288}
284 289
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index f4246cfc8d87..8c23fa7a91e6 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -148,8 +148,7 @@ static struct fuse_req *get_reserved_req(struct fuse_conn *fc,
148 if (ff->reserved_req) { 148 if (ff->reserved_req) {
149 req = ff->reserved_req; 149 req = ff->reserved_req;
150 ff->reserved_req = NULL; 150 ff->reserved_req = NULL;
151 get_file(file); 151 req->stolen_file = get_file(file);
152 req->stolen_file = file;
153 } 152 }
154 spin_unlock(&fc->lock); 153 spin_unlock(&fc->lock);
155 } while (!req); 154 } while (!req);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index fca222dabe3c..f0eda124cffb 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1197,6 +1197,12 @@ static void fuse_fs_cleanup(void)
1197{ 1197{
1198 unregister_filesystem(&fuse_fs_type); 1198 unregister_filesystem(&fuse_fs_type);
1199 unregister_fuseblk(); 1199 unregister_fuseblk();
1200
1201 /*
1202 * Make sure all delayed rcu free inodes are flushed before we
1203 * destroy cache.
1204 */
1205 rcu_barrier();
1200 kmem_cache_destroy(fuse_inode_cachep); 1206 kmem_cache_destroy(fuse_inode_cachep);
1201} 1207}
1202 1208
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 0b63d135a092..e93ddaadfd1e 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -492,6 +492,12 @@ static int __init init_hfs_fs(void)
492static void __exit exit_hfs_fs(void) 492static void __exit exit_hfs_fs(void)
493{ 493{
494 unregister_filesystem(&hfs_fs_type); 494 unregister_filesystem(&hfs_fs_type);
495
496 /*
497 * Make sure all delayed rcu free inodes are flushed before we
498 * destroy cache.
499 */
500 rcu_barrier();
495 kmem_cache_destroy(hfs_inode_cachep); 501 kmem_cache_destroy(hfs_inode_cachep);
496} 502}
497 503
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index fdafb2d71654..811a84d2d964 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -635,6 +635,12 @@ static int __init init_hfsplus_fs(void)
635static void __exit exit_hfsplus_fs(void) 635static void __exit exit_hfsplus_fs(void)
636{ 636{
637 unregister_filesystem(&hfsplus_fs_type); 637 unregister_filesystem(&hfsplus_fs_type);
638
639 /*
640 * Make sure all delayed rcu free inodes are flushed before we
641 * destroy cache.
642 */
643 rcu_barrier();
638 kmem_cache_destroy(hfsplus_inode_cachep); 644 kmem_cache_destroy(hfsplus_inode_cachep);
639} 645}
640 646
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index a152783602d9..bc28bf077a6a 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -210,6 +210,11 @@ static int init_inodecache(void)
210 210
211static void destroy_inodecache(void) 211static void destroy_inodecache(void)
212{ 212{
213 /*
214 * Make sure all delayed rcu free inodes are flushed before we
215 * destroy cache.
216 */
217 rcu_barrier();
213 kmem_cache_destroy(hpfs_inode_cachep); 218 kmem_cache_destroy(hpfs_inode_cachep);
214} 219}
215 220
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 6e572c4fbf68..9460120a5170 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -1048,6 +1048,11 @@ static int __init init_hugetlbfs_fs(void)
1048 1048
1049static void __exit exit_hugetlbfs_fs(void) 1049static void __exit exit_hugetlbfs_fs(void)
1050{ 1050{
1051 /*
1052 * Make sure all delayed rcu free inodes are flushed before we
1053 * destroy cache.
1054 */
1055 rcu_barrier();
1051 kmem_cache_destroy(hugetlbfs_inode_cachep); 1056 kmem_cache_destroy(hugetlbfs_inode_cachep);
1052 kern_unmount(hugetlbfs_vfsmount); 1057 kern_unmount(hugetlbfs_vfsmount);
1053 unregister_filesystem(&hugetlbfs_fs_type); 1058 unregister_filesystem(&hugetlbfs_fs_type);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 29167bebe874..3bdad6d1f268 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -603,21 +603,14 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
603 603
604SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) 604SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
605{ 605{
606 struct file *filp; 606 int error;
607 int error = -EBADF; 607 struct fd f = fdget(fd);
608 int fput_needed; 608
609 609 if (!f.file)
610 filp = fget_light(fd, &fput_needed); 610 return -EBADF;
611 if (!filp) 611 error = security_file_ioctl(f.file, cmd, arg);
612 goto out; 612 if (!error)
613 613 error = do_vfs_ioctl(f.file, fd, cmd, arg);
614 error = security_file_ioctl(filp, cmd, arg); 614 fdput(f);
615 if (error)
616 goto out_fput;
617
618 error = do_vfs_ioctl(filp, fd, cmd, arg);
619 out_fput:
620 fput_light(filp, fput_needed);
621 out:
622 return error; 615 return error;
623} 616}
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index a7d8e6cc5e0c..67ce52507d7d 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -115,6 +115,11 @@ static int init_inodecache(void)
115 115
116static void destroy_inodecache(void) 116static void destroy_inodecache(void)
117{ 117{
118 /*
119 * Make sure all delayed rcu free inodes are flushed before we
120 * destroy cache.
121 */
122 rcu_barrier();
118 kmem_cache_destroy(isofs_inode_cachep); 123 kmem_cache_destroy(isofs_inode_cachep);
119} 124}
120 125
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 61ea41389f90..ff487954cd96 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -418,6 +418,12 @@ static void __exit exit_jffs2_fs(void)
418 unregister_filesystem(&jffs2_fs_type); 418 unregister_filesystem(&jffs2_fs_type);
419 jffs2_destroy_slab_caches(); 419 jffs2_destroy_slab_caches();
420 jffs2_compressors_exit(); 420 jffs2_compressors_exit();
421
422 /*
423 * Make sure all delayed rcu free inodes are flushed before we
424 * destroy cache.
425 */
426 rcu_barrier();
421 kmem_cache_destroy(jffs2_inode_cachep); 427 kmem_cache_destroy(jffs2_inode_cachep);
422} 428}
423 429
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 706692f24033..efdf8835dfca 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -911,6 +911,12 @@ static void __exit exit_jfs_fs(void)
911 jfs_proc_clean(); 911 jfs_proc_clean();
912#endif 912#endif
913 unregister_filesystem(&jfs_fs_type); 913 unregister_filesystem(&jfs_fs_type);
914
915 /*
916 * Make sure all delayed rcu free inodes are flushed before we
917 * destroy cache.
918 */
919 rcu_barrier();
914 kmem_cache_destroy(jfs_inode_cachep); 920 kmem_cache_destroy(jfs_inode_cachep);
915} 921}
916 922
diff --git a/fs/locks.c b/fs/locks.c
index 7e81bfc75164..abc7dc6c490b 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1625,15 +1625,13 @@ EXPORT_SYMBOL(flock_lock_file_wait);
1625 */ 1625 */
1626SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) 1626SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
1627{ 1627{
1628 struct file *filp; 1628 struct fd f = fdget(fd);
1629 int fput_needed;
1630 struct file_lock *lock; 1629 struct file_lock *lock;
1631 int can_sleep, unlock; 1630 int can_sleep, unlock;
1632 int error; 1631 int error;
1633 1632
1634 error = -EBADF; 1633 error = -EBADF;
1635 filp = fget_light(fd, &fput_needed); 1634 if (!f.file)
1636 if (!filp)
1637 goto out; 1635 goto out;
1638 1636
1639 can_sleep = !(cmd & LOCK_NB); 1637 can_sleep = !(cmd & LOCK_NB);
@@ -1641,31 +1639,31 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
1641 unlock = (cmd == LOCK_UN); 1639 unlock = (cmd == LOCK_UN);
1642 1640
1643 if (!unlock && !(cmd & LOCK_MAND) && 1641 if (!unlock && !(cmd & LOCK_MAND) &&
1644 !(filp->f_mode & (FMODE_READ|FMODE_WRITE))) 1642 !(f.file->f_mode & (FMODE_READ|FMODE_WRITE)))
1645 goto out_putf; 1643 goto out_putf;
1646 1644
1647 error = flock_make_lock(filp, &lock, cmd); 1645 error = flock_make_lock(f.file, &lock, cmd);
1648 if (error) 1646 if (error)
1649 goto out_putf; 1647 goto out_putf;
1650 if (can_sleep) 1648 if (can_sleep)
1651 lock->fl_flags |= FL_SLEEP; 1649 lock->fl_flags |= FL_SLEEP;
1652 1650
1653 error = security_file_lock(filp, lock->fl_type); 1651 error = security_file_lock(f.file, lock->fl_type);
1654 if (error) 1652 if (error)
1655 goto out_free; 1653 goto out_free;
1656 1654
1657 if (filp->f_op && filp->f_op->flock) 1655 if (f.file->f_op && f.file->f_op->flock)
1658 error = filp->f_op->flock(filp, 1656 error = f.file->f_op->flock(f.file,
1659 (can_sleep) ? F_SETLKW : F_SETLK, 1657 (can_sleep) ? F_SETLKW : F_SETLK,
1660 lock); 1658 lock);
1661 else 1659 else
1662 error = flock_lock_file_wait(filp, lock); 1660 error = flock_lock_file_wait(f.file, lock);
1663 1661
1664 out_free: 1662 out_free:
1665 locks_free_lock(lock); 1663 locks_free_lock(lock);
1666 1664
1667 out_putf: 1665 out_putf:
1668 fput_light(filp, fput_needed); 1666 fdput(f);
1669 out: 1667 out:
1670 return error; 1668 return error;
1671} 1669}
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index bda39085309f..adb90116d36b 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -417,5 +417,10 @@ int logfs_init_inode_cache(void)
417 417
418void logfs_destroy_inode_cache(void) 418void logfs_destroy_inode_cache(void)
419{ 419{
420 /*
421 * Make sure all delayed rcu free inodes are flushed before we
422 * destroy cache.
423 */
424 rcu_barrier();
420 kmem_cache_destroy(logfs_inode_cache); 425 kmem_cache_destroy(logfs_inode_cache);
421} 426}
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index d0e42c678923..4fc5f8ab1c44 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -100,6 +100,11 @@ static int init_inodecache(void)
100 100
101static void destroy_inodecache(void) 101static void destroy_inodecache(void)
102{ 102{
103 /*
104 * Make sure all delayed rcu free inodes are flushed before we
105 * destroy cache.
106 */
107 rcu_barrier();
103 kmem_cache_destroy(minix_inode_cachep); 108 kmem_cache_destroy(minix_inode_cachep);
104} 109}
105 110
diff --git a/fs/namei.c b/fs/namei.c
index a856e7f7b6e3..aa30d19e9edd 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1797,8 +1797,6 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1797 struct nameidata *nd, struct file **fp) 1797 struct nameidata *nd, struct file **fp)
1798{ 1798{
1799 int retval = 0; 1799 int retval = 0;
1800 int fput_needed;
1801 struct file *file;
1802 1800
1803 nd->last_type = LAST_ROOT; /* if there are only slashes... */ 1801 nd->last_type = LAST_ROOT; /* if there are only slashes... */
1804 nd->flags = flags | LOOKUP_JUMPED; 1802 nd->flags = flags | LOOKUP_JUMPED;
@@ -1850,44 +1848,41 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1850 get_fs_pwd(current->fs, &nd->path); 1848 get_fs_pwd(current->fs, &nd->path);
1851 } 1849 }
1852 } else { 1850 } else {
1851 struct fd f = fdget_raw(dfd);
1853 struct dentry *dentry; 1852 struct dentry *dentry;
1854 1853
1855 file = fget_raw_light(dfd, &fput_needed); 1854 if (!f.file)
1856 retval = -EBADF; 1855 return -EBADF;
1857 if (!file)
1858 goto out_fail;
1859 1856
1860 dentry = file->f_path.dentry; 1857 dentry = f.file->f_path.dentry;
1861 1858
1862 if (*name) { 1859 if (*name) {
1863 retval = -ENOTDIR; 1860 if (!S_ISDIR(dentry->d_inode->i_mode)) {
1864 if (!S_ISDIR(dentry->d_inode->i_mode)) 1861 fdput(f);
1865 goto fput_fail; 1862 return -ENOTDIR;
1863 }
1866 1864
1867 retval = inode_permission(dentry->d_inode, MAY_EXEC); 1865 retval = inode_permission(dentry->d_inode, MAY_EXEC);
1868 if (retval) 1866 if (retval) {
1869 goto fput_fail; 1867 fdput(f);
1868 return retval;
1869 }
1870 } 1870 }
1871 1871
1872 nd->path = file->f_path; 1872 nd->path = f.file->f_path;
1873 if (flags & LOOKUP_RCU) { 1873 if (flags & LOOKUP_RCU) {
1874 if (fput_needed) 1874 if (f.need_put)
1875 *fp = file; 1875 *fp = f.file;
1876 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); 1876 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1877 lock_rcu_walk(); 1877 lock_rcu_walk();
1878 } else { 1878 } else {
1879 path_get(&file->f_path); 1879 path_get(&nd->path);
1880 fput_light(file, fput_needed); 1880 fdput(f);
1881 } 1881 }
1882 } 1882 }
1883 1883
1884 nd->inode = nd->path.dentry->d_inode; 1884 nd->inode = nd->path.dentry->d_inode;
1885 return 0; 1885 return 0;
1886
1887fput_fail:
1888 fput_light(file, fput_needed);
1889out_fail:
1890 return retval;
1891} 1886}
1892 1887
1893static inline int lookup_last(struct nameidata *nd, struct path *path) 1888static inline int lookup_last(struct nameidata *nd, struct path *path)
@@ -3971,7 +3966,7 @@ EXPORT_SYMBOL(user_path_at);
3971EXPORT_SYMBOL(follow_down_one); 3966EXPORT_SYMBOL(follow_down_one);
3972EXPORT_SYMBOL(follow_down); 3967EXPORT_SYMBOL(follow_down);
3973EXPORT_SYMBOL(follow_up); 3968EXPORT_SYMBOL(follow_up);
3974EXPORT_SYMBOL(get_write_access); /* binfmt_aout */ 3969EXPORT_SYMBOL(get_write_access); /* nfsd */
3975EXPORT_SYMBOL(getname); 3970EXPORT_SYMBOL(getname);
3976EXPORT_SYMBOL(lock_rename); 3971EXPORT_SYMBOL(lock_rename);
3977EXPORT_SYMBOL(lookup_one_len); 3972EXPORT_SYMBOL(lookup_one_len);
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index eaa74323663a..d7e9fe77188a 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -89,6 +89,11 @@ static int init_inodecache(void)
89 89
90static void destroy_inodecache(void) 90static void destroy_inodecache(void)
91{ 91{
92 /*
93 * Make sure all delayed rcu free inodes are flushed before we
94 * destroy cache.
95 */
96 rcu_barrier();
92 kmem_cache_destroy(ncp_inode_cachep); 97 kmem_cache_destroy(ncp_inode_cachep);
93} 98}
94 99
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 9b47610338f5..e4c716d374a8 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1571,6 +1571,11 @@ static int __init nfs_init_inodecache(void)
1571 1571
1572static void nfs_destroy_inodecache(void) 1572static void nfs_destroy_inodecache(void)
1573{ 1573{
1574 /*
1575 * Make sure all delayed rcu free inodes are flushed before we
1576 * destroy cache.
1577 */
1578 rcu_barrier();
1574 kmem_cache_destroy(nfs_inode_cachep); 1579 kmem_cache_destroy(nfs_inode_cachep);
1575} 1580}
1576 1581
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index cc894eda385a..48a1bad37334 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2837,8 +2837,7 @@ static int nfs4_setlease(struct nfs4_delegation *dp, int flag)
2837 return -ENOMEM; 2837 return -ENOMEM;
2838 } 2838 }
2839 fp->fi_lease = fl; 2839 fp->fi_lease = fl;
2840 fp->fi_deleg_file = fl->fl_file; 2840 fp->fi_deleg_file = get_file(fl->fl_file);
2841 get_file(fp->fi_deleg_file);
2842 atomic_set(&fp->fi_delegees, 1); 2841 atomic_set(&fp->fi_delegees, 1);
2843 list_add(&dp->dl_perfile, &fp->fi_delegations); 2842 list_add(&dp->dl_perfile, &fp->fi_delegations);
2844 return 0; 2843 return 0;
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 6a10812711c1..3c991dc84f2f 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1382,6 +1382,12 @@ static void nilfs_segbuf_init_once(void *obj)
1382 1382
1383static void nilfs_destroy_cachep(void) 1383static void nilfs_destroy_cachep(void)
1384{ 1384{
1385 /*
1386 * Make sure all delayed rcu free inodes are flushed before we
1387 * destroy cache.
1388 */
1389 rcu_barrier();
1390
1385 if (nilfs_inode_cachep) 1391 if (nilfs_inode_cachep)
1386 kmem_cache_destroy(nilfs_inode_cachep); 1392 kmem_cache_destroy(nilfs_inode_cachep);
1387 if (nilfs_transaction_cachep) 1393 if (nilfs_transaction_cachep)
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index d43803669739..721d692fa8d4 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -58,7 +58,9 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
58 return fsnotify_remove_notify_event(group); 58 return fsnotify_remove_notify_event(group);
59} 59}
60 60
61static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event) 61static int create_fd(struct fsnotify_group *group,
62 struct fsnotify_event *event,
63 struct file **file)
62{ 64{
63 int client_fd; 65 int client_fd;
64 struct file *new_file; 66 struct file *new_file;
@@ -98,7 +100,7 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event)
98 put_unused_fd(client_fd); 100 put_unused_fd(client_fd);
99 client_fd = PTR_ERR(new_file); 101 client_fd = PTR_ERR(new_file);
100 } else { 102 } else {
101 fd_install(client_fd, new_file); 103 *file = new_file;
102 } 104 }
103 105
104 return client_fd; 106 return client_fd;
@@ -106,13 +108,15 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event)
106 108
107static int fill_event_metadata(struct fsnotify_group *group, 109static int fill_event_metadata(struct fsnotify_group *group,
108 struct fanotify_event_metadata *metadata, 110 struct fanotify_event_metadata *metadata,
109 struct fsnotify_event *event) 111 struct fsnotify_event *event,
112 struct file **file)
110{ 113{
111 int ret = 0; 114 int ret = 0;
112 115
113 pr_debug("%s: group=%p metadata=%p event=%p\n", __func__, 116 pr_debug("%s: group=%p metadata=%p event=%p\n", __func__,
114 group, metadata, event); 117 group, metadata, event);
115 118
119 *file = NULL;
116 metadata->event_len = FAN_EVENT_METADATA_LEN; 120 metadata->event_len = FAN_EVENT_METADATA_LEN;
117 metadata->metadata_len = FAN_EVENT_METADATA_LEN; 121 metadata->metadata_len = FAN_EVENT_METADATA_LEN;
118 metadata->vers = FANOTIFY_METADATA_VERSION; 122 metadata->vers = FANOTIFY_METADATA_VERSION;
@@ -121,7 +125,7 @@ static int fill_event_metadata(struct fsnotify_group *group,
121 if (unlikely(event->mask & FAN_Q_OVERFLOW)) 125 if (unlikely(event->mask & FAN_Q_OVERFLOW))
122 metadata->fd = FAN_NOFD; 126 metadata->fd = FAN_NOFD;
123 else { 127 else {
124 metadata->fd = create_fd(group, event); 128 metadata->fd = create_fd(group, event, file);
125 if (metadata->fd < 0) 129 if (metadata->fd < 0)
126 ret = metadata->fd; 130 ret = metadata->fd;
127 } 131 }
@@ -220,25 +224,6 @@ static int prepare_for_access_response(struct fsnotify_group *group,
220 return 0; 224 return 0;
221} 225}
222 226
223static void remove_access_response(struct fsnotify_group *group,
224 struct fsnotify_event *event,
225 __s32 fd)
226{
227 struct fanotify_response_event *re;
228
229 if (!(event->mask & FAN_ALL_PERM_EVENTS))
230 return;
231
232 re = dequeue_re(group, fd);
233 if (!re)
234 return;
235
236 BUG_ON(re->event != event);
237
238 kmem_cache_free(fanotify_response_event_cache, re);
239
240 return;
241}
242#else 227#else
243static int prepare_for_access_response(struct fsnotify_group *group, 228static int prepare_for_access_response(struct fsnotify_group *group,
244 struct fsnotify_event *event, 229 struct fsnotify_event *event,
@@ -247,12 +232,6 @@ static int prepare_for_access_response(struct fsnotify_group *group,
247 return 0; 232 return 0;
248} 233}
249 234
250static void remove_access_response(struct fsnotify_group *group,
251 struct fsnotify_event *event,
252 __s32 fd)
253{
254 return;
255}
256#endif 235#endif
257 236
258static ssize_t copy_event_to_user(struct fsnotify_group *group, 237static ssize_t copy_event_to_user(struct fsnotify_group *group,
@@ -260,31 +239,33 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
260 char __user *buf) 239 char __user *buf)
261{ 240{
262 struct fanotify_event_metadata fanotify_event_metadata; 241 struct fanotify_event_metadata fanotify_event_metadata;
242 struct file *f;
263 int fd, ret; 243 int fd, ret;
264 244
265 pr_debug("%s: group=%p event=%p\n", __func__, group, event); 245 pr_debug("%s: group=%p event=%p\n", __func__, group, event);
266 246
267 ret = fill_event_metadata(group, &fanotify_event_metadata, event); 247 ret = fill_event_metadata(group, &fanotify_event_metadata, event, &f);
268 if (ret < 0) 248 if (ret < 0)
269 goto out; 249 goto out;
270 250
271 fd = fanotify_event_metadata.fd; 251 fd = fanotify_event_metadata.fd;
272 ret = prepare_for_access_response(group, event, fd);
273 if (ret)
274 goto out_close_fd;
275
276 ret = -EFAULT; 252 ret = -EFAULT;
277 if (copy_to_user(buf, &fanotify_event_metadata, 253 if (copy_to_user(buf, &fanotify_event_metadata,
278 fanotify_event_metadata.event_len)) 254 fanotify_event_metadata.event_len))
279 goto out_kill_access_response; 255 goto out_close_fd;
280 256
257 ret = prepare_for_access_response(group, event, fd);
258 if (ret)
259 goto out_close_fd;
260
261 fd_install(fd, f);
281 return fanotify_event_metadata.event_len; 262 return fanotify_event_metadata.event_len;
282 263
283out_kill_access_response:
284 remove_access_response(group, event, fd);
285out_close_fd: 264out_close_fd:
286 if (fd != FAN_NOFD) 265 if (fd != FAN_NOFD) {
287 sys_close(fd); 266 put_unused_fd(fd);
267 fput(f);
268 }
288out: 269out:
289#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS 270#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
290 if (event->mask & FAN_ALL_PERM_EVENTS) { 271 if (event->mask & FAN_ALL_PERM_EVENTS) {
@@ -470,24 +451,22 @@ static int fanotify_find_path(int dfd, const char __user *filename,
470 dfd, filename, flags); 451 dfd, filename, flags);
471 452
472 if (filename == NULL) { 453 if (filename == NULL) {
473 struct file *file; 454 struct fd f = fdget(dfd);
474 int fput_needed;
475 455
476 ret = -EBADF; 456 ret = -EBADF;
477 file = fget_light(dfd, &fput_needed); 457 if (!f.file)
478 if (!file)
479 goto out; 458 goto out;
480 459
481 ret = -ENOTDIR; 460 ret = -ENOTDIR;
482 if ((flags & FAN_MARK_ONLYDIR) && 461 if ((flags & FAN_MARK_ONLYDIR) &&
483 !(S_ISDIR(file->f_path.dentry->d_inode->i_mode))) { 462 !(S_ISDIR(f.file->f_path.dentry->d_inode->i_mode))) {
484 fput_light(file, fput_needed); 463 fdput(f);
485 goto out; 464 goto out;
486 } 465 }
487 466
488 *path = file->f_path; 467 *path = f.file->f_path;
489 path_get(path); 468 path_get(path);
490 fput_light(file, fput_needed); 469 fdput(f);
491 } else { 470 } else {
492 unsigned int lookup_flags = 0; 471 unsigned int lookup_flags = 0;
493 472
@@ -767,9 +746,9 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
767 struct inode *inode = NULL; 746 struct inode *inode = NULL;
768 struct vfsmount *mnt = NULL; 747 struct vfsmount *mnt = NULL;
769 struct fsnotify_group *group; 748 struct fsnotify_group *group;
770 struct file *filp; 749 struct fd f;
771 struct path path; 750 struct path path;
772 int ret, fput_needed; 751 int ret;
773 752
774 pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n", 753 pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n",
775 __func__, fanotify_fd, flags, dfd, pathname, mask); 754 __func__, fanotify_fd, flags, dfd, pathname, mask);
@@ -803,15 +782,15 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
803#endif 782#endif
804 return -EINVAL; 783 return -EINVAL;
805 784
806 filp = fget_light(fanotify_fd, &fput_needed); 785 f = fdget(fanotify_fd);
807 if (unlikely(!filp)) 786 if (unlikely(!f.file))
808 return -EBADF; 787 return -EBADF;
809 788
810 /* verify that this is indeed an fanotify instance */ 789 /* verify that this is indeed an fanotify instance */
811 ret = -EINVAL; 790 ret = -EINVAL;
812 if (unlikely(filp->f_op != &fanotify_fops)) 791 if (unlikely(f.file->f_op != &fanotify_fops))
813 goto fput_and_out; 792 goto fput_and_out;
814 group = filp->private_data; 793 group = f.file->private_data;
815 794
816 /* 795 /*
817 * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF. These are not 796 * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF. These are not
@@ -858,7 +837,7 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
858 837
859 path_put(&path); 838 path_put(&path);
860fput_and_out: 839fput_and_out:
861 fput_light(filp, fput_needed); 840 fdput(f);
862 return ret; 841 return ret;
863} 842}
864 843
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 8445fbc8985c..c311dda054a3 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -757,16 +757,16 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
757 struct fsnotify_group *group; 757 struct fsnotify_group *group;
758 struct inode *inode; 758 struct inode *inode;
759 struct path path; 759 struct path path;
760 struct file *filp; 760 struct fd f;
761 int ret, fput_needed; 761 int ret;
762 unsigned flags = 0; 762 unsigned flags = 0;
763 763
764 filp = fget_light(fd, &fput_needed); 764 f = fdget(fd);
765 if (unlikely(!filp)) 765 if (unlikely(!f.file))
766 return -EBADF; 766 return -EBADF;
767 767
768 /* verify that this is indeed an inotify instance */ 768 /* verify that this is indeed an inotify instance */
769 if (unlikely(filp->f_op != &inotify_fops)) { 769 if (unlikely(f.file->f_op != &inotify_fops)) {
770 ret = -EINVAL; 770 ret = -EINVAL;
771 goto fput_and_out; 771 goto fput_and_out;
772 } 772 }
@@ -782,13 +782,13 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
782 782
783 /* inode held in place by reference to path; group by fget on fd */ 783 /* inode held in place by reference to path; group by fget on fd */
784 inode = path.dentry->d_inode; 784 inode = path.dentry->d_inode;
785 group = filp->private_data; 785 group = f.file->private_data;
786 786
787 /* create/update an inode mark */ 787 /* create/update an inode mark */
788 ret = inotify_update_watch(group, inode, mask); 788 ret = inotify_update_watch(group, inode, mask);
789 path_put(&path); 789 path_put(&path);
790fput_and_out: 790fput_and_out:
791 fput_light(filp, fput_needed); 791 fdput(f);
792 return ret; 792 return ret;
793} 793}
794 794
@@ -796,19 +796,19 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
796{ 796{
797 struct fsnotify_group *group; 797 struct fsnotify_group *group;
798 struct inotify_inode_mark *i_mark; 798 struct inotify_inode_mark *i_mark;
799 struct file *filp; 799 struct fd f;
800 int ret = 0, fput_needed; 800 int ret = 0;
801 801
802 filp = fget_light(fd, &fput_needed); 802 f = fdget(fd);
803 if (unlikely(!filp)) 803 if (unlikely(!f.file))
804 return -EBADF; 804 return -EBADF;
805 805
806 /* verify that this is indeed an inotify instance */ 806 /* verify that this is indeed an inotify instance */
807 ret = -EINVAL; 807 ret = -EINVAL;
808 if (unlikely(filp->f_op != &inotify_fops)) 808 if (unlikely(f.file->f_op != &inotify_fops))
809 goto out; 809 goto out;
810 810
811 group = filp->private_data; 811 group = f.file->private_data;
812 812
813 ret = -EINVAL; 813 ret = -EINVAL;
814 i_mark = inotify_idr_find(group, wd); 814 i_mark = inotify_idr_find(group, wd);
@@ -823,7 +823,7 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
823 fsnotify_put_mark(&i_mark->fsn_mark); 823 fsnotify_put_mark(&i_mark->fsn_mark);
824 824
825out: 825out:
826 fput_light(filp, fput_needed); 826 fdput(f);
827 return ret; 827 return ret;
828} 828}
829 829
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index da01c165067d..4a8289f8b16c 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -3193,6 +3193,12 @@ static void __exit exit_ntfs_fs(void)
3193 ntfs_debug("Unregistering NTFS driver."); 3193 ntfs_debug("Unregistering NTFS driver.");
3194 3194
3195 unregister_filesystem(&ntfs_fs_type); 3195 unregister_filesystem(&ntfs_fs_type);
3196
3197 /*
3198 * Make sure all delayed rcu free inodes are flushed before we
3199 * destroy cache.
3200 */
3201 rcu_barrier();
3196 kmem_cache_destroy(ntfs_big_inode_cache); 3202 kmem_cache_destroy(ntfs_big_inode_cache);
3197 kmem_cache_destroy(ntfs_inode_cache); 3203 kmem_cache_destroy(ntfs_inode_cache);
3198 kmem_cache_destroy(ntfs_name_cache); 3204 kmem_cache_destroy(ntfs_name_cache);
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index a4e855e3690e..f7c648d7d6bf 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1746,8 +1746,8 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1746 long fd; 1746 long fd;
1747 int sectsize; 1747 int sectsize;
1748 char *p = (char *)page; 1748 char *p = (char *)page;
1749 struct file *filp = NULL; 1749 struct fd f;
1750 struct inode *inode = NULL; 1750 struct inode *inode;
1751 ssize_t ret = -EINVAL; 1751 ssize_t ret = -EINVAL;
1752 int live_threshold; 1752 int live_threshold;
1753 1753
@@ -1766,26 +1766,26 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1766 if (fd < 0 || fd >= INT_MAX) 1766 if (fd < 0 || fd >= INT_MAX)
1767 goto out; 1767 goto out;
1768 1768
1769 filp = fget(fd); 1769 f = fdget(fd);
1770 if (filp == NULL) 1770 if (f.file == NULL)
1771 goto out; 1771 goto out;
1772 1772
1773 if (reg->hr_blocks == 0 || reg->hr_start_block == 0 || 1773 if (reg->hr_blocks == 0 || reg->hr_start_block == 0 ||
1774 reg->hr_block_bytes == 0) 1774 reg->hr_block_bytes == 0)
1775 goto out; 1775 goto out2;
1776 1776
1777 inode = igrab(filp->f_mapping->host); 1777 inode = igrab(f.file->f_mapping->host);
1778 if (inode == NULL) 1778 if (inode == NULL)
1779 goto out; 1779 goto out2;
1780 1780
1781 if (!S_ISBLK(inode->i_mode)) 1781 if (!S_ISBLK(inode->i_mode))
1782 goto out; 1782 goto out3;
1783 1783
1784 reg->hr_bdev = I_BDEV(filp->f_mapping->host); 1784 reg->hr_bdev = I_BDEV(f.file->f_mapping->host);
1785 ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ, NULL); 1785 ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ, NULL);
1786 if (ret) { 1786 if (ret) {
1787 reg->hr_bdev = NULL; 1787 reg->hr_bdev = NULL;
1788 goto out; 1788 goto out3;
1789 } 1789 }
1790 inode = NULL; 1790 inode = NULL;
1791 1791
@@ -1797,7 +1797,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1797 "blocksize %u incorrect for device, expected %d", 1797 "blocksize %u incorrect for device, expected %d",
1798 reg->hr_block_bytes, sectsize); 1798 reg->hr_block_bytes, sectsize);
1799 ret = -EINVAL; 1799 ret = -EINVAL;
1800 goto out; 1800 goto out3;
1801 } 1801 }
1802 1802
1803 o2hb_init_region_params(reg); 1803 o2hb_init_region_params(reg);
@@ -1811,13 +1811,13 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1811 ret = o2hb_map_slot_data(reg); 1811 ret = o2hb_map_slot_data(reg);
1812 if (ret) { 1812 if (ret) {
1813 mlog_errno(ret); 1813 mlog_errno(ret);
1814 goto out; 1814 goto out3;
1815 } 1815 }
1816 1816
1817 ret = o2hb_populate_slot_data(reg); 1817 ret = o2hb_populate_slot_data(reg);
1818 if (ret) { 1818 if (ret) {
1819 mlog_errno(ret); 1819 mlog_errno(ret);
1820 goto out; 1820 goto out3;
1821 } 1821 }
1822 1822
1823 INIT_DELAYED_WORK(&reg->hr_write_timeout_work, o2hb_write_timeout); 1823 INIT_DELAYED_WORK(&reg->hr_write_timeout_work, o2hb_write_timeout);
@@ -1847,7 +1847,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1847 if (IS_ERR(hb_task)) { 1847 if (IS_ERR(hb_task)) {
1848 ret = PTR_ERR(hb_task); 1848 ret = PTR_ERR(hb_task);
1849 mlog_errno(ret); 1849 mlog_errno(ret);
1850 goto out; 1850 goto out3;
1851 } 1851 }
1852 1852
1853 spin_lock(&o2hb_live_lock); 1853 spin_lock(&o2hb_live_lock);
@@ -1863,7 +1863,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1863 1863
1864 if (reg->hr_aborted_start) { 1864 if (reg->hr_aborted_start) {
1865 ret = -EIO; 1865 ret = -EIO;
1866 goto out; 1866 goto out3;
1867 } 1867 }
1868 1868
1869 /* Ok, we were woken. Make sure it wasn't by drop_item() */ 1869 /* Ok, we were woken. Make sure it wasn't by drop_item() */
@@ -1882,11 +1882,11 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1882 printk(KERN_NOTICE "o2hb: Heartbeat started on region %s (%s)\n", 1882 printk(KERN_NOTICE "o2hb: Heartbeat started on region %s (%s)\n",
1883 config_item_name(&reg->hr_item), reg->hr_dev_name); 1883 config_item_name(&reg->hr_item), reg->hr_dev_name);
1884 1884
1885out3:
1886 iput(inode);
1887out2:
1888 fdput(f);
1885out: 1889out:
1886 if (filp)
1887 fput(filp);
1888 if (inode)
1889 iput(inode);
1890 if (ret < 0) { 1890 if (ret < 0) {
1891 if (reg->hr_bdev) { 1891 if (reg->hr_bdev) {
1892 blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE); 1892 blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE);
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 83b6f98e0665..16b712d260d4 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -691,6 +691,11 @@ static void __exit exit_dlmfs_fs(void)
691 flush_workqueue(user_dlm_worker); 691 flush_workqueue(user_dlm_worker);
692 destroy_workqueue(user_dlm_worker); 692 destroy_workqueue(user_dlm_worker);
693 693
694 /*
695 * Make sure all delayed rcu free inodes are flushed before we
696 * destroy cache.
697 */
698 rcu_barrier();
694 kmem_cache_destroy(dlmfs_inode_cache); 699 kmem_cache_destroy(dlmfs_inode_cache);
695 700
696 bdi_destroy(&dlmfs_backing_dev_info); 701 bdi_destroy(&dlmfs_backing_dev_info);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 68f4541c2db9..0e91ec22a940 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1818,6 +1818,11 @@ static int ocfs2_initialize_mem_caches(void)
1818 1818
1819static void ocfs2_free_mem_caches(void) 1819static void ocfs2_free_mem_caches(void)
1820{ 1820{
1821 /*
1822 * Make sure all delayed rcu free inodes are flushed before we
1823 * destroy cache.
1824 */
1825 rcu_barrier();
1821 if (ocfs2_inode_cachep) 1826 if (ocfs2_inode_cachep)
1822 kmem_cache_destroy(ocfs2_inode_cachep); 1827 kmem_cache_destroy(ocfs2_inode_cachep);
1823 ocfs2_inode_cachep = NULL; 1828 ocfs2_inode_cachep = NULL;
diff --git a/fs/open.c b/fs/open.c
index b0bae3a41825..44da0feeca2c 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -132,27 +132,27 @@ SYSCALL_DEFINE2(truncate, const char __user *, path, long, length)
132 132
133static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) 133static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
134{ 134{
135 struct inode * inode; 135 struct inode *inode;
136 struct dentry *dentry; 136 struct dentry *dentry;
137 struct file * file; 137 struct fd f;
138 int error; 138 int error;
139 139
140 error = -EINVAL; 140 error = -EINVAL;
141 if (length < 0) 141 if (length < 0)
142 goto out; 142 goto out;
143 error = -EBADF; 143 error = -EBADF;
144 file = fget(fd); 144 f = fdget(fd);
145 if (!file) 145 if (!f.file)
146 goto out; 146 goto out;
147 147
148 /* explicitly opened as large or we are on 64-bit box */ 148 /* explicitly opened as large or we are on 64-bit box */
149 if (file->f_flags & O_LARGEFILE) 149 if (f.file->f_flags & O_LARGEFILE)
150 small = 0; 150 small = 0;
151 151
152 dentry = file->f_path.dentry; 152 dentry = f.file->f_path.dentry;
153 inode = dentry->d_inode; 153 inode = dentry->d_inode;
154 error = -EINVAL; 154 error = -EINVAL;
155 if (!S_ISREG(inode->i_mode) || !(file->f_mode & FMODE_WRITE)) 155 if (!S_ISREG(inode->i_mode) || !(f.file->f_mode & FMODE_WRITE))
156 goto out_putf; 156 goto out_putf;
157 157
158 error = -EINVAL; 158 error = -EINVAL;
@@ -165,14 +165,14 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
165 goto out_putf; 165 goto out_putf;
166 166
167 sb_start_write(inode->i_sb); 167 sb_start_write(inode->i_sb);
168 error = locks_verify_truncate(inode, file, length); 168 error = locks_verify_truncate(inode, f.file, length);
169 if (!error) 169 if (!error)
170 error = security_path_truncate(&file->f_path); 170 error = security_path_truncate(&f.file->f_path);
171 if (!error) 171 if (!error)
172 error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file); 172 error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, f.file);
173 sb_end_write(inode->i_sb); 173 sb_end_write(inode->i_sb);
174out_putf: 174out_putf:
175 fput(file); 175 fdput(f);
176out: 176out:
177 return error; 177 return error;
178} 178}
@@ -276,15 +276,13 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
276 276
277SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len) 277SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len)
278{ 278{
279 struct file *file; 279 struct fd f = fdget(fd);
280 int error = -EBADF; 280 int error = -EBADF;
281 281
282 file = fget(fd); 282 if (f.file) {
283 if (file) { 283 error = do_fallocate(f.file, mode, offset, len);
284 error = do_fallocate(file, mode, offset, len); 284 fdput(f);
285 fput(file);
286 } 285 }
287
288 return error; 286 return error;
289} 287}
290 288
@@ -400,16 +398,15 @@ out:
400 398
401SYSCALL_DEFINE1(fchdir, unsigned int, fd) 399SYSCALL_DEFINE1(fchdir, unsigned int, fd)
402{ 400{
403 struct file *file; 401 struct fd f = fdget_raw(fd);
404 struct inode *inode; 402 struct inode *inode;
405 int error, fput_needed; 403 int error = -EBADF;
406 404
407 error = -EBADF; 405 error = -EBADF;
408 file = fget_raw_light(fd, &fput_needed); 406 if (!f.file)
409 if (!file)
410 goto out; 407 goto out;
411 408
412 inode = file->f_path.dentry->d_inode; 409 inode = f.file->f_path.dentry->d_inode;
413 410
414 error = -ENOTDIR; 411 error = -ENOTDIR;
415 if (!S_ISDIR(inode->i_mode)) 412 if (!S_ISDIR(inode->i_mode))
@@ -417,9 +414,9 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd)
417 414
418 error = inode_permission(inode, MAY_EXEC | MAY_CHDIR); 415 error = inode_permission(inode, MAY_EXEC | MAY_CHDIR);
419 if (!error) 416 if (!error)
420 set_fs_pwd(current->fs, &file->f_path); 417 set_fs_pwd(current->fs, &f.file->f_path);
421out_putf: 418out_putf:
422 fput_light(file, fput_needed); 419 fdput(f);
423out: 420out:
424 return error; 421 return error;
425} 422}
@@ -582,23 +579,20 @@ SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group
582 579
583SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) 580SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
584{ 581{
585 struct file * file; 582 struct fd f = fdget(fd);
586 int error = -EBADF; 583 int error = -EBADF;
587 struct dentry * dentry;
588 584
589 file = fget(fd); 585 if (!f.file)
590 if (!file)
591 goto out; 586 goto out;
592 587
593 error = mnt_want_write_file(file); 588 error = mnt_want_write_file(f.file);
594 if (error) 589 if (error)
595 goto out_fput; 590 goto out_fput;
596 dentry = file->f_path.dentry; 591 audit_inode(NULL, f.file->f_path.dentry);
597 audit_inode(NULL, dentry); 592 error = chown_common(&f.file->f_path, user, group);
598 error = chown_common(&file->f_path, user, group); 593 mnt_drop_write_file(f.file);
599 mnt_drop_write_file(file);
600out_fput: 594out_fput:
601 fput(file); 595 fdput(f);
602out: 596out:
603 return error; 597 return error;
604} 598}
@@ -803,50 +797,6 @@ struct file *dentry_open(const struct path *path, int flags,
803} 797}
804EXPORT_SYMBOL(dentry_open); 798EXPORT_SYMBOL(dentry_open);
805 799
806static void __put_unused_fd(struct files_struct *files, unsigned int fd)
807{
808 struct fdtable *fdt = files_fdtable(files);
809 __clear_open_fd(fd, fdt);
810 if (fd < files->next_fd)
811 files->next_fd = fd;
812}
813
814void put_unused_fd(unsigned int fd)
815{
816 struct files_struct *files = current->files;
817 spin_lock(&files->file_lock);
818 __put_unused_fd(files, fd);
819 spin_unlock(&files->file_lock);
820}
821
822EXPORT_SYMBOL(put_unused_fd);
823
824/*
825 * Install a file pointer in the fd array.
826 *
827 * The VFS is full of places where we drop the files lock between
828 * setting the open_fds bitmap and installing the file in the file
829 * array. At any such point, we are vulnerable to a dup2() race
830 * installing a file in the array before us. We need to detect this and
831 * fput() the struct file we are about to overwrite in this case.
832 *
833 * It should never happen - if we allow dup2() do it, _really_ bad things
834 * will follow.
835 */
836
837void fd_install(unsigned int fd, struct file *file)
838{
839 struct files_struct *files = current->files;
840 struct fdtable *fdt;
841 spin_lock(&files->file_lock);
842 fdt = files_fdtable(files);
843 BUG_ON(fdt->fd[fd] != NULL);
844 rcu_assign_pointer(fdt->fd[fd], file);
845 spin_unlock(&files->file_lock);
846}
847
848EXPORT_SYMBOL(fd_install);
849
850static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op) 800static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op)
851{ 801{
852 int lookup_flags = 0; 802 int lookup_flags = 0;
@@ -858,7 +808,7 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o
858 op->mode = 0; 808 op->mode = 0;
859 809
860 /* Must never be set by userspace */ 810 /* Must never be set by userspace */
861 flags &= ~FMODE_NONOTIFY; 811 flags &= ~FMODE_NONOTIFY & ~O_CLOEXEC;
862 812
863 /* 813 /*
864 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only 814 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
@@ -1038,23 +988,7 @@ EXPORT_SYMBOL(filp_close);
1038 */ 988 */
1039SYSCALL_DEFINE1(close, unsigned int, fd) 989SYSCALL_DEFINE1(close, unsigned int, fd)
1040{ 990{
1041 struct file * filp; 991 int retval = __close_fd(current->files, fd);
1042 struct files_struct *files = current->files;
1043 struct fdtable *fdt;
1044 int retval;
1045
1046 spin_lock(&files->file_lock);
1047 fdt = files_fdtable(files);
1048 if (fd >= fdt->max_fds)
1049 goto out_unlock;
1050 filp = fdt->fd[fd];
1051 if (!filp)
1052 goto out_unlock;
1053 rcu_assign_pointer(fdt->fd[fd], NULL);
1054 __clear_close_on_exec(fd, fdt);
1055 __put_unused_fd(files, fd);
1056 spin_unlock(&files->file_lock);
1057 retval = filp_close(filp, files);
1058 992
1059 /* can't restart close syscall because file table entry was cleared */ 993 /* can't restart close syscall because file table entry was cleared */
1060 if (unlikely(retval == -ERESTARTSYS || 994 if (unlikely(retval == -ERESTARTSYS ||
@@ -1064,10 +998,6 @@ SYSCALL_DEFINE1(close, unsigned int, fd)
1064 retval = -EINTR; 998 retval = -EINTR;
1065 999
1066 return retval; 1000 return retval;
1067
1068out_unlock:
1069 spin_unlock(&files->file_lock);
1070 return -EBADF;
1071} 1001}
1072EXPORT_SYMBOL(sys_close); 1002EXPORT_SYMBOL(sys_close);
1073 1003
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 4a3477949bca..2ad080faca34 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -463,6 +463,11 @@ static int __init init_openprom_fs(void)
463static void __exit exit_openprom_fs(void) 463static void __exit exit_openprom_fs(void)
464{ 464{
465 unregister_filesystem(&openprom_fs_type); 465 unregister_filesystem(&openprom_fs_type);
466 /*
467 * Make sure all delayed rcu free inodes are flushed before we
468 * destroy cache.
469 */
470 rcu_barrier();
466 kmem_cache_destroy(op_inode_cachep); 471 kmem_cache_destroy(op_inode_cachep);
467} 472}
468 473
diff --git a/fs/pipe.c b/fs/pipe.c
index 8d85d7068c1e..bd3479db4b62 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1064,9 +1064,8 @@ err_inode:
1064 return err; 1064 return err;
1065} 1065}
1066 1066
1067int do_pipe_flags(int *fd, int flags) 1067static int __do_pipe_flags(int *fd, struct file **files, int flags)
1068{ 1068{
1069 struct file *files[2];
1070 int error; 1069 int error;
1071 int fdw, fdr; 1070 int fdw, fdr;
1072 1071
@@ -1088,11 +1087,8 @@ int do_pipe_flags(int *fd, int flags)
1088 fdw = error; 1087 fdw = error;
1089 1088
1090 audit_fd_pair(fdr, fdw); 1089 audit_fd_pair(fdr, fdw);
1091 fd_install(fdr, files[0]);
1092 fd_install(fdw, files[1]);
1093 fd[0] = fdr; 1090 fd[0] = fdr;
1094 fd[1] = fdw; 1091 fd[1] = fdw;
1095
1096 return 0; 1092 return 0;
1097 1093
1098 err_fdr: 1094 err_fdr:
@@ -1103,21 +1099,38 @@ int do_pipe_flags(int *fd, int flags)
1103 return error; 1099 return error;
1104} 1100}
1105 1101
1102int do_pipe_flags(int *fd, int flags)
1103{
1104 struct file *files[2];
1105 int error = __do_pipe_flags(fd, files, flags);
1106 if (!error) {
1107 fd_install(fd[0], files[0]);
1108 fd_install(fd[1], files[1]);
1109 }
1110 return error;
1111}
1112
1106/* 1113/*
1107 * sys_pipe() is the normal C calling standard for creating 1114 * sys_pipe() is the normal C calling standard for creating
1108 * a pipe. It's not the way Unix traditionally does this, though. 1115 * a pipe. It's not the way Unix traditionally does this, though.
1109 */ 1116 */
1110SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags) 1117SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)
1111{ 1118{
1119 struct file *files[2];
1112 int fd[2]; 1120 int fd[2];
1113 int error; 1121 int error;
1114 1122
1115 error = do_pipe_flags(fd, flags); 1123 error = __do_pipe_flags(fd, files, flags);
1116 if (!error) { 1124 if (!error) {
1117 if (copy_to_user(fildes, fd, sizeof(fd))) { 1125 if (unlikely(copy_to_user(fildes, fd, sizeof(fd)))) {
1118 sys_close(fd[0]); 1126 fput(files[0]);
1119 sys_close(fd[1]); 1127 fput(files[1]);
1128 put_unused_fd(fd[0]);
1129 put_unused_fd(fd[1]);
1120 error = -EFAULT; 1130 error = -EFAULT;
1131 } else {
1132 fd_install(fd[0], files[0]);
1133 fd_install(fd[1], files[1]);
1121 } 1134 }
1122 } 1135 }
1123 return error; 1136 return error;
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index c1c729335924..99349efbbc2b 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -8,7 +8,7 @@ proc-y := nommu.o task_nommu.o
8proc-$(CONFIG_MMU) := mmu.o task_mmu.o 8proc-$(CONFIG_MMU) := mmu.o task_mmu.o
9 9
10proc-y += inode.o root.o base.o generic.o array.o \ 10proc-y += inode.o root.o base.o generic.o array.o \
11 proc_tty.o 11 proc_tty.o fd.o
12proc-y += cmdline.o 12proc-y += cmdline.o
13proc-y += consoles.o 13proc-y += consoles.o
14proc-y += cpuinfo.o 14proc-y += cpuinfo.o
diff --git a/fs/proc/base.c b/fs/proc/base.c
index acd1960c28a2..d295af993677 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -90,6 +90,7 @@
90#endif 90#endif
91#include <trace/events/oom.h> 91#include <trace/events/oom.h>
92#include "internal.h" 92#include "internal.h"
93#include "fd.h"
93 94
94/* NOTE: 95/* NOTE:
95 * Implementing inode permission operations in /proc is almost 96 * Implementing inode permission operations in /proc is almost
@@ -136,8 +137,6 @@ struct pid_entry {
136 NULL, &proc_single_file_operations, \ 137 NULL, &proc_single_file_operations, \
137 { .proc_show = show } ) 138 { .proc_show = show } )
138 139
139static int proc_fd_permission(struct inode *inode, int mask);
140
141/* 140/*
142 * Count the number of hardlinks for the pid_entry table, excluding the . 141 * Count the number of hardlinks for the pid_entry table, excluding the .
143 * and .. links. 142 * and .. links.
@@ -1500,7 +1499,7 @@ out:
1500 return error; 1499 return error;
1501} 1500}
1502 1501
1503static const struct inode_operations proc_pid_link_inode_operations = { 1502const struct inode_operations proc_pid_link_inode_operations = {
1504 .readlink = proc_pid_readlink, 1503 .readlink = proc_pid_readlink,
1505 .follow_link = proc_pid_follow_link, 1504 .follow_link = proc_pid_follow_link,
1506 .setattr = proc_setattr, 1505 .setattr = proc_setattr,
@@ -1509,21 +1508,6 @@ static const struct inode_operations proc_pid_link_inode_operations = {
1509 1508
1510/* building an inode */ 1509/* building an inode */
1511 1510
1512static int task_dumpable(struct task_struct *task)
1513{
1514 int dumpable = 0;
1515 struct mm_struct *mm;
1516
1517 task_lock(task);
1518 mm = task->mm;
1519 if (mm)
1520 dumpable = get_dumpable(mm);
1521 task_unlock(task);
1522 if(dumpable == 1)
1523 return 1;
1524 return 0;
1525}
1526
1527struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task) 1511struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
1528{ 1512{
1529 struct inode * inode; 1513 struct inode * inode;
@@ -1649,15 +1633,6 @@ int pid_revalidate(struct dentry *dentry, unsigned int flags)
1649 return 0; 1633 return 0;
1650} 1634}
1651 1635
1652static int pid_delete_dentry(const struct dentry * dentry)
1653{
1654 /* Is the task we represent dead?
1655 * If so, then don't put the dentry on the lru list,
1656 * kill it immediately.
1657 */
1658 return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
1659}
1660
1661const struct dentry_operations pid_dentry_operations = 1636const struct dentry_operations pid_dentry_operations =
1662{ 1637{
1663 .d_revalidate = pid_revalidate, 1638 .d_revalidate = pid_revalidate,
@@ -1720,289 +1695,6 @@ end_instantiate:
1720 return filldir(dirent, name, len, filp->f_pos, ino, type); 1695 return filldir(dirent, name, len, filp->f_pos, ino, type);
1721} 1696}
1722 1697
1723static unsigned name_to_int(struct dentry *dentry)
1724{
1725 const char *name = dentry->d_name.name;
1726 int len = dentry->d_name.len;
1727 unsigned n = 0;
1728
1729 if (len > 1 && *name == '0')
1730 goto out;
1731 while (len-- > 0) {
1732 unsigned c = *name++ - '0';
1733 if (c > 9)
1734 goto out;
1735 if (n >= (~0U-9)/10)
1736 goto out;
1737 n *= 10;
1738 n += c;
1739 }
1740 return n;
1741out:
1742 return ~0U;
1743}
1744
1745#define PROC_FDINFO_MAX 64
1746
1747static int proc_fd_info(struct inode *inode, struct path *path, char *info)
1748{
1749 struct task_struct *task = get_proc_task(inode);
1750 struct files_struct *files = NULL;
1751 struct file *file;
1752 int fd = proc_fd(inode);
1753
1754 if (task) {
1755 files = get_files_struct(task);
1756 put_task_struct(task);
1757 }
1758 if (files) {
1759 /*
1760 * We are not taking a ref to the file structure, so we must
1761 * hold ->file_lock.
1762 */
1763 spin_lock(&files->file_lock);
1764 file = fcheck_files(files, fd);
1765 if (file) {
1766 unsigned int f_flags;
1767 struct fdtable *fdt;
1768
1769 fdt = files_fdtable(files);
1770 f_flags = file->f_flags & ~O_CLOEXEC;
1771 if (close_on_exec(fd, fdt))
1772 f_flags |= O_CLOEXEC;
1773
1774 if (path) {
1775 *path = file->f_path;
1776 path_get(&file->f_path);
1777 }
1778 if (info)
1779 snprintf(info, PROC_FDINFO_MAX,
1780 "pos:\t%lli\n"
1781 "flags:\t0%o\n",
1782 (long long) file->f_pos,
1783 f_flags);
1784 spin_unlock(&files->file_lock);
1785 put_files_struct(files);
1786 return 0;
1787 }
1788 spin_unlock(&files->file_lock);
1789 put_files_struct(files);
1790 }
1791 return -ENOENT;
1792}
1793
1794static int proc_fd_link(struct dentry *dentry, struct path *path)
1795{
1796 return proc_fd_info(dentry->d_inode, path, NULL);
1797}
1798
1799static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags)
1800{
1801 struct inode *inode;
1802 struct task_struct *task;
1803 int fd;
1804 struct files_struct *files;
1805 const struct cred *cred;
1806
1807 if (flags & LOOKUP_RCU)
1808 return -ECHILD;
1809
1810 inode = dentry->d_inode;
1811 task = get_proc_task(inode);
1812 fd = proc_fd(inode);
1813
1814 if (task) {
1815 files = get_files_struct(task);
1816 if (files) {
1817 struct file *file;
1818 rcu_read_lock();
1819 file = fcheck_files(files, fd);
1820 if (file) {
1821 unsigned f_mode = file->f_mode;
1822
1823 rcu_read_unlock();
1824 put_files_struct(files);
1825
1826 if (task_dumpable(task)) {
1827 rcu_read_lock();
1828 cred = __task_cred(task);
1829 inode->i_uid = cred->euid;
1830 inode->i_gid = cred->egid;
1831 rcu_read_unlock();
1832 } else {
1833 inode->i_uid = GLOBAL_ROOT_UID;
1834 inode->i_gid = GLOBAL_ROOT_GID;
1835 }
1836
1837 if (S_ISLNK(inode->i_mode)) {
1838 unsigned i_mode = S_IFLNK;
1839 if (f_mode & FMODE_READ)
1840 i_mode |= S_IRUSR | S_IXUSR;
1841 if (f_mode & FMODE_WRITE)
1842 i_mode |= S_IWUSR | S_IXUSR;
1843 inode->i_mode = i_mode;
1844 }
1845
1846 security_task_to_inode(task, inode);
1847 put_task_struct(task);
1848 return 1;
1849 }
1850 rcu_read_unlock();
1851 put_files_struct(files);
1852 }
1853 put_task_struct(task);
1854 }
1855 d_drop(dentry);
1856 return 0;
1857}
1858
1859static const struct dentry_operations tid_fd_dentry_operations =
1860{
1861 .d_revalidate = tid_fd_revalidate,
1862 .d_delete = pid_delete_dentry,
1863};
1864
1865static struct dentry *proc_fd_instantiate(struct inode *dir,
1866 struct dentry *dentry, struct task_struct *task, const void *ptr)
1867{
1868 unsigned fd = (unsigned long)ptr;
1869 struct inode *inode;
1870 struct proc_inode *ei;
1871 struct dentry *error = ERR_PTR(-ENOENT);
1872
1873 inode = proc_pid_make_inode(dir->i_sb, task);
1874 if (!inode)
1875 goto out;
1876 ei = PROC_I(inode);
1877 ei->fd = fd;
1878
1879 inode->i_mode = S_IFLNK;
1880 inode->i_op = &proc_pid_link_inode_operations;
1881 inode->i_size = 64;
1882 ei->op.proc_get_link = proc_fd_link;
1883 d_set_d_op(dentry, &tid_fd_dentry_operations);
1884 d_add(dentry, inode);
1885 /* Close the race of the process dying before we return the dentry */
1886 if (tid_fd_revalidate(dentry, 0))
1887 error = NULL;
1888
1889 out:
1890 return error;
1891}
1892
1893static struct dentry *proc_lookupfd_common(struct inode *dir,
1894 struct dentry *dentry,
1895 instantiate_t instantiate)
1896{
1897 struct task_struct *task = get_proc_task(dir);
1898 unsigned fd = name_to_int(dentry);
1899 struct dentry *result = ERR_PTR(-ENOENT);
1900
1901 if (!task)
1902 goto out_no_task;
1903 if (fd == ~0U)
1904 goto out;
1905
1906 result = instantiate(dir, dentry, task, (void *)(unsigned long)fd);
1907out:
1908 put_task_struct(task);
1909out_no_task:
1910 return result;
1911}
1912
1913static int proc_readfd_common(struct file * filp, void * dirent,
1914 filldir_t filldir, instantiate_t instantiate)
1915{
1916 struct dentry *dentry = filp->f_path.dentry;
1917 struct inode *inode = dentry->d_inode;
1918 struct task_struct *p = get_proc_task(inode);
1919 unsigned int fd, ino;
1920 int retval;
1921 struct files_struct * files;
1922
1923 retval = -ENOENT;
1924 if (!p)
1925 goto out_no_task;
1926 retval = 0;
1927
1928 fd = filp->f_pos;
1929 switch (fd) {
1930 case 0:
1931 if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
1932 goto out;
1933 filp->f_pos++;
1934 case 1:
1935 ino = parent_ino(dentry);
1936 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
1937 goto out;
1938 filp->f_pos++;
1939 default:
1940 files = get_files_struct(p);
1941 if (!files)
1942 goto out;
1943 rcu_read_lock();
1944 for (fd = filp->f_pos-2;
1945 fd < files_fdtable(files)->max_fds;
1946 fd++, filp->f_pos++) {
1947 char name[PROC_NUMBUF];
1948 int len;
1949 int rv;
1950
1951 if (!fcheck_files(files, fd))
1952 continue;
1953 rcu_read_unlock();
1954
1955 len = snprintf(name, sizeof(name), "%d", fd);
1956 rv = proc_fill_cache(filp, dirent, filldir,
1957 name, len, instantiate, p,
1958 (void *)(unsigned long)fd);
1959 if (rv < 0)
1960 goto out_fd_loop;
1961 rcu_read_lock();
1962 }
1963 rcu_read_unlock();
1964out_fd_loop:
1965 put_files_struct(files);
1966 }
1967out:
1968 put_task_struct(p);
1969out_no_task:
1970 return retval;
1971}
1972
1973static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry,
1974 unsigned int flags)
1975{
1976 return proc_lookupfd_common(dir, dentry, proc_fd_instantiate);
1977}
1978
1979static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir)
1980{
1981 return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate);
1982}
1983
1984static ssize_t proc_fdinfo_read(struct file *file, char __user *buf,
1985 size_t len, loff_t *ppos)
1986{
1987 char tmp[PROC_FDINFO_MAX];
1988 int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp);
1989 if (!err)
1990 err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp));
1991 return err;
1992}
1993
1994static const struct file_operations proc_fdinfo_file_operations = {
1995 .open = nonseekable_open,
1996 .read = proc_fdinfo_read,
1997 .llseek = no_llseek,
1998};
1999
2000static const struct file_operations proc_fd_operations = {
2001 .read = generic_read_dir,
2002 .readdir = proc_readfd,
2003 .llseek = default_llseek,
2004};
2005
2006#ifdef CONFIG_CHECKPOINT_RESTORE 1698#ifdef CONFIG_CHECKPOINT_RESTORE
2007 1699
2008/* 1700/*
@@ -2121,7 +1813,7 @@ out:
2121} 1813}
2122 1814
2123struct map_files_info { 1815struct map_files_info {
2124 struct file *file; 1816 fmode_t mode;
2125 unsigned long len; 1817 unsigned long len;
2126 unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ 1818 unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */
2127}; 1819};
@@ -2130,13 +1822,10 @@ static struct dentry *
2130proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, 1822proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
2131 struct task_struct *task, const void *ptr) 1823 struct task_struct *task, const void *ptr)
2132{ 1824{
2133 const struct file *file = ptr; 1825 fmode_t mode = (fmode_t)(unsigned long)ptr;
2134 struct proc_inode *ei; 1826 struct proc_inode *ei;
2135 struct inode *inode; 1827 struct inode *inode;
2136 1828
2137 if (!file)
2138 return ERR_PTR(-ENOENT);
2139
2140 inode = proc_pid_make_inode(dir->i_sb, task); 1829 inode = proc_pid_make_inode(dir->i_sb, task);
2141 if (!inode) 1830 if (!inode)
2142 return ERR_PTR(-ENOENT); 1831 return ERR_PTR(-ENOENT);
@@ -2148,9 +1837,9 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
2148 inode->i_size = 64; 1837 inode->i_size = 64;
2149 inode->i_mode = S_IFLNK; 1838 inode->i_mode = S_IFLNK;
2150 1839
2151 if (file->f_mode & FMODE_READ) 1840 if (mode & FMODE_READ)
2152 inode->i_mode |= S_IRUSR; 1841 inode->i_mode |= S_IRUSR;
2153 if (file->f_mode & FMODE_WRITE) 1842 if (mode & FMODE_WRITE)
2154 inode->i_mode |= S_IWUSR; 1843 inode->i_mode |= S_IWUSR;
2155 1844
2156 d_set_d_op(dentry, &tid_map_files_dentry_operations); 1845 d_set_d_op(dentry, &tid_map_files_dentry_operations);
@@ -2194,7 +1883,8 @@ static struct dentry *proc_map_files_lookup(struct inode *dir,
2194 if (!vma) 1883 if (!vma)
2195 goto out_no_vma; 1884 goto out_no_vma;
2196 1885
2197 result = proc_map_files_instantiate(dir, dentry, task, vma->vm_file); 1886 result = proc_map_files_instantiate(dir, dentry, task,
1887 (void *)(unsigned long)vma->vm_file->f_mode);
2198 1888
2199out_no_vma: 1889out_no_vma:
2200 up_read(&mm->mmap_sem); 1890 up_read(&mm->mmap_sem);
@@ -2295,8 +1985,7 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
2295 if (++pos <= filp->f_pos) 1985 if (++pos <= filp->f_pos)
2296 continue; 1986 continue;
2297 1987
2298 get_file(vma->vm_file); 1988 info.mode = vma->vm_file->f_mode;
2299 info.file = vma->vm_file;
2300 info.len = snprintf(info.name, 1989 info.len = snprintf(info.name,
2301 sizeof(info.name), "%lx-%lx", 1990 sizeof(info.name), "%lx-%lx",
2302 vma->vm_start, vma->vm_end); 1991 vma->vm_start, vma->vm_end);
@@ -2311,19 +2000,11 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
2311 ret = proc_fill_cache(filp, dirent, filldir, 2000 ret = proc_fill_cache(filp, dirent, filldir,
2312 p->name, p->len, 2001 p->name, p->len,
2313 proc_map_files_instantiate, 2002 proc_map_files_instantiate,
2314 task, p->file); 2003 task,
2004 (void *)(unsigned long)p->mode);
2315 if (ret) 2005 if (ret)
2316 break; 2006 break;
2317 filp->f_pos++; 2007 filp->f_pos++;
2318 fput(p->file);
2319 }
2320 for (; i < nr_files; i++) {
2321 /*
2322 * In case of error don't forget
2323 * to put rest of file refs.
2324 */
2325 p = flex_array_get(fa, i);
2326 fput(p->file);
2327 } 2008 }
2328 if (fa) 2009 if (fa)
2329 flex_array_free(fa); 2010 flex_array_free(fa);
@@ -2345,82 +2026,6 @@ static const struct file_operations proc_map_files_operations = {
2345 2026
2346#endif /* CONFIG_CHECKPOINT_RESTORE */ 2027#endif /* CONFIG_CHECKPOINT_RESTORE */
2347 2028
2348/*
2349 * /proc/pid/fd needs a special permission handler so that a process can still
2350 * access /proc/self/fd after it has executed a setuid().
2351 */
2352static int proc_fd_permission(struct inode *inode, int mask)
2353{
2354 int rv = generic_permission(inode, mask);
2355 if (rv == 0)
2356 return 0;
2357 if (task_pid(current) == proc_pid(inode))
2358 rv = 0;
2359 return rv;
2360}
2361
2362/*
2363 * proc directories can do almost nothing..
2364 */
2365static const struct inode_operations proc_fd_inode_operations = {
2366 .lookup = proc_lookupfd,
2367 .permission = proc_fd_permission,
2368 .setattr = proc_setattr,
2369};
2370
2371static struct dentry *proc_fdinfo_instantiate(struct inode *dir,
2372 struct dentry *dentry, struct task_struct *task, const void *ptr)
2373{
2374 unsigned fd = (unsigned long)ptr;
2375 struct inode *inode;
2376 struct proc_inode *ei;
2377 struct dentry *error = ERR_PTR(-ENOENT);
2378
2379 inode = proc_pid_make_inode(dir->i_sb, task);
2380 if (!inode)
2381 goto out;
2382 ei = PROC_I(inode);
2383 ei->fd = fd;
2384 inode->i_mode = S_IFREG | S_IRUSR;
2385 inode->i_fop = &proc_fdinfo_file_operations;
2386 d_set_d_op(dentry, &tid_fd_dentry_operations);
2387 d_add(dentry, inode);
2388 /* Close the race of the process dying before we return the dentry */
2389 if (tid_fd_revalidate(dentry, 0))
2390 error = NULL;
2391
2392 out:
2393 return error;
2394}
2395
2396static struct dentry *proc_lookupfdinfo(struct inode *dir,
2397 struct dentry *dentry,
2398 unsigned int flags)
2399{
2400 return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate);
2401}
2402
2403static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir)
2404{
2405 return proc_readfd_common(filp, dirent, filldir,
2406 proc_fdinfo_instantiate);
2407}
2408
2409static const struct file_operations proc_fdinfo_operations = {
2410 .read = generic_read_dir,
2411 .readdir = proc_readfdinfo,
2412 .llseek = default_llseek,
2413};
2414
2415/*
2416 * proc directories can do almost nothing..
2417 */
2418static const struct inode_operations proc_fdinfo_inode_operations = {
2419 .lookup = proc_lookupfdinfo,
2420 .setattr = proc_setattr,
2421};
2422
2423
2424static struct dentry *proc_pident_instantiate(struct inode *dir, 2029static struct dentry *proc_pident_instantiate(struct inode *dir,
2425 struct dentry *dentry, struct task_struct *task, const void *ptr) 2030 struct dentry *dentry, struct task_struct *task, const void *ptr)
2426{ 2031{
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
new file mode 100644
index 000000000000..f28a875f8779
--- /dev/null
+++ b/fs/proc/fd.c
@@ -0,0 +1,367 @@
1#include <linux/sched.h>
2#include <linux/errno.h>
3#include <linux/dcache.h>
4#include <linux/path.h>
5#include <linux/fdtable.h>
6#include <linux/namei.h>
7#include <linux/pid.h>
8#include <linux/security.h>
9#include <linux/file.h>
10#include <linux/seq_file.h>
11
12#include <linux/proc_fs.h>
13
14#include "internal.h"
15#include "fd.h"
16
17static int seq_show(struct seq_file *m, void *v)
18{
19 struct files_struct *files = NULL;
20 int f_flags = 0, ret = -ENOENT;
21 struct file *file = NULL;
22 struct task_struct *task;
23
24 task = get_proc_task(m->private);
25 if (!task)
26 return -ENOENT;
27
28 files = get_files_struct(task);
29 put_task_struct(task);
30
31 if (files) {
32 int fd = proc_fd(m->private);
33
34 spin_lock(&files->file_lock);
35 file = fcheck_files(files, fd);
36 if (file) {
37 struct fdtable *fdt = files_fdtable(files);
38
39 f_flags = file->f_flags;
40 if (close_on_exec(fd, fdt))
41 f_flags |= O_CLOEXEC;
42
43 get_file(file);
44 ret = 0;
45 }
46 spin_unlock(&files->file_lock);
47 put_files_struct(files);
48 }
49
50 if (!ret) {
51 seq_printf(m, "pos:\t%lli\nflags:\t0%o\n",
52 (long long)file->f_pos, f_flags);
53 fput(file);
54 }
55
56 return ret;
57}
58
59static int seq_fdinfo_open(struct inode *inode, struct file *file)
60{
61 return single_open(file, seq_show, inode);
62}
63
64static const struct file_operations proc_fdinfo_file_operations = {
65 .open = seq_fdinfo_open,
66 .read = seq_read,
67 .llseek = seq_lseek,
68 .release = single_release,
69};
70
71static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags)
72{
73 struct files_struct *files;
74 struct task_struct *task;
75 const struct cred *cred;
76 struct inode *inode;
77 int fd;
78
79 if (flags & LOOKUP_RCU)
80 return -ECHILD;
81
82 inode = dentry->d_inode;
83 task = get_proc_task(inode);
84 fd = proc_fd(inode);
85
86 if (task) {
87 files = get_files_struct(task);
88 if (files) {
89 struct file *file;
90
91 rcu_read_lock();
92 file = fcheck_files(files, fd);
93 if (file) {
94 unsigned f_mode = file->f_mode;
95
96 rcu_read_unlock();
97 put_files_struct(files);
98
99 if (task_dumpable(task)) {
100 rcu_read_lock();
101 cred = __task_cred(task);
102 inode->i_uid = cred->euid;
103 inode->i_gid = cred->egid;
104 rcu_read_unlock();
105 } else {
106 inode->i_uid = GLOBAL_ROOT_UID;
107 inode->i_gid = GLOBAL_ROOT_GID;
108 }
109
110 if (S_ISLNK(inode->i_mode)) {
111 unsigned i_mode = S_IFLNK;
112 if (f_mode & FMODE_READ)
113 i_mode |= S_IRUSR | S_IXUSR;
114 if (f_mode & FMODE_WRITE)
115 i_mode |= S_IWUSR | S_IXUSR;
116 inode->i_mode = i_mode;
117 }
118
119 security_task_to_inode(task, inode);
120 put_task_struct(task);
121 return 1;
122 }
123 rcu_read_unlock();
124 put_files_struct(files);
125 }
126 put_task_struct(task);
127 }
128
129 d_drop(dentry);
130 return 0;
131}
132
133static const struct dentry_operations tid_fd_dentry_operations = {
134 .d_revalidate = tid_fd_revalidate,
135 .d_delete = pid_delete_dentry,
136};
137
138static int proc_fd_link(struct dentry *dentry, struct path *path)
139{
140 struct files_struct *files = NULL;
141 struct task_struct *task;
142 int ret = -ENOENT;
143
144 task = get_proc_task(dentry->d_inode);
145 if (task) {
146 files = get_files_struct(task);
147 put_task_struct(task);
148 }
149
150 if (files) {
151 int fd = proc_fd(dentry->d_inode);
152 struct file *fd_file;
153
154 spin_lock(&files->file_lock);
155 fd_file = fcheck_files(files, fd);
156 if (fd_file) {
157 *path = fd_file->f_path;
158 path_get(&fd_file->f_path);
159 ret = 0;
160 }
161 spin_unlock(&files->file_lock);
162 put_files_struct(files);
163 }
164
165 return ret;
166}
167
168static struct dentry *
169proc_fd_instantiate(struct inode *dir, struct dentry *dentry,
170 struct task_struct *task, const void *ptr)
171{
172 struct dentry *error = ERR_PTR(-ENOENT);
173 unsigned fd = (unsigned long)ptr;
174 struct proc_inode *ei;
175 struct inode *inode;
176
177 inode = proc_pid_make_inode(dir->i_sb, task);
178 if (!inode)
179 goto out;
180
181 ei = PROC_I(inode);
182 ei->fd = fd;
183
184 inode->i_mode = S_IFLNK;
185 inode->i_op = &proc_pid_link_inode_operations;
186 inode->i_size = 64;
187
188 ei->op.proc_get_link = proc_fd_link;
189
190 d_set_d_op(dentry, &tid_fd_dentry_operations);
191 d_add(dentry, inode);
192
193 /* Close the race of the process dying before we return the dentry */
194 if (tid_fd_revalidate(dentry, 0))
195 error = NULL;
196 out:
197 return error;
198}
199
200static struct dentry *proc_lookupfd_common(struct inode *dir,
201 struct dentry *dentry,
202 instantiate_t instantiate)
203{
204 struct task_struct *task = get_proc_task(dir);
205 struct dentry *result = ERR_PTR(-ENOENT);
206 unsigned fd = name_to_int(dentry);
207
208 if (!task)
209 goto out_no_task;
210 if (fd == ~0U)
211 goto out;
212
213 result = instantiate(dir, dentry, task, (void *)(unsigned long)fd);
214out:
215 put_task_struct(task);
216out_no_task:
217 return result;
218}
219
220static int proc_readfd_common(struct file * filp, void * dirent,
221 filldir_t filldir, instantiate_t instantiate)
222{
223 struct dentry *dentry = filp->f_path.dentry;
224 struct inode *inode = dentry->d_inode;
225 struct task_struct *p = get_proc_task(inode);
226 struct files_struct *files;
227 unsigned int fd, ino;
228 int retval;
229
230 retval = -ENOENT;
231 if (!p)
232 goto out_no_task;
233 retval = 0;
234
235 fd = filp->f_pos;
236 switch (fd) {
237 case 0:
238 if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
239 goto out;
240 filp->f_pos++;
241 case 1:
242 ino = parent_ino(dentry);
243 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
244 goto out;
245 filp->f_pos++;
246 default:
247 files = get_files_struct(p);
248 if (!files)
249 goto out;
250 rcu_read_lock();
251 for (fd = filp->f_pos - 2;
252 fd < files_fdtable(files)->max_fds;
253 fd++, filp->f_pos++) {
254 char name[PROC_NUMBUF];
255 int len;
256 int rv;
257
258 if (!fcheck_files(files, fd))
259 continue;
260 rcu_read_unlock();
261
262 len = snprintf(name, sizeof(name), "%d", fd);
263 rv = proc_fill_cache(filp, dirent, filldir,
264 name, len, instantiate, p,
265 (void *)(unsigned long)fd);
266 if (rv < 0)
267 goto out_fd_loop;
268 rcu_read_lock();
269 }
270 rcu_read_unlock();
271out_fd_loop:
272 put_files_struct(files);
273 }
274out:
275 put_task_struct(p);
276out_no_task:
277 return retval;
278}
279
280static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir)
281{
282 return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate);
283}
284
285const struct file_operations proc_fd_operations = {
286 .read = generic_read_dir,
287 .readdir = proc_readfd,
288 .llseek = default_llseek,
289};
290
291static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry,
292 unsigned int flags)
293{
294 return proc_lookupfd_common(dir, dentry, proc_fd_instantiate);
295}
296
297/*
298 * /proc/pid/fd needs a special permission handler so that a process can still
299 * access /proc/self/fd after it has executed a setuid().
300 */
301int proc_fd_permission(struct inode *inode, int mask)
302{
303 int rv = generic_permission(inode, mask);
304 if (rv == 0)
305 return 0;
306 if (task_pid(current) == proc_pid(inode))
307 rv = 0;
308 return rv;
309}
310
311const struct inode_operations proc_fd_inode_operations = {
312 .lookup = proc_lookupfd,
313 .permission = proc_fd_permission,
314 .setattr = proc_setattr,
315};
316
317static struct dentry *
318proc_fdinfo_instantiate(struct inode *dir, struct dentry *dentry,
319 struct task_struct *task, const void *ptr)
320{
321 struct dentry *error = ERR_PTR(-ENOENT);
322 unsigned fd = (unsigned long)ptr;
323 struct proc_inode *ei;
324 struct inode *inode;
325
326 inode = proc_pid_make_inode(dir->i_sb, task);
327 if (!inode)
328 goto out;
329
330 ei = PROC_I(inode);
331 ei->fd = fd;
332
333 inode->i_mode = S_IFREG | S_IRUSR;
334 inode->i_fop = &proc_fdinfo_file_operations;
335
336 d_set_d_op(dentry, &tid_fd_dentry_operations);
337 d_add(dentry, inode);
338
339 /* Close the race of the process dying before we return the dentry */
340 if (tid_fd_revalidate(dentry, 0))
341 error = NULL;
342 out:
343 return error;
344}
345
346static struct dentry *
347proc_lookupfdinfo(struct inode *dir, struct dentry *dentry, unsigned int flags)
348{
349 return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate);
350}
351
352static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir)
353{
354 return proc_readfd_common(filp, dirent, filldir,
355 proc_fdinfo_instantiate);
356}
357
358const struct inode_operations proc_fdinfo_inode_operations = {
359 .lookup = proc_lookupfdinfo,
360 .setattr = proc_setattr,
361};
362
363const struct file_operations proc_fdinfo_operations = {
364 .read = generic_read_dir,
365 .readdir = proc_readfdinfo,
366 .llseek = default_llseek,
367};
diff --git a/fs/proc/fd.h b/fs/proc/fd.h
new file mode 100644
index 000000000000..cbb1d47deda8
--- /dev/null
+++ b/fs/proc/fd.h
@@ -0,0 +1,14 @@
1#ifndef __PROCFS_FD_H__
2#define __PROCFS_FD_H__
3
4#include <linux/fs.h>
5
6extern const struct file_operations proc_fd_operations;
7extern const struct inode_operations proc_fd_inode_operations;
8
9extern const struct file_operations proc_fdinfo_operations;
10extern const struct inode_operations proc_fdinfo_inode_operations;
11
12extern int proc_fd_permission(struct inode *inode, int mask);
13
14#endif /* __PROCFS_FD_H__ */
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index e1167a1c9126..67925a7bd8cb 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -9,6 +9,7 @@
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11 11
12#include <linux/sched.h>
12#include <linux/proc_fs.h> 13#include <linux/proc_fs.h>
13struct ctl_table_header; 14struct ctl_table_header;
14 15
@@ -65,6 +66,7 @@ extern const struct file_operations proc_clear_refs_operations;
65extern const struct file_operations proc_pagemap_operations; 66extern const struct file_operations proc_pagemap_operations;
66extern const struct file_operations proc_net_operations; 67extern const struct file_operations proc_net_operations;
67extern const struct inode_operations proc_net_inode_operations; 68extern const struct inode_operations proc_net_inode_operations;
69extern const struct inode_operations proc_pid_link_inode_operations;
68 70
69struct proc_maps_private { 71struct proc_maps_private {
70 struct pid *pid; 72 struct pid *pid;
@@ -91,6 +93,52 @@ static inline int proc_fd(struct inode *inode)
91 return PROC_I(inode)->fd; 93 return PROC_I(inode)->fd;
92} 94}
93 95
96static inline int task_dumpable(struct task_struct *task)
97{
98 int dumpable = 0;
99 struct mm_struct *mm;
100
101 task_lock(task);
102 mm = task->mm;
103 if (mm)
104 dumpable = get_dumpable(mm);
105 task_unlock(task);
106 if(dumpable == 1)
107 return 1;
108 return 0;
109}
110
111static inline int pid_delete_dentry(const struct dentry * dentry)
112{
113 /* Is the task we represent dead?
114 * If so, then don't put the dentry on the lru list,
115 * kill it immediately.
116 */
117 return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
118}
119
120static inline unsigned name_to_int(struct dentry *dentry)
121{
122 const char *name = dentry->d_name.name;
123 int len = dentry->d_name.len;
124 unsigned n = 0;
125
126 if (len > 1 && *name == '0')
127 goto out;
128 while (len-- > 0) {
129 unsigned c = *name++ - '0';
130 if (c > 9)
131 goto out;
132 if (n >= (~0U-9)/10)
133 goto out;
134 n *= 10;
135 n += c;
136 }
137 return n;
138out:
139 return ~0U;
140}
141
94struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino, 142struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino,
95 struct dentry *dentry); 143 struct dentry *dentry);
96int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, 144int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 5c3c7b02e17b..43098bb5723a 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -391,6 +391,11 @@ static int init_inodecache(void)
391 391
392static void destroy_inodecache(void) 392static void destroy_inodecache(void)
393{ 393{
394 /*
395 * Make sure all delayed rcu free inodes are flushed before we
396 * destroy cache.
397 */
398 rcu_barrier();
394 kmem_cache_destroy(qnx4_inode_cachep); 399 kmem_cache_destroy(qnx4_inode_cachep);
395} 400}
396 401
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index f4eef0b5e7b5..b6addf560483 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -651,6 +651,11 @@ static int init_inodecache(void)
651 651
652static void destroy_inodecache(void) 652static void destroy_inodecache(void)
653{ 653{
654 /*
655 * Make sure all delayed rcu free inodes are flushed before we
656 * destroy cache.
657 */
658 rcu_barrier();
654 kmem_cache_destroy(qnx6_inode_cachep); 659 kmem_cache_destroy(qnx6_inode_cachep);
655} 660}
656 661
diff --git a/fs/read_write.c b/fs/read_write.c
index 1adfb691e4f1..d06534857e9e 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -232,23 +232,18 @@ EXPORT_SYMBOL(vfs_llseek);
232SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin) 232SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin)
233{ 233{
234 off_t retval; 234 off_t retval;
235 struct file * file; 235 struct fd f = fdget(fd);
236 int fput_needed; 236 if (!f.file)
237 237 return -EBADF;
238 retval = -EBADF;
239 file = fget_light(fd, &fput_needed);
240 if (!file)
241 goto bad;
242 238
243 retval = -EINVAL; 239 retval = -EINVAL;
244 if (origin <= SEEK_MAX) { 240 if (origin <= SEEK_MAX) {
245 loff_t res = vfs_llseek(file, offset, origin); 241 loff_t res = vfs_llseek(f.file, offset, origin);
246 retval = res; 242 retval = res;
247 if (res != (loff_t)retval) 243 if (res != (loff_t)retval)
248 retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ 244 retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */
249 } 245 }
250 fput_light(file, fput_needed); 246 fdput(f);
251bad:
252 return retval; 247 return retval;
253} 248}
254 249
@@ -258,20 +253,17 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
258 unsigned int, origin) 253 unsigned int, origin)
259{ 254{
260 int retval; 255 int retval;
261 struct file * file; 256 struct fd f = fdget(fd);
262 loff_t offset; 257 loff_t offset;
263 int fput_needed;
264 258
265 retval = -EBADF; 259 if (!f.file)
266 file = fget_light(fd, &fput_needed); 260 return -EBADF;
267 if (!file)
268 goto bad;
269 261
270 retval = -EINVAL; 262 retval = -EINVAL;
271 if (origin > SEEK_MAX) 263 if (origin > SEEK_MAX)
272 goto out_putf; 264 goto out_putf;
273 265
274 offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low, 266 offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low,
275 origin); 267 origin);
276 268
277 retval = (int)offset; 269 retval = (int)offset;
@@ -281,8 +273,7 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
281 retval = 0; 273 retval = 0;
282 } 274 }
283out_putf: 275out_putf:
284 fput_light(file, fput_needed); 276 fdput(f);
285bad:
286 return retval; 277 return retval;
287} 278}
288#endif 279#endif
@@ -461,34 +452,29 @@ static inline void file_pos_write(struct file *file, loff_t pos)
461 452
462SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) 453SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
463{ 454{
464 struct file *file; 455 struct fd f = fdget(fd);
465 ssize_t ret = -EBADF; 456 ssize_t ret = -EBADF;
466 int fput_needed;
467 457
468 file = fget_light(fd, &fput_needed); 458 if (f.file) {
469 if (file) { 459 loff_t pos = file_pos_read(f.file);
470 loff_t pos = file_pos_read(file); 460 ret = vfs_read(f.file, buf, count, &pos);
471 ret = vfs_read(file, buf, count, &pos); 461 file_pos_write(f.file, pos);
472 file_pos_write(file, pos); 462 fdput(f);
473 fput_light(file, fput_needed);
474 } 463 }
475
476 return ret; 464 return ret;
477} 465}
478 466
479SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, 467SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
480 size_t, count) 468 size_t, count)
481{ 469{
482 struct file *file; 470 struct fd f = fdget(fd);
483 ssize_t ret = -EBADF; 471 ssize_t ret = -EBADF;
484 int fput_needed;
485 472
486 file = fget_light(fd, &fput_needed); 473 if (f.file) {
487 if (file) { 474 loff_t pos = file_pos_read(f.file);
488 loff_t pos = file_pos_read(file); 475 ret = vfs_write(f.file, buf, count, &pos);
489 ret = vfs_write(file, buf, count, &pos); 476 file_pos_write(f.file, pos);
490 file_pos_write(file, pos); 477 fdput(f);
491 fput_light(file, fput_needed);
492 } 478 }
493 479
494 return ret; 480 return ret;
@@ -497,19 +483,18 @@ SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
497SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf, 483SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf,
498 size_t count, loff_t pos) 484 size_t count, loff_t pos)
499{ 485{
500 struct file *file; 486 struct fd f;
501 ssize_t ret = -EBADF; 487 ssize_t ret = -EBADF;
502 int fput_needed;
503 488
504 if (pos < 0) 489 if (pos < 0)
505 return -EINVAL; 490 return -EINVAL;
506 491
507 file = fget_light(fd, &fput_needed); 492 f = fdget(fd);
508 if (file) { 493 if (f.file) {
509 ret = -ESPIPE; 494 ret = -ESPIPE;
510 if (file->f_mode & FMODE_PREAD) 495 if (f.file->f_mode & FMODE_PREAD)
511 ret = vfs_read(file, buf, count, &pos); 496 ret = vfs_read(f.file, buf, count, &pos);
512 fput_light(file, fput_needed); 497 fdput(f);
513 } 498 }
514 499
515 return ret; 500 return ret;
@@ -526,19 +511,18 @@ SYSCALL_ALIAS(sys_pread64, SyS_pread64);
526SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf, 511SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf,
527 size_t count, loff_t pos) 512 size_t count, loff_t pos)
528{ 513{
529 struct file *file; 514 struct fd f;
530 ssize_t ret = -EBADF; 515 ssize_t ret = -EBADF;
531 int fput_needed;
532 516
533 if (pos < 0) 517 if (pos < 0)
534 return -EINVAL; 518 return -EINVAL;
535 519
536 file = fget_light(fd, &fput_needed); 520 f = fdget(fd);
537 if (file) { 521 if (f.file) {
538 ret = -ESPIPE; 522 ret = -ESPIPE;
539 if (file->f_mode & FMODE_PWRITE) 523 if (f.file->f_mode & FMODE_PWRITE)
540 ret = vfs_write(file, buf, count, &pos); 524 ret = vfs_write(f.file, buf, count, &pos);
541 fput_light(file, fput_needed); 525 fdput(f);
542 } 526 }
543 527
544 return ret; 528 return ret;
@@ -789,16 +773,14 @@ EXPORT_SYMBOL(vfs_writev);
789SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, 773SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
790 unsigned long, vlen) 774 unsigned long, vlen)
791{ 775{
792 struct file *file; 776 struct fd f = fdget(fd);
793 ssize_t ret = -EBADF; 777 ssize_t ret = -EBADF;
794 int fput_needed;
795 778
796 file = fget_light(fd, &fput_needed); 779 if (f.file) {
797 if (file) { 780 loff_t pos = file_pos_read(f.file);
798 loff_t pos = file_pos_read(file); 781 ret = vfs_readv(f.file, vec, vlen, &pos);
799 ret = vfs_readv(file, vec, vlen, &pos); 782 file_pos_write(f.file, pos);
800 file_pos_write(file, pos); 783 fdput(f);
801 fput_light(file, fput_needed);
802 } 784 }
803 785
804 if (ret > 0) 786 if (ret > 0)
@@ -810,16 +792,14 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
810SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, 792SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
811 unsigned long, vlen) 793 unsigned long, vlen)
812{ 794{
813 struct file *file; 795 struct fd f = fdget(fd);
814 ssize_t ret = -EBADF; 796 ssize_t ret = -EBADF;
815 int fput_needed;
816 797
817 file = fget_light(fd, &fput_needed); 798 if (f.file) {
818 if (file) { 799 loff_t pos = file_pos_read(f.file);
819 loff_t pos = file_pos_read(file); 800 ret = vfs_writev(f.file, vec, vlen, &pos);
820 ret = vfs_writev(file, vec, vlen, &pos); 801 file_pos_write(f.file, pos);
821 file_pos_write(file, pos); 802 fdput(f);
822 fput_light(file, fput_needed);
823 } 803 }
824 804
825 if (ret > 0) 805 if (ret > 0)
@@ -838,19 +818,18 @@ SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
838 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 818 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
839{ 819{
840 loff_t pos = pos_from_hilo(pos_h, pos_l); 820 loff_t pos = pos_from_hilo(pos_h, pos_l);
841 struct file *file; 821 struct fd f;
842 ssize_t ret = -EBADF; 822 ssize_t ret = -EBADF;
843 int fput_needed;
844 823
845 if (pos < 0) 824 if (pos < 0)
846 return -EINVAL; 825 return -EINVAL;
847 826
848 file = fget_light(fd, &fput_needed); 827 f = fdget(fd);
849 if (file) { 828 if (f.file) {
850 ret = -ESPIPE; 829 ret = -ESPIPE;
851 if (file->f_mode & FMODE_PREAD) 830 if (f.file->f_mode & FMODE_PREAD)
852 ret = vfs_readv(file, vec, vlen, &pos); 831 ret = vfs_readv(f.file, vec, vlen, &pos);
853 fput_light(file, fput_needed); 832 fdput(f);
854 } 833 }
855 834
856 if (ret > 0) 835 if (ret > 0)
@@ -863,19 +842,18 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
863 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 842 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
864{ 843{
865 loff_t pos = pos_from_hilo(pos_h, pos_l); 844 loff_t pos = pos_from_hilo(pos_h, pos_l);
866 struct file *file; 845 struct fd f;
867 ssize_t ret = -EBADF; 846 ssize_t ret = -EBADF;
868 int fput_needed;
869 847
870 if (pos < 0) 848 if (pos < 0)
871 return -EINVAL; 849 return -EINVAL;
872 850
873 file = fget_light(fd, &fput_needed); 851 f = fdget(fd);
874 if (file) { 852 if (f.file) {
875 ret = -ESPIPE; 853 ret = -ESPIPE;
876 if (file->f_mode & FMODE_PWRITE) 854 if (f.file->f_mode & FMODE_PWRITE)
877 ret = vfs_writev(file, vec, vlen, &pos); 855 ret = vfs_writev(f.file, vec, vlen, &pos);
878 fput_light(file, fput_needed); 856 fdput(f);
879 } 857 }
880 858
881 if (ret > 0) 859 if (ret > 0)
@@ -884,31 +862,31 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
884 return ret; 862 return ret;
885} 863}
886 864
887static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, 865ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count,
888 size_t count, loff_t max) 866 loff_t max)
889{ 867{
890 struct file * in_file, * out_file; 868 struct fd in, out;
891 struct inode * in_inode, * out_inode; 869 struct inode *in_inode, *out_inode;
892 loff_t pos; 870 loff_t pos;
893 ssize_t retval; 871 ssize_t retval;
894 int fput_needed_in, fput_needed_out, fl; 872 int fl;
895 873
896 /* 874 /*
897 * Get input file, and verify that it is ok.. 875 * Get input file, and verify that it is ok..
898 */ 876 */
899 retval = -EBADF; 877 retval = -EBADF;
900 in_file = fget_light(in_fd, &fput_needed_in); 878 in = fdget(in_fd);
901 if (!in_file) 879 if (!in.file)
902 goto out; 880 goto out;
903 if (!(in_file->f_mode & FMODE_READ)) 881 if (!(in.file->f_mode & FMODE_READ))
904 goto fput_in; 882 goto fput_in;
905 retval = -ESPIPE; 883 retval = -ESPIPE;
906 if (!ppos) 884 if (!ppos)
907 ppos = &in_file->f_pos; 885 ppos = &in.file->f_pos;
908 else 886 else
909 if (!(in_file->f_mode & FMODE_PREAD)) 887 if (!(in.file->f_mode & FMODE_PREAD))
910 goto fput_in; 888 goto fput_in;
911 retval = rw_verify_area(READ, in_file, ppos, count); 889 retval = rw_verify_area(READ, in.file, ppos, count);
912 if (retval < 0) 890 if (retval < 0)
913 goto fput_in; 891 goto fput_in;
914 count = retval; 892 count = retval;
@@ -917,15 +895,15 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
917 * Get output file, and verify that it is ok.. 895 * Get output file, and verify that it is ok..
918 */ 896 */
919 retval = -EBADF; 897 retval = -EBADF;
920 out_file = fget_light(out_fd, &fput_needed_out); 898 out = fdget(out_fd);
921 if (!out_file) 899 if (!out.file)
922 goto fput_in; 900 goto fput_in;
923 if (!(out_file->f_mode & FMODE_WRITE)) 901 if (!(out.file->f_mode & FMODE_WRITE))
924 goto fput_out; 902 goto fput_out;
925 retval = -EINVAL; 903 retval = -EINVAL;
926 in_inode = in_file->f_path.dentry->d_inode; 904 in_inode = in.file->f_path.dentry->d_inode;
927 out_inode = out_file->f_path.dentry->d_inode; 905 out_inode = out.file->f_path.dentry->d_inode;
928 retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); 906 retval = rw_verify_area(WRITE, out.file, &out.file->f_pos, count);
929 if (retval < 0) 907 if (retval < 0)
930 goto fput_out; 908 goto fput_out;
931 count = retval; 909 count = retval;
@@ -949,10 +927,10 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
949 * and the application is arguably buggy if it doesn't expect 927 * and the application is arguably buggy if it doesn't expect
950 * EAGAIN on a non-blocking file descriptor. 928 * EAGAIN on a non-blocking file descriptor.
951 */ 929 */
952 if (in_file->f_flags & O_NONBLOCK) 930 if (in.file->f_flags & O_NONBLOCK)
953 fl = SPLICE_F_NONBLOCK; 931 fl = SPLICE_F_NONBLOCK;
954#endif 932#endif
955 retval = do_splice_direct(in_file, ppos, out_file, count, fl); 933 retval = do_splice_direct(in.file, ppos, out.file, count, fl);
956 934
957 if (retval > 0) { 935 if (retval > 0) {
958 add_rchar(current, retval); 936 add_rchar(current, retval);
@@ -965,9 +943,9 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
965 retval = -EOVERFLOW; 943 retval = -EOVERFLOW;
966 944
967fput_out: 945fput_out:
968 fput_light(out_file, fput_needed_out); 946 fdput(out);
969fput_in: 947fput_in:
970 fput_light(in_file, fput_needed_in); 948 fdput(in);
971out: 949out:
972 return retval; 950 return retval;
973} 951}
diff --git a/fs/read_write.h b/fs/read_write.h
index d07b954c6e0c..d3e00ef67420 100644
--- a/fs/read_write.h
+++ b/fs/read_write.h
@@ -12,3 +12,5 @@ ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
12 unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn); 12 unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn);
13ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, 13ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
14 unsigned long nr_segs, loff_t *ppos, io_fn_t fn); 14 unsigned long nr_segs, loff_t *ppos, io_fn_t fn);
15ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count,
16 loff_t max);
diff --git a/fs/readdir.c b/fs/readdir.c
index 39e3370d79cf..5e69ef533b77 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -106,22 +106,20 @@ SYSCALL_DEFINE3(old_readdir, unsigned int, fd,
106 struct old_linux_dirent __user *, dirent, unsigned int, count) 106 struct old_linux_dirent __user *, dirent, unsigned int, count)
107{ 107{
108 int error; 108 int error;
109 struct file * file; 109 struct fd f = fdget(fd);
110 struct readdir_callback buf; 110 struct readdir_callback buf;
111 int fput_needed;
112 111
113 file = fget_light(fd, &fput_needed); 112 if (!f.file)
114 if (!file)
115 return -EBADF; 113 return -EBADF;
116 114
117 buf.result = 0; 115 buf.result = 0;
118 buf.dirent = dirent; 116 buf.dirent = dirent;
119 117
120 error = vfs_readdir(file, fillonedir, &buf); 118 error = vfs_readdir(f.file, fillonedir, &buf);
121 if (buf.result) 119 if (buf.result)
122 error = buf.result; 120 error = buf.result;
123 121
124 fput_light(file, fput_needed); 122 fdput(f);
125 return error; 123 return error;
126} 124}
127 125
@@ -191,17 +189,16 @@ efault:
191SYSCALL_DEFINE3(getdents, unsigned int, fd, 189SYSCALL_DEFINE3(getdents, unsigned int, fd,
192 struct linux_dirent __user *, dirent, unsigned int, count) 190 struct linux_dirent __user *, dirent, unsigned int, count)
193{ 191{
194 struct file * file; 192 struct fd f;
195 struct linux_dirent __user * lastdirent; 193 struct linux_dirent __user * lastdirent;
196 struct getdents_callback buf; 194 struct getdents_callback buf;
197 int fput_needed;
198 int error; 195 int error;
199 196
200 if (!access_ok(VERIFY_WRITE, dirent, count)) 197 if (!access_ok(VERIFY_WRITE, dirent, count))
201 return -EFAULT; 198 return -EFAULT;
202 199
203 file = fget_light(fd, &fput_needed); 200 f = fdget(fd);
204 if (!file) 201 if (!f.file)
205 return -EBADF; 202 return -EBADF;
206 203
207 buf.current_dir = dirent; 204 buf.current_dir = dirent;
@@ -209,17 +206,17 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd,
209 buf.count = count; 206 buf.count = count;
210 buf.error = 0; 207 buf.error = 0;
211 208
212 error = vfs_readdir(file, filldir, &buf); 209 error = vfs_readdir(f.file, filldir, &buf);
213 if (error >= 0) 210 if (error >= 0)
214 error = buf.error; 211 error = buf.error;
215 lastdirent = buf.previous; 212 lastdirent = buf.previous;
216 if (lastdirent) { 213 if (lastdirent) {
217 if (put_user(file->f_pos, &lastdirent->d_off)) 214 if (put_user(f.file->f_pos, &lastdirent->d_off))
218 error = -EFAULT; 215 error = -EFAULT;
219 else 216 else
220 error = count - buf.count; 217 error = count - buf.count;
221 } 218 }
222 fput_light(file, fput_needed); 219 fdput(f);
223 return error; 220 return error;
224} 221}
225 222
@@ -272,17 +269,16 @@ efault:
272SYSCALL_DEFINE3(getdents64, unsigned int, fd, 269SYSCALL_DEFINE3(getdents64, unsigned int, fd,
273 struct linux_dirent64 __user *, dirent, unsigned int, count) 270 struct linux_dirent64 __user *, dirent, unsigned int, count)
274{ 271{
275 struct file * file; 272 struct fd f;
276 struct linux_dirent64 __user * lastdirent; 273 struct linux_dirent64 __user * lastdirent;
277 struct getdents_callback64 buf; 274 struct getdents_callback64 buf;
278 int fput_needed;
279 int error; 275 int error;
280 276
281 if (!access_ok(VERIFY_WRITE, dirent, count)) 277 if (!access_ok(VERIFY_WRITE, dirent, count))
282 return -EFAULT; 278 return -EFAULT;
283 279
284 file = fget_light(fd, &fput_needed); 280 f = fdget(fd);
285 if (!file) 281 if (!f.file)
286 return -EBADF; 282 return -EBADF;
287 283
288 buf.current_dir = dirent; 284 buf.current_dir = dirent;
@@ -290,17 +286,17 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd,
290 buf.count = count; 286 buf.count = count;
291 buf.error = 0; 287 buf.error = 0;
292 288
293 error = vfs_readdir(file, filldir64, &buf); 289 error = vfs_readdir(f.file, filldir64, &buf);
294 if (error >= 0) 290 if (error >= 0)
295 error = buf.error; 291 error = buf.error;
296 lastdirent = buf.previous; 292 lastdirent = buf.previous;
297 if (lastdirent) { 293 if (lastdirent) {
298 typeof(lastdirent->d_off) d_off = file->f_pos; 294 typeof(lastdirent->d_off) d_off = f.file->f_pos;
299 if (__put_user(d_off, &lastdirent->d_off)) 295 if (__put_user(d_off, &lastdirent->d_off))
300 error = -EFAULT; 296 error = -EFAULT;
301 else 297 else
302 error = count - buf.count; 298 error = count - buf.count;
303 } 299 }
304 fput_light(file, fput_needed); 300 fdput(f);
305 return error; 301 return error;
306} 302}
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 7a37dabf5a96..1078ae179993 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -608,6 +608,11 @@ static int init_inodecache(void)
608 608
609static void destroy_inodecache(void) 609static void destroy_inodecache(void)
610{ 610{
611 /*
612 * Make sure all delayed rcu free inodes are flushed before we
613 * destroy cache.
614 */
615 rcu_barrier();
611 kmem_cache_destroy(reiserfs_inode_cachep); 616 kmem_cache_destroy(reiserfs_inode_cachep);
612} 617}
613 618
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 77c5f2173983..fd7c5f60b46b 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -648,6 +648,11 @@ error_register:
648static void __exit exit_romfs_fs(void) 648static void __exit exit_romfs_fs(void)
649{ 649{
650 unregister_filesystem(&romfs_fs_type); 650 unregister_filesystem(&romfs_fs_type);
651 /*
652 * Make sure all delayed rcu free inodes are flushed before we
653 * destroy cache.
654 */
655 rcu_barrier();
651 kmem_cache_destroy(romfs_inode_cachep); 656 kmem_cache_destroy(romfs_inode_cachep);
652} 657}
653 658
diff --git a/fs/select.c b/fs/select.c
index db14c781335e..2ef72d965036 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -220,8 +220,7 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
220 struct poll_table_entry *entry = poll_get_entry(pwq); 220 struct poll_table_entry *entry = poll_get_entry(pwq);
221 if (!entry) 221 if (!entry)
222 return; 222 return;
223 get_file(filp); 223 entry->filp = get_file(filp);
224 entry->filp = filp;
225 entry->wait_address = wait_address; 224 entry->wait_address = wait_address;
226 entry->key = p->_key; 225 entry->key = p->_key;
227 init_waitqueue_func_entry(&entry->wait, pollwake); 226 init_waitqueue_func_entry(&entry->wait, pollwake);
@@ -429,8 +428,6 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
429 for (i = 0; i < n; ++rinp, ++routp, ++rexp) { 428 for (i = 0; i < n; ++rinp, ++routp, ++rexp) {
430 unsigned long in, out, ex, all_bits, bit = 1, mask, j; 429 unsigned long in, out, ex, all_bits, bit = 1, mask, j;
431 unsigned long res_in = 0, res_out = 0, res_ex = 0; 430 unsigned long res_in = 0, res_out = 0, res_ex = 0;
432 const struct file_operations *f_op = NULL;
433 struct file *file = NULL;
434 431
435 in = *inp++; out = *outp++; ex = *exp++; 432 in = *inp++; out = *outp++; ex = *exp++;
436 all_bits = in | out | ex; 433 all_bits = in | out | ex;
@@ -440,20 +437,21 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
440 } 437 }
441 438
442 for (j = 0; j < BITS_PER_LONG; ++j, ++i, bit <<= 1) { 439 for (j = 0; j < BITS_PER_LONG; ++j, ++i, bit <<= 1) {
443 int fput_needed; 440 struct fd f;
444 if (i >= n) 441 if (i >= n)
445 break; 442 break;
446 if (!(bit & all_bits)) 443 if (!(bit & all_bits))
447 continue; 444 continue;
448 file = fget_light(i, &fput_needed); 445 f = fdget(i);
449 if (file) { 446 if (f.file) {
450 f_op = file->f_op; 447 const struct file_operations *f_op;
448 f_op = f.file->f_op;
451 mask = DEFAULT_POLLMASK; 449 mask = DEFAULT_POLLMASK;
452 if (f_op && f_op->poll) { 450 if (f_op && f_op->poll) {
453 wait_key_set(wait, in, out, bit); 451 wait_key_set(wait, in, out, bit);
454 mask = (*f_op->poll)(file, wait); 452 mask = (*f_op->poll)(f.file, wait);
455 } 453 }
456 fput_light(file, fput_needed); 454 fdput(f);
457 if ((mask & POLLIN_SET) && (in & bit)) { 455 if ((mask & POLLIN_SET) && (in & bit)) {
458 res_in |= bit; 456 res_in |= bit;
459 retval++; 457 retval++;
@@ -726,20 +724,17 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait)
726 mask = 0; 724 mask = 0;
727 fd = pollfd->fd; 725 fd = pollfd->fd;
728 if (fd >= 0) { 726 if (fd >= 0) {
729 int fput_needed; 727 struct fd f = fdget(fd);
730 struct file * file;
731
732 file = fget_light(fd, &fput_needed);
733 mask = POLLNVAL; 728 mask = POLLNVAL;
734 if (file != NULL) { 729 if (f.file) {
735 mask = DEFAULT_POLLMASK; 730 mask = DEFAULT_POLLMASK;
736 if (file->f_op && file->f_op->poll) { 731 if (f.file->f_op && f.file->f_op->poll) {
737 pwait->_key = pollfd->events|POLLERR|POLLHUP; 732 pwait->_key = pollfd->events|POLLERR|POLLHUP;
738 mask = file->f_op->poll(file, pwait); 733 mask = f.file->f_op->poll(f.file, pwait);
739 } 734 }
740 /* Mask out unneeded events. */ 735 /* Mask out unneeded events. */
741 mask &= pollfd->events | POLLERR | POLLHUP; 736 mask &= pollfd->events | POLLERR | POLLHUP;
742 fput_light(file, fput_needed); 737 fdput(f);
743 } 738 }
744 } 739 }
745 pollfd->revents = mask; 740 pollfd->revents = mask;
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 9f35a37173de..8bee4e570911 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -269,13 +269,12 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask,
269 if (ufd < 0) 269 if (ufd < 0)
270 kfree(ctx); 270 kfree(ctx);
271 } else { 271 } else {
272 int fput_needed; 272 struct fd f = fdget(ufd);
273 struct file *file = fget_light(ufd, &fput_needed); 273 if (!f.file)
274 if (!file)
275 return -EBADF; 274 return -EBADF;
276 ctx = file->private_data; 275 ctx = f.file->private_data;
277 if (file->f_op != &signalfd_fops) { 276 if (f.file->f_op != &signalfd_fops) {
278 fput_light(file, fput_needed); 277 fdput(f);
279 return -EINVAL; 278 return -EINVAL;
280 } 279 }
281 spin_lock_irq(&current->sighand->siglock); 280 spin_lock_irq(&current->sighand->siglock);
@@ -283,7 +282,7 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask,
283 spin_unlock_irq(&current->sighand->siglock); 282 spin_unlock_irq(&current->sighand->siglock);
284 283
285 wake_up(&current->sighand->signalfd_wqh); 284 wake_up(&current->sighand->signalfd_wqh);
286 fput_light(file, fput_needed); 285 fdput(f);
287 } 286 }
288 287
289 return ufd; 288 return ufd;
diff --git a/fs/splice.c b/fs/splice.c
index 41514dd89462..13e5b4776e7a 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1666,9 +1666,8 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
1666SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov, 1666SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov,
1667 unsigned long, nr_segs, unsigned int, flags) 1667 unsigned long, nr_segs, unsigned int, flags)
1668{ 1668{
1669 struct file *file; 1669 struct fd f;
1670 long error; 1670 long error;
1671 int fput;
1672 1671
1673 if (unlikely(nr_segs > UIO_MAXIOV)) 1672 if (unlikely(nr_segs > UIO_MAXIOV))
1674 return -EINVAL; 1673 return -EINVAL;
@@ -1676,14 +1675,14 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov,
1676 return 0; 1675 return 0;
1677 1676
1678 error = -EBADF; 1677 error = -EBADF;
1679 file = fget_light(fd, &fput); 1678 f = fdget(fd);
1680 if (file) { 1679 if (f.file) {
1681 if (file->f_mode & FMODE_WRITE) 1680 if (f.file->f_mode & FMODE_WRITE)
1682 error = vmsplice_to_pipe(file, iov, nr_segs, flags); 1681 error = vmsplice_to_pipe(f.file, iov, nr_segs, flags);
1683 else if (file->f_mode & FMODE_READ) 1682 else if (f.file->f_mode & FMODE_READ)
1684 error = vmsplice_to_user(file, iov, nr_segs, flags); 1683 error = vmsplice_to_user(f.file, iov, nr_segs, flags);
1685 1684
1686 fput_light(file, fput); 1685 fdput(f);
1687 } 1686 }
1688 1687
1689 return error; 1688 return error;
@@ -1693,30 +1692,27 @@ SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in,
1693 int, fd_out, loff_t __user *, off_out, 1692 int, fd_out, loff_t __user *, off_out,
1694 size_t, len, unsigned int, flags) 1693 size_t, len, unsigned int, flags)
1695{ 1694{
1695 struct fd in, out;
1696 long error; 1696 long error;
1697 struct file *in, *out;
1698 int fput_in, fput_out;
1699 1697
1700 if (unlikely(!len)) 1698 if (unlikely(!len))
1701 return 0; 1699 return 0;
1702 1700
1703 error = -EBADF; 1701 error = -EBADF;
1704 in = fget_light(fd_in, &fput_in); 1702 in = fdget(fd_in);
1705 if (in) { 1703 if (in.file) {
1706 if (in->f_mode & FMODE_READ) { 1704 if (in.file->f_mode & FMODE_READ) {
1707 out = fget_light(fd_out, &fput_out); 1705 out = fdget(fd_out);
1708 if (out) { 1706 if (out.file) {
1709 if (out->f_mode & FMODE_WRITE) 1707 if (out.file->f_mode & FMODE_WRITE)
1710 error = do_splice(in, off_in, 1708 error = do_splice(in.file, off_in,
1711 out, off_out, 1709 out.file, off_out,
1712 len, flags); 1710 len, flags);
1713 fput_light(out, fput_out); 1711 fdput(out);
1714 } 1712 }
1715 } 1713 }
1716 1714 fdput(in);
1717 fput_light(in, fput_in);
1718 } 1715 }
1719
1720 return error; 1716 return error;
1721} 1717}
1722 1718
@@ -2027,26 +2023,25 @@ static long do_tee(struct file *in, struct file *out, size_t len,
2027 2023
2028SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags) 2024SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags)
2029{ 2025{
2030 struct file *in; 2026 struct fd in;
2031 int error, fput_in; 2027 int error;
2032 2028
2033 if (unlikely(!len)) 2029 if (unlikely(!len))
2034 return 0; 2030 return 0;
2035 2031
2036 error = -EBADF; 2032 error = -EBADF;
2037 in = fget_light(fdin, &fput_in); 2033 in = fdget(fdin);
2038 if (in) { 2034 if (in.file) {
2039 if (in->f_mode & FMODE_READ) { 2035 if (in.file->f_mode & FMODE_READ) {
2040 int fput_out; 2036 struct fd out = fdget(fdout);
2041 struct file *out = fget_light(fdout, &fput_out); 2037 if (out.file) {
2042 2038 if (out.file->f_mode & FMODE_WRITE)
2043 if (out) { 2039 error = do_tee(in.file, out.file,
2044 if (out->f_mode & FMODE_WRITE) 2040 len, flags);
2045 error = do_tee(in, out, len, flags); 2041 fdput(out);
2046 fput_light(out, fput_out);
2047 } 2042 }
2048 } 2043 }
2049 fput_light(in, fput_in); 2044 fdput(in);
2050 } 2045 }
2051 2046
2052 return error; 2047 return error;
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 29cd014ed3a1..260e3928d4f5 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -425,6 +425,11 @@ static int __init init_inodecache(void)
425 425
426static void destroy_inodecache(void) 426static void destroy_inodecache(void)
427{ 427{
428 /*
429 * Make sure all delayed rcu free inodes are flushed before we
430 * destroy cache.
431 */
432 rcu_barrier();
428 kmem_cache_destroy(squashfs_inode_cachep); 433 kmem_cache_destroy(squashfs_inode_cachep);
429} 434}
430 435
diff --git a/fs/stat.c b/fs/stat.c
index 208039eec6c7..eae494630a36 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -57,13 +57,13 @@ EXPORT_SYMBOL(vfs_getattr);
57 57
58int vfs_fstat(unsigned int fd, struct kstat *stat) 58int vfs_fstat(unsigned int fd, struct kstat *stat)
59{ 59{
60 int fput_needed; 60 struct fd f = fdget_raw(fd);
61 struct file *f = fget_raw_light(fd, &fput_needed);
62 int error = -EBADF; 61 int error = -EBADF;
63 62
64 if (f) { 63 if (f.file) {
65 error = vfs_getattr(f->f_path.mnt, f->f_path.dentry, stat); 64 error = vfs_getattr(f.file->f_path.mnt, f.file->f_path.dentry,
66 fput_light(f, fput_needed); 65 stat);
66 fdput(f);
67 } 67 }
68 return error; 68 return error;
69} 69}
diff --git a/fs/statfs.c b/fs/statfs.c
index 95ad5c0e586c..f8e832e6f0a2 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -87,12 +87,11 @@ int user_statfs(const char __user *pathname, struct kstatfs *st)
87 87
88int fd_statfs(int fd, struct kstatfs *st) 88int fd_statfs(int fd, struct kstatfs *st)
89{ 89{
90 int fput_needed; 90 struct fd f = fdget(fd);
91 struct file *file = fget_light(fd, &fput_needed);
92 int error = -EBADF; 91 int error = -EBADF;
93 if (file) { 92 if (f.file) {
94 error = vfs_statfs(&file->f_path, st); 93 error = vfs_statfs(&f.file->f_path, st);
95 fput_light(file, fput_needed); 94 fdput(f);
96 } 95 }
97 return error; 96 return error;
98} 97}
diff --git a/fs/super.c b/fs/super.c
index 0902cfa6a12e..5fdf7ff32c4e 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -307,12 +307,6 @@ void deactivate_locked_super(struct super_block *s)
307 307
308 /* caches are now gone, we can safely kill the shrinker now */ 308 /* caches are now gone, we can safely kill the shrinker now */
309 unregister_shrinker(&s->s_shrink); 309 unregister_shrinker(&s->s_shrink);
310
311 /*
312 * We need to call rcu_barrier so all the delayed rcu free
313 * inodes are flushed before we release the fs module.
314 */
315 rcu_barrier();
316 put_filesystem(fs); 310 put_filesystem(fs);
317 put_super(s); 311 put_super(s);
318 } else { 312 } else {
diff --git a/fs/sync.c b/fs/sync.c
index eb8722dc556f..14eefeb44636 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -148,21 +148,19 @@ void emergency_sync(void)
148 */ 148 */
149SYSCALL_DEFINE1(syncfs, int, fd) 149SYSCALL_DEFINE1(syncfs, int, fd)
150{ 150{
151 struct file *file; 151 struct fd f = fdget(fd);
152 struct super_block *sb; 152 struct super_block *sb;
153 int ret; 153 int ret;
154 int fput_needed;
155 154
156 file = fget_light(fd, &fput_needed); 155 if (!f.file)
157 if (!file)
158 return -EBADF; 156 return -EBADF;
159 sb = file->f_dentry->d_sb; 157 sb = f.file->f_dentry->d_sb;
160 158
161 down_read(&sb->s_umount); 159 down_read(&sb->s_umount);
162 ret = sync_filesystem(sb); 160 ret = sync_filesystem(sb);
163 up_read(&sb->s_umount); 161 up_read(&sb->s_umount);
164 162
165 fput_light(file, fput_needed); 163 fdput(f);
166 return ret; 164 return ret;
167} 165}
168 166
@@ -201,14 +199,12 @@ EXPORT_SYMBOL(vfs_fsync);
201 199
202static int do_fsync(unsigned int fd, int datasync) 200static int do_fsync(unsigned int fd, int datasync)
203{ 201{
204 struct file *file; 202 struct fd f = fdget(fd);
205 int ret = -EBADF; 203 int ret = -EBADF;
206 int fput_needed;
207 204
208 file = fget_light(fd, &fput_needed); 205 if (f.file) {
209 if (file) { 206 ret = vfs_fsync(f.file, datasync);
210 ret = vfs_fsync(file, datasync); 207 fdput(f);
211 fput_light(file, fput_needed);
212 } 208 }
213 return ret; 209 return ret;
214} 210}
@@ -291,10 +287,9 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
291 unsigned int flags) 287 unsigned int flags)
292{ 288{
293 int ret; 289 int ret;
294 struct file *file; 290 struct fd f;
295 struct address_space *mapping; 291 struct address_space *mapping;
296 loff_t endbyte; /* inclusive */ 292 loff_t endbyte; /* inclusive */
297 int fput_needed;
298 umode_t i_mode; 293 umode_t i_mode;
299 294
300 ret = -EINVAL; 295 ret = -EINVAL;
@@ -333,17 +328,17 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
333 endbyte--; /* inclusive */ 328 endbyte--; /* inclusive */
334 329
335 ret = -EBADF; 330 ret = -EBADF;
336 file = fget_light(fd, &fput_needed); 331 f = fdget(fd);
337 if (!file) 332 if (!f.file)
338 goto out; 333 goto out;
339 334
340 i_mode = file->f_path.dentry->d_inode->i_mode; 335 i_mode = f.file->f_path.dentry->d_inode->i_mode;
341 ret = -ESPIPE; 336 ret = -ESPIPE;
342 if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) && 337 if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) &&
343 !S_ISLNK(i_mode)) 338 !S_ISLNK(i_mode))
344 goto out_put; 339 goto out_put;
345 340
346 mapping = file->f_mapping; 341 mapping = f.file->f_mapping;
347 if (!mapping) { 342 if (!mapping) {
348 ret = -EINVAL; 343 ret = -EINVAL;
349 goto out_put; 344 goto out_put;
@@ -366,7 +361,7 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
366 ret = filemap_fdatawait_range(mapping, offset, endbyte); 361 ret = filemap_fdatawait_range(mapping, offset, endbyte);
367 362
368out_put: 363out_put:
369 fput_light(file, fput_needed); 364 fdput(f);
370out: 365out:
371 return ret; 366 return ret;
372} 367}
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index b23ab736685d..d33e506c1eac 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -360,5 +360,10 @@ int __init sysv_init_icache(void)
360 360
361void sysv_destroy_icache(void) 361void sysv_destroy_icache(void)
362{ 362{
363 /*
364 * Make sure all delayed rcu free inodes are flushed before we
365 * destroy cache.
366 */
367 rcu_barrier();
363 kmem_cache_destroy(sysv_inode_cachep); 368 kmem_cache_destroy(sysv_inode_cachep);
364} 369}
diff --git a/fs/timerfd.c b/fs/timerfd.c
index dffeb3795af1..d03822bbf190 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -234,19 +234,17 @@ static const struct file_operations timerfd_fops = {
234 .llseek = noop_llseek, 234 .llseek = noop_llseek,
235}; 235};
236 236
237static struct file *timerfd_fget(int fd) 237static int timerfd_fget(int fd, struct fd *p)
238{ 238{
239 struct file *file; 239 struct fd f = fdget(fd);
240 240 if (!f.file)
241 file = fget(fd); 241 return -EBADF;
242 if (!file) 242 if (f.file->f_op != &timerfd_fops) {
243 return ERR_PTR(-EBADF); 243 fdput(f);
244 if (file->f_op != &timerfd_fops) { 244 return -EINVAL;
245 fput(file);
246 return ERR_PTR(-EINVAL);
247 } 245 }
248 246 *p = f;
249 return file; 247 return 0;
250} 248}
251 249
252SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) 250SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
@@ -284,7 +282,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
284 const struct itimerspec __user *, utmr, 282 const struct itimerspec __user *, utmr,
285 struct itimerspec __user *, otmr) 283 struct itimerspec __user *, otmr)
286{ 284{
287 struct file *file; 285 struct fd f;
288 struct timerfd_ctx *ctx; 286 struct timerfd_ctx *ctx;
289 struct itimerspec ktmr, kotmr; 287 struct itimerspec ktmr, kotmr;
290 int ret; 288 int ret;
@@ -297,10 +295,10 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
297 !timespec_valid(&ktmr.it_interval)) 295 !timespec_valid(&ktmr.it_interval))
298 return -EINVAL; 296 return -EINVAL;
299 297
300 file = timerfd_fget(ufd); 298 ret = timerfd_fget(ufd, &f);
301 if (IS_ERR(file)) 299 if (ret)
302 return PTR_ERR(file); 300 return ret;
303 ctx = file->private_data; 301 ctx = f.file->private_data;
304 302
305 timerfd_setup_cancel(ctx, flags); 303 timerfd_setup_cancel(ctx, flags);
306 304
@@ -334,7 +332,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
334 ret = timerfd_setup(ctx, flags, &ktmr); 332 ret = timerfd_setup(ctx, flags, &ktmr);
335 333
336 spin_unlock_irq(&ctx->wqh.lock); 334 spin_unlock_irq(&ctx->wqh.lock);
337 fput(file); 335 fdput(f);
338 if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr))) 336 if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr)))
339 return -EFAULT; 337 return -EFAULT;
340 338
@@ -343,14 +341,13 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
343 341
344SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) 342SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
345{ 343{
346 struct file *file; 344 struct fd f;
347 struct timerfd_ctx *ctx; 345 struct timerfd_ctx *ctx;
348 struct itimerspec kotmr; 346 struct itimerspec kotmr;
349 347 int ret = timerfd_fget(ufd, &f);
350 file = timerfd_fget(ufd); 348 if (ret)
351 if (IS_ERR(file)) 349 return ret;
352 return PTR_ERR(file); 350 ctx = f.file->private_data;
353 ctx = file->private_data;
354 351
355 spin_lock_irq(&ctx->wqh.lock); 352 spin_lock_irq(&ctx->wqh.lock);
356 if (ctx->expired && ctx->tintv.tv64) { 353 if (ctx->expired && ctx->tintv.tv64) {
@@ -362,7 +359,7 @@ SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
362 kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); 359 kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
363 kotmr.it_interval = ktime_to_timespec(ctx->tintv); 360 kotmr.it_interval = ktime_to_timespec(ctx->tintv);
364 spin_unlock_irq(&ctx->wqh.lock); 361 spin_unlock_irq(&ctx->wqh.lock);
365 fput(file); 362 fdput(f);
366 363
367 return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0; 364 return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0;
368} 365}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 681f3a942444..49825427a0e8 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -2298,6 +2298,12 @@ static void __exit ubifs_exit(void)
2298 dbg_debugfs_exit(); 2298 dbg_debugfs_exit();
2299 ubifs_compressors_exit(); 2299 ubifs_compressors_exit();
2300 unregister_shrinker(&ubifs_shrinker_info); 2300 unregister_shrinker(&ubifs_shrinker_info);
2301
2302 /*
2303 * Make sure all delayed rcu free inodes are flushed before we
2304 * destroy cache.
2305 */
2306 rcu_barrier();
2301 kmem_cache_destroy(ubifs_inode_slab); 2307 kmem_cache_destroy(ubifs_inode_slab);
2302 unregister_filesystem(&ubifs_fs_type); 2308 unregister_filesystem(&ubifs_fs_type);
2303} 2309}
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 862741dddf27..d44fb568abe1 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -171,6 +171,11 @@ static int init_inodecache(void)
171 171
172static void destroy_inodecache(void) 172static void destroy_inodecache(void)
173{ 173{
174 /*
175 * Make sure all delayed rcu free inodes are flushed before we
176 * destroy cache.
177 */
178 rcu_barrier();
174 kmem_cache_destroy(udf_inode_cachep); 179 kmem_cache_destroy(udf_inode_cachep);
175} 180}
176 181
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 444927e5706b..f7cfecfe1cab 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1466,6 +1466,11 @@ static int init_inodecache(void)
1466 1466
1467static void destroy_inodecache(void) 1467static void destroy_inodecache(void)
1468{ 1468{
1469 /*
1470 * Make sure all delayed rcu free inodes are flushed before we
1471 * destroy cache.
1472 */
1473 rcu_barrier();
1469 kmem_cache_destroy(ufs_inode_cachep); 1474 kmem_cache_destroy(ufs_inode_cachep);
1470} 1475}
1471 1476
diff --git a/fs/utimes.c b/fs/utimes.c
index fa4dbe451e27..bb0696a41735 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -140,19 +140,18 @@ long do_utimes(int dfd, const char __user *filename, struct timespec *times,
140 goto out; 140 goto out;
141 141
142 if (filename == NULL && dfd != AT_FDCWD) { 142 if (filename == NULL && dfd != AT_FDCWD) {
143 int fput_needed; 143 struct fd f;
144 struct file *file;
145 144
146 if (flags & AT_SYMLINK_NOFOLLOW) 145 if (flags & AT_SYMLINK_NOFOLLOW)
147 goto out; 146 goto out;
148 147
149 file = fget_light(dfd, &fput_needed); 148 f = fdget(dfd);
150 error = -EBADF; 149 error = -EBADF;
151 if (!file) 150 if (!f.file)
152 goto out; 151 goto out;
153 152
154 error = utimes_common(&file->f_path, times); 153 error = utimes_common(&f.file->f_path, times);
155 fput_light(file, fput_needed); 154 fdput(f);
156 } else { 155 } else {
157 struct path path; 156 struct path path;
158 int lookup_flags = 0; 157 int lookup_flags = 0;
diff --git a/fs/xattr.c b/fs/xattr.c
index f7f7f09b0b41..ca15fbd391c8 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -403,22 +403,20 @@ SYSCALL_DEFINE5(lsetxattr, const char __user *, pathname,
403SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, 403SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
404 const void __user *,value, size_t, size, int, flags) 404 const void __user *,value, size_t, size, int, flags)
405{ 405{
406 int fput_needed; 406 struct fd f = fdget(fd);
407 struct file *f;
408 struct dentry *dentry; 407 struct dentry *dentry;
409 int error = -EBADF; 408 int error = -EBADF;
410 409
411 f = fget_light(fd, &fput_needed); 410 if (!f.file)
412 if (!f)
413 return error; 411 return error;
414 dentry = f->f_path.dentry; 412 dentry = f.file->f_path.dentry;
415 audit_inode(NULL, dentry); 413 audit_inode(NULL, dentry);
416 error = mnt_want_write_file(f); 414 error = mnt_want_write_file(f.file);
417 if (!error) { 415 if (!error) {
418 error = setxattr(dentry, name, value, size, flags); 416 error = setxattr(dentry, name, value, size, flags);
419 mnt_drop_write_file(f); 417 mnt_drop_write_file(f.file);
420 } 418 }
421 fput_light(f, fput_needed); 419 fdput(f);
422 return error; 420 return error;
423} 421}
424 422
@@ -502,16 +500,14 @@ SYSCALL_DEFINE4(lgetxattr, const char __user *, pathname,
502SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name, 500SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name,
503 void __user *, value, size_t, size) 501 void __user *, value, size_t, size)
504{ 502{
505 int fput_needed; 503 struct fd f = fdget(fd);
506 struct file *f;
507 ssize_t error = -EBADF; 504 ssize_t error = -EBADF;
508 505
509 f = fget_light(fd, &fput_needed); 506 if (!f.file)
510 if (!f)
511 return error; 507 return error;
512 audit_inode(NULL, f->f_path.dentry); 508 audit_inode(NULL, f.file->f_path.dentry);
513 error = getxattr(f->f_path.dentry, name, value, size); 509 error = getxattr(f.file->f_path.dentry, name, value, size);
514 fput_light(f, fput_needed); 510 fdput(f);
515 return error; 511 return error;
516} 512}
517 513
@@ -583,16 +579,14 @@ SYSCALL_DEFINE3(llistxattr, const char __user *, pathname, char __user *, list,
583 579
584SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size) 580SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size)
585{ 581{
586 int fput_needed; 582 struct fd f = fdget(fd);
587 struct file *f;
588 ssize_t error = -EBADF; 583 ssize_t error = -EBADF;
589 584
590 f = fget_light(fd, &fput_needed); 585 if (!f.file)
591 if (!f)
592 return error; 586 return error;
593 audit_inode(NULL, f->f_path.dentry); 587 audit_inode(NULL, f.file->f_path.dentry);
594 error = listxattr(f->f_path.dentry, list, size); 588 error = listxattr(f.file->f_path.dentry, list, size);
595 fput_light(f, fput_needed); 589 fdput(f);
596 return error; 590 return error;
597} 591}
598 592
@@ -652,22 +646,20 @@ SYSCALL_DEFINE2(lremovexattr, const char __user *, pathname,
652 646
653SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name) 647SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name)
654{ 648{
655 int fput_needed; 649 struct fd f = fdget(fd);
656 struct file *f;
657 struct dentry *dentry; 650 struct dentry *dentry;
658 int error = -EBADF; 651 int error = -EBADF;
659 652
660 f = fget_light(fd, &fput_needed); 653 if (!f.file)
661 if (!f)
662 return error; 654 return error;
663 dentry = f->f_path.dentry; 655 dentry = f.file->f_path.dentry;
664 audit_inode(NULL, dentry); 656 audit_inode(NULL, dentry);
665 error = mnt_want_write_file(f); 657 error = mnt_want_write_file(f.file);
666 if (!error) { 658 if (!error) {
667 error = removexattr(dentry, name); 659 error = removexattr(dentry, name);
668 mnt_drop_write_file(f); 660 mnt_drop_write_file(f.file);
669 } 661 }
670 fput_light(f, fput_needed); 662 fdput(f);
671 return error; 663 return error;
672} 664}
673 665
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index e00de08dc8ac..b9b8646e62db 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -48,44 +48,44 @@ xfs_swapext(
48 xfs_swapext_t *sxp) 48 xfs_swapext_t *sxp)
49{ 49{
50 xfs_inode_t *ip, *tip; 50 xfs_inode_t *ip, *tip;
51 struct file *file, *tmp_file; 51 struct fd f, tmp;
52 int error = 0; 52 int error = 0;
53 53
54 /* Pull information for the target fd */ 54 /* Pull information for the target fd */
55 file = fget((int)sxp->sx_fdtarget); 55 f = fdget((int)sxp->sx_fdtarget);
56 if (!file) { 56 if (!f.file) {
57 error = XFS_ERROR(EINVAL); 57 error = XFS_ERROR(EINVAL);
58 goto out; 58 goto out;
59 } 59 }
60 60
61 if (!(file->f_mode & FMODE_WRITE) || 61 if (!(f.file->f_mode & FMODE_WRITE) ||
62 !(file->f_mode & FMODE_READ) || 62 !(f.file->f_mode & FMODE_READ) ||
63 (file->f_flags & O_APPEND)) { 63 (f.file->f_flags & O_APPEND)) {
64 error = XFS_ERROR(EBADF); 64 error = XFS_ERROR(EBADF);
65 goto out_put_file; 65 goto out_put_file;
66 } 66 }
67 67
68 tmp_file = fget((int)sxp->sx_fdtmp); 68 tmp = fdget((int)sxp->sx_fdtmp);
69 if (!tmp_file) { 69 if (!tmp.file) {
70 error = XFS_ERROR(EINVAL); 70 error = XFS_ERROR(EINVAL);
71 goto out_put_file; 71 goto out_put_file;
72 } 72 }
73 73
74 if (!(tmp_file->f_mode & FMODE_WRITE) || 74 if (!(tmp.file->f_mode & FMODE_WRITE) ||
75 !(tmp_file->f_mode & FMODE_READ) || 75 !(tmp.file->f_mode & FMODE_READ) ||
76 (tmp_file->f_flags & O_APPEND)) { 76 (tmp.file->f_flags & O_APPEND)) {
77 error = XFS_ERROR(EBADF); 77 error = XFS_ERROR(EBADF);
78 goto out_put_tmp_file; 78 goto out_put_tmp_file;
79 } 79 }
80 80
81 if (IS_SWAPFILE(file->f_path.dentry->d_inode) || 81 if (IS_SWAPFILE(f.file->f_path.dentry->d_inode) ||
82 IS_SWAPFILE(tmp_file->f_path.dentry->d_inode)) { 82 IS_SWAPFILE(tmp.file->f_path.dentry->d_inode)) {
83 error = XFS_ERROR(EINVAL); 83 error = XFS_ERROR(EINVAL);
84 goto out_put_tmp_file; 84 goto out_put_tmp_file;
85 } 85 }
86 86
87 ip = XFS_I(file->f_path.dentry->d_inode); 87 ip = XFS_I(f.file->f_path.dentry->d_inode);
88 tip = XFS_I(tmp_file->f_path.dentry->d_inode); 88 tip = XFS_I(tmp.file->f_path.dentry->d_inode);
89 89
90 if (ip->i_mount != tip->i_mount) { 90 if (ip->i_mount != tip->i_mount) {
91 error = XFS_ERROR(EINVAL); 91 error = XFS_ERROR(EINVAL);
@@ -105,9 +105,9 @@ xfs_swapext(
105 error = xfs_swap_extents(ip, tip, sxp); 105 error = xfs_swap_extents(ip, tip, sxp);
106 106
107 out_put_tmp_file: 107 out_put_tmp_file:
108 fput(tmp_file); 108 fdput(tmp);
109 out_put_file: 109 out_put_file:
110 fput(file); 110 fdput(f);
111 out: 111 out:
112 return error; 112 return error;
113} 113}
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 0e0232c3b6d9..8305f2ac6773 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -70,16 +70,16 @@ xfs_find_handle(
70 int hsize; 70 int hsize;
71 xfs_handle_t handle; 71 xfs_handle_t handle;
72 struct inode *inode; 72 struct inode *inode;
73 struct file *file = NULL; 73 struct fd f;
74 struct path path; 74 struct path path;
75 int error; 75 int error;
76 struct xfs_inode *ip; 76 struct xfs_inode *ip;
77 77
78 if (cmd == XFS_IOC_FD_TO_HANDLE) { 78 if (cmd == XFS_IOC_FD_TO_HANDLE) {
79 file = fget(hreq->fd); 79 f = fdget(hreq->fd);
80 if (!file) 80 if (!f.file)
81 return -EBADF; 81 return -EBADF;
82 inode = file->f_path.dentry->d_inode; 82 inode = f.file->f_path.dentry->d_inode;
83 } else { 83 } else {
84 error = user_lpath((const char __user *)hreq->path, &path); 84 error = user_lpath((const char __user *)hreq->path, &path);
85 if (error) 85 if (error)
@@ -134,7 +134,7 @@ xfs_find_handle(
134 134
135 out_put: 135 out_put:
136 if (cmd == XFS_IOC_FD_TO_HANDLE) 136 if (cmd == XFS_IOC_FD_TO_HANDLE)
137 fput(file); 137 fdput(f);
138 else 138 else
139 path_put(&path); 139 path_put(&path);
140 return error; 140 return error;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 001537f92caf..e0fd2734189e 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1506,6 +1506,11 @@ xfs_init_zones(void)
1506STATIC void 1506STATIC void
1507xfs_destroy_zones(void) 1507xfs_destroy_zones(void)
1508{ 1508{
1509 /*
1510 * Make sure all delayed rcu free are flushed before we
1511 * destroy caches.
1512 */
1513 rcu_barrier();
1509 kmem_zone_destroy(xfs_ili_zone); 1514 kmem_zone_destroy(xfs_ili_zone);
1510 kmem_zone_destroy(xfs_inode_zone); 1515 kmem_zone_destroy(xfs_inode_zone);
1511 kmem_zone_destroy(xfs_efi_zone); 1516 kmem_zone_destroy(xfs_efi_zone);
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 09b28b7369d7..fd4e29956d1c 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -590,6 +590,9 @@ asmlinkage ssize_t compat_sys_process_vm_writev(compat_pid_t pid,
590 unsigned long liovcnt, const struct compat_iovec __user *rvec, 590 unsigned long liovcnt, const struct compat_iovec __user *rvec,
591 unsigned long riovcnt, unsigned long flags); 591 unsigned long riovcnt, unsigned long flags);
592 592
593asmlinkage long compat_sys_sendfile(int out_fd, int in_fd,
594 compat_off_t __user *offset, compat_size_t count);
595
593#else 596#else
594 597
595#define is_compat_task() (0) 598#define is_compat_task() (0)
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index 158a41eed314..45052aa814c8 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -30,31 +30,11 @@ struct fdtable {
30 struct fdtable *next; 30 struct fdtable *next;
31}; 31};
32 32
33static inline void __set_close_on_exec(int fd, struct fdtable *fdt)
34{
35 __set_bit(fd, fdt->close_on_exec);
36}
37
38static inline void __clear_close_on_exec(int fd, struct fdtable *fdt)
39{
40 __clear_bit(fd, fdt->close_on_exec);
41}
42
43static inline bool close_on_exec(int fd, const struct fdtable *fdt) 33static inline bool close_on_exec(int fd, const struct fdtable *fdt)
44{ 34{
45 return test_bit(fd, fdt->close_on_exec); 35 return test_bit(fd, fdt->close_on_exec);
46} 36}
47 37
48static inline void __set_open_fd(int fd, struct fdtable *fdt)
49{
50 __set_bit(fd, fdt->open_fds);
51}
52
53static inline void __clear_open_fd(int fd, struct fdtable *fdt)
54{
55 __clear_bit(fd, fdt->open_fds);
56}
57
58static inline bool fd_is_open(int fd, const struct fdtable *fdt) 38static inline bool fd_is_open(int fd, const struct fdtable *fdt)
59{ 39{
60 return test_bit(fd, fdt->open_fds); 40 return test_bit(fd, fdt->open_fds);
@@ -93,15 +73,8 @@ struct file_operations;
93struct vfsmount; 73struct vfsmount;
94struct dentry; 74struct dentry;
95 75
96extern int expand_files(struct files_struct *, int nr);
97extern void free_fdtable_rcu(struct rcu_head *rcu);
98extern void __init files_defer_init(void); 76extern void __init files_defer_init(void);
99 77
100static inline void free_fdtable(struct fdtable *fdt)
101{
102 call_rcu(&fdt->rcu, free_fdtable_rcu);
103}
104
105static inline struct file * fcheck_files(struct files_struct *files, unsigned int fd) 78static inline struct file * fcheck_files(struct files_struct *files, unsigned int fd)
106{ 79{
107 struct file * file = NULL; 80 struct file * file = NULL;
@@ -122,8 +95,20 @@ struct task_struct;
122struct files_struct *get_files_struct(struct task_struct *); 95struct files_struct *get_files_struct(struct task_struct *);
123void put_files_struct(struct files_struct *fs); 96void put_files_struct(struct files_struct *fs);
124void reset_files_struct(struct files_struct *); 97void reset_files_struct(struct files_struct *);
98void daemonize_descriptors(void);
125int unshare_files(struct files_struct **); 99int unshare_files(struct files_struct **);
126struct files_struct *dup_fd(struct files_struct *, int *); 100struct files_struct *dup_fd(struct files_struct *, int *);
101void do_close_on_exec(struct files_struct *);
102int iterate_fd(struct files_struct *, unsigned,
103 int (*)(const void *, struct file *, unsigned),
104 const void *);
105
106extern int __alloc_fd(struct files_struct *files,
107 unsigned start, unsigned end, unsigned flags);
108extern void __fd_install(struct files_struct *files,
109 unsigned int fd, struct file *file);
110extern int __close_fd(struct files_struct *files,
111 unsigned int fd);
127 112
128extern struct kmem_cache *files_cachep; 113extern struct kmem_cache *files_cachep;
129 114
diff --git a/include/linux/file.h b/include/linux/file.h
index a22408bac0d0..cbacf4faf447 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -26,15 +26,44 @@ static inline void fput_light(struct file *file, int fput_needed)
26 fput(file); 26 fput(file);
27} 27}
28 28
29struct fd {
30 struct file *file;
31 int need_put;
32};
33
34static inline void fdput(struct fd fd)
35{
36 if (fd.need_put)
37 fput(fd.file);
38}
39
29extern struct file *fget(unsigned int fd); 40extern struct file *fget(unsigned int fd);
30extern struct file *fget_light(unsigned int fd, int *fput_needed); 41extern struct file *fget_light(unsigned int fd, int *fput_needed);
42
43static inline struct fd fdget(unsigned int fd)
44{
45 int b;
46 struct file *f = fget_light(fd, &b);
47 return (struct fd){f,b};
48}
49
31extern struct file *fget_raw(unsigned int fd); 50extern struct file *fget_raw(unsigned int fd);
32extern struct file *fget_raw_light(unsigned int fd, int *fput_needed); 51extern struct file *fget_raw_light(unsigned int fd, int *fput_needed);
52
53static inline struct fd fdget_raw(unsigned int fd)
54{
55 int b;
56 struct file *f = fget_raw_light(fd, &b);
57 return (struct fd){f,b};
58}
59
60extern int f_dupfd(unsigned int from, struct file *file, unsigned flags);
61extern int replace_fd(unsigned fd, struct file *file, unsigned flags);
33extern void set_close_on_exec(unsigned int fd, int flag); 62extern void set_close_on_exec(unsigned int fd, int flag);
63extern bool get_close_on_exec(unsigned int fd);
34extern void put_filp(struct file *); 64extern void put_filp(struct file *);
35extern int alloc_fd(unsigned start, unsigned flags); 65extern int get_unused_fd_flags(unsigned flags);
36extern int get_unused_fd(void); 66#define get_unused_fd() get_unused_fd_flags(0)
37#define get_unused_fd_flags(flags) alloc_fd(0, (flags))
38extern void put_unused_fd(unsigned int fd); 67extern void put_unused_fd(unsigned int fd);
39 68
40extern void fd_install(unsigned int fd, struct file *file); 69extern void fd_install(unsigned int fd, struct file *file);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index aa110476a95b..ca6d8c806f47 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1074,7 +1074,11 @@ struct file_handle {
1074 unsigned char f_handle[0]; 1074 unsigned char f_handle[0];
1075}; 1075};
1076 1076
1077#define get_file(x) atomic_long_inc(&(x)->f_count) 1077static inline struct file *get_file(struct file *f)
1078{
1079 atomic_long_inc(&f->f_count);
1080 return f;
1081}
1078#define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1) 1082#define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1)
1079#define file_count(x) atomic_long_read(&(x)->f_count) 1083#define file_count(x) atomic_long_read(&(x)->f_count)
1080 1084
@@ -1126,9 +1130,9 @@ static inline int file_check_writeable(struct file *filp)
1126/* Page cache limit. The filesystems should put that into their s_maxbytes 1130/* Page cache limit. The filesystems should put that into their s_maxbytes
1127 limits, otherwise bad things can happen in VM. */ 1131 limits, otherwise bad things can happen in VM. */
1128#if BITS_PER_LONG==32 1132#if BITS_PER_LONG==32
1129#define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) 1133#define MAX_LFS_FILESIZE (((loff_t)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
1130#elif BITS_PER_LONG==64 1134#elif BITS_PER_LONG==64
1131#define MAX_LFS_FILESIZE 0x7fffffffffffffffUL 1135#define MAX_LFS_FILESIZE ((loff_t)0x7fffffffffffffff)
1132#endif 1136#endif
1133 1137
1134#define FL_POSIX 1 1138#define FL_POSIX 1
diff --git a/include/linux/net.h b/include/linux/net.h
index 99276c3dc89a..6ab31cabef7c 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -65,6 +65,7 @@ typedef enum {
65struct poll_table_struct; 65struct poll_table_struct;
66struct pipe_inode_info; 66struct pipe_inode_info;
67struct inode; 67struct inode;
68struct file;
68struct net; 69struct net;
69 70
70#define SOCK_ASYNC_NOSPACE 0 71#define SOCK_ASYNC_NOSPACE 0
@@ -246,7 +247,7 @@ extern int sock_sendmsg(struct socket *sock, struct msghdr *msg,
246 size_t len); 247 size_t len);
247extern int sock_recvmsg(struct socket *sock, struct msghdr *msg, 248extern int sock_recvmsg(struct socket *sock, struct msghdr *msg,
248 size_t size, int flags); 249 size_t size, int flags);
249extern int sock_map_fd(struct socket *sock, int flags); 250extern struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname);
250extern struct socket *sockfd_lookup(int fd, int *err); 251extern struct socket *sockfd_lookup(int fd, int *err);
251extern struct socket *sock_from_file(struct file *file, int *err); 252extern struct socket *sock_from_file(struct file *file, int *err);
252#define sockfd_put(sock) fput(sock->file) 253#define sockfd_put(sock) fput(sock->file)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9c5612f0374b..9d51e260bde0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -405,6 +405,7 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
405 405
406extern void set_dumpable(struct mm_struct *mm, int value); 406extern void set_dumpable(struct mm_struct *mm, int value);
407extern int get_dumpable(struct mm_struct *mm); 407extern int get_dumpable(struct mm_struct *mm);
408extern int __get_dumpable(unsigned long mm_flags);
408 409
409/* get/set_dumpable() values */ 410/* get/set_dumpable() values */
410#define SUID_DUMPABLE_DISABLED 0 411#define SUID_DUMPABLE_DISABLED 0
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 9a08acc9e649..6d255e535d03 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -944,7 +944,7 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
944 size_t, msg_len, unsigned int, msg_prio, 944 size_t, msg_len, unsigned int, msg_prio,
945 const struct timespec __user *, u_abs_timeout) 945 const struct timespec __user *, u_abs_timeout)
946{ 946{
947 struct file *filp; 947 struct fd f;
948 struct inode *inode; 948 struct inode *inode;
949 struct ext_wait_queue wait; 949 struct ext_wait_queue wait;
950 struct ext_wait_queue *receiver; 950 struct ext_wait_queue *receiver;
@@ -967,21 +967,21 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
967 967
968 audit_mq_sendrecv(mqdes, msg_len, msg_prio, timeout ? &ts : NULL); 968 audit_mq_sendrecv(mqdes, msg_len, msg_prio, timeout ? &ts : NULL);
969 969
970 filp = fget(mqdes); 970 f = fdget(mqdes);
971 if (unlikely(!filp)) { 971 if (unlikely(!f.file)) {
972 ret = -EBADF; 972 ret = -EBADF;
973 goto out; 973 goto out;
974 } 974 }
975 975
976 inode = filp->f_path.dentry->d_inode; 976 inode = f.file->f_path.dentry->d_inode;
977 if (unlikely(filp->f_op != &mqueue_file_operations)) { 977 if (unlikely(f.file->f_op != &mqueue_file_operations)) {
978 ret = -EBADF; 978 ret = -EBADF;
979 goto out_fput; 979 goto out_fput;
980 } 980 }
981 info = MQUEUE_I(inode); 981 info = MQUEUE_I(inode);
982 audit_inode(NULL, filp->f_path.dentry); 982 audit_inode(NULL, f.file->f_path.dentry);
983 983
984 if (unlikely(!(filp->f_mode & FMODE_WRITE))) { 984 if (unlikely(!(f.file->f_mode & FMODE_WRITE))) {
985 ret = -EBADF; 985 ret = -EBADF;
986 goto out_fput; 986 goto out_fput;
987 } 987 }
@@ -1023,7 +1023,7 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
1023 } 1023 }
1024 1024
1025 if (info->attr.mq_curmsgs == info->attr.mq_maxmsg) { 1025 if (info->attr.mq_curmsgs == info->attr.mq_maxmsg) {
1026 if (filp->f_flags & O_NONBLOCK) { 1026 if (f.file->f_flags & O_NONBLOCK) {
1027 ret = -EAGAIN; 1027 ret = -EAGAIN;
1028 } else { 1028 } else {
1029 wait.task = current; 1029 wait.task = current;
@@ -1056,7 +1056,7 @@ out_free:
1056 if (ret) 1056 if (ret)
1057 free_msg(msg_ptr); 1057 free_msg(msg_ptr);
1058out_fput: 1058out_fput:
1059 fput(filp); 1059 fdput(f);
1060out: 1060out:
1061 return ret; 1061 return ret;
1062} 1062}
@@ -1067,7 +1067,7 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
1067{ 1067{
1068 ssize_t ret; 1068 ssize_t ret;
1069 struct msg_msg *msg_ptr; 1069 struct msg_msg *msg_ptr;
1070 struct file *filp; 1070 struct fd f;
1071 struct inode *inode; 1071 struct inode *inode;
1072 struct mqueue_inode_info *info; 1072 struct mqueue_inode_info *info;
1073 struct ext_wait_queue wait; 1073 struct ext_wait_queue wait;
@@ -1084,21 +1084,21 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
1084 1084
1085 audit_mq_sendrecv(mqdes, msg_len, 0, timeout ? &ts : NULL); 1085 audit_mq_sendrecv(mqdes, msg_len, 0, timeout ? &ts : NULL);
1086 1086
1087 filp = fget(mqdes); 1087 f = fdget(mqdes);
1088 if (unlikely(!filp)) { 1088 if (unlikely(!f.file)) {
1089 ret = -EBADF; 1089 ret = -EBADF;
1090 goto out; 1090 goto out;
1091 } 1091 }
1092 1092
1093 inode = filp->f_path.dentry->d_inode; 1093 inode = f.file->f_path.dentry->d_inode;
1094 if (unlikely(filp->f_op != &mqueue_file_operations)) { 1094 if (unlikely(f.file->f_op != &mqueue_file_operations)) {
1095 ret = -EBADF; 1095 ret = -EBADF;
1096 goto out_fput; 1096 goto out_fput;
1097 } 1097 }
1098 info = MQUEUE_I(inode); 1098 info = MQUEUE_I(inode);
1099 audit_inode(NULL, filp->f_path.dentry); 1099 audit_inode(NULL, f.file->f_path.dentry);
1100 1100
1101 if (unlikely(!(filp->f_mode & FMODE_READ))) { 1101 if (unlikely(!(f.file->f_mode & FMODE_READ))) {
1102 ret = -EBADF; 1102 ret = -EBADF;
1103 goto out_fput; 1103 goto out_fput;
1104 } 1104 }
@@ -1130,7 +1130,7 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
1130 } 1130 }
1131 1131
1132 if (info->attr.mq_curmsgs == 0) { 1132 if (info->attr.mq_curmsgs == 0) {
1133 if (filp->f_flags & O_NONBLOCK) { 1133 if (f.file->f_flags & O_NONBLOCK) {
1134 spin_unlock(&info->lock); 1134 spin_unlock(&info->lock);
1135 ret = -EAGAIN; 1135 ret = -EAGAIN;
1136 } else { 1136 } else {
@@ -1160,7 +1160,7 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
1160 free_msg(msg_ptr); 1160 free_msg(msg_ptr);
1161 } 1161 }
1162out_fput: 1162out_fput:
1163 fput(filp); 1163 fdput(f);
1164out: 1164out:
1165 return ret; 1165 return ret;
1166} 1166}
@@ -1174,7 +1174,7 @@ SYSCALL_DEFINE2(mq_notify, mqd_t, mqdes,
1174 const struct sigevent __user *, u_notification) 1174 const struct sigevent __user *, u_notification)
1175{ 1175{
1176 int ret; 1176 int ret;
1177 struct file *filp; 1177 struct fd f;
1178 struct sock *sock; 1178 struct sock *sock;
1179 struct inode *inode; 1179 struct inode *inode;
1180 struct sigevent notification; 1180 struct sigevent notification;
@@ -1220,13 +1220,13 @@ SYSCALL_DEFINE2(mq_notify, mqd_t, mqdes,
1220 skb_put(nc, NOTIFY_COOKIE_LEN); 1220 skb_put(nc, NOTIFY_COOKIE_LEN);
1221 /* and attach it to the socket */ 1221 /* and attach it to the socket */
1222retry: 1222retry:
1223 filp = fget(notification.sigev_signo); 1223 f = fdget(notification.sigev_signo);
1224 if (!filp) { 1224 if (!f.file) {
1225 ret = -EBADF; 1225 ret = -EBADF;
1226 goto out; 1226 goto out;
1227 } 1227 }
1228 sock = netlink_getsockbyfilp(filp); 1228 sock = netlink_getsockbyfilp(f.file);
1229 fput(filp); 1229 fdput(f);
1230 if (IS_ERR(sock)) { 1230 if (IS_ERR(sock)) {
1231 ret = PTR_ERR(sock); 1231 ret = PTR_ERR(sock);
1232 sock = NULL; 1232 sock = NULL;
@@ -1245,14 +1245,14 @@ retry:
1245 } 1245 }
1246 } 1246 }
1247 1247
1248 filp = fget(mqdes); 1248 f = fdget(mqdes);
1249 if (!filp) { 1249 if (!f.file) {
1250 ret = -EBADF; 1250 ret = -EBADF;
1251 goto out; 1251 goto out;
1252 } 1252 }
1253 1253
1254 inode = filp->f_path.dentry->d_inode; 1254 inode = f.file->f_path.dentry->d_inode;
1255 if (unlikely(filp->f_op != &mqueue_file_operations)) { 1255 if (unlikely(f.file->f_op != &mqueue_file_operations)) {
1256 ret = -EBADF; 1256 ret = -EBADF;
1257 goto out_fput; 1257 goto out_fput;
1258 } 1258 }
@@ -1292,7 +1292,7 @@ retry:
1292 } 1292 }
1293 spin_unlock(&info->lock); 1293 spin_unlock(&info->lock);
1294out_fput: 1294out_fput:
1295 fput(filp); 1295 fdput(f);
1296out: 1296out:
1297 if (sock) { 1297 if (sock) {
1298 netlink_detachskb(sock, nc); 1298 netlink_detachskb(sock, nc);
@@ -1308,7 +1308,7 @@ SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes,
1308{ 1308{
1309 int ret; 1309 int ret;
1310 struct mq_attr mqstat, omqstat; 1310 struct mq_attr mqstat, omqstat;
1311 struct file *filp; 1311 struct fd f;
1312 struct inode *inode; 1312 struct inode *inode;
1313 struct mqueue_inode_info *info; 1313 struct mqueue_inode_info *info;
1314 1314
@@ -1319,14 +1319,14 @@ SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes,
1319 return -EINVAL; 1319 return -EINVAL;
1320 } 1320 }
1321 1321
1322 filp = fget(mqdes); 1322 f = fdget(mqdes);
1323 if (!filp) { 1323 if (!f.file) {
1324 ret = -EBADF; 1324 ret = -EBADF;
1325 goto out; 1325 goto out;
1326 } 1326 }
1327 1327
1328 inode = filp->f_path.dentry->d_inode; 1328 inode = f.file->f_path.dentry->d_inode;
1329 if (unlikely(filp->f_op != &mqueue_file_operations)) { 1329 if (unlikely(f.file->f_op != &mqueue_file_operations)) {
1330 ret = -EBADF; 1330 ret = -EBADF;
1331 goto out_fput; 1331 goto out_fput;
1332 } 1332 }
@@ -1335,15 +1335,15 @@ SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes,
1335 spin_lock(&info->lock); 1335 spin_lock(&info->lock);
1336 1336
1337 omqstat = info->attr; 1337 omqstat = info->attr;
1338 omqstat.mq_flags = filp->f_flags & O_NONBLOCK; 1338 omqstat.mq_flags = f.file->f_flags & O_NONBLOCK;
1339 if (u_mqstat) { 1339 if (u_mqstat) {
1340 audit_mq_getsetattr(mqdes, &mqstat); 1340 audit_mq_getsetattr(mqdes, &mqstat);
1341 spin_lock(&filp->f_lock); 1341 spin_lock(&f.file->f_lock);
1342 if (mqstat.mq_flags & O_NONBLOCK) 1342 if (mqstat.mq_flags & O_NONBLOCK)
1343 filp->f_flags |= O_NONBLOCK; 1343 f.file->f_flags |= O_NONBLOCK;
1344 else 1344 else
1345 filp->f_flags &= ~O_NONBLOCK; 1345 f.file->f_flags &= ~O_NONBLOCK;
1346 spin_unlock(&filp->f_lock); 1346 spin_unlock(&f.file->f_lock);
1347 1347
1348 inode->i_atime = inode->i_ctime = CURRENT_TIME; 1348 inode->i_atime = inode->i_ctime = CURRENT_TIME;
1349 } 1349 }
@@ -1356,7 +1356,7 @@ SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes,
1356 ret = -EFAULT; 1356 ret = -EFAULT;
1357 1357
1358out_fput: 1358out_fput:
1359 fput(filp); 1359 fdput(f);
1360out: 1360out:
1361 return ret; 1361 return ret;
1362} 1362}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index deec4e50eb30..f16f3c58f11a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -468,14 +468,13 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event,
468{ 468{
469 struct perf_cgroup *cgrp; 469 struct perf_cgroup *cgrp;
470 struct cgroup_subsys_state *css; 470 struct cgroup_subsys_state *css;
471 struct file *file; 471 struct fd f = fdget(fd);
472 int ret = 0, fput_needed; 472 int ret = 0;
473 473
474 file = fget_light(fd, &fput_needed); 474 if (!f.file)
475 if (!file)
476 return -EBADF; 475 return -EBADF;
477 476
478 css = cgroup_css_from_dir(file, perf_subsys_id); 477 css = cgroup_css_from_dir(f.file, perf_subsys_id);
479 if (IS_ERR(css)) { 478 if (IS_ERR(css)) {
480 ret = PTR_ERR(css); 479 ret = PTR_ERR(css);
481 goto out; 480 goto out;
@@ -501,7 +500,7 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event,
501 ret = -EINVAL; 500 ret = -EINVAL;
502 } 501 }
503out: 502out:
504 fput_light(file, fput_needed); 503 fdput(f);
505 return ret; 504 return ret;
506} 505}
507 506
@@ -3234,21 +3233,18 @@ unlock:
3234 3233
3235static const struct file_operations perf_fops; 3234static const struct file_operations perf_fops;
3236 3235
3237static struct file *perf_fget_light(int fd, int *fput_needed) 3236static inline int perf_fget_light(int fd, struct fd *p)
3238{ 3237{
3239 struct file *file; 3238 struct fd f = fdget(fd);
3240 3239 if (!f.file)
3241 file = fget_light(fd, fput_needed); 3240 return -EBADF;
3242 if (!file)
3243 return ERR_PTR(-EBADF);
3244 3241
3245 if (file->f_op != &perf_fops) { 3242 if (f.file->f_op != &perf_fops) {
3246 fput_light(file, *fput_needed); 3243 fdput(f);
3247 *fput_needed = 0; 3244 return -EBADF;
3248 return ERR_PTR(-EBADF);
3249 } 3245 }
3250 3246 *p = f;
3251 return file; 3247 return 0;
3252} 3248}
3253 3249
3254static int perf_event_set_output(struct perf_event *event, 3250static int perf_event_set_output(struct perf_event *event,
@@ -3280,22 +3276,19 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
3280 3276
3281 case PERF_EVENT_IOC_SET_OUTPUT: 3277 case PERF_EVENT_IOC_SET_OUTPUT:
3282 { 3278 {
3283 struct file *output_file = NULL;
3284 struct perf_event *output_event = NULL;
3285 int fput_needed = 0;
3286 int ret; 3279 int ret;
3287
3288 if (arg != -1) { 3280 if (arg != -1) {
3289 output_file = perf_fget_light(arg, &fput_needed); 3281 struct perf_event *output_event;
3290 if (IS_ERR(output_file)) 3282 struct fd output;
3291 return PTR_ERR(output_file); 3283 ret = perf_fget_light(arg, &output);
3292 output_event = output_file->private_data; 3284 if (ret)
3285 return ret;
3286 output_event = output.file->private_data;
3287 ret = perf_event_set_output(event, output_event);
3288 fdput(output);
3289 } else {
3290 ret = perf_event_set_output(event, NULL);
3293 } 3291 }
3294
3295 ret = perf_event_set_output(event, output_event);
3296 if (output_event)
3297 fput_light(output_file, fput_needed);
3298
3299 return ret; 3292 return ret;
3300 } 3293 }
3301 3294
@@ -6443,12 +6436,11 @@ SYSCALL_DEFINE5(perf_event_open,
6443 struct perf_event_attr attr; 6436 struct perf_event_attr attr;
6444 struct perf_event_context *ctx; 6437 struct perf_event_context *ctx;
6445 struct file *event_file = NULL; 6438 struct file *event_file = NULL;
6446 struct file *group_file = NULL; 6439 struct fd group = {NULL, 0};
6447 struct task_struct *task = NULL; 6440 struct task_struct *task = NULL;
6448 struct pmu *pmu; 6441 struct pmu *pmu;
6449 int event_fd; 6442 int event_fd;
6450 int move_group = 0; 6443 int move_group = 0;
6451 int fput_needed = 0;
6452 int err; 6444 int err;
6453 6445
6454 /* for future expandability... */ 6446 /* for future expandability... */
@@ -6478,17 +6470,15 @@ SYSCALL_DEFINE5(perf_event_open,
6478 if ((flags & PERF_FLAG_PID_CGROUP) && (pid == -1 || cpu == -1)) 6470 if ((flags & PERF_FLAG_PID_CGROUP) && (pid == -1 || cpu == -1))
6479 return -EINVAL; 6471 return -EINVAL;
6480 6472
6481 event_fd = get_unused_fd_flags(O_RDWR); 6473 event_fd = get_unused_fd();
6482 if (event_fd < 0) 6474 if (event_fd < 0)
6483 return event_fd; 6475 return event_fd;
6484 6476
6485 if (group_fd != -1) { 6477 if (group_fd != -1) {
6486 group_file = perf_fget_light(group_fd, &fput_needed); 6478 err = perf_fget_light(group_fd, &group);
6487 if (IS_ERR(group_file)) { 6479 if (err)
6488 err = PTR_ERR(group_file);
6489 goto err_fd; 6480 goto err_fd;
6490 } 6481 group_leader = group.file->private_data;
6491 group_leader = group_file->private_data;
6492 if (flags & PERF_FLAG_FD_OUTPUT) 6482 if (flags & PERF_FLAG_FD_OUTPUT)
6493 output_event = group_leader; 6483 output_event = group_leader;
6494 if (flags & PERF_FLAG_FD_NO_GROUP) 6484 if (flags & PERF_FLAG_FD_NO_GROUP)
@@ -6664,7 +6654,7 @@ SYSCALL_DEFINE5(perf_event_open,
6664 * of the group leader will find the pointer to itself in 6654 * of the group leader will find the pointer to itself in
6665 * perf_group_detach(). 6655 * perf_group_detach().
6666 */ 6656 */
6667 fput_light(group_file, fput_needed); 6657 fdput(group);
6668 fd_install(event_fd, event_file); 6658 fd_install(event_fd, event_file);
6669 return event_fd; 6659 return event_fd;
6670 6660
@@ -6678,7 +6668,7 @@ err_task:
6678 if (task) 6668 if (task)
6679 put_task_struct(task); 6669 put_task_struct(task);
6680err_group_fd: 6670err_group_fd:
6681 fput_light(group_file, fput_needed); 6671 fdput(group);
6682err_fd: 6672err_fd:
6683 put_unused_fd(event_fd); 6673 put_unused_fd(event_fd);
6684 return err; 6674 return err;
diff --git a/kernel/exit.c b/kernel/exit.c
index 42f25952edd9..346616c0092c 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -457,108 +457,13 @@ void daemonize(const char *name, ...)
457 /* Become as one with the init task */ 457 /* Become as one with the init task */
458 458
459 daemonize_fs_struct(); 459 daemonize_fs_struct();
460 exit_files(current); 460 daemonize_descriptors();
461 current->files = init_task.files;
462 atomic_inc(&current->files->count);
463 461
464 reparent_to_kthreadd(); 462 reparent_to_kthreadd();
465} 463}
466 464
467EXPORT_SYMBOL(daemonize); 465EXPORT_SYMBOL(daemonize);
468 466
469static void close_files(struct files_struct * files)
470{
471 int i, j;
472 struct fdtable *fdt;
473
474 j = 0;
475
476 /*
477 * It is safe to dereference the fd table without RCU or
478 * ->file_lock because this is the last reference to the
479 * files structure. But use RCU to shut RCU-lockdep up.
480 */
481 rcu_read_lock();
482 fdt = files_fdtable(files);
483 rcu_read_unlock();
484 for (;;) {
485 unsigned long set;
486 i = j * BITS_PER_LONG;
487 if (i >= fdt->max_fds)
488 break;
489 set = fdt->open_fds[j++];
490 while (set) {
491 if (set & 1) {
492 struct file * file = xchg(&fdt->fd[i], NULL);
493 if (file) {
494 filp_close(file, files);
495 cond_resched();
496 }
497 }
498 i++;
499 set >>= 1;
500 }
501 }
502}
503
504struct files_struct *get_files_struct(struct task_struct *task)
505{
506 struct files_struct *files;
507
508 task_lock(task);
509 files = task->files;
510 if (files)
511 atomic_inc(&files->count);
512 task_unlock(task);
513
514 return files;
515}
516
517void put_files_struct(struct files_struct *files)
518{
519 struct fdtable *fdt;
520
521 if (atomic_dec_and_test(&files->count)) {
522 close_files(files);
523 /*
524 * Free the fd and fdset arrays if we expanded them.
525 * If the fdtable was embedded, pass files for freeing
526 * at the end of the RCU grace period. Otherwise,
527 * you can free files immediately.
528 */
529 rcu_read_lock();
530 fdt = files_fdtable(files);
531 if (fdt != &files->fdtab)
532 kmem_cache_free(files_cachep, files);
533 free_fdtable(fdt);
534 rcu_read_unlock();
535 }
536}
537
538void reset_files_struct(struct files_struct *files)
539{
540 struct task_struct *tsk = current;
541 struct files_struct *old;
542
543 old = tsk->files;
544 task_lock(tsk);
545 tsk->files = files;
546 task_unlock(tsk);
547 put_files_struct(old);
548}
549
550void exit_files(struct task_struct *tsk)
551{
552 struct files_struct * files = tsk->files;
553
554 if (files) {
555 task_lock(tsk);
556 tsk->files = NULL;
557 task_unlock(tsk);
558 put_files_struct(files);
559 }
560}
561
562#ifdef CONFIG_MM_OWNER 467#ifdef CONFIG_MM_OWNER
563/* 468/*
564 * A task is exiting. If it owned this mm, find a new owner for the mm. 469 * A task is exiting. If it owned this mm, find a new owner for the mm.
diff --git a/kernel/sys.c b/kernel/sys.c
index 241507f23eca..f9492284e5d2 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1788,15 +1788,15 @@ SYSCALL_DEFINE1(umask, int, mask)
1788#ifdef CONFIG_CHECKPOINT_RESTORE 1788#ifdef CONFIG_CHECKPOINT_RESTORE
1789static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) 1789static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
1790{ 1790{
1791 struct file *exe_file; 1791 struct fd exe;
1792 struct dentry *dentry; 1792 struct dentry *dentry;
1793 int err; 1793 int err;
1794 1794
1795 exe_file = fget(fd); 1795 exe = fdget(fd);
1796 if (!exe_file) 1796 if (!exe.file)
1797 return -EBADF; 1797 return -EBADF;
1798 1798
1799 dentry = exe_file->f_path.dentry; 1799 dentry = exe.file->f_path.dentry;
1800 1800
1801 /* 1801 /*
1802 * Because the original mm->exe_file points to executable file, make 1802 * Because the original mm->exe_file points to executable file, make
@@ -1805,7 +1805,7 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
1805 */ 1805 */
1806 err = -EACCES; 1806 err = -EACCES;
1807 if (!S_ISREG(dentry->d_inode->i_mode) || 1807 if (!S_ISREG(dentry->d_inode->i_mode) ||
1808 exe_file->f_path.mnt->mnt_flags & MNT_NOEXEC) 1808 exe.file->f_path.mnt->mnt_flags & MNT_NOEXEC)
1809 goto exit; 1809 goto exit;
1810 1810
1811 err = inode_permission(dentry->d_inode, MAY_EXEC); 1811 err = inode_permission(dentry->d_inode, MAY_EXEC);
@@ -1839,12 +1839,12 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
1839 goto exit_unlock; 1839 goto exit_unlock;
1840 1840
1841 err = 0; 1841 err = 0;
1842 set_mm_exe_file(mm, exe_file); 1842 set_mm_exe_file(mm, exe.file); /* this grabs a reference to exe.file */
1843exit_unlock: 1843exit_unlock:
1844 up_write(&mm->mmap_sem); 1844 up_write(&mm->mmap_sem);
1845 1845
1846exit: 1846exit:
1847 fput(exe_file); 1847 fdput(exe);
1848 return err; 1848 return err;
1849} 1849}
1850 1850
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 5eab1f3edfa5..610f0838d555 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -424,16 +424,15 @@ static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
424 struct nlattr *na; 424 struct nlattr *na;
425 size_t size; 425 size_t size;
426 u32 fd; 426 u32 fd;
427 struct file *file; 427 struct fd f;
428 int fput_needed;
429 428
430 na = info->attrs[CGROUPSTATS_CMD_ATTR_FD]; 429 na = info->attrs[CGROUPSTATS_CMD_ATTR_FD];
431 if (!na) 430 if (!na)
432 return -EINVAL; 431 return -EINVAL;
433 432
434 fd = nla_get_u32(info->attrs[CGROUPSTATS_CMD_ATTR_FD]); 433 fd = nla_get_u32(info->attrs[CGROUPSTATS_CMD_ATTR_FD]);
435 file = fget_light(fd, &fput_needed); 434 f = fdget(fd);
436 if (!file) 435 if (!f.file)
437 return 0; 436 return 0;
438 437
439 size = nla_total_size(sizeof(struct cgroupstats)); 438 size = nla_total_size(sizeof(struct cgroupstats));
@@ -453,7 +452,7 @@ static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
453 stats = nla_data(na); 452 stats = nla_data(na);
454 memset(stats, 0, sizeof(*stats)); 453 memset(stats, 0, sizeof(*stats));
455 454
456 rc = cgroupstats_build(stats, file->f_dentry); 455 rc = cgroupstats_build(stats, f.file->f_dentry);
457 if (rc < 0) { 456 if (rc < 0) {
458 nlmsg_free(rep_skb); 457 nlmsg_free(rep_skb);
459 goto err; 458 goto err;
@@ -462,7 +461,7 @@ static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
462 rc = send_reply(rep_skb, info); 461 rc = send_reply(rep_skb, info);
463 462
464err: 463err:
465 fput_light(file, fput_needed); 464 fdput(f);
466 return rc; 465 return rc;
467} 466}
468 467
diff --git a/mm/fadvise.c b/mm/fadvise.c
index 9b75a045dbf4..a47f0f50c89f 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -26,7 +26,7 @@
26 */ 26 */
27SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice) 27SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice)
28{ 28{
29 struct file *file = fget(fd); 29 struct fd f = fdget(fd);
30 struct address_space *mapping; 30 struct address_space *mapping;
31 struct backing_dev_info *bdi; 31 struct backing_dev_info *bdi;
32 loff_t endbyte; /* inclusive */ 32 loff_t endbyte; /* inclusive */
@@ -35,15 +35,15 @@ SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice)
35 unsigned long nrpages; 35 unsigned long nrpages;
36 int ret = 0; 36 int ret = 0;
37 37
38 if (!file) 38 if (!f.file)
39 return -EBADF; 39 return -EBADF;
40 40
41 if (S_ISFIFO(file->f_path.dentry->d_inode->i_mode)) { 41 if (S_ISFIFO(f.file->f_path.dentry->d_inode->i_mode)) {
42 ret = -ESPIPE; 42 ret = -ESPIPE;
43 goto out; 43 goto out;
44 } 44 }
45 45
46 mapping = file->f_mapping; 46 mapping = f.file->f_mapping;
47 if (!mapping || len < 0) { 47 if (!mapping || len < 0) {
48 ret = -EINVAL; 48 ret = -EINVAL;
49 goto out; 49 goto out;
@@ -76,21 +76,21 @@ SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice)
76 76
77 switch (advice) { 77 switch (advice) {
78 case POSIX_FADV_NORMAL: 78 case POSIX_FADV_NORMAL:
79 file->f_ra.ra_pages = bdi->ra_pages; 79 f.file->f_ra.ra_pages = bdi->ra_pages;
80 spin_lock(&file->f_lock); 80 spin_lock(&f.file->f_lock);
81 file->f_mode &= ~FMODE_RANDOM; 81 f.file->f_mode &= ~FMODE_RANDOM;
82 spin_unlock(&file->f_lock); 82 spin_unlock(&f.file->f_lock);
83 break; 83 break;
84 case POSIX_FADV_RANDOM: 84 case POSIX_FADV_RANDOM:
85 spin_lock(&file->f_lock); 85 spin_lock(&f.file->f_lock);
86 file->f_mode |= FMODE_RANDOM; 86 f.file->f_mode |= FMODE_RANDOM;
87 spin_unlock(&file->f_lock); 87 spin_unlock(&f.file->f_lock);
88 break; 88 break;
89 case POSIX_FADV_SEQUENTIAL: 89 case POSIX_FADV_SEQUENTIAL:
90 file->f_ra.ra_pages = bdi->ra_pages * 2; 90 f.file->f_ra.ra_pages = bdi->ra_pages * 2;
91 spin_lock(&file->f_lock); 91 spin_lock(&f.file->f_lock);
92 file->f_mode &= ~FMODE_RANDOM; 92 f.file->f_mode &= ~FMODE_RANDOM;
93 spin_unlock(&file->f_lock); 93 spin_unlock(&f.file->f_lock);
94 break; 94 break;
95 case POSIX_FADV_WILLNEED: 95 case POSIX_FADV_WILLNEED:
96 /* First and last PARTIAL page! */ 96 /* First and last PARTIAL page! */
@@ -106,7 +106,7 @@ SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice)
106 * Ignore return value because fadvise() shall return 106 * Ignore return value because fadvise() shall return
107 * success even if filesystem can't retrieve a hint, 107 * success even if filesystem can't retrieve a hint,
108 */ 108 */
109 force_page_cache_readahead(mapping, file, start_index, 109 force_page_cache_readahead(mapping, f.file, start_index,
110 nrpages); 110 nrpages);
111 break; 111 break;
112 case POSIX_FADV_NOREUSE: 112 case POSIX_FADV_NOREUSE:
@@ -128,7 +128,7 @@ SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice)
128 ret = -EINVAL; 128 ret = -EINVAL;
129 } 129 }
130out: 130out:
131 fput(file); 131 fdput(f);
132 return ret; 132 return ret;
133} 133}
134#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS 134#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
diff --git a/mm/fremap.c b/mm/fremap.c
index 9ed4fd432467..048659c0c03d 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -195,10 +195,9 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
195 */ 195 */
196 if (mapping_cap_account_dirty(mapping)) { 196 if (mapping_cap_account_dirty(mapping)) {
197 unsigned long addr; 197 unsigned long addr;
198 struct file *file = vma->vm_file; 198 struct file *file = get_file(vma->vm_file);
199 199
200 flags &= MAP_NONBLOCK; 200 flags &= MAP_NONBLOCK;
201 get_file(file);
202 addr = mmap_region(file, start, size, 201 addr = mmap_region(file, start, size,
203 flags, vma->vm_flags, pgoff); 202 flags, vma->vm_flags, pgoff);
204 fput(file); 203 fput(file);
diff --git a/mm/mmap.c b/mm/mmap.c
index ae18a48e7e4e..872441e81914 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1301,8 +1301,7 @@ munmap_back:
1301 goto free_vma; 1301 goto free_vma;
1302 correct_wcount = 1; 1302 correct_wcount = 1;
1303 } 1303 }
1304 vma->vm_file = file; 1304 vma->vm_file = get_file(file);
1305 get_file(file);
1306 error = file->f_op->mmap(file, vma); 1305 error = file->f_op->mmap(file, vma);
1307 if (error) 1306 if (error)
1308 goto unmap_and_free_vma; 1307 goto unmap_and_free_vma;
diff --git a/mm/nommu.c b/mm/nommu.c
index d4b0c10872de..dee2ff89fd58 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1282,10 +1282,8 @@ unsigned long do_mmap_pgoff(struct file *file,
1282 vma->vm_pgoff = pgoff; 1282 vma->vm_pgoff = pgoff;
1283 1283
1284 if (file) { 1284 if (file) {
1285 region->vm_file = file; 1285 region->vm_file = get_file(file);
1286 get_file(file); 1286 vma->vm_file = get_file(file);
1287 vma->vm_file = file;
1288 get_file(file);
1289 if (vm_flags & VM_EXECUTABLE) { 1287 if (vm_flags & VM_EXECUTABLE) {
1290 added_exe_file_vma(current->mm); 1288 added_exe_file_vma(current->mm);
1291 vma->vm_mm = current->mm; 1289 vma->vm_mm = current->mm;
diff --git a/mm/readahead.c b/mm/readahead.c
index ea8f8fa21649..7963f2391236 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -579,19 +579,19 @@ do_readahead(struct address_space *mapping, struct file *filp,
579SYSCALL_DEFINE(readahead)(int fd, loff_t offset, size_t count) 579SYSCALL_DEFINE(readahead)(int fd, loff_t offset, size_t count)
580{ 580{
581 ssize_t ret; 581 ssize_t ret;
582 struct file *file; 582 struct fd f;
583 583
584 ret = -EBADF; 584 ret = -EBADF;
585 file = fget(fd); 585 f = fdget(fd);
586 if (file) { 586 if (f.file) {
587 if (file->f_mode & FMODE_READ) { 587 if (f.file->f_mode & FMODE_READ) {
588 struct address_space *mapping = file->f_mapping; 588 struct address_space *mapping = f.file->f_mapping;
589 pgoff_t start = offset >> PAGE_CACHE_SHIFT; 589 pgoff_t start = offset >> PAGE_CACHE_SHIFT;
590 pgoff_t end = (offset + count - 1) >> PAGE_CACHE_SHIFT; 590 pgoff_t end = (offset + count - 1) >> PAGE_CACHE_SHIFT;
591 unsigned long len = end - start + 1; 591 unsigned long len = end - start + 1;
592 ret = do_readahead(mapping, file, start, len); 592 ret = do_readahead(mapping, f.file, start, len);
593 } 593 }
594 fput(file); 594 fdput(f);
595 } 595 }
596 return ret; 596 return ret;
597} 597}
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 505f0ce3f10b..15656b8573f3 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -793,30 +793,28 @@ static int p9_fd_open(struct p9_client *client, int rfd, int wfd)
793static int p9_socket_open(struct p9_client *client, struct socket *csocket) 793static int p9_socket_open(struct p9_client *client, struct socket *csocket)
794{ 794{
795 struct p9_trans_fd *p; 795 struct p9_trans_fd *p;
796 int ret, fd; 796 struct file *file;
797 int ret;
797 798
798 p = kmalloc(sizeof(struct p9_trans_fd), GFP_KERNEL); 799 p = kmalloc(sizeof(struct p9_trans_fd), GFP_KERNEL);
799 if (!p) 800 if (!p)
800 return -ENOMEM; 801 return -ENOMEM;
801 802
802 csocket->sk->sk_allocation = GFP_NOIO; 803 csocket->sk->sk_allocation = GFP_NOIO;
803 fd = sock_map_fd(csocket, 0); 804 file = sock_alloc_file(csocket, 0, NULL);
804 if (fd < 0) { 805 if (IS_ERR(file)) {
805 pr_err("%s (%d): failed to map fd\n", 806 pr_err("%s (%d): failed to map fd\n",
806 __func__, task_pid_nr(current)); 807 __func__, task_pid_nr(current));
807 sock_release(csocket); 808 sock_release(csocket);
808 kfree(p); 809 kfree(p);
809 return fd; 810 return PTR_ERR(file);
810 } 811 }
811 812
812 get_file(csocket->file); 813 get_file(file);
813 get_file(csocket->file); 814 p->wr = p->rd = file;
814 p->wr = p->rd = csocket->file;
815 client->trans = p; 815 client->trans = p;
816 client->status = Connected; 816 client->status = Connected;
817 817
818 sys_close(fd); /* still racy */
819
820 p->rd->f_flags |= O_NONBLOCK; 818 p->rd->f_flags |= O_NONBLOCK;
821 819
822 p->conn = p9_conn_create(client); 820 p->conn = p9_conn_create(client);
diff --git a/net/compat.c b/net/compat.c
index 74ed1d7a84a2..79ae88485001 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -301,8 +301,7 @@ void scm_detach_fds_compat(struct msghdr *kmsg, struct scm_cookie *scm)
301 break; 301 break;
302 } 302 }
303 /* Bump the usage count and install the file. */ 303 /* Bump the usage count and install the file. */
304 get_file(fp[i]); 304 fd_install(new_fd, get_file(fp[i]));
305 fd_install(new_fd, fp[i]);
306 } 305 }
307 306
308 if (i > 0) { 307 if (i > 0) {
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 4a83fb3c8e87..79285a36035f 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -239,38 +239,24 @@ out_free_devname:
239 return ret; 239 return ret;
240} 240}
241 241
242static int update_netprio(const void *v, struct file *file, unsigned n)
243{
244 int err;
245 struct socket *sock = sock_from_file(file, &err);
246 if (sock)
247 sock->sk->sk_cgrp_prioidx = (u32)(unsigned long)v;
248 return 0;
249}
250
242void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) 251void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
243{ 252{
244 struct task_struct *p; 253 struct task_struct *p;
254 void *v;
245 255
246 cgroup_taskset_for_each(p, cgrp, tset) { 256 cgroup_taskset_for_each(p, cgrp, tset) {
247 unsigned int fd;
248 struct fdtable *fdt;
249 struct files_struct *files;
250
251 task_lock(p); 257 task_lock(p);
252 files = p->files; 258 v = (void *)(unsigned long)task_netprioidx(p);
253 if (!files) { 259 iterate_fd(p->files, 0, update_netprio, v);
254 task_unlock(p);
255 continue;
256 }
257
258 spin_lock(&files->file_lock);
259 fdt = files_fdtable(files);
260 for (fd = 0; fd < fdt->max_fds; fd++) {
261 struct file *file;
262 struct socket *sock;
263 int err;
264
265 file = fcheck_files(files, fd);
266 if (!file)
267 continue;
268
269 sock = sock_from_file(file, &err);
270 if (sock)
271 sock_update_netprioidx(sock->sk, p);
272 }
273 spin_unlock(&files->file_lock);
274 task_unlock(p); 260 task_unlock(p);
275 } 261 }
276} 262}
diff --git a/net/core/scm.c b/net/core/scm.c
index 9c1c63da3ca8..ab570841a532 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -301,11 +301,10 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
301 break; 301 break;
302 } 302 }
303 /* Bump the usage count and install the file. */ 303 /* Bump the usage count and install the file. */
304 get_file(fp[i]);
305 sock = sock_from_file(fp[i], &err); 304 sock = sock_from_file(fp[i], &err);
306 if (sock) 305 if (sock)
307 sock_update_netprioidx(sock->sk, current); 306 sock_update_netprioidx(sock->sk, current);
308 fd_install(new_fd, fp[i]); 307 fd_install(new_fd, get_file(fp[i]));
309 } 308 }
310 309
311 if (i > 0) 310 if (i > 0)
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index d37d24ff197f..59d16ea927f0 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -70,6 +70,7 @@
70#include <linux/init.h> 70#include <linux/init.h>
71#include <linux/crypto.h> 71#include <linux/crypto.h>
72#include <linux/slab.h> 72#include <linux/slab.h>
73#include <linux/file.h>
73 74
74#include <net/ip.h> 75#include <net/ip.h>
75#include <net/icmp.h> 76#include <net/icmp.h>
@@ -4292,6 +4293,7 @@ static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval
4292{ 4293{
4293 sctp_peeloff_arg_t peeloff; 4294 sctp_peeloff_arg_t peeloff;
4294 struct socket *newsock; 4295 struct socket *newsock;
4296 struct file *newfile;
4295 int retval = 0; 4297 int retval = 0;
4296 4298
4297 if (len < sizeof(sctp_peeloff_arg_t)) 4299 if (len < sizeof(sctp_peeloff_arg_t))
@@ -4305,22 +4307,35 @@ static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval
4305 goto out; 4307 goto out;
4306 4308
4307 /* Map the socket to an unused fd that can be returned to the user. */ 4309 /* Map the socket to an unused fd that can be returned to the user. */
4308 retval = sock_map_fd(newsock, 0); 4310 retval = get_unused_fd();
4309 if (retval < 0) { 4311 if (retval < 0) {
4310 sock_release(newsock); 4312 sock_release(newsock);
4311 goto out; 4313 goto out;
4312 } 4314 }
4313 4315
4316 newfile = sock_alloc_file(newsock, 0, NULL);
4317 if (unlikely(IS_ERR(newfile))) {
4318 put_unused_fd(retval);
4319 sock_release(newsock);
4320 return PTR_ERR(newfile);
4321 }
4322
4314 SCTP_DEBUG_PRINTK("%s: sk: %p newsk: %p sd: %d\n", 4323 SCTP_DEBUG_PRINTK("%s: sk: %p newsk: %p sd: %d\n",
4315 __func__, sk, newsock->sk, retval); 4324 __func__, sk, newsock->sk, retval);
4316 4325
4317 /* Return the fd mapped to the new socket. */ 4326 /* Return the fd mapped to the new socket. */
4327 if (put_user(len, optlen)) {
4328 fput(newfile);
4329 put_unused_fd(retval);
4330 return -EFAULT;
4331 }
4318 peeloff.sd = retval; 4332 peeloff.sd = retval;
4319 if (put_user(len, optlen)) 4333 if (copy_to_user(optval, &peeloff, len)) {
4334 fput(newfile);
4335 put_unused_fd(retval);
4320 return -EFAULT; 4336 return -EFAULT;
4321 if (copy_to_user(optval, &peeloff, len)) 4337 }
4322 retval = -EFAULT; 4338 fd_install(retval, newfile);
4323
4324out: 4339out:
4325 return retval; 4340 return retval;
4326} 4341}
diff --git a/net/socket.c b/net/socket.c
index 80dc7e84b046..d92c490e66fa 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -347,17 +347,11 @@ static struct file_system_type sock_fs_type = {
347 * but we take care of internal coherence yet. 347 * but we take care of internal coherence yet.
348 */ 348 */
349 349
350static int sock_alloc_file(struct socket *sock, struct file **f, int flags, 350struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
351 const char *dname)
352{ 351{
353 struct qstr name = { .name = "" }; 352 struct qstr name = { .name = "" };
354 struct path path; 353 struct path path;
355 struct file *file; 354 struct file *file;
356 int fd;
357
358 fd = get_unused_fd_flags(flags);
359 if (unlikely(fd < 0))
360 return fd;
361 355
362 if (dname) { 356 if (dname) {
363 name.name = dname; 357 name.name = dname;
@@ -367,10 +361,8 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags,
367 name.len = strlen(name.name); 361 name.len = strlen(name.name);
368 } 362 }
369 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name); 363 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
370 if (unlikely(!path.dentry)) { 364 if (unlikely(!path.dentry))
371 put_unused_fd(fd); 365 return ERR_PTR(-ENOMEM);
372 return -ENOMEM;
373 }
374 path.mnt = mntget(sock_mnt); 366 path.mnt = mntget(sock_mnt);
375 367
376 d_instantiate(path.dentry, SOCK_INODE(sock)); 368 d_instantiate(path.dentry, SOCK_INODE(sock));
@@ -382,30 +374,33 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags,
382 /* drop dentry, keep inode */ 374 /* drop dentry, keep inode */
383 ihold(path.dentry->d_inode); 375 ihold(path.dentry->d_inode);
384 path_put(&path); 376 path_put(&path);
385 put_unused_fd(fd); 377 return ERR_PTR(-ENFILE);
386 return -ENFILE;
387 } 378 }
388 379
389 sock->file = file; 380 sock->file = file;
390 file->f_flags = O_RDWR | (flags & O_NONBLOCK); 381 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
391 file->f_pos = 0; 382 file->f_pos = 0;
392 file->private_data = sock; 383 file->private_data = sock;
393 384 return file;
394 *f = file;
395 return fd;
396} 385}
386EXPORT_SYMBOL(sock_alloc_file);
397 387
398int sock_map_fd(struct socket *sock, int flags) 388static int sock_map_fd(struct socket *sock, int flags)
399{ 389{
400 struct file *newfile; 390 struct file *newfile;
401 int fd = sock_alloc_file(sock, &newfile, flags, NULL); 391 int fd = get_unused_fd_flags(flags);
392 if (unlikely(fd < 0))
393 return fd;
402 394
403 if (likely(fd >= 0)) 395 newfile = sock_alloc_file(sock, flags, NULL);
396 if (likely(!IS_ERR(newfile))) {
404 fd_install(fd, newfile); 397 fd_install(fd, newfile);
398 return fd;
399 }
405 400
406 return fd; 401 put_unused_fd(fd);
402 return PTR_ERR(newfile);
407} 403}
408EXPORT_SYMBOL(sock_map_fd);
409 404
410struct socket *sock_from_file(struct file *file, int *err) 405struct socket *sock_from_file(struct file *file, int *err)
411{ 406{
@@ -1466,17 +1461,32 @@ SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1466 if (err < 0) 1461 if (err < 0)
1467 goto out_release_both; 1462 goto out_release_both;
1468 1463
1469 fd1 = sock_alloc_file(sock1, &newfile1, flags, NULL); 1464 fd1 = get_unused_fd_flags(flags);
1470 if (unlikely(fd1 < 0)) { 1465 if (unlikely(fd1 < 0)) {
1471 err = fd1; 1466 err = fd1;
1472 goto out_release_both; 1467 goto out_release_both;
1473 } 1468 }
1474 1469 fd2 = get_unused_fd_flags(flags);
1475 fd2 = sock_alloc_file(sock2, &newfile2, flags, NULL);
1476 if (unlikely(fd2 < 0)) { 1470 if (unlikely(fd2 < 0)) {
1477 err = fd2; 1471 err = fd2;
1472 put_unused_fd(fd1);
1473 goto out_release_both;
1474 }
1475
1476 newfile1 = sock_alloc_file(sock1, flags, NULL);
1477 if (unlikely(IS_ERR(newfile1))) {
1478 err = PTR_ERR(newfile1);
1479 put_unused_fd(fd1);
1480 put_unused_fd(fd2);
1481 goto out_release_both;
1482 }
1483
1484 newfile2 = sock_alloc_file(sock2, flags, NULL);
1485 if (IS_ERR(newfile2)) {
1486 err = PTR_ERR(newfile2);
1478 fput(newfile1); 1487 fput(newfile1);
1479 put_unused_fd(fd1); 1488 put_unused_fd(fd1);
1489 put_unused_fd(fd2);
1480 sock_release(sock2); 1490 sock_release(sock2);
1481 goto out; 1491 goto out;
1482 } 1492 }
@@ -1608,13 +1618,19 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1608 */ 1618 */
1609 __module_get(newsock->ops->owner); 1619 __module_get(newsock->ops->owner);
1610 1620
1611 newfd = sock_alloc_file(newsock, &newfile, flags, 1621 newfd = get_unused_fd_flags(flags);
1612 sock->sk->sk_prot_creator->name);
1613 if (unlikely(newfd < 0)) { 1622 if (unlikely(newfd < 0)) {
1614 err = newfd; 1623 err = newfd;
1615 sock_release(newsock); 1624 sock_release(newsock);
1616 goto out_put; 1625 goto out_put;
1617 } 1626 }
1627 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
1628 if (unlikely(IS_ERR(newfile))) {
1629 err = PTR_ERR(newfile);
1630 put_unused_fd(newfd);
1631 sock_release(newsock);
1632 goto out_put;
1633 }
1618 1634
1619 err = security_socket_accept(sock, newsock); 1635 err = security_socket_accept(sock, newsock);
1620 if (err) 1636 if (err)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 6c77f63c7591..651d8456611a 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2088,15 +2088,19 @@ static int selinux_bprm_secureexec(struct linux_binprm *bprm)
2088 return (atsecure || cap_bprm_secureexec(bprm)); 2088 return (atsecure || cap_bprm_secureexec(bprm));
2089} 2089}
2090 2090
2091static int match_file(const void *p, struct file *file, unsigned fd)
2092{
2093 return file_has_perm(p, file, file_to_av(file)) ? fd + 1 : 0;
2094}
2095
2091/* Derived from fs/exec.c:flush_old_files. */ 2096/* Derived from fs/exec.c:flush_old_files. */
2092static inline void flush_unauthorized_files(const struct cred *cred, 2097static inline void flush_unauthorized_files(const struct cred *cred,
2093 struct files_struct *files) 2098 struct files_struct *files)
2094{ 2099{
2095 struct file *file, *devnull = NULL; 2100 struct file *file, *devnull = NULL;
2096 struct tty_struct *tty; 2101 struct tty_struct *tty;
2097 struct fdtable *fdt;
2098 long j = -1;
2099 int drop_tty = 0; 2102 int drop_tty = 0;
2103 unsigned n;
2100 2104
2101 tty = get_current_tty(); 2105 tty = get_current_tty();
2102 if (tty) { 2106 if (tty) {
@@ -2123,58 +2127,23 @@ static inline void flush_unauthorized_files(const struct cred *cred,
2123 no_tty(); 2127 no_tty();
2124 2128
2125 /* Revalidate access to inherited open files. */ 2129 /* Revalidate access to inherited open files. */
2126 spin_lock(&files->file_lock); 2130 n = iterate_fd(files, 0, match_file, cred);
2127 for (;;) { 2131 if (!n) /* none found? */
2128 unsigned long set, i; 2132 return;
2129 int fd;
2130
2131 j++;
2132 i = j * BITS_PER_LONG;
2133 fdt = files_fdtable(files);
2134 if (i >= fdt->max_fds)
2135 break;
2136 set = fdt->open_fds[j];
2137 if (!set)
2138 continue;
2139 spin_unlock(&files->file_lock);
2140 for ( ; set ; i++, set >>= 1) {
2141 if (set & 1) {
2142 file = fget(i);
2143 if (!file)
2144 continue;
2145 if (file_has_perm(cred,
2146 file,
2147 file_to_av(file))) {
2148 sys_close(i);
2149 fd = get_unused_fd();
2150 if (fd != i) {
2151 if (fd >= 0)
2152 put_unused_fd(fd);
2153 fput(file);
2154 continue;
2155 }
2156 if (devnull) {
2157 get_file(devnull);
2158 } else {
2159 devnull = dentry_open(
2160 &selinux_null,
2161 O_RDWR, cred);
2162 if (IS_ERR(devnull)) {
2163 devnull = NULL;
2164 put_unused_fd(fd);
2165 fput(file);
2166 continue;
2167 }
2168 }
2169 fd_install(fd, devnull);
2170 }
2171 fput(file);
2172 }
2173 }
2174 spin_lock(&files->file_lock);
2175 2133
2134 devnull = dentry_open(&selinux_null, O_RDWR, cred);
2135 if (!IS_ERR(devnull)) {
2136 /* replace all the matching ones with this */
2137 do {
2138 replace_fd(n - 1, get_file(devnull), 0);
2139 } while ((n = iterate_fd(files, n, match_file, cred)) != 0);
2140 fput(devnull);
2141 } else {
2142 /* just close all the matching ones */
2143 do {
2144 replace_fd(n - 1, NULL, 0);
2145 } while ((n = iterate_fd(files, n, match_file, cred)) != 0);
2176 } 2146 }
2177 spin_unlock(&files->file_lock);
2178} 2147}
2179 2148
2180/* 2149/*
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 53b5ada8f7c3..20554eff5a21 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -1563,25 +1563,25 @@ static int snd_pcm_drop(struct snd_pcm_substream *substream)
1563 1563
1564 1564
1565/* WARNING: Don't forget to fput back the file */ 1565/* WARNING: Don't forget to fput back the file */
1566static struct file *snd_pcm_file_fd(int fd) 1566static struct file *snd_pcm_file_fd(int fd, int *fput_needed)
1567{ 1567{
1568 struct file *file; 1568 struct file *file;
1569 struct inode *inode; 1569 struct inode *inode;
1570 unsigned int minor; 1570 unsigned int minor;
1571 1571
1572 file = fget(fd); 1572 file = fget_light(fd, fput_needed);
1573 if (!file) 1573 if (!file)
1574 return NULL; 1574 return NULL;
1575 inode = file->f_path.dentry->d_inode; 1575 inode = file->f_path.dentry->d_inode;
1576 if (!S_ISCHR(inode->i_mode) || 1576 if (!S_ISCHR(inode->i_mode) ||
1577 imajor(inode) != snd_major) { 1577 imajor(inode) != snd_major) {
1578 fput(file); 1578 fput_light(file, *fput_needed);
1579 return NULL; 1579 return NULL;
1580 } 1580 }
1581 minor = iminor(inode); 1581 minor = iminor(inode);
1582 if (!snd_lookup_minor_data(minor, SNDRV_DEVICE_TYPE_PCM_PLAYBACK) && 1582 if (!snd_lookup_minor_data(minor, SNDRV_DEVICE_TYPE_PCM_PLAYBACK) &&
1583 !snd_lookup_minor_data(minor, SNDRV_DEVICE_TYPE_PCM_CAPTURE)) { 1583 !snd_lookup_minor_data(minor, SNDRV_DEVICE_TYPE_PCM_CAPTURE)) {
1584 fput(file); 1584 fput_light(file, *fput_needed);
1585 return NULL; 1585 return NULL;
1586 } 1586 }
1587 return file; 1587 return file;
@@ -1597,8 +1597,9 @@ static int snd_pcm_link(struct snd_pcm_substream *substream, int fd)
1597 struct snd_pcm_file *pcm_file; 1597 struct snd_pcm_file *pcm_file;
1598 struct snd_pcm_substream *substream1; 1598 struct snd_pcm_substream *substream1;
1599 struct snd_pcm_group *group; 1599 struct snd_pcm_group *group;
1600 int fput_needed;
1600 1601
1601 file = snd_pcm_file_fd(fd); 1602 file = snd_pcm_file_fd(fd, &fput_needed);
1602 if (!file) 1603 if (!file)
1603 return -EBADFD; 1604 return -EBADFD;
1604 pcm_file = file->private_data; 1605 pcm_file = file->private_data;
@@ -1633,7 +1634,7 @@ static int snd_pcm_link(struct snd_pcm_substream *substream, int fd)
1633 write_unlock_irq(&snd_pcm_link_rwlock); 1634 write_unlock_irq(&snd_pcm_link_rwlock);
1634 up_write(&snd_pcm_link_rwsem); 1635 up_write(&snd_pcm_link_rwsem);
1635 _nolock: 1636 _nolock:
1636 fput(file); 1637 fput_light(file, fput_needed);
1637 if (res < 0) 1638 if (res < 0)
1638 kfree(group); 1639 kfree(group);
1639 return res; 1640 return res;