aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/alpha/kernel/osf_sys.c36
-rw-r--r--arch/parisc/hpux/sys_hpux.c65
-rw-r--r--arch/powerpc/platforms/cell/spufs/syscalls.c2
-rw-r--r--arch/um/drivers/mconsole_kern.c21
-rw-r--r--arch/x86/ia32/ia32entry.S2
-rw-r--r--arch/x86/include/asm/unistd_32.h4
-rw-r--r--arch/x86/include/asm/unistd_64.h4
-rw-r--r--arch/x86/kernel/syscall_table_32.S2
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/Makefile2
-rw-r--r--fs/btrfs/export.c8
-rw-r--r--fs/btrfs/inode.c3
-rw-r--r--fs/compat.c61
-rw-r--r--fs/exec.c18
-rw-r--r--fs/exportfs/expfs.c11
-rw-r--r--fs/ext3/namei.c7
-rw-r--r--fs/ext3/super.c1
-rw-r--r--fs/ext4/namei.c7
-rw-r--r--fs/ext4/super.c2
-rw-r--r--fs/fat/inode.c4
-rw-r--r--fs/fcntl.c37
-rw-r--r--fs/fhandle.c265
-rw-r--r--fs/file_table.c55
-rw-r--r--fs/fuse/inode.c4
-rw-r--r--fs/gfs2/export.c8
-rw-r--r--fs/internal.h13
-rw-r--r--fs/isofs/export.c8
-rw-r--r--fs/jfs/namei.c3
-rw-r--r--fs/namei.c1498
-rw-r--r--fs/namespace.c16
-rw-r--r--fs/nfsctl.c21
-rw-r--r--fs/ocfs2/export.c8
-rw-r--r--fs/ocfs2/refcounttree.c2
-rw-r--r--fs/open.c126
-rw-r--r--fs/reiserfs/inode.c7
-rw-r--r--fs/reiserfs/namei.c4
-rw-r--r--fs/stat.c7
-rw-r--r--fs/statfs.c176
-rw-r--r--fs/ubifs/dir.c18
-rw-r--r--fs/udf/namei.c7
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c4
-rw-r--r--include/asm-generic/fcntl.h4
-rw-r--r--include/asm-generic/unistd.h6
-rw-r--r--include/linux/exportfs.h9
-rw-r--r--include/linux/fcntl.h1
-rw-r--r--include/linux/file.h2
-rw-r--r--include/linux/fs.h19
-rw-r--r--include/linux/namei.h7
-rw-r--r--include/linux/syscalls.h8
-rw-r--r--init/Kconfig12
-rw-r--r--kernel/audit_watch.c85
-rw-r--r--kernel/sys_ni.c5
-rw-r--r--kernel/sysctl_binary.c19
-rw-r--r--mm/shmem.c4
-rw-r--r--net/core/scm.c2
-rw-r--r--net/unix/af_unix.c2
-rw-r--r--net/unix/garbage.c2
57 files changed, 1483 insertions, 1253 deletions
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index fe698b5045e9..376f22130791 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -230,44 +230,24 @@ linux_to_osf_statfs(struct kstatfs *linux_stat, struct osf_statfs __user *osf_st
230 return copy_to_user(osf_stat, &tmp_stat, bufsiz) ? -EFAULT : 0; 230 return copy_to_user(osf_stat, &tmp_stat, bufsiz) ? -EFAULT : 0;
231} 231}
232 232
233static int 233SYSCALL_DEFINE3(osf_statfs, const char __user *, pathname,
234do_osf_statfs(struct path *path, struct osf_statfs __user *buffer, 234 struct osf_statfs __user *, buffer, unsigned long, bufsiz)
235 unsigned long bufsiz)
236{ 235{
237 struct kstatfs linux_stat; 236 struct kstatfs linux_stat;
238 int error = vfs_statfs(path, &linux_stat); 237 int error = user_statfs(pathname, &linux_stat);
239 if (!error) 238 if (!error)
240 error = linux_to_osf_statfs(&linux_stat, buffer, bufsiz); 239 error = linux_to_osf_statfs(&linux_stat, buffer, bufsiz);
241 return error; 240 return error;
242} 241}
243 242
244SYSCALL_DEFINE3(osf_statfs, const char __user *, pathname,
245 struct osf_statfs __user *, buffer, unsigned long, bufsiz)
246{
247 struct path path;
248 int retval;
249
250 retval = user_path(pathname, &path);
251 if (!retval) {
252 retval = do_osf_statfs(&path, buffer, bufsiz);
253 path_put(&path);
254 }
255 return retval;
256}
257
258SYSCALL_DEFINE3(osf_fstatfs, unsigned long, fd, 243SYSCALL_DEFINE3(osf_fstatfs, unsigned long, fd,
259 struct osf_statfs __user *, buffer, unsigned long, bufsiz) 244 struct osf_statfs __user *, buffer, unsigned long, bufsiz)
260{ 245{
261 struct file *file; 246 struct kstatfs linux_stat;
262 int retval; 247 int error = fd_statfs(fd, &linux_stat);
263 248 if (!error)
264 retval = -EBADF; 249 error = linux_to_osf_statfs(&linux_stat, buffer, bufsiz);
265 file = fget(fd); 250 return error;
266 if (file) {
267 retval = do_osf_statfs(&file->f_path, buffer, bufsiz);
268 fput(file);
269 }
270 return retval;
271} 251}
272 252
273/* 253/*
diff --git a/arch/parisc/hpux/sys_hpux.c b/arch/parisc/hpux/sys_hpux.c
index 30394081d9b6..6ab9580b0b00 100644
--- a/arch/parisc/hpux/sys_hpux.c
+++ b/arch/parisc/hpux/sys_hpux.c
@@ -185,26 +185,21 @@ struct hpux_statfs {
185 int16_t f_pad; 185 int16_t f_pad;
186}; 186};
187 187
188static int do_statfs_hpux(struct path *path, struct hpux_statfs *buf) 188static int do_statfs_hpux(struct kstatfs *st, struct hpux_statfs __user *p)
189{ 189{
190 struct kstatfs st; 190 struct hpux_statfs buf;
191 int retval; 191 memset(&buf, 0, sizeof(buf));
192 192 buf.f_type = st->f_type;
193 retval = vfs_statfs(path, &st); 193 buf.f_bsize = st->f_bsize;
194 if (retval) 194 buf.f_blocks = st->f_blocks;
195 return retval; 195 buf.f_bfree = st->f_bfree;
196 196 buf.f_bavail = st->f_bavail;
197 memset(buf, 0, sizeof(*buf)); 197 buf.f_files = st->f_files;
198 buf->f_type = st.f_type; 198 buf.f_ffree = st->f_ffree;
199 buf->f_bsize = st.f_bsize; 199 buf.f_fsid[0] = st->f_fsid.val[0];
200 buf->f_blocks = st.f_blocks; 200 buf.f_fsid[1] = st->f_fsid.val[1];
201 buf->f_bfree = st.f_bfree; 201 if (copy_to_user(p, &buf, sizeof(buf)))
202 buf->f_bavail = st.f_bavail; 202 return -EFAULT;
203 buf->f_files = st.f_files;
204 buf->f_ffree = st.f_ffree;
205 buf->f_fsid[0] = st.f_fsid.val[0];
206 buf->f_fsid[1] = st.f_fsid.val[1];
207
208 return 0; 203 return 0;
209} 204}
210 205
@@ -212,35 +207,19 @@ static int do_statfs_hpux(struct path *path, struct hpux_statfs *buf)
212asmlinkage long hpux_statfs(const char __user *pathname, 207asmlinkage long hpux_statfs(const char __user *pathname,
213 struct hpux_statfs __user *buf) 208 struct hpux_statfs __user *buf)
214{ 209{
215 struct path path; 210 struct kstatfs st;
216 int error; 211 int error = user_statfs(pathname, &st);
217 212 if (!error)
218 error = user_path(pathname, &path); 213 error = do_statfs_hpux(&st, buf);
219 if (!error) {
220 struct hpux_statfs tmp;
221 error = do_statfs_hpux(&path, &tmp);
222 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
223 error = -EFAULT;
224 path_put(&path);
225 }
226 return error; 214 return error;
227} 215}
228 216
229asmlinkage long hpux_fstatfs(unsigned int fd, struct hpux_statfs __user * buf) 217asmlinkage long hpux_fstatfs(unsigned int fd, struct hpux_statfs __user * buf)
230{ 218{
231 struct file *file; 219 struct kstatfs st;
232 struct hpux_statfs tmp; 220 int error = fd_statfs(fd, &st);
233 int error; 221 if (!error)
234 222 error = do_statfs_hpux(&st, buf);
235 error = -EBADF;
236 file = fget(fd);
237 if (!file)
238 goto out;
239 error = do_statfs_hpux(&file->f_path, &tmp);
240 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
241 error = -EFAULT;
242 fput(file);
243 out:
244 return error; 223 return error;
245} 224}
246 225
diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c
index 187a7d32f86a..a3d2ce54ea2e 100644
--- a/arch/powerpc/platforms/cell/spufs/syscalls.c
+++ b/arch/powerpc/platforms/cell/spufs/syscalls.c
@@ -70,7 +70,7 @@ static long do_spu_create(const char __user *pathname, unsigned int flags,
70 if (!IS_ERR(tmp)) { 70 if (!IS_ERR(tmp)) {
71 struct nameidata nd; 71 struct nameidata nd;
72 72
73 ret = path_lookup(tmp, LOOKUP_PARENT, &nd); 73 ret = kern_path_parent(tmp, &nd);
74 if (!ret) { 74 if (!ret) {
75 nd.flags |= LOOKUP_OPEN | LOOKUP_CREATE; 75 nd.flags |= LOOKUP_OPEN | LOOKUP_CREATE;
76 ret = spufs_create(&nd, flags, mode, neighbor); 76 ret = spufs_create(&nd, flags, mode, neighbor);
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 975613b23dcf..c70e047eed72 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -124,35 +124,18 @@ void mconsole_log(struct mc_request *req)
124#if 0 124#if 0
125void mconsole_proc(struct mc_request *req) 125void mconsole_proc(struct mc_request *req)
126{ 126{
127 struct nameidata nd;
128 struct vfsmount *mnt = current->nsproxy->pid_ns->proc_mnt; 127 struct vfsmount *mnt = current->nsproxy->pid_ns->proc_mnt;
129 struct file *file; 128 struct file *file;
130 int n, err; 129 int n;
131 char *ptr = req->request.data, *buf; 130 char *ptr = req->request.data, *buf;
132 mm_segment_t old_fs = get_fs(); 131 mm_segment_t old_fs = get_fs();
133 132
134 ptr += strlen("proc"); 133 ptr += strlen("proc");
135 ptr = skip_spaces(ptr); 134 ptr = skip_spaces(ptr);
136 135
137 err = vfs_path_lookup(mnt->mnt_root, mnt, ptr, LOOKUP_FOLLOW, &nd); 136 file = file_open_root(mnt->mnt_root, mnt, ptr, O_RDONLY);
138 if (err) {
139 mconsole_reply(req, "Failed to look up file", 1, 0);
140 goto out;
141 }
142
143 err = may_open(&nd.path, MAY_READ, O_RDONLY);
144 if (result) {
145 mconsole_reply(req, "Failed to open file", 1, 0);
146 path_put(&nd.path);
147 goto out;
148 }
149
150 file = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY,
151 current_cred());
152 err = PTR_ERR(file);
153 if (IS_ERR(file)) { 137 if (IS_ERR(file)) {
154 mconsole_reply(req, "Failed to open file", 1, 0); 138 mconsole_reply(req, "Failed to open file", 1, 0);
155 path_put(&nd.path);
156 goto out; 139 goto out;
157 } 140 }
158 141
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 518bb99c3394..98d353edfff3 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -851,4 +851,6 @@ ia32_sys_call_table:
851 .quad sys_fanotify_init 851 .quad sys_fanotify_init
852 .quad sys32_fanotify_mark 852 .quad sys32_fanotify_mark
853 .quad sys_prlimit64 /* 340 */ 853 .quad sys_prlimit64 /* 340 */
854 .quad sys_name_to_handle_at
855 .quad compat_sys_open_by_handle_at
854ia32_syscall_end: 856ia32_syscall_end:
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index b766a5e8ba0e..f4c4973fc2ac 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -346,10 +346,12 @@
346#define __NR_fanotify_init 338 346#define __NR_fanotify_init 338
347#define __NR_fanotify_mark 339 347#define __NR_fanotify_mark 339
348#define __NR_prlimit64 340 348#define __NR_prlimit64 340
349#define __NR_name_to_handle_at 341
350#define __NR_open_by_handle_at 342
349 351
350#ifdef __KERNEL__ 352#ifdef __KERNEL__
351 353
352#define NR_syscalls 341 354#define NR_syscalls 343
353 355
354#define __ARCH_WANT_IPC_PARSE_VERSION 356#define __ARCH_WANT_IPC_PARSE_VERSION
355#define __ARCH_WANT_OLD_READDIR 357#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 363e9b8a715b..81a3d5b70235 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -669,6 +669,10 @@ __SYSCALL(__NR_fanotify_init, sys_fanotify_init)
669__SYSCALL(__NR_fanotify_mark, sys_fanotify_mark) 669__SYSCALL(__NR_fanotify_mark, sys_fanotify_mark)
670#define __NR_prlimit64 302 670#define __NR_prlimit64 302
671__SYSCALL(__NR_prlimit64, sys_prlimit64) 671__SYSCALL(__NR_prlimit64, sys_prlimit64)
672#define __NR_name_to_handle_at 303
673__SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at)
674#define __NR_open_by_handle_at 304
675__SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
672 676
673#ifndef __NO_STUBS 677#ifndef __NO_STUBS
674#define __ARCH_WANT_OLD_READDIR 678#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index b35786dc9b8f..c314b2199efd 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -340,3 +340,5 @@ ENTRY(sys_call_table)
340 .long sys_fanotify_init 340 .long sys_fanotify_init
341 .long sys_fanotify_mark 341 .long sys_fanotify_mark
342 .long sys_prlimit64 /* 340 */ 342 .long sys_prlimit64 /* 340 */
343 .long sys_name_to_handle_at
344 .long sys_open_by_handle_at
diff --git a/fs/Kconfig b/fs/Kconfig
index 3db9caa57edc..7cb53aafac1e 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -47,7 +47,7 @@ config FS_POSIX_ACL
47 def_bool n 47 def_bool n
48 48
49config EXPORTFS 49config EXPORTFS
50 tristate 50 bool
51 51
52config FILE_LOCKING 52config FILE_LOCKING
53 bool "Enable POSIX file locking API" if EXPERT 53 bool "Enable POSIX file locking API" if EXPERT
diff --git a/fs/Makefile b/fs/Makefile
index a7f7cef0c0c8..ba01202844c5 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -48,6 +48,8 @@ obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o
48obj-$(CONFIG_NFS_COMMON) += nfs_common/ 48obj-$(CONFIG_NFS_COMMON) += nfs_common/
49obj-$(CONFIG_GENERIC_ACL) += generic_acl.o 49obj-$(CONFIG_GENERIC_ACL) += generic_acl.o
50 50
51obj-$(CONFIG_FHANDLE) += fhandle.o
52
51obj-y += quota/ 53obj-y += quota/
52 54
53obj-$(CONFIG_PROC_FS) += proc/ 55obj-$(CONFIG_PROC_FS) += proc/
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index ff27d7a477b2..b4ffad859adb 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -21,9 +21,13 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
21 int len = *max_len; 21 int len = *max_len;
22 int type; 22 int type;
23 23
24 if ((len < BTRFS_FID_SIZE_NON_CONNECTABLE) || 24 if (connectable && (len < BTRFS_FID_SIZE_CONNECTABLE)) {
25 (connectable && len < BTRFS_FID_SIZE_CONNECTABLE)) 25 *max_len = BTRFS_FID_SIZE_CONNECTABLE;
26 return 255; 26 return 255;
27 } else if (len < BTRFS_FID_SIZE_NON_CONNECTABLE) {
28 *max_len = BTRFS_FID_SIZE_NON_CONNECTABLE;
29 return 255;
30 }
27 31
28 len = BTRFS_FID_SIZE_NON_CONNECTABLE; 32 len = BTRFS_FID_SIZE_NON_CONNECTABLE;
29 type = FILEID_BTRFS_WITHOUT_PARENT; 33 type = FILEID_BTRFS_WITHOUT_PARENT;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 9007bbd01dbf..4a0107e18747 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4806,9 +4806,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4806 int err; 4806 int err;
4807 int drop_inode = 0; 4807 int drop_inode = 0;
4808 4808
4809 if (inode->i_nlink == 0)
4810 return -ENOENT;
4811
4812 /* do not allow sys_link's with other subvols of the same device */ 4809 /* do not allow sys_link's with other subvols of the same device */
4813 if (root->objectid != BTRFS_I(inode)->root->objectid) 4810 if (root->objectid != BTRFS_I(inode)->root->objectid)
4814 return -EPERM; 4811 return -EPERM;
diff --git a/fs/compat.c b/fs/compat.c
index 691c3fd8ce1d..c6d31a3bab88 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -262,35 +262,19 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs *
262 */ 262 */
263asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf) 263asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf)
264{ 264{
265 struct path path; 265 struct kstatfs tmp;
266 int error; 266 int error = user_statfs(pathname, &tmp);
267 267 if (!error)
268 error = user_path(pathname, &path); 268 error = put_compat_statfs(buf, &tmp);
269 if (!error) {
270 struct kstatfs tmp;
271 error = vfs_statfs(&path, &tmp);
272 if (!error)
273 error = put_compat_statfs(buf, &tmp);
274 path_put(&path);
275 }
276 return error; 269 return error;
277} 270}
278 271
279asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user *buf) 272asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user *buf)
280{ 273{
281 struct file * file;
282 struct kstatfs tmp; 274 struct kstatfs tmp;
283 int error; 275 int error = fd_statfs(fd, &tmp);
284
285 error = -EBADF;
286 file = fget(fd);
287 if (!file)
288 goto out;
289 error = vfs_statfs(&file->f_path, &tmp);
290 if (!error) 276 if (!error)
291 error = put_compat_statfs(buf, &tmp); 277 error = put_compat_statfs(buf, &tmp);
292 fput(file);
293out:
294 return error; 278 return error;
295} 279}
296 280
@@ -329,41 +313,29 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat
329 313
330asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf) 314asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf)
331{ 315{
332 struct path path; 316 struct kstatfs tmp;
333 int error; 317 int error;
334 318
335 if (sz != sizeof(*buf)) 319 if (sz != sizeof(*buf))
336 return -EINVAL; 320 return -EINVAL;
337 321
338 error = user_path(pathname, &path); 322 error = user_statfs(pathname, &tmp);
339 if (!error) { 323 if (!error)
340 struct kstatfs tmp; 324 error = put_compat_statfs64(buf, &tmp);
341 error = vfs_statfs(&path, &tmp);
342 if (!error)
343 error = put_compat_statfs64(buf, &tmp);
344 path_put(&path);
345 }
346 return error; 325 return error;
347} 326}
348 327
349asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user *buf) 328asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user *buf)
350{ 329{
351 struct file * file;
352 struct kstatfs tmp; 330 struct kstatfs tmp;
353 int error; 331 int error;
354 332
355 if (sz != sizeof(*buf)) 333 if (sz != sizeof(*buf))
356 return -EINVAL; 334 return -EINVAL;
357 335
358 error = -EBADF; 336 error = fd_statfs(fd, &tmp);
359 file = fget(fd);
360 if (!file)
361 goto out;
362 error = vfs_statfs(&file->f_path, &tmp);
363 if (!error) 337 if (!error)
364 error = put_compat_statfs64(buf, &tmp); 338 error = put_compat_statfs64(buf, &tmp);
365 fput(file);
366out:
367 return error; 339 return error;
368} 340}
369 341
@@ -2312,3 +2284,16 @@ asmlinkage long compat_sys_timerfd_gettime(int ufd,
2312} 2284}
2313 2285
2314#endif /* CONFIG_TIMERFD */ 2286#endif /* CONFIG_TIMERFD */
2287
2288#ifdef CONFIG_FHANDLE
2289/*
2290 * Exactly like fs/open.c:sys_open_by_handle_at(), except that it
2291 * doesn't set the O_LARGEFILE flag.
2292 */
2293asmlinkage long
2294compat_sys_open_by_handle_at(int mountdirfd,
2295 struct file_handle __user *handle, int flags)
2296{
2297 return do_handle_open(mountdirfd, handle, flags);
2298}
2299#endif
diff --git a/fs/exec.c b/fs/exec.c
index 52a447d9b6ab..ba99e1abb1aa 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -115,13 +115,16 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
115 struct file *file; 115 struct file *file;
116 char *tmp = getname(library); 116 char *tmp = getname(library);
117 int error = PTR_ERR(tmp); 117 int error = PTR_ERR(tmp);
118 static const struct open_flags uselib_flags = {
119 .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
120 .acc_mode = MAY_READ | MAY_EXEC | MAY_OPEN,
121 .intent = LOOKUP_OPEN
122 };
118 123
119 if (IS_ERR(tmp)) 124 if (IS_ERR(tmp))
120 goto out; 125 goto out;
121 126
122 file = do_filp_open(AT_FDCWD, tmp, 127 file = do_filp_open(AT_FDCWD, tmp, &uselib_flags, LOOKUP_FOLLOW);
123 O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0,
124 MAY_READ | MAY_EXEC | MAY_OPEN);
125 putname(tmp); 128 putname(tmp);
126 error = PTR_ERR(file); 129 error = PTR_ERR(file);
127 if (IS_ERR(file)) 130 if (IS_ERR(file))
@@ -721,10 +724,13 @@ struct file *open_exec(const char *name)
721{ 724{
722 struct file *file; 725 struct file *file;
723 int err; 726 int err;
727 static const struct open_flags open_exec_flags = {
728 .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
729 .acc_mode = MAY_EXEC | MAY_OPEN,
730 .intent = LOOKUP_OPEN
731 };
724 732
725 file = do_filp_open(AT_FDCWD, name, 733 file = do_filp_open(AT_FDCWD, name, &open_exec_flags, LOOKUP_FOLLOW);
726 O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0,
727 MAY_EXEC | MAY_OPEN);
728 if (IS_ERR(file)) 734 if (IS_ERR(file))
729 goto out; 735 goto out;
730 736
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 4b6825740dd5..b05acb796135 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -320,9 +320,14 @@ static int export_encode_fh(struct dentry *dentry, struct fid *fid,
320 struct inode * inode = dentry->d_inode; 320 struct inode * inode = dentry->d_inode;
321 int len = *max_len; 321 int len = *max_len;
322 int type = FILEID_INO32_GEN; 322 int type = FILEID_INO32_GEN;
323 323
324 if (len < 2 || (connectable && len < 4)) 324 if (connectable && (len < 4)) {
325 *max_len = 4;
326 return 255;
327 } else if (len < 2) {
328 *max_len = 2;
325 return 255; 329 return 255;
330 }
326 331
327 len = 2; 332 len = 2;
328 fid->i32.ino = inode->i_ino; 333 fid->i32.ino = inode->i_ino;
@@ -369,6 +374,8 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
369 /* 374 /*
370 * Try to get any dentry for the given file handle from the filesystem. 375 * Try to get any dentry for the given file handle from the filesystem.
371 */ 376 */
377 if (!nop || !nop->fh_to_dentry)
378 return ERR_PTR(-ESTALE);
372 result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type); 379 result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type);
373 if (!result) 380 if (!result)
374 result = ERR_PTR(-ESTALE); 381 result = ERR_PTR(-ESTALE);
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index b27ba71810ec..561f69256266 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -2253,13 +2253,6 @@ static int ext3_link (struct dentry * old_dentry,
2253 2253
2254 dquot_initialize(dir); 2254 dquot_initialize(dir);
2255 2255
2256 /*
2257 * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
2258 * otherwise has the potential to corrupt the orphan inode list.
2259 */
2260 if (inode->i_nlink == 0)
2261 return -ENOENT;
2262
2263retry: 2256retry:
2264 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + 2257 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
2265 EXT3_INDEX_EXTRA_TRANS_BLOCKS); 2258 EXT3_INDEX_EXTRA_TRANS_BLOCKS);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 85c8cc8f2473..9cc19a1dea8e 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1936,6 +1936,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1936 sb->s_qcop = &ext3_qctl_operations; 1936 sb->s_qcop = &ext3_qctl_operations;
1937 sb->dq_op = &ext3_quota_operations; 1937 sb->dq_op = &ext3_quota_operations;
1938#endif 1938#endif
1939 memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
1939 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 1940 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
1940 mutex_init(&sbi->s_orphan_lock); 1941 mutex_init(&sbi->s_orphan_lock);
1941 mutex_init(&sbi->s_resize_lock); 1942 mutex_init(&sbi->s_resize_lock);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 5485390d32c5..e781b7ea5630 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2304,13 +2304,6 @@ static int ext4_link(struct dentry *old_dentry,
2304 2304
2305 dquot_initialize(dir); 2305 dquot_initialize(dir);
2306 2306
2307 /*
2308 * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
2309 * otherwise has the potential to corrupt the orphan inode list.
2310 */
2311 if (inode->i_nlink == 0)
2312 return -ENOENT;
2313
2314retry: 2307retry:
2315 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 2308 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2316 EXT4_INDEX_EXTRA_TRANS_BLOCKS); 2309 EXT4_INDEX_EXTRA_TRANS_BLOCKS);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f6a318f836b2..5977b356a435 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3415,6 +3415,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3415 sb->s_qcop = &ext4_qctl_operations; 3415 sb->s_qcop = &ext4_qctl_operations;
3416 sb->dq_op = &ext4_quota_operations; 3416 sb->dq_op = &ext4_quota_operations;
3417#endif 3417#endif
3418 memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
3419
3418 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 3420 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
3419 mutex_init(&sbi->s_orphan_lock); 3421 mutex_init(&sbi->s_orphan_lock);
3420 mutex_init(&sbi->s_resize_lock); 3422 mutex_init(&sbi->s_resize_lock);
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 86753fe10bd1..0e277ec4b612 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -757,8 +757,10 @@ fat_encode_fh(struct dentry *de, __u32 *fh, int *lenp, int connectable)
757 struct inode *inode = de->d_inode; 757 struct inode *inode = de->d_inode;
758 u32 ipos_h, ipos_m, ipos_l; 758 u32 ipos_h, ipos_m, ipos_l;
759 759
760 if (len < 5) 760 if (len < 5) {
761 *lenp = 5;
761 return 255; /* no room */ 762 return 255; /* no room */
763 }
762 764
763 ipos_h = MSDOS_I(inode)->i_pos >> 8; 765 ipos_h = MSDOS_I(inode)->i_pos >> 8;
764 ipos_m = (MSDOS_I(inode)->i_pos & 0xf0) << 24; 766 ipos_m = (MSDOS_I(inode)->i_pos & 0xf0) << 24;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index cb1026181bdc..6c82e5bac039 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -131,7 +131,7 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
131SYSCALL_DEFINE1(dup, unsigned int, fildes) 131SYSCALL_DEFINE1(dup, unsigned int, fildes)
132{ 132{
133 int ret = -EBADF; 133 int ret = -EBADF;
134 struct file *file = fget(fildes); 134 struct file *file = fget_raw(fildes);
135 135
136 if (file) { 136 if (file) {
137 ret = get_unused_fd(); 137 ret = get_unused_fd();
@@ -426,15 +426,35 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
426 return err; 426 return err;
427} 427}
428 428
429static int check_fcntl_cmd(unsigned cmd)
430{
431 switch (cmd) {
432 case F_DUPFD:
433 case F_DUPFD_CLOEXEC:
434 case F_GETFD:
435 case F_SETFD:
436 case F_GETFL:
437 return 1;
438 }
439 return 0;
440}
441
429SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) 442SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
430{ 443{
431 struct file *filp; 444 struct file *filp;
432 long err = -EBADF; 445 long err = -EBADF;
433 446
434 filp = fget(fd); 447 filp = fget_raw(fd);
435 if (!filp) 448 if (!filp)
436 goto out; 449 goto out;
437 450
451 if (unlikely(filp->f_mode & FMODE_PATH)) {
452 if (!check_fcntl_cmd(cmd)) {
453 fput(filp);
454 goto out;
455 }
456 }
457
438 err = security_file_fcntl(filp, cmd, arg); 458 err = security_file_fcntl(filp, cmd, arg);
439 if (err) { 459 if (err) {
440 fput(filp); 460 fput(filp);
@@ -456,10 +476,17 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
456 long err; 476 long err;
457 477
458 err = -EBADF; 478 err = -EBADF;
459 filp = fget(fd); 479 filp = fget_raw(fd);
460 if (!filp) 480 if (!filp)
461 goto out; 481 goto out;
462 482
483 if (unlikely(filp->f_mode & FMODE_PATH)) {
484 if (!check_fcntl_cmd(cmd)) {
485 fput(filp);
486 goto out;
487 }
488 }
489
463 err = security_file_fcntl(filp, cmd, arg); 490 err = security_file_fcntl(filp, cmd, arg);
464 if (err) { 491 if (err) {
465 fput(filp); 492 fput(filp);
@@ -808,14 +835,14 @@ static int __init fcntl_init(void)
808 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY 835 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
809 * is defined as O_NONBLOCK on some platforms and not on others. 836 * is defined as O_NONBLOCK on some platforms and not on others.
810 */ 837 */
811 BUILD_BUG_ON(18 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( 838 BUILD_BUG_ON(19 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
812 O_RDONLY | O_WRONLY | O_RDWR | 839 O_RDONLY | O_WRONLY | O_RDWR |
813 O_CREAT | O_EXCL | O_NOCTTY | 840 O_CREAT | O_EXCL | O_NOCTTY |
814 O_TRUNC | O_APPEND | /* O_NONBLOCK | */ 841 O_TRUNC | O_APPEND | /* O_NONBLOCK | */
815 __O_SYNC | O_DSYNC | FASYNC | 842 __O_SYNC | O_DSYNC | FASYNC |
816 O_DIRECT | O_LARGEFILE | O_DIRECTORY | 843 O_DIRECT | O_LARGEFILE | O_DIRECTORY |
817 O_NOFOLLOW | O_NOATIME | O_CLOEXEC | 844 O_NOFOLLOW | O_NOATIME | O_CLOEXEC |
818 __FMODE_EXEC 845 __FMODE_EXEC | O_PATH
819 )); 846 ));
820 847
821 fasync_cache = kmem_cache_create("fasync_cache", 848 fasync_cache = kmem_cache_create("fasync_cache",
diff --git a/fs/fhandle.c b/fs/fhandle.c
new file mode 100644
index 000000000000..bf93ad2bee07
--- /dev/null
+++ b/fs/fhandle.c
@@ -0,0 +1,265 @@
1#include <linux/syscalls.h>
2#include <linux/slab.h>
3#include <linux/fs.h>
4#include <linux/file.h>
5#include <linux/mount.h>
6#include <linux/namei.h>
7#include <linux/exportfs.h>
8#include <linux/fs_struct.h>
9#include <linux/fsnotify.h>
10#include <asm/uaccess.h>
11#include "internal.h"
12
13static long do_sys_name_to_handle(struct path *path,
14 struct file_handle __user *ufh,
15 int __user *mnt_id)
16{
17 long retval;
18 struct file_handle f_handle;
19 int handle_dwords, handle_bytes;
20 struct file_handle *handle = NULL;
21
22 /*
23 * We need t make sure wether the file system
24 * support decoding of the file handle
25 */
26 if (!path->mnt->mnt_sb->s_export_op ||
27 !path->mnt->mnt_sb->s_export_op->fh_to_dentry)
28 return -EOPNOTSUPP;
29
30 if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle)))
31 return -EFAULT;
32
33 if (f_handle.handle_bytes > MAX_HANDLE_SZ)
34 return -EINVAL;
35
36 handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes,
37 GFP_KERNEL);
38 if (!handle)
39 return -ENOMEM;
40
41 /* convert handle size to multiple of sizeof(u32) */
42 handle_dwords = f_handle.handle_bytes >> 2;
43
44 /* we ask for a non connected handle */
45 retval = exportfs_encode_fh(path->dentry,
46 (struct fid *)handle->f_handle,
47 &handle_dwords, 0);
48 handle->handle_type = retval;
49 /* convert handle size to bytes */
50 handle_bytes = handle_dwords * sizeof(u32);
51 handle->handle_bytes = handle_bytes;
52 if ((handle->handle_bytes > f_handle.handle_bytes) ||
53 (retval == 255) || (retval == -ENOSPC)) {
54 /* As per old exportfs_encode_fh documentation
55 * we could return ENOSPC to indicate overflow
56 * But file system returned 255 always. So handle
57 * both the values
58 */
59 /*
60 * set the handle size to zero so we copy only
61 * non variable part of the file_handle
62 */
63 handle_bytes = 0;
64 retval = -EOVERFLOW;
65 } else
66 retval = 0;
67 /* copy the mount id */
68 if (copy_to_user(mnt_id, &path->mnt->mnt_id, sizeof(*mnt_id)) ||
69 copy_to_user(ufh, handle,
70 sizeof(struct file_handle) + handle_bytes))
71 retval = -EFAULT;
72 kfree(handle);
73 return retval;
74}
75
76/**
77 * sys_name_to_handle_at: convert name to handle
78 * @dfd: directory relative to which name is interpreted if not absolute
79 * @name: name that should be converted to handle.
80 * @handle: resulting file handle
81 * @mnt_id: mount id of the file system containing the file
82 * @flag: flag value to indicate whether to follow symlink or not
83 *
84 * @handle->handle_size indicate the space available to store the
85 * variable part of the file handle in bytes. If there is not
86 * enough space, the field is updated to return the minimum
87 * value required.
88 */
89SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
90 struct file_handle __user *, handle, int __user *, mnt_id,
91 int, flag)
92{
93 struct path path;
94 int lookup_flags;
95 int err;
96
97 if ((flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
98 return -EINVAL;
99
100 lookup_flags = (flag & AT_SYMLINK_FOLLOW) ? LOOKUP_FOLLOW : 0;
101 if (flag & AT_EMPTY_PATH)
102 lookup_flags |= LOOKUP_EMPTY;
103 err = user_path_at(dfd, name, lookup_flags, &path);
104 if (!err) {
105 err = do_sys_name_to_handle(&path, handle, mnt_id);
106 path_put(&path);
107 }
108 return err;
109}
110
111static struct vfsmount *get_vfsmount_from_fd(int fd)
112{
113 struct path path;
114
115 if (fd == AT_FDCWD) {
116 struct fs_struct *fs = current->fs;
117 spin_lock(&fs->lock);
118 path = fs->pwd;
119 mntget(path.mnt);
120 spin_unlock(&fs->lock);
121 } else {
122 int fput_needed;
123 struct file *file = fget_light(fd, &fput_needed);
124 if (!file)
125 return ERR_PTR(-EBADF);
126 path = file->f_path;
127 mntget(path.mnt);
128 fput_light(file, fput_needed);
129 }
130 return path.mnt;
131}
132
133static int vfs_dentry_acceptable(void *context, struct dentry *dentry)
134{
135 return 1;
136}
137
138static int do_handle_to_path(int mountdirfd, struct file_handle *handle,
139 struct path *path)
140{
141 int retval = 0;
142 int handle_dwords;
143
144 path->mnt = get_vfsmount_from_fd(mountdirfd);
145 if (IS_ERR(path->mnt)) {
146 retval = PTR_ERR(path->mnt);
147 goto out_err;
148 }
149 /* change the handle size to multiple of sizeof(u32) */
150 handle_dwords = handle->handle_bytes >> 2;
151 path->dentry = exportfs_decode_fh(path->mnt,
152 (struct fid *)handle->f_handle,
153 handle_dwords, handle->handle_type,
154 vfs_dentry_acceptable, NULL);
155 if (IS_ERR(path->dentry)) {
156 retval = PTR_ERR(path->dentry);
157 goto out_mnt;
158 }
159 return 0;
160out_mnt:
161 mntput(path->mnt);
162out_err:
163 return retval;
164}
165
166static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
167 struct path *path)
168{
169 int retval = 0;
170 struct file_handle f_handle;
171 struct file_handle *handle = NULL;
172
173 /*
174 * With handle we don't look at the execute bit on the
175 * the directory. Ideally we would like CAP_DAC_SEARCH.
176 * But we don't have that
177 */
178 if (!capable(CAP_DAC_READ_SEARCH)) {
179 retval = -EPERM;
180 goto out_err;
181 }
182 if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) {
183 retval = -EFAULT;
184 goto out_err;
185 }
186 if ((f_handle.handle_bytes > MAX_HANDLE_SZ) ||
187 (f_handle.handle_bytes == 0)) {
188 retval = -EINVAL;
189 goto out_err;
190 }
191 handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes,
192 GFP_KERNEL);
193 if (!handle) {
194 retval = -ENOMEM;
195 goto out_err;
196 }
197 /* copy the full handle */
198 if (copy_from_user(handle, ufh,
199 sizeof(struct file_handle) +
200 f_handle.handle_bytes)) {
201 retval = -EFAULT;
202 goto out_handle;
203 }
204
205 retval = do_handle_to_path(mountdirfd, handle, path);
206
207out_handle:
208 kfree(handle);
209out_err:
210 return retval;
211}
212
213long do_handle_open(int mountdirfd,
214 struct file_handle __user *ufh, int open_flag)
215{
216 long retval = 0;
217 struct path path;
218 struct file *file;
219 int fd;
220
221 retval = handle_to_path(mountdirfd, ufh, &path);
222 if (retval)
223 return retval;
224
225 fd = get_unused_fd_flags(open_flag);
226 if (fd < 0) {
227 path_put(&path);
228 return fd;
229 }
230 file = file_open_root(path.dentry, path.mnt, "", open_flag);
231 if (IS_ERR(file)) {
232 put_unused_fd(fd);
233 retval = PTR_ERR(file);
234 } else {
235 retval = fd;
236 fsnotify_open(file);
237 fd_install(fd, file);
238 }
239 path_put(&path);
240 return retval;
241}
242
243/**
244 * sys_open_by_handle_at: Open the file handle
245 * @mountdirfd: directory file descriptor
246 * @handle: file handle to be opened
247 * @flag: open flags.
248 *
249 * @mountdirfd indicate the directory file descriptor
250 * of the mount point. file handle is decoded relative
251 * to the vfsmount pointed by the @mountdirfd. @flags
252 * value is same as the open(2) flags.
253 */
254SYSCALL_DEFINE3(open_by_handle_at, int, mountdirfd,
255 struct file_handle __user *, handle,
256 int, flags)
257{
258 long ret;
259
260 if (force_o_largefile())
261 flags |= O_LARGEFILE;
262
263 ret = do_handle_open(mountdirfd, handle, flags);
264 return ret;
265}
diff --git a/fs/file_table.c b/fs/file_table.c
index eb36b6b17e26..74a9544ac770 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -276,11 +276,10 @@ struct file *fget(unsigned int fd)
276 rcu_read_lock(); 276 rcu_read_lock();
277 file = fcheck_files(files, fd); 277 file = fcheck_files(files, fd);
278 if (file) { 278 if (file) {
279 if (!atomic_long_inc_not_zero(&file->f_count)) { 279 /* File object ref couldn't be taken */
280 /* File object ref couldn't be taken */ 280 if (file->f_mode & FMODE_PATH ||
281 rcu_read_unlock(); 281 !atomic_long_inc_not_zero(&file->f_count))
282 return NULL; 282 file = NULL;
283 }
284 } 283 }
285 rcu_read_unlock(); 284 rcu_read_unlock();
286 285
@@ -289,6 +288,25 @@ struct file *fget(unsigned int fd)
289 288
290EXPORT_SYMBOL(fget); 289EXPORT_SYMBOL(fget);
291 290
291struct file *fget_raw(unsigned int fd)
292{
293 struct file *file;
294 struct files_struct *files = current->files;
295
296 rcu_read_lock();
297 file = fcheck_files(files, fd);
298 if (file) {
299 /* File object ref couldn't be taken */
300 if (!atomic_long_inc_not_zero(&file->f_count))
301 file = NULL;
302 }
303 rcu_read_unlock();
304
305 return file;
306}
307
308EXPORT_SYMBOL(fget_raw);
309
292/* 310/*
293 * Lightweight file lookup - no refcnt increment if fd table isn't shared. 311 * Lightweight file lookup - no refcnt increment if fd table isn't shared.
294 * 312 *
@@ -313,6 +331,33 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
313 *fput_needed = 0; 331 *fput_needed = 0;
314 if (atomic_read(&files->count) == 1) { 332 if (atomic_read(&files->count) == 1) {
315 file = fcheck_files(files, fd); 333 file = fcheck_files(files, fd);
334 if (file && (file->f_mode & FMODE_PATH))
335 file = NULL;
336 } else {
337 rcu_read_lock();
338 file = fcheck_files(files, fd);
339 if (file) {
340 if (!(file->f_mode & FMODE_PATH) &&
341 atomic_long_inc_not_zero(&file->f_count))
342 *fput_needed = 1;
343 else
344 /* Didn't get the reference, someone's freed */
345 file = NULL;
346 }
347 rcu_read_unlock();
348 }
349
350 return file;
351}
352
353struct file *fget_raw_light(unsigned int fd, int *fput_needed)
354{
355 struct file *file;
356 struct files_struct *files = current->files;
357
358 *fput_needed = 0;
359 if (atomic_read(&files->count) == 1) {
360 file = fcheck_files(files, fd);
316 } else { 361 } else {
317 rcu_read_lock(); 362 rcu_read_lock();
318 file = fcheck_files(files, fd); 363 file = fcheck_files(files, fd);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 9e3f68cc1bd1..051b1a084528 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -637,8 +637,10 @@ static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
637 u64 nodeid; 637 u64 nodeid;
638 u32 generation; 638 u32 generation;
639 639
640 if (*max_len < len) 640 if (*max_len < len) {
641 *max_len = len;
641 return 255; 642 return 255;
643 }
642 644
643 nodeid = get_fuse_inode(inode)->nodeid; 645 nodeid = get_fuse_inode(inode)->nodeid;
644 generation = inode->i_generation; 646 generation = inode->i_generation;
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index 9023db8184f9..b5a5e60df0d5 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -36,9 +36,13 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
36 struct super_block *sb = inode->i_sb; 36 struct super_block *sb = inode->i_sb;
37 struct gfs2_inode *ip = GFS2_I(inode); 37 struct gfs2_inode *ip = GFS2_I(inode);
38 38
39 if (*len < GFS2_SMALL_FH_SIZE || 39 if (connectable && (*len < GFS2_LARGE_FH_SIZE)) {
40 (connectable && *len < GFS2_LARGE_FH_SIZE)) 40 *len = GFS2_LARGE_FH_SIZE;
41 return 255; 41 return 255;
42 } else if (*len < GFS2_SMALL_FH_SIZE) {
43 *len = GFS2_SMALL_FH_SIZE;
44 return 255;
45 }
42 46
43 fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32); 47 fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32);
44 fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF); 48 fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF);
diff --git a/fs/internal.h b/fs/internal.h
index 9b976b57d7fe..f3d15de44b15 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -106,6 +106,19 @@ extern void put_super(struct super_block *sb);
106struct nameidata; 106struct nameidata;
107extern struct file *nameidata_to_filp(struct nameidata *); 107extern struct file *nameidata_to_filp(struct nameidata *);
108extern void release_open_intent(struct nameidata *); 108extern void release_open_intent(struct nameidata *);
109struct open_flags {
110 int open_flag;
111 int mode;
112 int acc_mode;
113 int intent;
114};
115extern struct file *do_filp_open(int dfd, const char *pathname,
116 const struct open_flags *op, int lookup_flags);
117extern struct file *do_file_open_root(struct dentry *, struct vfsmount *,
118 const char *, const struct open_flags *, int lookup_flags);
119
120extern long do_handle_open(int mountdirfd,
121 struct file_handle __user *ufh, int open_flag);
109 122
110/* 123/*
111 * inode.c 124 * inode.c
diff --git a/fs/isofs/export.c b/fs/isofs/export.c
index ed752cb38474..dd4687ff30d0 100644
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
@@ -124,9 +124,13 @@ isofs_export_encode_fh(struct dentry *dentry,
124 * offset of the inode and the upper 16 bits of fh32[1] to 124 * offset of the inode and the upper 16 bits of fh32[1] to
125 * hold the offset of the parent. 125 * hold the offset of the parent.
126 */ 126 */
127 127 if (connectable && (len < 5)) {
128 if (len < 3 || (connectable && len < 5)) 128 *max_len = 5;
129 return 255;
130 } else if (len < 3) {
131 *max_len = 3;
129 return 255; 132 return 255;
133 }
130 134
131 len = 3; 135 len = 3;
132 fh32[0] = ei->i_iget5_block; 136 fh32[0] = ei->i_iget5_block;
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 5a2b269428a6..3f04a1804931 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -809,9 +809,6 @@ static int jfs_link(struct dentry *old_dentry,
809 if (ip->i_nlink == JFS_LINK_MAX) 809 if (ip->i_nlink == JFS_LINK_MAX)
810 return -EMLINK; 810 return -EMLINK;
811 811
812 if (ip->i_nlink == 0)
813 return -ENOENT;
814
815 dquot_initialize(dir); 812 dquot_initialize(dir);
816 813
817 tid = txBegin(ip->i_sb, 0); 814 tid = txBegin(ip->i_sb, 0);
diff --git a/fs/namei.c b/fs/namei.c
index a4689eb2df28..0a601cae23de 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -136,7 +136,7 @@ static int do_getname(const char __user *filename, char *page)
136 return retval; 136 return retval;
137} 137}
138 138
139char * getname(const char __user * filename) 139static char *getname_flags(const char __user * filename, int flags)
140{ 140{
141 char *tmp, *result; 141 char *tmp, *result;
142 142
@@ -147,14 +147,21 @@ char * getname(const char __user * filename)
147 147
148 result = tmp; 148 result = tmp;
149 if (retval < 0) { 149 if (retval < 0) {
150 __putname(tmp); 150 if (retval != -ENOENT || !(flags & LOOKUP_EMPTY)) {
151 result = ERR_PTR(retval); 151 __putname(tmp);
152 result = ERR_PTR(retval);
153 }
152 } 154 }
153 } 155 }
154 audit_getname(result); 156 audit_getname(result);
155 return result; 157 return result;
156} 158}
157 159
160char *getname(const char __user * filename)
161{
162 return getname_flags(filename, 0);
163}
164
158#ifdef CONFIG_AUDITSYSCALL 165#ifdef CONFIG_AUDITSYSCALL
159void putname(const char *name) 166void putname(const char *name)
160{ 167{
@@ -401,9 +408,11 @@ static int nameidata_drop_rcu(struct nameidata *nd)
401{ 408{
402 struct fs_struct *fs = current->fs; 409 struct fs_struct *fs = current->fs;
403 struct dentry *dentry = nd->path.dentry; 410 struct dentry *dentry = nd->path.dentry;
411 int want_root = 0;
404 412
405 BUG_ON(!(nd->flags & LOOKUP_RCU)); 413 BUG_ON(!(nd->flags & LOOKUP_RCU));
406 if (nd->root.mnt) { 414 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
415 want_root = 1;
407 spin_lock(&fs->lock); 416 spin_lock(&fs->lock);
408 if (nd->root.mnt != fs->root.mnt || 417 if (nd->root.mnt != fs->root.mnt ||
409 nd->root.dentry != fs->root.dentry) 418 nd->root.dentry != fs->root.dentry)
@@ -414,7 +423,7 @@ static int nameidata_drop_rcu(struct nameidata *nd)
414 goto err; 423 goto err;
415 BUG_ON(nd->inode != dentry->d_inode); 424 BUG_ON(nd->inode != dentry->d_inode);
416 spin_unlock(&dentry->d_lock); 425 spin_unlock(&dentry->d_lock);
417 if (nd->root.mnt) { 426 if (want_root) {
418 path_get(&nd->root); 427 path_get(&nd->root);
419 spin_unlock(&fs->lock); 428 spin_unlock(&fs->lock);
420 } 429 }
@@ -427,7 +436,7 @@ static int nameidata_drop_rcu(struct nameidata *nd)
427err: 436err:
428 spin_unlock(&dentry->d_lock); 437 spin_unlock(&dentry->d_lock);
429err_root: 438err_root:
430 if (nd->root.mnt) 439 if (want_root)
431 spin_unlock(&fs->lock); 440 spin_unlock(&fs->lock);
432 return -ECHILD; 441 return -ECHILD;
433} 442}
@@ -454,9 +463,11 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry
454{ 463{
455 struct fs_struct *fs = current->fs; 464 struct fs_struct *fs = current->fs;
456 struct dentry *parent = nd->path.dentry; 465 struct dentry *parent = nd->path.dentry;
466 int want_root = 0;
457 467
458 BUG_ON(!(nd->flags & LOOKUP_RCU)); 468 BUG_ON(!(nd->flags & LOOKUP_RCU));
459 if (nd->root.mnt) { 469 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
470 want_root = 1;
460 spin_lock(&fs->lock); 471 spin_lock(&fs->lock);
461 if (nd->root.mnt != fs->root.mnt || 472 if (nd->root.mnt != fs->root.mnt ||
462 nd->root.dentry != fs->root.dentry) 473 nd->root.dentry != fs->root.dentry)
@@ -476,7 +487,7 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry
476 parent->d_count++; 487 parent->d_count++;
477 spin_unlock(&dentry->d_lock); 488 spin_unlock(&dentry->d_lock);
478 spin_unlock(&parent->d_lock); 489 spin_unlock(&parent->d_lock);
479 if (nd->root.mnt) { 490 if (want_root) {
480 path_get(&nd->root); 491 path_get(&nd->root);
481 spin_unlock(&fs->lock); 492 spin_unlock(&fs->lock);
482 } 493 }
@@ -490,7 +501,7 @@ err:
490 spin_unlock(&dentry->d_lock); 501 spin_unlock(&dentry->d_lock);
491 spin_unlock(&parent->d_lock); 502 spin_unlock(&parent->d_lock);
492err_root: 503err_root:
493 if (nd->root.mnt) 504 if (want_root)
494 spin_unlock(&fs->lock); 505 spin_unlock(&fs->lock);
495 return -ECHILD; 506 return -ECHILD;
496} 507}
@@ -498,8 +509,16 @@ err_root:
498/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */ 509/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
499static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry) 510static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry)
500{ 511{
501 if (nd->flags & LOOKUP_RCU) 512 if (nd->flags & LOOKUP_RCU) {
502 return nameidata_dentry_drop_rcu(nd, dentry); 513 if (unlikely(nameidata_dentry_drop_rcu(nd, dentry))) {
514 nd->flags &= ~LOOKUP_RCU;
515 if (!(nd->flags & LOOKUP_ROOT))
516 nd->root.mnt = NULL;
517 rcu_read_unlock();
518 br_read_unlock(vfsmount_lock);
519 return -ECHILD;
520 }
521 }
503 return 0; 522 return 0;
504} 523}
505 524
@@ -518,7 +537,8 @@ static int nameidata_drop_rcu_last(struct nameidata *nd)
518 537
519 BUG_ON(!(nd->flags & LOOKUP_RCU)); 538 BUG_ON(!(nd->flags & LOOKUP_RCU));
520 nd->flags &= ~LOOKUP_RCU; 539 nd->flags &= ~LOOKUP_RCU;
521 nd->root.mnt = NULL; 540 if (!(nd->flags & LOOKUP_ROOT))
541 nd->root.mnt = NULL;
522 spin_lock(&dentry->d_lock); 542 spin_lock(&dentry->d_lock);
523 if (!__d_rcu_to_refcount(dentry, nd->seq)) 543 if (!__d_rcu_to_refcount(dentry, nd->seq))
524 goto err_unlock; 544 goto err_unlock;
@@ -539,14 +559,6 @@ err_unlock:
539 return -ECHILD; 559 return -ECHILD;
540} 560}
541 561
542/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
543static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd)
544{
545 if (likely(nd->flags & LOOKUP_RCU))
546 return nameidata_drop_rcu_last(nd);
547 return 0;
548}
549
550/** 562/**
551 * release_open_intent - free up open intent resources 563 * release_open_intent - free up open intent resources
552 * @nd: pointer to nameidata 564 * @nd: pointer to nameidata
@@ -590,42 +602,8 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd)
590 return dentry; 602 return dentry;
591} 603}
592 604
593static inline struct dentry *
594do_revalidate_rcu(struct dentry *dentry, struct nameidata *nd)
595{
596 int status = d_revalidate(dentry, nd);
597 if (likely(status > 0))
598 return dentry;
599 if (status == -ECHILD) {
600 if (nameidata_dentry_drop_rcu(nd, dentry))
601 return ERR_PTR(-ECHILD);
602 return do_revalidate(dentry, nd);
603 }
604 if (status < 0)
605 return ERR_PTR(status);
606 /* Don't d_invalidate in rcu-walk mode */
607 if (nameidata_dentry_drop_rcu(nd, dentry))
608 return ERR_PTR(-ECHILD);
609 if (!d_invalidate(dentry)) {
610 dput(dentry);
611 dentry = NULL;
612 }
613 return dentry;
614}
615
616static inline int need_reval_dot(struct dentry *dentry)
617{
618 if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
619 return 0;
620
621 if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
622 return 0;
623
624 return 1;
625}
626
627/* 605/*
628 * force_reval_path - force revalidation of a dentry 606 * handle_reval_path - force revalidation of a dentry
629 * 607 *
630 * In some situations the path walking code will trust dentries without 608 * In some situations the path walking code will trust dentries without
631 * revalidating them. This causes problems for filesystems that depend on 609 * revalidating them. This causes problems for filesystems that depend on
@@ -639,27 +617,28 @@ static inline int need_reval_dot(struct dentry *dentry)
639 * invalidate the dentry. It's up to the caller to handle putting references 617 * invalidate the dentry. It's up to the caller to handle putting references
640 * to the path if necessary. 618 * to the path if necessary.
641 */ 619 */
642static int 620static inline int handle_reval_path(struct nameidata *nd)
643force_reval_path(struct path *path, struct nameidata *nd)
644{ 621{
622 struct dentry *dentry = nd->path.dentry;
645 int status; 623 int status;
646 struct dentry *dentry = path->dentry;
647 624
648 /* 625 if (likely(!(nd->flags & LOOKUP_JUMPED)))
649 * only check on filesystems where it's possible for the dentry to 626 return 0;
650 * become stale. 627
651 */ 628 if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
652 if (!need_reval_dot(dentry)) 629 return 0;
630
631 if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
653 return 0; 632 return 0;
654 633
634 /* Note: we do not d_invalidate() */
655 status = d_revalidate(dentry, nd); 635 status = d_revalidate(dentry, nd);
656 if (status > 0) 636 if (status > 0)
657 return 0; 637 return 0;
658 638
659 if (!status) { 639 if (!status)
660 d_invalidate(dentry);
661 status = -ESTALE; 640 status = -ESTALE;
662 } 641
663 return status; 642 return status;
664} 643}
665 644
@@ -728,6 +707,7 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
728 path_put(&nd->path); 707 path_put(&nd->path);
729 nd->path = nd->root; 708 nd->path = nd->root;
730 path_get(&nd->root); 709 path_get(&nd->root);
710 nd->flags |= LOOKUP_JUMPED;
731 } 711 }
732 nd->inode = nd->path.dentry->d_inode; 712 nd->inode = nd->path.dentry->d_inode;
733 713
@@ -757,20 +737,44 @@ static inline void path_to_nameidata(const struct path *path,
757 nd->path.dentry = path->dentry; 737 nd->path.dentry = path->dentry;
758} 738}
759 739
740static inline void put_link(struct nameidata *nd, struct path *link, void *cookie)
741{
742 struct inode *inode = link->dentry->d_inode;
743 if (!IS_ERR(cookie) && inode->i_op->put_link)
744 inode->i_op->put_link(link->dentry, nd, cookie);
745 path_put(link);
746}
747
760static __always_inline int 748static __always_inline int
761__do_follow_link(const struct path *link, struct nameidata *nd, void **p) 749follow_link(struct path *link, struct nameidata *nd, void **p)
762{ 750{
763 int error; 751 int error;
764 struct dentry *dentry = link->dentry; 752 struct dentry *dentry = link->dentry;
765 753
766 BUG_ON(nd->flags & LOOKUP_RCU); 754 BUG_ON(nd->flags & LOOKUP_RCU);
767 755
756 if (unlikely(current->total_link_count >= 40)) {
757 *p = ERR_PTR(-ELOOP); /* no ->put_link(), please */
758 path_put_conditional(link, nd);
759 path_put(&nd->path);
760 return -ELOOP;
761 }
762 cond_resched();
763 current->total_link_count++;
764
768 touch_atime(link->mnt, dentry); 765 touch_atime(link->mnt, dentry);
769 nd_set_link(nd, NULL); 766 nd_set_link(nd, NULL);
770 767
771 if (link->mnt == nd->path.mnt) 768 if (link->mnt == nd->path.mnt)
772 mntget(link->mnt); 769 mntget(link->mnt);
773 770
771 error = security_inode_follow_link(link->dentry, nd);
772 if (error) {
773 *p = ERR_PTR(error); /* no ->put_link(), please */
774 path_put(&nd->path);
775 return error;
776 }
777
774 nd->last_type = LAST_BIND; 778 nd->last_type = LAST_BIND;
775 *p = dentry->d_inode->i_op->follow_link(dentry, nd); 779 *p = dentry->d_inode->i_op->follow_link(dentry, nd);
776 error = PTR_ERR(*p); 780 error = PTR_ERR(*p);
@@ -780,56 +784,18 @@ __do_follow_link(const struct path *link, struct nameidata *nd, void **p)
780 if (s) 784 if (s)
781 error = __vfs_follow_link(nd, s); 785 error = __vfs_follow_link(nd, s);
782 else if (nd->last_type == LAST_BIND) { 786 else if (nd->last_type == LAST_BIND) {
783 error = force_reval_path(&nd->path, nd); 787 nd->flags |= LOOKUP_JUMPED;
784 if (error) 788 nd->inode = nd->path.dentry->d_inode;
789 if (nd->inode->i_op->follow_link) {
790 /* stepped on a _really_ weird one */
785 path_put(&nd->path); 791 path_put(&nd->path);
792 error = -ELOOP;
793 }
786 } 794 }
787 } 795 }
788 return error; 796 return error;
789} 797}
790 798
791/*
792 * This limits recursive symlink follows to 8, while
793 * limiting consecutive symlinks to 40.
794 *
795 * Without that kind of total limit, nasty chains of consecutive
796 * symlinks can cause almost arbitrarily long lookups.
797 */
798static inline int do_follow_link(struct inode *inode, struct path *path, struct nameidata *nd)
799{
800 void *cookie;
801 int err = -ELOOP;
802
803 /* We drop rcu-walk here */
804 if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry))
805 return -ECHILD;
806 BUG_ON(inode != path->dentry->d_inode);
807
808 if (current->link_count >= MAX_NESTED_LINKS)
809 goto loop;
810 if (current->total_link_count >= 40)
811 goto loop;
812 BUG_ON(nd->depth >= MAX_NESTED_LINKS);
813 cond_resched();
814 err = security_inode_follow_link(path->dentry, nd);
815 if (err)
816 goto loop;
817 current->link_count++;
818 current->total_link_count++;
819 nd->depth++;
820 err = __do_follow_link(path, nd, &cookie);
821 if (!IS_ERR(cookie) && path->dentry->d_inode->i_op->put_link)
822 path->dentry->d_inode->i_op->put_link(path->dentry, nd, cookie);
823 path_put(path);
824 current->link_count--;
825 nd->depth--;
826 return err;
827loop:
828 path_put_conditional(path, nd);
829 path_put(&nd->path);
830 return err;
831}
832
833static int follow_up_rcu(struct path *path) 799static int follow_up_rcu(struct path *path)
834{ 800{
835 struct vfsmount *parent; 801 struct vfsmount *parent;
@@ -1068,7 +1034,7 @@ static int follow_dotdot_rcu(struct nameidata *nd)
1068 1034
1069 seq = read_seqcount_begin(&parent->d_seq); 1035 seq = read_seqcount_begin(&parent->d_seq);
1070 if (read_seqcount_retry(&old->d_seq, nd->seq)) 1036 if (read_seqcount_retry(&old->d_seq, nd->seq))
1071 return -ECHILD; 1037 goto failed;
1072 inode = parent->d_inode; 1038 inode = parent->d_inode;
1073 nd->path.dentry = parent; 1039 nd->path.dentry = parent;
1074 nd->seq = seq; 1040 nd->seq = seq;
@@ -1081,8 +1047,15 @@ static int follow_dotdot_rcu(struct nameidata *nd)
1081 } 1047 }
1082 __follow_mount_rcu(nd, &nd->path, &inode, true); 1048 __follow_mount_rcu(nd, &nd->path, &inode, true);
1083 nd->inode = inode; 1049 nd->inode = inode;
1084
1085 return 0; 1050 return 0;
1051
1052failed:
1053 nd->flags &= ~LOOKUP_RCU;
1054 if (!(nd->flags & LOOKUP_ROOT))
1055 nd->root.mnt = NULL;
1056 rcu_read_unlock();
1057 br_read_unlock(vfsmount_lock);
1058 return -ECHILD;
1086} 1059}
1087 1060
1088/* 1061/*
@@ -1216,68 +1189,85 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
1216{ 1189{
1217 struct vfsmount *mnt = nd->path.mnt; 1190 struct vfsmount *mnt = nd->path.mnt;
1218 struct dentry *dentry, *parent = nd->path.dentry; 1191 struct dentry *dentry, *parent = nd->path.dentry;
1219 struct inode *dir; 1192 int need_reval = 1;
1193 int status = 1;
1220 int err; 1194 int err;
1221 1195
1222 /* 1196 /*
1223 * See if the low-level filesystem might want
1224 * to use its own hash..
1225 */
1226 if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
1227 err = parent->d_op->d_hash(parent, nd->inode, name);
1228 if (err < 0)
1229 return err;
1230 }
1231
1232 /*
1233 * Rename seqlock is not required here because in the off chance 1197 * Rename seqlock is not required here because in the off chance
1234 * of a false negative due to a concurrent rename, we're going to 1198 * of a false negative due to a concurrent rename, we're going to
1235 * do the non-racy lookup, below. 1199 * do the non-racy lookup, below.
1236 */ 1200 */
1237 if (nd->flags & LOOKUP_RCU) { 1201 if (nd->flags & LOOKUP_RCU) {
1238 unsigned seq; 1202 unsigned seq;
1239
1240 *inode = nd->inode; 1203 *inode = nd->inode;
1241 dentry = __d_lookup_rcu(parent, name, &seq, inode); 1204 dentry = __d_lookup_rcu(parent, name, &seq, inode);
1242 if (!dentry) { 1205 if (!dentry)
1243 if (nameidata_drop_rcu(nd)) 1206 goto unlazy;
1244 return -ECHILD; 1207
1245 goto need_lookup;
1246 }
1247 /* Memory barrier in read_seqcount_begin of child is enough */ 1208 /* Memory barrier in read_seqcount_begin of child is enough */
1248 if (__read_seqcount_retry(&parent->d_seq, nd->seq)) 1209 if (__read_seqcount_retry(&parent->d_seq, nd->seq))
1249 return -ECHILD; 1210 return -ECHILD;
1250
1251 nd->seq = seq; 1211 nd->seq = seq;
1212
1252 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { 1213 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
1253 dentry = do_revalidate_rcu(dentry, nd); 1214 status = d_revalidate(dentry, nd);
1254 if (!dentry) 1215 if (unlikely(status <= 0)) {
1255 goto need_lookup; 1216 if (status != -ECHILD)
1256 if (IS_ERR(dentry)) 1217 need_reval = 0;
1257 goto fail; 1218 goto unlazy;
1258 if (!(nd->flags & LOOKUP_RCU)) 1219 }
1259 goto done;
1260 } 1220 }
1261 path->mnt = mnt; 1221 path->mnt = mnt;
1262 path->dentry = dentry; 1222 path->dentry = dentry;
1263 if (likely(__follow_mount_rcu(nd, path, inode, false))) 1223 if (likely(__follow_mount_rcu(nd, path, inode, false)))
1264 return 0; 1224 return 0;
1265 if (nameidata_drop_rcu(nd)) 1225unlazy:
1266 return -ECHILD; 1226 if (dentry) {
1267 /* fallthru */ 1227 if (nameidata_dentry_drop_rcu(nd, dentry))
1228 return -ECHILD;
1229 } else {
1230 if (nameidata_drop_rcu(nd))
1231 return -ECHILD;
1232 }
1233 } else {
1234 dentry = __d_lookup(parent, name);
1268 } 1235 }
1269 dentry = __d_lookup(parent, name); 1236
1270 if (!dentry) 1237retry:
1271 goto need_lookup; 1238 if (unlikely(!dentry)) {
1272found: 1239 struct inode *dir = parent->d_inode;
1273 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { 1240 BUG_ON(nd->inode != dir);
1274 dentry = do_revalidate(dentry, nd); 1241
1275 if (!dentry) 1242 mutex_lock(&dir->i_mutex);
1276 goto need_lookup; 1243 dentry = d_lookup(parent, name);
1277 if (IS_ERR(dentry)) 1244 if (likely(!dentry)) {
1278 goto fail; 1245 dentry = d_alloc_and_lookup(parent, name, nd);
1246 if (IS_ERR(dentry)) {
1247 mutex_unlock(&dir->i_mutex);
1248 return PTR_ERR(dentry);
1249 }
1250 /* known good */
1251 need_reval = 0;
1252 status = 1;
1253 }
1254 mutex_unlock(&dir->i_mutex);
1279 } 1255 }
1280done: 1256 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval)
1257 status = d_revalidate(dentry, nd);
1258 if (unlikely(status <= 0)) {
1259 if (status < 0) {
1260 dput(dentry);
1261 return status;
1262 }
1263 if (!d_invalidate(dentry)) {
1264 dput(dentry);
1265 dentry = NULL;
1266 need_reval = 1;
1267 goto retry;
1268 }
1269 }
1270
1281 path->mnt = mnt; 1271 path->mnt = mnt;
1282 path->dentry = dentry; 1272 path->dentry = dentry;
1283 err = follow_managed(path, nd->flags); 1273 err = follow_managed(path, nd->flags);
@@ -1287,39 +1277,113 @@ done:
1287 } 1277 }
1288 *inode = path->dentry->d_inode; 1278 *inode = path->dentry->d_inode;
1289 return 0; 1279 return 0;
1280}
1290 1281
1291need_lookup: 1282static inline int may_lookup(struct nameidata *nd)
1292 dir = parent->d_inode; 1283{
1293 BUG_ON(nd->inode != dir); 1284 if (nd->flags & LOOKUP_RCU) {
1285 int err = exec_permission(nd->inode, IPERM_FLAG_RCU);
1286 if (err != -ECHILD)
1287 return err;
1288 if (nameidata_drop_rcu(nd))
1289 return -ECHILD;
1290 }
1291 return exec_permission(nd->inode, 0);
1292}
1294 1293
1295 mutex_lock(&dir->i_mutex); 1294static inline int handle_dots(struct nameidata *nd, int type)
1296 /* 1295{
1297 * First re-do the cached lookup just in case it was created 1296 if (type == LAST_DOTDOT) {
1298 * while we waited for the directory semaphore, or the first 1297 if (nd->flags & LOOKUP_RCU) {
1299 * lookup failed due to an unrelated rename. 1298 if (follow_dotdot_rcu(nd))
1300 * 1299 return -ECHILD;
1301 * This could use version numbering or similar to avoid unnecessary 1300 } else
1302 * cache lookups, but then we'd have to do the first lookup in the 1301 follow_dotdot(nd);
1303 * non-racy way. However in the common case here, everything should 1302 }
1304 * be hot in cache, so would it be a big win? 1303 return 0;
1305 */ 1304}
1306 dentry = d_lookup(parent, name); 1305
1307 if (likely(!dentry)) { 1306static void terminate_walk(struct nameidata *nd)
1308 dentry = d_alloc_and_lookup(parent, name, nd); 1307{
1309 mutex_unlock(&dir->i_mutex); 1308 if (!(nd->flags & LOOKUP_RCU)) {
1310 if (IS_ERR(dentry)) 1309 path_put(&nd->path);
1311 goto fail; 1310 } else {
1312 goto done; 1311 nd->flags &= ~LOOKUP_RCU;
1312 if (!(nd->flags & LOOKUP_ROOT))
1313 nd->root.mnt = NULL;
1314 rcu_read_unlock();
1315 br_read_unlock(vfsmount_lock);
1313 } 1316 }
1317}
1318
1319static inline int walk_component(struct nameidata *nd, struct path *path,
1320 struct qstr *name, int type, int follow)
1321{
1322 struct inode *inode;
1323 int err;
1314 /* 1324 /*
1315 * Uhhuh! Nasty case: the cache was re-populated while 1325 * "." and ".." are special - ".." especially so because it has
1316 * we waited on the semaphore. Need to revalidate. 1326 * to be able to know about the current root directory and
1327 * parent relationships.
1317 */ 1328 */
1318 mutex_unlock(&dir->i_mutex); 1329 if (unlikely(type != LAST_NORM))
1319 goto found; 1330 return handle_dots(nd, type);
1331 err = do_lookup(nd, name, path, &inode);
1332 if (unlikely(err)) {
1333 terminate_walk(nd);
1334 return err;
1335 }
1336 if (!inode) {
1337 path_to_nameidata(path, nd);
1338 terminate_walk(nd);
1339 return -ENOENT;
1340 }
1341 if (unlikely(inode->i_op->follow_link) && follow) {
1342 if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry))
1343 return -ECHILD;
1344 BUG_ON(inode != path->dentry->d_inode);
1345 return 1;
1346 }
1347 path_to_nameidata(path, nd);
1348 nd->inode = inode;
1349 return 0;
1350}
1320 1351
1321fail: 1352/*
1322 return PTR_ERR(dentry); 1353 * This limits recursive symlink follows to 8, while
1354 * limiting consecutive symlinks to 40.
1355 *
1356 * Without that kind of total limit, nasty chains of consecutive
1357 * symlinks can cause almost arbitrarily long lookups.
1358 */
1359static inline int nested_symlink(struct path *path, struct nameidata *nd)
1360{
1361 int res;
1362
1363 BUG_ON(nd->depth >= MAX_NESTED_LINKS);
1364 if (unlikely(current->link_count >= MAX_NESTED_LINKS)) {
1365 path_put_conditional(path, nd);
1366 path_put(&nd->path);
1367 return -ELOOP;
1368 }
1369
1370 nd->depth++;
1371 current->link_count++;
1372
1373 do {
1374 struct path link = *path;
1375 void *cookie;
1376
1377 res = follow_link(&link, nd, &cookie);
1378 if (!res)
1379 res = walk_component(nd, path, &nd->last,
1380 nd->last_type, LOOKUP_FOLLOW);
1381 put_link(nd, &link, cookie);
1382 } while (res > 0);
1383
1384 current->link_count--;
1385 nd->depth--;
1386 return res;
1323} 1387}
1324 1388
1325/* 1389/*
@@ -1339,30 +1403,18 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1339 while (*name=='/') 1403 while (*name=='/')
1340 name++; 1404 name++;
1341 if (!*name) 1405 if (!*name)
1342 goto return_reval; 1406 return 0;
1343
1344 if (nd->depth)
1345 lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);
1346 1407
1347 /* At this point we know we have a real path component. */ 1408 /* At this point we know we have a real path component. */
1348 for(;;) { 1409 for(;;) {
1349 struct inode *inode;
1350 unsigned long hash; 1410 unsigned long hash;
1351 struct qstr this; 1411 struct qstr this;
1352 unsigned int c; 1412 unsigned int c;
1413 int type;
1353 1414
1354 nd->flags |= LOOKUP_CONTINUE; 1415 nd->flags |= LOOKUP_CONTINUE;
1355 if (nd->flags & LOOKUP_RCU) { 1416
1356 err = exec_permission(nd->inode, IPERM_FLAG_RCU); 1417 err = may_lookup(nd);
1357 if (err == -ECHILD) {
1358 if (nameidata_drop_rcu(nd))
1359 return -ECHILD;
1360 goto exec_again;
1361 }
1362 } else {
1363exec_again:
1364 err = exec_permission(nd->inode, 0);
1365 }
1366 if (err) 1418 if (err)
1367 break; 1419 break;
1368 1420
@@ -1378,52 +1430,43 @@ exec_again:
1378 this.len = name - (const char *) this.name; 1430 this.len = name - (const char *) this.name;
1379 this.hash = end_name_hash(hash); 1431 this.hash = end_name_hash(hash);
1380 1432
1433 type = LAST_NORM;
1434 if (this.name[0] == '.') switch (this.len) {
1435 case 2:
1436 if (this.name[1] == '.') {
1437 type = LAST_DOTDOT;
1438 nd->flags |= LOOKUP_JUMPED;
1439 }
1440 break;
1441 case 1:
1442 type = LAST_DOT;
1443 }
1444 if (likely(type == LAST_NORM)) {
1445 struct dentry *parent = nd->path.dentry;
1446 nd->flags &= ~LOOKUP_JUMPED;
1447 if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
1448 err = parent->d_op->d_hash(parent, nd->inode,
1449 &this);
1450 if (err < 0)
1451 break;
1452 }
1453 }
1454
1381 /* remove trailing slashes? */ 1455 /* remove trailing slashes? */
1382 if (!c) 1456 if (!c)
1383 goto last_component; 1457 goto last_component;
1384 while (*++name == '/'); 1458 while (*++name == '/');
1385 if (!*name) 1459 if (!*name)
1386 goto last_with_slashes; 1460 goto last_component;
1387 1461
1388 /* 1462 err = walk_component(nd, &next, &this, type, LOOKUP_FOLLOW);
1389 * "." and ".." are special - ".." especially so because it has 1463 if (err < 0)
1390 * to be able to know about the current root directory and 1464 return err;
1391 * parent relationships.
1392 */
1393 if (this.name[0] == '.') switch (this.len) {
1394 default:
1395 break;
1396 case 2:
1397 if (this.name[1] != '.')
1398 break;
1399 if (nd->flags & LOOKUP_RCU) {
1400 if (follow_dotdot_rcu(nd))
1401 return -ECHILD;
1402 } else
1403 follow_dotdot(nd);
1404 /* fallthrough */
1405 case 1:
1406 continue;
1407 }
1408 /* This does the actual lookups.. */
1409 err = do_lookup(nd, &this, &next, &inode);
1410 if (err)
1411 break;
1412 err = -ENOENT;
1413 if (!inode)
1414 goto out_dput;
1415 1465
1416 if (inode->i_op->follow_link) { 1466 if (err) {
1417 err = do_follow_link(inode, &next, nd); 1467 err = nested_symlink(&next, nd);
1418 if (err) 1468 if (err)
1419 goto return_err; 1469 return err;
1420 nd->inode = nd->path.dentry->d_inode;
1421 err = -ENOENT;
1422 if (!nd->inode)
1423 break;
1424 } else {
1425 path_to_nameidata(&next, nd);
1426 nd->inode = inode;
1427 } 1470 }
1428 err = -ENOTDIR; 1471 err = -ENOTDIR;
1429 if (!nd->inode->i_op->lookup) 1472 if (!nd->inode->i_op->lookup)
@@ -1431,210 +1474,109 @@ exec_again:
1431 continue; 1474 continue;
1432 /* here ends the main loop */ 1475 /* here ends the main loop */
1433 1476
1434last_with_slashes:
1435 lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
1436last_component: 1477last_component:
1437 /* Clear LOOKUP_CONTINUE iff it was previously unset */ 1478 /* Clear LOOKUP_CONTINUE iff it was previously unset */
1438 nd->flags &= lookup_flags | ~LOOKUP_CONTINUE; 1479 nd->flags &= lookup_flags | ~LOOKUP_CONTINUE;
1439 if (lookup_flags & LOOKUP_PARENT)
1440 goto lookup_parent;
1441 if (this.name[0] == '.') switch (this.len) {
1442 default:
1443 break;
1444 case 2:
1445 if (this.name[1] != '.')
1446 break;
1447 if (nd->flags & LOOKUP_RCU) {
1448 if (follow_dotdot_rcu(nd))
1449 return -ECHILD;
1450 } else
1451 follow_dotdot(nd);
1452 /* fallthrough */
1453 case 1:
1454 goto return_reval;
1455 }
1456 err = do_lookup(nd, &this, &next, &inode);
1457 if (err)
1458 break;
1459 if (inode && unlikely(inode->i_op->follow_link) &&
1460 (lookup_flags & LOOKUP_FOLLOW)) {
1461 err = do_follow_link(inode, &next, nd);
1462 if (err)
1463 goto return_err;
1464 nd->inode = nd->path.dentry->d_inode;
1465 } else {
1466 path_to_nameidata(&next, nd);
1467 nd->inode = inode;
1468 }
1469 err = -ENOENT;
1470 if (!nd->inode)
1471 break;
1472 if (lookup_flags & LOOKUP_DIRECTORY) {
1473 err = -ENOTDIR;
1474 if (!nd->inode->i_op->lookup)
1475 break;
1476 }
1477 goto return_base;
1478lookup_parent:
1479 nd->last = this; 1480 nd->last = this;
1480 nd->last_type = LAST_NORM; 1481 nd->last_type = type;
1481 if (this.name[0] != '.')
1482 goto return_base;
1483 if (this.len == 1)
1484 nd->last_type = LAST_DOT;
1485 else if (this.len == 2 && this.name[1] == '.')
1486 nd->last_type = LAST_DOTDOT;
1487 else
1488 goto return_base;
1489return_reval:
1490 /*
1491 * We bypassed the ordinary revalidation routines.
1492 * We may need to check the cached dentry for staleness.
1493 */
1494 if (need_reval_dot(nd->path.dentry)) {
1495 if (nameidata_drop_rcu_last_maybe(nd))
1496 return -ECHILD;
1497 /* Note: we do not d_invalidate() */
1498 err = d_revalidate(nd->path.dentry, nd);
1499 if (!err)
1500 err = -ESTALE;
1501 if (err < 0)
1502 break;
1503 return 0;
1504 }
1505return_base:
1506 if (nameidata_drop_rcu_last_maybe(nd))
1507 return -ECHILD;
1508 return 0; 1482 return 0;
1509out_dput:
1510 if (!(nd->flags & LOOKUP_RCU))
1511 path_put_conditional(&next, nd);
1512 break;
1513 } 1483 }
1514 if (!(nd->flags & LOOKUP_RCU)) 1484 terminate_walk(nd);
1515 path_put(&nd->path);
1516return_err:
1517 return err; 1485 return err;
1518} 1486}
1519 1487
1520static inline int path_walk_rcu(const char *name, struct nameidata *nd) 1488static int path_init(int dfd, const char *name, unsigned int flags,
1521{ 1489 struct nameidata *nd, struct file **fp)
1522 current->total_link_count = 0;
1523
1524 return link_path_walk(name, nd);
1525}
1526
1527static inline int path_walk_simple(const char *name, struct nameidata *nd)
1528{
1529 current->total_link_count = 0;
1530
1531 return link_path_walk(name, nd);
1532}
1533
1534static int path_walk(const char *name, struct nameidata *nd)
1535{
1536 struct path save = nd->path;
1537 int result;
1538
1539 current->total_link_count = 0;
1540
1541 /* make sure the stuff we saved doesn't go away */
1542 path_get(&save);
1543
1544 result = link_path_walk(name, nd);
1545 if (result == -ESTALE) {
1546 /* nd->path had been dropped */
1547 current->total_link_count = 0;
1548 nd->path = save;
1549 nd->inode = save.dentry->d_inode;
1550 path_get(&nd->path);
1551 nd->flags |= LOOKUP_REVAL;
1552 result = link_path_walk(name, nd);
1553 }
1554
1555 path_put(&save);
1556
1557 return result;
1558}
1559
1560static void path_finish_rcu(struct nameidata *nd)
1561{
1562 if (nd->flags & LOOKUP_RCU) {
1563 /* RCU dangling. Cancel it. */
1564 nd->flags &= ~LOOKUP_RCU;
1565 nd->root.mnt = NULL;
1566 rcu_read_unlock();
1567 br_read_unlock(vfsmount_lock);
1568 }
1569 if (nd->file)
1570 fput(nd->file);
1571}
1572
1573static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
1574{ 1490{
1575 int retval = 0; 1491 int retval = 0;
1576 int fput_needed; 1492 int fput_needed;
1577 struct file *file; 1493 struct file *file;
1578 1494
1579 nd->last_type = LAST_ROOT; /* if there are only slashes... */ 1495 nd->last_type = LAST_ROOT; /* if there are only slashes... */
1580 nd->flags = flags | LOOKUP_RCU; 1496 nd->flags = flags | LOOKUP_JUMPED;
1581 nd->depth = 0; 1497 nd->depth = 0;
1498 if (flags & LOOKUP_ROOT) {
1499 struct inode *inode = nd->root.dentry->d_inode;
1500 if (*name) {
1501 if (!inode->i_op->lookup)
1502 return -ENOTDIR;
1503 retval = inode_permission(inode, MAY_EXEC);
1504 if (retval)
1505 return retval;
1506 }
1507 nd->path = nd->root;
1508 nd->inode = inode;
1509 if (flags & LOOKUP_RCU) {
1510 br_read_lock(vfsmount_lock);
1511 rcu_read_lock();
1512 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1513 } else {
1514 path_get(&nd->path);
1515 }
1516 return 0;
1517 }
1518
1582 nd->root.mnt = NULL; 1519 nd->root.mnt = NULL;
1583 nd->file = NULL;
1584 1520
1585 if (*name=='/') { 1521 if (*name=='/') {
1586 struct fs_struct *fs = current->fs; 1522 if (flags & LOOKUP_RCU) {
1587 unsigned seq; 1523 br_read_lock(vfsmount_lock);
1588 1524 rcu_read_lock();
1589 br_read_lock(vfsmount_lock); 1525 set_root_rcu(nd);
1590 rcu_read_lock(); 1526 } else {
1591 1527 set_root(nd);
1592 do { 1528 path_get(&nd->root);
1593 seq = read_seqcount_begin(&fs->seq); 1529 }
1594 nd->root = fs->root; 1530 nd->path = nd->root;
1595 nd->path = nd->root;
1596 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1597 } while (read_seqcount_retry(&fs->seq, seq));
1598
1599 } else if (dfd == AT_FDCWD) { 1531 } else if (dfd == AT_FDCWD) {
1600 struct fs_struct *fs = current->fs; 1532 if (flags & LOOKUP_RCU) {
1601 unsigned seq; 1533 struct fs_struct *fs = current->fs;
1602 1534 unsigned seq;
1603 br_read_lock(vfsmount_lock);
1604 rcu_read_lock();
1605 1535
1606 do { 1536 br_read_lock(vfsmount_lock);
1607 seq = read_seqcount_begin(&fs->seq); 1537 rcu_read_lock();
1608 nd->path = fs->pwd;
1609 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1610 } while (read_seqcount_retry(&fs->seq, seq));
1611 1538
1539 do {
1540 seq = read_seqcount_begin(&fs->seq);
1541 nd->path = fs->pwd;
1542 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1543 } while (read_seqcount_retry(&fs->seq, seq));
1544 } else {
1545 get_fs_pwd(current->fs, &nd->path);
1546 }
1612 } else { 1547 } else {
1613 struct dentry *dentry; 1548 struct dentry *dentry;
1614 1549
1615 file = fget_light(dfd, &fput_needed); 1550 file = fget_raw_light(dfd, &fput_needed);
1616 retval = -EBADF; 1551 retval = -EBADF;
1617 if (!file) 1552 if (!file)
1618 goto out_fail; 1553 goto out_fail;
1619 1554
1620 dentry = file->f_path.dentry; 1555 dentry = file->f_path.dentry;
1621 1556
1622 retval = -ENOTDIR; 1557 if (*name) {
1623 if (!S_ISDIR(dentry->d_inode->i_mode)) 1558 retval = -ENOTDIR;
1624 goto fput_fail; 1559 if (!S_ISDIR(dentry->d_inode->i_mode))
1560 goto fput_fail;
1625 1561
1626 retval = file_permission(file, MAY_EXEC); 1562 retval = file_permission(file, MAY_EXEC);
1627 if (retval) 1563 if (retval)
1628 goto fput_fail; 1564 goto fput_fail;
1565 }
1629 1566
1630 nd->path = file->f_path; 1567 nd->path = file->f_path;
1631 if (fput_needed) 1568 if (flags & LOOKUP_RCU) {
1632 nd->file = file; 1569 if (fput_needed)
1633 1570 *fp = file;
1634 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); 1571 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1635 br_read_lock(vfsmount_lock); 1572 br_read_lock(vfsmount_lock);
1636 rcu_read_lock(); 1573 rcu_read_lock();
1574 } else {
1575 path_get(&file->f_path);
1576 fput_light(file, fput_needed);
1577 }
1637 } 1578 }
1579
1638 nd->inode = nd->path.dentry->d_inode; 1580 nd->inode = nd->path.dentry->d_inode;
1639 return 0; 1581 return 0;
1640 1582
@@ -1644,60 +1586,23 @@ out_fail:
1644 return retval; 1586 return retval;
1645} 1587}
1646 1588
1647static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd) 1589static inline int lookup_last(struct nameidata *nd, struct path *path)
1648{ 1590{
1649 int retval = 0; 1591 if (nd->last_type == LAST_NORM && nd->last.name[nd->last.len])
1650 int fput_needed; 1592 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
1651 struct file *file;
1652
1653 nd->last_type = LAST_ROOT; /* if there are only slashes... */
1654 nd->flags = flags;
1655 nd->depth = 0;
1656 nd->root.mnt = NULL;
1657
1658 if (*name=='/') {
1659 set_root(nd);
1660 nd->path = nd->root;
1661 path_get(&nd->root);
1662 } else if (dfd == AT_FDCWD) {
1663 get_fs_pwd(current->fs, &nd->path);
1664 } else {
1665 struct dentry *dentry;
1666
1667 file = fget_light(dfd, &fput_needed);
1668 retval = -EBADF;
1669 if (!file)
1670 goto out_fail;
1671
1672 dentry = file->f_path.dentry;
1673
1674 retval = -ENOTDIR;
1675 if (!S_ISDIR(dentry->d_inode->i_mode))
1676 goto fput_fail;
1677
1678 retval = file_permission(file, MAY_EXEC);
1679 if (retval)
1680 goto fput_fail;
1681
1682 nd->path = file->f_path;
1683 path_get(&file->f_path);
1684 1593
1685 fput_light(file, fput_needed); 1594 nd->flags &= ~LOOKUP_PARENT;
1686 } 1595 return walk_component(nd, path, &nd->last, nd->last_type,
1687 nd->inode = nd->path.dentry->d_inode; 1596 nd->flags & LOOKUP_FOLLOW);
1688 return 0;
1689
1690fput_fail:
1691 fput_light(file, fput_needed);
1692out_fail:
1693 return retval;
1694} 1597}
1695 1598
1696/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ 1599/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
1697static int do_path_lookup(int dfd, const char *name, 1600static int path_lookupat(int dfd, const char *name,
1698 unsigned int flags, struct nameidata *nd) 1601 unsigned int flags, struct nameidata *nd)
1699{ 1602{
1700 int retval; 1603 struct file *base = NULL;
1604 struct path path;
1605 int err;
1701 1606
1702 /* 1607 /*
1703 * Path walking is largely split up into 2 different synchronisation 1608 * Path walking is largely split up into 2 different synchronisation
@@ -1713,44 +1618,75 @@ static int do_path_lookup(int dfd, const char *name,
1713 * be handled by restarting a traditional ref-walk (which will always 1618 * be handled by restarting a traditional ref-walk (which will always
1714 * be able to complete). 1619 * be able to complete).
1715 */ 1620 */
1716 retval = path_init_rcu(dfd, name, flags, nd); 1621 err = path_init(dfd, name, flags | LOOKUP_PARENT, nd, &base);
1717 if (unlikely(retval)) 1622
1718 return retval; 1623 if (unlikely(err))
1719 retval = path_walk_rcu(name, nd); 1624 return err;
1720 path_finish_rcu(nd); 1625
1721 if (nd->root.mnt) { 1626 current->total_link_count = 0;
1722 path_put(&nd->root); 1627 err = link_path_walk(name, nd);
1723 nd->root.mnt = NULL; 1628
1629 if (!err && !(flags & LOOKUP_PARENT)) {
1630 err = lookup_last(nd, &path);
1631 while (err > 0) {
1632 void *cookie;
1633 struct path link = path;
1634 nd->flags |= LOOKUP_PARENT;
1635 err = follow_link(&link, nd, &cookie);
1636 if (!err)
1637 err = lookup_last(nd, &path);
1638 put_link(nd, &link, cookie);
1639 }
1724 } 1640 }
1725 1641
1726 if (unlikely(retval == -ECHILD || retval == -ESTALE)) { 1642 if (nd->flags & LOOKUP_RCU) {
1727 /* slower, locked walk */ 1643 /* went all way through without dropping RCU */
1728 if (retval == -ESTALE) 1644 BUG_ON(err);
1729 flags |= LOOKUP_REVAL; 1645 if (nameidata_drop_rcu_last(nd))
1730 retval = path_init(dfd, name, flags, nd); 1646 err = -ECHILD;
1731 if (unlikely(retval)) 1647 }
1732 return retval; 1648
1733 retval = path_walk(name, nd); 1649 if (!err)
1734 if (nd->root.mnt) { 1650 err = handle_reval_path(nd);
1735 path_put(&nd->root); 1651
1736 nd->root.mnt = NULL; 1652 if (!err && nd->flags & LOOKUP_DIRECTORY) {
1653 if (!nd->inode->i_op->lookup) {
1654 path_put(&nd->path);
1655 return -ENOTDIR;
1737 } 1656 }
1738 } 1657 }
1739 1658
1659 if (base)
1660 fput(base);
1661
1662 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
1663 path_put(&nd->root);
1664 nd->root.mnt = NULL;
1665 }
1666 return err;
1667}
1668
1669static int do_path_lookup(int dfd, const char *name,
1670 unsigned int flags, struct nameidata *nd)
1671{
1672 int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd);
1673 if (unlikely(retval == -ECHILD))
1674 retval = path_lookupat(dfd, name, flags, nd);
1675 if (unlikely(retval == -ESTALE))
1676 retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd);
1677
1740 if (likely(!retval)) { 1678 if (likely(!retval)) {
1741 if (unlikely(!audit_dummy_context())) { 1679 if (unlikely(!audit_dummy_context())) {
1742 if (nd->path.dentry && nd->inode) 1680 if (nd->path.dentry && nd->inode)
1743 audit_inode(name, nd->path.dentry); 1681 audit_inode(name, nd->path.dentry);
1744 } 1682 }
1745 } 1683 }
1746
1747 return retval; 1684 return retval;
1748} 1685}
1749 1686
1750int path_lookup(const char *name, unsigned int flags, 1687int kern_path_parent(const char *name, struct nameidata *nd)
1751 struct nameidata *nd)
1752{ 1688{
1753 return do_path_lookup(AT_FDCWD, name, flags, nd); 1689 return do_path_lookup(AT_FDCWD, name, LOOKUP_PARENT, nd);
1754} 1690}
1755 1691
1756int kern_path(const char *name, unsigned int flags, struct path *path) 1692int kern_path(const char *name, unsigned int flags, struct path *path)
@@ -1774,29 +1710,10 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
1774 const char *name, unsigned int flags, 1710 const char *name, unsigned int flags,
1775 struct nameidata *nd) 1711 struct nameidata *nd)
1776{ 1712{
1777 int retval; 1713 nd->root.dentry = dentry;
1778 1714 nd->root.mnt = mnt;
1779 /* same as do_path_lookup */ 1715 /* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */
1780 nd->last_type = LAST_ROOT; 1716 return do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, nd);
1781 nd->flags = flags;
1782 nd->depth = 0;
1783
1784 nd->path.dentry = dentry;
1785 nd->path.mnt = mnt;
1786 path_get(&nd->path);
1787 nd->root = nd->path;
1788 path_get(&nd->root);
1789 nd->inode = nd->path.dentry->d_inode;
1790
1791 retval = path_walk(name, nd);
1792 if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
1793 nd->inode))
1794 audit_inode(name, nd->path.dentry);
1795
1796 path_put(&nd->root);
1797 nd->root.mnt = NULL;
1798
1799 return retval;
1800} 1717}
1801 1718
1802static struct dentry *__lookup_hash(struct qstr *name, 1719static struct dentry *__lookup_hash(struct qstr *name,
@@ -1811,17 +1728,6 @@ static struct dentry *__lookup_hash(struct qstr *name,
1811 return ERR_PTR(err); 1728 return ERR_PTR(err);
1812 1729
1813 /* 1730 /*
1814 * See if the low-level filesystem might want
1815 * to use its own hash..
1816 */
1817 if (base->d_flags & DCACHE_OP_HASH) {
1818 err = base->d_op->d_hash(base, inode, name);
1819 dentry = ERR_PTR(err);
1820 if (err < 0)
1821 goto out;
1822 }
1823
1824 /*
1825 * Don't bother with __d_lookup: callers are for creat as 1731 * Don't bother with __d_lookup: callers are for creat as
1826 * well as unlink, so a lot of the time it would cost 1732 * well as unlink, so a lot of the time it would cost
1827 * a double lookup. 1733 * a double lookup.
@@ -1833,7 +1739,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
1833 1739
1834 if (!dentry) 1740 if (!dentry)
1835 dentry = d_alloc_and_lookup(base, name, nd); 1741 dentry = d_alloc_and_lookup(base, name, nd);
1836out: 1742
1837 return dentry; 1743 return dentry;
1838} 1744}
1839 1745
@@ -1847,28 +1753,6 @@ static struct dentry *lookup_hash(struct nameidata *nd)
1847 return __lookup_hash(&nd->last, nd->path.dentry, nd); 1753 return __lookup_hash(&nd->last, nd->path.dentry, nd);
1848} 1754}
1849 1755
1850static int __lookup_one_len(const char *name, struct qstr *this,
1851 struct dentry *base, int len)
1852{
1853 unsigned long hash;
1854 unsigned int c;
1855
1856 this->name = name;
1857 this->len = len;
1858 if (!len)
1859 return -EACCES;
1860
1861 hash = init_name_hash();
1862 while (len--) {
1863 c = *(const unsigned char *)name++;
1864 if (c == '/' || c == '\0')
1865 return -EACCES;
1866 hash = partial_name_hash(c, hash);
1867 }
1868 this->hash = end_name_hash(hash);
1869 return 0;
1870}
1871
1872/** 1756/**
1873 * lookup_one_len - filesystem helper to lookup single pathname component 1757 * lookup_one_len - filesystem helper to lookup single pathname component
1874 * @name: pathname component to lookup 1758 * @name: pathname component to lookup
@@ -1882,14 +1766,34 @@ static int __lookup_one_len(const char *name, struct qstr *this,
1882 */ 1766 */
1883struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) 1767struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
1884{ 1768{
1885 int err;
1886 struct qstr this; 1769 struct qstr this;
1770 unsigned long hash;
1771 unsigned int c;
1887 1772
1888 WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex)); 1773 WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
1889 1774
1890 err = __lookup_one_len(name, &this, base, len); 1775 this.name = name;
1891 if (err) 1776 this.len = len;
1892 return ERR_PTR(err); 1777 if (!len)
1778 return ERR_PTR(-EACCES);
1779
1780 hash = init_name_hash();
1781 while (len--) {
1782 c = *(const unsigned char *)name++;
1783 if (c == '/' || c == '\0')
1784 return ERR_PTR(-EACCES);
1785 hash = partial_name_hash(c, hash);
1786 }
1787 this.hash = end_name_hash(hash);
1788 /*
1789 * See if the low-level filesystem might want
1790 * to use its own hash..
1791 */
1792 if (base->d_flags & DCACHE_OP_HASH) {
1793 int err = base->d_op->d_hash(base, base->d_inode, &this);
1794 if (err < 0)
1795 return ERR_PTR(err);
1796 }
1893 1797
1894 return __lookup_hash(&this, base, NULL); 1798 return __lookup_hash(&this, base, NULL);
1895} 1799}
@@ -1898,7 +1802,7 @@ int user_path_at(int dfd, const char __user *name, unsigned flags,
1898 struct path *path) 1802 struct path *path)
1899{ 1803{
1900 struct nameidata nd; 1804 struct nameidata nd;
1901 char *tmp = getname(name); 1805 char *tmp = getname_flags(name, flags);
1902 int err = PTR_ERR(tmp); 1806 int err = PTR_ERR(tmp);
1903 if (!IS_ERR(tmp)) { 1807 if (!IS_ERR(tmp)) {
1904 1808
@@ -2078,12 +1982,16 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
2078 return error; 1982 return error;
2079} 1983}
2080 1984
2081int may_open(struct path *path, int acc_mode, int flag) 1985static int may_open(struct path *path, int acc_mode, int flag)
2082{ 1986{
2083 struct dentry *dentry = path->dentry; 1987 struct dentry *dentry = path->dentry;
2084 struct inode *inode = dentry->d_inode; 1988 struct inode *inode = dentry->d_inode;
2085 int error; 1989 int error;
2086 1990
1991 /* O_PATH? */
1992 if (!acc_mode)
1993 return 0;
1994
2087 if (!inode) 1995 if (!inode)
2088 return -ENOENT; 1996 return -ENOENT;
2089 1997
@@ -2152,34 +2060,6 @@ static int handle_truncate(struct file *filp)
2152} 2060}
2153 2061
2154/* 2062/*
2155 * Be careful about ever adding any more callers of this
2156 * function. Its flags must be in the namei format, not
2157 * what get passed to sys_open().
2158 */
2159static int __open_namei_create(struct nameidata *nd, struct path *path,
2160 int open_flag, int mode)
2161{
2162 int error;
2163 struct dentry *dir = nd->path.dentry;
2164
2165 if (!IS_POSIXACL(dir->d_inode))
2166 mode &= ~current_umask();
2167 error = security_path_mknod(&nd->path, path->dentry, mode, 0);
2168 if (error)
2169 goto out_unlock;
2170 error = vfs_create(dir->d_inode, path->dentry, mode, nd);
2171out_unlock:
2172 mutex_unlock(&dir->d_inode->i_mutex);
2173 dput(nd->path.dentry);
2174 nd->path.dentry = path->dentry;
2175
2176 if (error)
2177 return error;
2178 /* Don't check for write permission, don't truncate */
2179 return may_open(&nd->path, 0, open_flag & ~O_TRUNC);
2180}
2181
2182/*
2183 * Note that while the flag value (low two bits) for sys_open means: 2063 * Note that while the flag value (low two bits) for sys_open means:
2184 * 00 - read-only 2064 * 00 - read-only
2185 * 01 - write-only 2065 * 01 - write-only
@@ -2203,126 +2083,115 @@ static inline int open_to_namei_flags(int flag)
2203 return flag; 2083 return flag;
2204} 2084}
2205 2085
2206static int open_will_truncate(int flag, struct inode *inode)
2207{
2208 /*
2209 * We'll never write to the fs underlying
2210 * a device file.
2211 */
2212 if (special_file(inode->i_mode))
2213 return 0;
2214 return (flag & O_TRUNC);
2215}
2216
2217static struct file *finish_open(struct nameidata *nd,
2218 int open_flag, int acc_mode)
2219{
2220 struct file *filp;
2221 int will_truncate;
2222 int error;
2223
2224 will_truncate = open_will_truncate(open_flag, nd->path.dentry->d_inode);
2225 if (will_truncate) {
2226 error = mnt_want_write(nd->path.mnt);
2227 if (error)
2228 goto exit;
2229 }
2230 error = may_open(&nd->path, acc_mode, open_flag);
2231 if (error) {
2232 if (will_truncate)
2233 mnt_drop_write(nd->path.mnt);
2234 goto exit;
2235 }
2236 filp = nameidata_to_filp(nd);
2237 if (!IS_ERR(filp)) {
2238 error = ima_file_check(filp, acc_mode);
2239 if (error) {
2240 fput(filp);
2241 filp = ERR_PTR(error);
2242 }
2243 }
2244 if (!IS_ERR(filp)) {
2245 if (will_truncate) {
2246 error = handle_truncate(filp);
2247 if (error) {
2248 fput(filp);
2249 filp = ERR_PTR(error);
2250 }
2251 }
2252 }
2253 /*
2254 * It is now safe to drop the mnt write
2255 * because the filp has had a write taken
2256 * on its behalf.
2257 */
2258 if (will_truncate)
2259 mnt_drop_write(nd->path.mnt);
2260 path_put(&nd->path);
2261 return filp;
2262
2263exit:
2264 path_put(&nd->path);
2265 return ERR_PTR(error);
2266}
2267
2268/* 2086/*
2269 * Handle O_CREAT case for do_filp_open 2087 * Handle the last step of open()
2270 */ 2088 */
2271static struct file *do_last(struct nameidata *nd, struct path *path, 2089static struct file *do_last(struct nameidata *nd, struct path *path,
2272 int open_flag, int acc_mode, 2090 const struct open_flags *op, const char *pathname)
2273 int mode, const char *pathname)
2274{ 2091{
2275 struct dentry *dir = nd->path.dentry; 2092 struct dentry *dir = nd->path.dentry;
2093 struct dentry *dentry;
2094 int open_flag = op->open_flag;
2095 int will_truncate = open_flag & O_TRUNC;
2096 int want_write = 0;
2097 int acc_mode = op->acc_mode;
2276 struct file *filp; 2098 struct file *filp;
2277 int error = -EISDIR; 2099 int error;
2100
2101 nd->flags &= ~LOOKUP_PARENT;
2102 nd->flags |= op->intent;
2278 2103
2279 switch (nd->last_type) { 2104 switch (nd->last_type) {
2280 case LAST_DOTDOT: 2105 case LAST_DOTDOT:
2281 follow_dotdot(nd);
2282 dir = nd->path.dentry;
2283 case LAST_DOT: 2106 case LAST_DOT:
2284 if (need_reval_dot(dir)) { 2107 error = handle_dots(nd, nd->last_type);
2285 int status = d_revalidate(nd->path.dentry, nd); 2108 if (error)
2286 if (!status) 2109 return ERR_PTR(error);
2287 status = -ESTALE;
2288 if (status < 0) {
2289 error = status;
2290 goto exit;
2291 }
2292 }
2293 /* fallthrough */ 2110 /* fallthrough */
2294 case LAST_ROOT: 2111 case LAST_ROOT:
2295 goto exit; 2112 if (nd->flags & LOOKUP_RCU) {
2113 if (nameidata_drop_rcu_last(nd))
2114 return ERR_PTR(-ECHILD);
2115 }
2116 error = handle_reval_path(nd);
2117 if (error)
2118 goto exit;
2119 audit_inode(pathname, nd->path.dentry);
2120 if (open_flag & O_CREAT) {
2121 error = -EISDIR;
2122 goto exit;
2123 }
2124 goto ok;
2296 case LAST_BIND: 2125 case LAST_BIND:
2126 /* can't be RCU mode here */
2127 error = handle_reval_path(nd);
2128 if (error)
2129 goto exit;
2297 audit_inode(pathname, dir); 2130 audit_inode(pathname, dir);
2298 goto ok; 2131 goto ok;
2299 } 2132 }
2300 2133
2134 if (!(open_flag & O_CREAT)) {
2135 int symlink_ok = 0;
2136 if (nd->last.name[nd->last.len])
2137 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
2138 if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW))
2139 symlink_ok = 1;
2140 /* we _can_ be in RCU mode here */
2141 error = walk_component(nd, path, &nd->last, LAST_NORM,
2142 !symlink_ok);
2143 if (error < 0)
2144 return ERR_PTR(error);
2145 if (error) /* symlink */
2146 return NULL;
2147 /* sayonara */
2148 if (nd->flags & LOOKUP_RCU) {
2149 if (nameidata_drop_rcu_last(nd))
2150 return ERR_PTR(-ECHILD);
2151 }
2152
2153 error = -ENOTDIR;
2154 if (nd->flags & LOOKUP_DIRECTORY) {
2155 if (!nd->inode->i_op->lookup)
2156 goto exit;
2157 }
2158 audit_inode(pathname, nd->path.dentry);
2159 goto ok;
2160 }
2161
2162 /* create side of things */
2163
2164 if (nd->flags & LOOKUP_RCU) {
2165 if (nameidata_drop_rcu_last(nd))
2166 return ERR_PTR(-ECHILD);
2167 }
2168
2169 audit_inode(pathname, dir);
2170 error = -EISDIR;
2301 /* trailing slashes? */ 2171 /* trailing slashes? */
2302 if (nd->last.name[nd->last.len]) 2172 if (nd->last.name[nd->last.len])
2303 goto exit; 2173 goto exit;
2304 2174
2305 mutex_lock(&dir->d_inode->i_mutex); 2175 mutex_lock(&dir->d_inode->i_mutex);
2306 2176
2307 path->dentry = lookup_hash(nd); 2177 dentry = lookup_hash(nd);
2308 path->mnt = nd->path.mnt; 2178 error = PTR_ERR(dentry);
2309 2179 if (IS_ERR(dentry)) {
2310 error = PTR_ERR(path->dentry);
2311 if (IS_ERR(path->dentry)) {
2312 mutex_unlock(&dir->d_inode->i_mutex); 2180 mutex_unlock(&dir->d_inode->i_mutex);
2313 goto exit; 2181 goto exit;
2314 } 2182 }
2315 2183
2316 if (IS_ERR(nd->intent.open.file)) { 2184 path->dentry = dentry;
2317 error = PTR_ERR(nd->intent.open.file); 2185 path->mnt = nd->path.mnt;
2318 goto exit_mutex_unlock;
2319 }
2320 2186
2321 /* Negative dentry, just create the file */ 2187 /* Negative dentry, just create the file */
2322 if (!path->dentry->d_inode) { 2188 if (!dentry->d_inode) {
2189 int mode = op->mode;
2190 if (!IS_POSIXACL(dir->d_inode))
2191 mode &= ~current_umask();
2323 /* 2192 /*
2324 * This write is needed to ensure that a 2193 * This write is needed to ensure that a
2325 * ro->rw transition does not occur between 2194 * rw->ro transition does not occur between
2326 * the time when the file is created and when 2195 * the time when the file is created and when
2327 * a permanent write count is taken through 2196 * a permanent write count is taken through
2328 * the 'struct file' in nameidata_to_filp(). 2197 * the 'struct file' in nameidata_to_filp().
@@ -2330,22 +2199,21 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
2330 error = mnt_want_write(nd->path.mnt); 2199 error = mnt_want_write(nd->path.mnt);
2331 if (error) 2200 if (error)
2332 goto exit_mutex_unlock; 2201 goto exit_mutex_unlock;
2333 error = __open_namei_create(nd, path, open_flag, mode); 2202 want_write = 1;
2334 if (error) { 2203 /* Don't check for write permission, don't truncate */
2335 mnt_drop_write(nd->path.mnt); 2204 open_flag &= ~O_TRUNC;
2336 goto exit; 2205 will_truncate = 0;
2337 } 2206 acc_mode = MAY_OPEN;
2338 filp = nameidata_to_filp(nd); 2207 error = security_path_mknod(&nd->path, dentry, mode, 0);
2339 mnt_drop_write(nd->path.mnt); 2208 if (error)
2340 path_put(&nd->path); 2209 goto exit_mutex_unlock;
2341 if (!IS_ERR(filp)) { 2210 error = vfs_create(dir->d_inode, dentry, mode, nd);
2342 error = ima_file_check(filp, acc_mode); 2211 if (error)
2343 if (error) { 2212 goto exit_mutex_unlock;
2344 fput(filp); 2213 mutex_unlock(&dir->d_inode->i_mutex);
2345 filp = ERR_PTR(error); 2214 dput(nd->path.dentry);
2346 } 2215 nd->path.dentry = dentry;
2347 } 2216 goto common;
2348 return filp;
2349 } 2217 }
2350 2218
2351 /* 2219 /*
@@ -2375,7 +2243,40 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
2375 if (S_ISDIR(nd->inode->i_mode)) 2243 if (S_ISDIR(nd->inode->i_mode))
2376 goto exit; 2244 goto exit;
2377ok: 2245ok:
2378 filp = finish_open(nd, open_flag, acc_mode); 2246 if (!S_ISREG(nd->inode->i_mode))
2247 will_truncate = 0;
2248
2249 if (will_truncate) {
2250 error = mnt_want_write(nd->path.mnt);
2251 if (error)
2252 goto exit;
2253 want_write = 1;
2254 }
2255common:
2256 error = may_open(&nd->path, acc_mode, open_flag);
2257 if (error)
2258 goto exit;
2259 filp = nameidata_to_filp(nd);
2260 if (!IS_ERR(filp)) {
2261 error = ima_file_check(filp, op->acc_mode);
2262 if (error) {
2263 fput(filp);
2264 filp = ERR_PTR(error);
2265 }
2266 }
2267 if (!IS_ERR(filp)) {
2268 if (will_truncate) {
2269 error = handle_truncate(filp);
2270 if (error) {
2271 fput(filp);
2272 filp = ERR_PTR(error);
2273 }
2274 }
2275 }
2276out:
2277 if (want_write)
2278 mnt_drop_write(nd->path.mnt);
2279 path_put(&nd->path);
2379 return filp; 2280 return filp;
2380 2281
2381exit_mutex_unlock: 2282exit_mutex_unlock:
@@ -2383,204 +2284,103 @@ exit_mutex_unlock:
2383exit_dput: 2284exit_dput:
2384 path_put_conditional(path, nd); 2285 path_put_conditional(path, nd);
2385exit: 2286exit:
2386 path_put(&nd->path); 2287 filp = ERR_PTR(error);
2387 return ERR_PTR(error); 2288 goto out;
2388} 2289}
2389 2290
2390/* 2291static struct file *path_openat(int dfd, const char *pathname,
2391 * Note that the low bits of the passed in "open_flag" 2292 struct nameidata *nd, const struct open_flags *op, int flags)
2392 * are not the same as in the local variable "flag". See
2393 * open_to_namei_flags() for more details.
2394 */
2395struct file *do_filp_open(int dfd, const char *pathname,
2396 int open_flag, int mode, int acc_mode)
2397{ 2293{
2294 struct file *base = NULL;
2398 struct file *filp; 2295 struct file *filp;
2399 struct nameidata nd;
2400 int error;
2401 struct path path; 2296 struct path path;
2402 int count = 0; 2297 int error;
2403 int flag = open_to_namei_flags(open_flag);
2404 int flags;
2405
2406 if (!(open_flag & O_CREAT))
2407 mode = 0;
2408
2409 /* Must never be set by userspace */
2410 open_flag &= ~FMODE_NONOTIFY;
2411
2412 /*
2413 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
2414 * check for O_DSYNC if the need any syncing at all we enforce it's
2415 * always set instead of having to deal with possibly weird behaviour
2416 * for malicious applications setting only __O_SYNC.
2417 */
2418 if (open_flag & __O_SYNC)
2419 open_flag |= O_DSYNC;
2420
2421 if (!acc_mode)
2422 acc_mode = MAY_OPEN | ACC_MODE(open_flag);
2423
2424 /* O_TRUNC implies we need access checks for write permissions */
2425 if (open_flag & O_TRUNC)
2426 acc_mode |= MAY_WRITE;
2427
2428 /* Allow the LSM permission hook to distinguish append
2429 access from general write access. */
2430 if (open_flag & O_APPEND)
2431 acc_mode |= MAY_APPEND;
2432
2433 flags = LOOKUP_OPEN;
2434 if (open_flag & O_CREAT) {
2435 flags |= LOOKUP_CREATE;
2436 if (open_flag & O_EXCL)
2437 flags |= LOOKUP_EXCL;
2438 }
2439 if (open_flag & O_DIRECTORY)
2440 flags |= LOOKUP_DIRECTORY;
2441 if (!(open_flag & O_NOFOLLOW))
2442 flags |= LOOKUP_FOLLOW;
2443 2298
2444 filp = get_empty_filp(); 2299 filp = get_empty_filp();
2445 if (!filp) 2300 if (!filp)
2446 return ERR_PTR(-ENFILE); 2301 return ERR_PTR(-ENFILE);
2447 2302
2448 filp->f_flags = open_flag; 2303 filp->f_flags = op->open_flag;
2449 nd.intent.open.file = filp; 2304 nd->intent.open.file = filp;
2450 nd.intent.open.flags = flag; 2305 nd->intent.open.flags = open_to_namei_flags(op->open_flag);
2451 nd.intent.open.create_mode = mode; 2306 nd->intent.open.create_mode = op->mode;
2452 2307
2453 if (open_flag & O_CREAT) 2308 error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base);
2454 goto creat;
2455
2456 /* !O_CREAT, simple open */
2457 error = do_path_lookup(dfd, pathname, flags, &nd);
2458 if (unlikely(error)) 2309 if (unlikely(error))
2459 goto out_filp2;
2460 error = -ELOOP;
2461 if (!(nd.flags & LOOKUP_FOLLOW)) {
2462 if (nd.inode->i_op->follow_link)
2463 goto out_path2;
2464 }
2465 error = -ENOTDIR;
2466 if (nd.flags & LOOKUP_DIRECTORY) {
2467 if (!nd.inode->i_op->lookup)
2468 goto out_path2;
2469 }
2470 audit_inode(pathname, nd.path.dentry);
2471 filp = finish_open(&nd, open_flag, acc_mode);
2472out2:
2473 release_open_intent(&nd);
2474 return filp;
2475
2476out_path2:
2477 path_put(&nd.path);
2478out_filp2:
2479 filp = ERR_PTR(error);
2480 goto out2;
2481
2482creat:
2483 /* OK, have to create the file. Find the parent. */
2484 error = path_init_rcu(dfd, pathname,
2485 LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
2486 if (error)
2487 goto out_filp; 2310 goto out_filp;
2488 error = path_walk_rcu(pathname, &nd);
2489 path_finish_rcu(&nd);
2490 if (unlikely(error == -ECHILD || error == -ESTALE)) {
2491 /* slower, locked walk */
2492 if (error == -ESTALE) {
2493reval:
2494 flags |= LOOKUP_REVAL;
2495 }
2496 error = path_init(dfd, pathname,
2497 LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
2498 if (error)
2499 goto out_filp;
2500 2311
2501 error = path_walk_simple(pathname, &nd); 2312 current->total_link_count = 0;
2502 } 2313 error = link_path_walk(pathname, nd);
2503 if (unlikely(error)) 2314 if (unlikely(error))
2504 goto out_filp; 2315 goto out_filp;
2505 if (unlikely(!audit_dummy_context()))
2506 audit_inode(pathname, nd.path.dentry);
2507 2316
2508 /* 2317 filp = do_last(nd, &path, op, pathname);
2509 * We have the parent and last component.
2510 */
2511 nd.flags = flags;
2512 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
2513 while (unlikely(!filp)) { /* trailing symlink */ 2318 while (unlikely(!filp)) { /* trailing symlink */
2514 struct path link = path; 2319 struct path link = path;
2515 struct inode *linki = link.dentry->d_inode;
2516 void *cookie; 2320 void *cookie;
2517 error = -ELOOP; 2321 if (!(nd->flags & LOOKUP_FOLLOW)) {
2518 if (!(nd.flags & LOOKUP_FOLLOW)) 2322 path_put_conditional(&path, nd);
2519 goto exit_dput; 2323 path_put(&nd->path);
2520 if (count++ == 32) 2324 filp = ERR_PTR(-ELOOP);
2521 goto exit_dput; 2325 break;
2522 /*
2523 * This is subtle. Instead of calling do_follow_link() we do
2524 * the thing by hands. The reason is that this way we have zero
2525 * link_count and path_walk() (called from ->follow_link)
2526 * honoring LOOKUP_PARENT. After that we have the parent and
2527 * last component, i.e. we are in the same situation as after
2528 * the first path_walk(). Well, almost - if the last component
2529 * is normal we get its copy stored in nd->last.name and we will
2530 * have to putname() it when we are done. Procfs-like symlinks
2531 * just set LAST_BIND.
2532 */
2533 nd.flags |= LOOKUP_PARENT;
2534 error = security_inode_follow_link(link.dentry, &nd);
2535 if (error)
2536 goto exit_dput;
2537 error = __do_follow_link(&link, &nd, &cookie);
2538 if (unlikely(error)) {
2539 if (!IS_ERR(cookie) && linki->i_op->put_link)
2540 linki->i_op->put_link(link.dentry, &nd, cookie);
2541 /* nd.path had been dropped */
2542 nd.path = link;
2543 goto out_path;
2544 } 2326 }
2545 nd.flags &= ~LOOKUP_PARENT; 2327 nd->flags |= LOOKUP_PARENT;
2546 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); 2328 nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
2547 if (linki->i_op->put_link) 2329 error = follow_link(&link, nd, &cookie);
2548 linki->i_op->put_link(link.dentry, &nd, cookie); 2330 if (unlikely(error))
2549 path_put(&link); 2331 filp = ERR_PTR(error);
2332 else
2333 filp = do_last(nd, &path, op, pathname);
2334 put_link(nd, &link, cookie);
2550 } 2335 }
2551out: 2336out:
2552 if (nd.root.mnt) 2337 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT))
2553 path_put(&nd.root); 2338 path_put(&nd->root);
2554 if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL)) 2339 if (base)
2555 goto reval; 2340 fput(base);
2556 release_open_intent(&nd); 2341 release_open_intent(nd);
2557 return filp; 2342 return filp;
2558 2343
2559exit_dput:
2560 path_put_conditional(&path, &nd);
2561out_path:
2562 path_put(&nd.path);
2563out_filp: 2344out_filp:
2564 filp = ERR_PTR(error); 2345 filp = ERR_PTR(error);
2565 goto out; 2346 goto out;
2566} 2347}
2567 2348
2568/** 2349struct file *do_filp_open(int dfd, const char *pathname,
2569 * filp_open - open file and return file pointer 2350 const struct open_flags *op, int flags)
2570 * 2351{
2571 * @filename: path to open 2352 struct nameidata nd;
2572 * @flags: open flags as per the open(2) second argument 2353 struct file *filp;
2573 * @mode: mode for the new file if O_CREAT is set, else ignored 2354
2574 * 2355 filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_RCU);
2575 * This is the helper to open a file from kernelspace if you really 2356 if (unlikely(filp == ERR_PTR(-ECHILD)))
2576 * have to. But in generally you should not do this, so please move 2357 filp = path_openat(dfd, pathname, &nd, op, flags);
2577 * along, nothing to see here.. 2358 if (unlikely(filp == ERR_PTR(-ESTALE)))
2578 */ 2359 filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_REVAL);
2579struct file *filp_open(const char *filename, int flags, int mode) 2360 return filp;
2361}
2362
2363struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt,
2364 const char *name, const struct open_flags *op, int flags)
2580{ 2365{
2581 return do_filp_open(AT_FDCWD, filename, flags, mode, 0); 2366 struct nameidata nd;
2367 struct file *file;
2368
2369 nd.root.mnt = mnt;
2370 nd.root.dentry = dentry;
2371
2372 flags |= LOOKUP_ROOT;
2373
2374 if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN)
2375 return ERR_PTR(-ELOOP);
2376
2377 file = path_openat(-1, name, &nd, op, flags | LOOKUP_RCU);
2378 if (unlikely(file == ERR_PTR(-ECHILD)))
2379 file = path_openat(-1, name, &nd, op, flags);
2380 if (unlikely(file == ERR_PTR(-ESTALE)))
2381 file = path_openat(-1, name, &nd, op, flags | LOOKUP_REVAL);
2382 return file;
2582} 2383}
2583EXPORT_SYMBOL(filp_open);
2584 2384
2585/** 2385/**
2586 * lookup_create - lookup a dentry, creating it if it doesn't exist 2386 * lookup_create - lookup a dentry, creating it if it doesn't exist
@@ -3119,7 +2919,11 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
3119 return error; 2919 return error;
3120 2920
3121 mutex_lock(&inode->i_mutex); 2921 mutex_lock(&inode->i_mutex);
3122 error = dir->i_op->link(old_dentry, dir, new_dentry); 2922 /* Make sure we don't allow creating hardlink to an unlinked file */
2923 if (inode->i_nlink == 0)
2924 error = -ENOENT;
2925 else
2926 error = dir->i_op->link(old_dentry, dir, new_dentry);
3123 mutex_unlock(&inode->i_mutex); 2927 mutex_unlock(&inode->i_mutex);
3124 if (!error) 2928 if (!error)
3125 fsnotify_link(dir, inode, new_dentry); 2929 fsnotify_link(dir, inode, new_dentry);
@@ -3141,15 +2945,27 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
3141 struct dentry *new_dentry; 2945 struct dentry *new_dentry;
3142 struct nameidata nd; 2946 struct nameidata nd;
3143 struct path old_path; 2947 struct path old_path;
2948 int how = 0;
3144 int error; 2949 int error;
3145 char *to; 2950 char *to;
3146 2951
3147 if ((flags & ~AT_SYMLINK_FOLLOW) != 0) 2952 if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
3148 return -EINVAL; 2953 return -EINVAL;
2954 /*
2955 * To use null names we require CAP_DAC_READ_SEARCH
2956 * This ensures that not everyone will be able to create
2957 * handlink using the passed filedescriptor.
2958 */
2959 if (flags & AT_EMPTY_PATH) {
2960 if (!capable(CAP_DAC_READ_SEARCH))
2961 return -ENOENT;
2962 how = LOOKUP_EMPTY;
2963 }
2964
2965 if (flags & AT_SYMLINK_FOLLOW)
2966 how |= LOOKUP_FOLLOW;
3149 2967
3150 error = user_path_at(olddfd, oldname, 2968 error = user_path_at(olddfd, oldname, how, &old_path);
3151 flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0,
3152 &old_path);
3153 if (error) 2969 if (error)
3154 return error; 2970 return error;
3155 2971
@@ -3586,7 +3402,7 @@ EXPORT_SYMBOL(page_readlink);
3586EXPORT_SYMBOL(__page_symlink); 3402EXPORT_SYMBOL(__page_symlink);
3587EXPORT_SYMBOL(page_symlink); 3403EXPORT_SYMBOL(page_symlink);
3588EXPORT_SYMBOL(page_symlink_inode_operations); 3404EXPORT_SYMBOL(page_symlink_inode_operations);
3589EXPORT_SYMBOL(path_lookup); 3405EXPORT_SYMBOL(kern_path_parent);
3590EXPORT_SYMBOL(kern_path); 3406EXPORT_SYMBOL(kern_path);
3591EXPORT_SYMBOL(vfs_path_lookup); 3407EXPORT_SYMBOL(vfs_path_lookup);
3592EXPORT_SYMBOL(inode_permission); 3408EXPORT_SYMBOL(inode_permission);
diff --git a/fs/namespace.c b/fs/namespace.c
index d1edf26025dc..dffe6f49ab93 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1002,6 +1002,18 @@ const struct seq_operations mounts_op = {
1002 .show = show_vfsmnt 1002 .show = show_vfsmnt
1003}; 1003};
1004 1004
1005static int uuid_is_nil(u8 *uuid)
1006{
1007 int i;
1008 u8 *cp = (u8 *)uuid;
1009
1010 for (i = 0; i < 16; i++) {
1011 if (*cp++)
1012 return 0;
1013 }
1014 return 1;
1015}
1016
1005static int show_mountinfo(struct seq_file *m, void *v) 1017static int show_mountinfo(struct seq_file *m, void *v)
1006{ 1018{
1007 struct proc_mounts *p = m->private; 1019 struct proc_mounts *p = m->private;
@@ -1040,6 +1052,10 @@ static int show_mountinfo(struct seq_file *m, void *v)
1040 if (IS_MNT_UNBINDABLE(mnt)) 1052 if (IS_MNT_UNBINDABLE(mnt))
1041 seq_puts(m, " unbindable"); 1053 seq_puts(m, " unbindable");
1042 1054
1055 if (!uuid_is_nil(mnt->mnt_sb->s_uuid))
1056 /* print the uuid */
1057 seq_printf(m, " uuid:%pU", mnt->mnt_sb->s_uuid);
1058
1043 /* Filesystem specific data */ 1059 /* Filesystem specific data */
1044 seq_puts(m, " - "); 1060 seq_puts(m, " - ");
1045 show_type(m, sb); 1061 show_type(m, sb);
diff --git a/fs/nfsctl.c b/fs/nfsctl.c
index bf9cbd242ddd..124e8fcb0dd6 100644
--- a/fs/nfsctl.c
+++ b/fs/nfsctl.c
@@ -22,30 +22,17 @@
22 22
23static struct file *do_open(char *name, int flags) 23static struct file *do_open(char *name, int flags)
24{ 24{
25 struct nameidata nd;
26 struct vfsmount *mnt; 25 struct vfsmount *mnt;
27 int error; 26 struct file *file;
28 27
29 mnt = do_kern_mount("nfsd", 0, "nfsd", NULL); 28 mnt = do_kern_mount("nfsd", 0, "nfsd", NULL);
30 if (IS_ERR(mnt)) 29 if (IS_ERR(mnt))
31 return (struct file *)mnt; 30 return (struct file *)mnt;
32 31
33 error = vfs_path_lookup(mnt->mnt_root, mnt, name, 0, &nd); 32 file = file_open_root(mnt->mnt_root, mnt, name, flags);
34 mntput(mnt); /* drop do_kern_mount reference */
35 if (error)
36 return ERR_PTR(error);
37
38 if (flags == O_RDWR)
39 error = may_open(&nd.path, MAY_READ|MAY_WRITE, flags);
40 else
41 error = may_open(&nd.path, MAY_WRITE, flags);
42 33
43 if (!error) 34 mntput(mnt); /* drop do_kern_mount reference */
44 return dentry_open(nd.path.dentry, nd.path.mnt, flags, 35 return file;
45 current_cred());
46
47 path_put(&nd.path);
48 return ERR_PTR(error);
49} 36}
50 37
51static struct { 38static struct {
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 5dbc3062b4fd..254652a9b542 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -197,8 +197,12 @@ static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len,
197 dentry->d_name.len, dentry->d_name.name, 197 dentry->d_name.len, dentry->d_name.name,
198 fh, len, connectable); 198 fh, len, connectable);
199 199
200 if (len < 3 || (connectable && len < 6)) { 200 if (connectable && (len < 6)) {
201 mlog(ML_ERROR, "fh buffer is too small for encoding\n"); 201 *max_len = 6;
202 type = 255;
203 goto bail;
204 } else if (len < 3) {
205 *max_len = 3;
202 type = 255; 206 type = 255;
203 goto bail; 207 goto bail;
204 } 208 }
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 19ebc5aad391..29623da133cc 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4379,7 +4379,7 @@ static int ocfs2_user_path_parent(const char __user *path,
4379 if (IS_ERR(s)) 4379 if (IS_ERR(s))
4380 return PTR_ERR(s); 4380 return PTR_ERR(s);
4381 4381
4382 error = path_lookup(s, LOOKUP_PARENT, nd); 4382 error = kern_path_parent(s, nd);
4383 if (error) 4383 if (error)
4384 putname(s); 4384 putname(s);
4385 else 4385 else
diff --git a/fs/open.c b/fs/open.c
index b47aab39c057..3cac0bda46df 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -573,13 +573,15 @@ SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
573{ 573{
574 struct path path; 574 struct path path;
575 int error = -EINVAL; 575 int error = -EINVAL;
576 int follow; 576 int lookup_flags;
577 577
578 if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) 578 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
579 goto out; 579 goto out;
580 580
581 follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW; 581 lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
582 error = user_path_at(dfd, filename, follow, &path); 582 if (flag & AT_EMPTY_PATH)
583 lookup_flags |= LOOKUP_EMPTY;
584 error = user_path_at(dfd, filename, lookup_flags, &path);
583 if (error) 585 if (error)
584 goto out; 586 goto out;
585 error = mnt_want_write(path.mnt); 587 error = mnt_want_write(path.mnt);
@@ -669,11 +671,16 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
669 int (*open)(struct inode *, struct file *), 671 int (*open)(struct inode *, struct file *),
670 const struct cred *cred) 672 const struct cred *cred)
671{ 673{
674 static const struct file_operations empty_fops = {};
672 struct inode *inode; 675 struct inode *inode;
673 int error; 676 int error;
674 677
675 f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK | 678 f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
676 FMODE_PREAD | FMODE_PWRITE; 679 FMODE_PREAD | FMODE_PWRITE;
680
681 if (unlikely(f->f_flags & O_PATH))
682 f->f_mode = FMODE_PATH;
683
677 inode = dentry->d_inode; 684 inode = dentry->d_inode;
678 if (f->f_mode & FMODE_WRITE) { 685 if (f->f_mode & FMODE_WRITE) {
679 error = __get_file_write_access(inode, mnt); 686 error = __get_file_write_access(inode, mnt);
@@ -687,9 +694,15 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
687 f->f_path.dentry = dentry; 694 f->f_path.dentry = dentry;
688 f->f_path.mnt = mnt; 695 f->f_path.mnt = mnt;
689 f->f_pos = 0; 696 f->f_pos = 0;
690 f->f_op = fops_get(inode->i_fop);
691 file_sb_list_add(f, inode->i_sb); 697 file_sb_list_add(f, inode->i_sb);
692 698
699 if (unlikely(f->f_mode & FMODE_PATH)) {
700 f->f_op = &empty_fops;
701 return f;
702 }
703
704 f->f_op = fops_get(inode->i_fop);
705
693 error = security_dentry_open(f, cred); 706 error = security_dentry_open(f, cred);
694 if (error) 707 if (error)
695 goto cleanup_all; 708 goto cleanup_all;
@@ -890,15 +903,110 @@ void fd_install(unsigned int fd, struct file *file)
890 903
891EXPORT_SYMBOL(fd_install); 904EXPORT_SYMBOL(fd_install);
892 905
906static inline int build_open_flags(int flags, int mode, struct open_flags *op)
907{
908 int lookup_flags = 0;
909 int acc_mode;
910
911 if (!(flags & O_CREAT))
912 mode = 0;
913 op->mode = mode;
914
915 /* Must never be set by userspace */
916 flags &= ~FMODE_NONOTIFY;
917
918 /*
919 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
920 * check for O_DSYNC if the need any syncing at all we enforce it's
921 * always set instead of having to deal with possibly weird behaviour
922 * for malicious applications setting only __O_SYNC.
923 */
924 if (flags & __O_SYNC)
925 flags |= O_DSYNC;
926
927 /*
928 * If we have O_PATH in the open flag. Then we
929 * cannot have anything other than the below set of flags
930 */
931 if (flags & O_PATH) {
932 flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH;
933 acc_mode = 0;
934 } else {
935 acc_mode = MAY_OPEN | ACC_MODE(flags);
936 }
937
938 op->open_flag = flags;
939
940 /* O_TRUNC implies we need access checks for write permissions */
941 if (flags & O_TRUNC)
942 acc_mode |= MAY_WRITE;
943
944 /* Allow the LSM permission hook to distinguish append
945 access from general write access. */
946 if (flags & O_APPEND)
947 acc_mode |= MAY_APPEND;
948
949 op->acc_mode = acc_mode;
950
951 op->intent = flags & O_PATH ? 0 : LOOKUP_OPEN;
952
953 if (flags & O_CREAT) {
954 op->intent |= LOOKUP_CREATE;
955 if (flags & O_EXCL)
956 op->intent |= LOOKUP_EXCL;
957 }
958
959 if (flags & O_DIRECTORY)
960 lookup_flags |= LOOKUP_DIRECTORY;
961 if (!(flags & O_NOFOLLOW))
962 lookup_flags |= LOOKUP_FOLLOW;
963 return lookup_flags;
964}
965
966/**
967 * filp_open - open file and return file pointer
968 *
969 * @filename: path to open
970 * @flags: open flags as per the open(2) second argument
971 * @mode: mode for the new file if O_CREAT is set, else ignored
972 *
973 * This is the helper to open a file from kernelspace if you really
974 * have to. But in generally you should not do this, so please move
975 * along, nothing to see here..
976 */
977struct file *filp_open(const char *filename, int flags, int mode)
978{
979 struct open_flags op;
980 int lookup = build_open_flags(flags, mode, &op);
981 return do_filp_open(AT_FDCWD, filename, &op, lookup);
982}
983EXPORT_SYMBOL(filp_open);
984
985struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
986 const char *filename, int flags)
987{
988 struct open_flags op;
989 int lookup = build_open_flags(flags, 0, &op);
990 if (flags & O_CREAT)
991 return ERR_PTR(-EINVAL);
992 if (!filename && (flags & O_DIRECTORY))
993 if (!dentry->d_inode->i_op->lookup)
994 return ERR_PTR(-ENOTDIR);
995 return do_file_open_root(dentry, mnt, filename, &op, lookup);
996}
997EXPORT_SYMBOL(file_open_root);
998
893long do_sys_open(int dfd, const char __user *filename, int flags, int mode) 999long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
894{ 1000{
1001 struct open_flags op;
1002 int lookup = build_open_flags(flags, mode, &op);
895 char *tmp = getname(filename); 1003 char *tmp = getname(filename);
896 int fd = PTR_ERR(tmp); 1004 int fd = PTR_ERR(tmp);
897 1005
898 if (!IS_ERR(tmp)) { 1006 if (!IS_ERR(tmp)) {
899 fd = get_unused_fd_flags(flags); 1007 fd = get_unused_fd_flags(flags);
900 if (fd >= 0) { 1008 if (fd >= 0) {
901 struct file *f = do_filp_open(dfd, tmp, flags, mode, 0); 1009 struct file *f = do_filp_open(dfd, tmp, &op, lookup);
902 if (IS_ERR(f)) { 1010 if (IS_ERR(f)) {
903 put_unused_fd(fd); 1011 put_unused_fd(fd);
904 fd = PTR_ERR(f); 1012 fd = PTR_ERR(f);
@@ -968,8 +1076,10 @@ int filp_close(struct file *filp, fl_owner_t id)
968 if (filp->f_op && filp->f_op->flush) 1076 if (filp->f_op && filp->f_op->flush)
969 retval = filp->f_op->flush(filp, id); 1077 retval = filp->f_op->flush(filp, id);
970 1078
971 dnotify_flush(filp, id); 1079 if (likely(!(filp->f_mode & FMODE_PATH))) {
972 locks_remove_posix(filp, id); 1080 dnotify_flush(filp, id);
1081 locks_remove_posix(filp, id);
1082 }
973 fput(filp); 1083 fput(filp);
974 return retval; 1084 return retval;
975} 1085}
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 0bae036831e2..1bba24bad820 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1593,8 +1593,13 @@ int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp,
1593 struct inode *inode = dentry->d_inode; 1593 struct inode *inode = dentry->d_inode;
1594 int maxlen = *lenp; 1594 int maxlen = *lenp;
1595 1595
1596 if (maxlen < 3) 1596 if (need_parent && (maxlen < 5)) {
1597 *lenp = 5;
1597 return 255; 1598 return 255;
1599 } else if (maxlen < 3) {
1600 *lenp = 3;
1601 return 255;
1602 }
1598 1603
1599 data[0] = inode->i_ino; 1604 data[0] = inode->i_ino;
1600 data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); 1605 data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 68fdf45cc6c9..4b2eb564fdad 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -1122,10 +1122,6 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
1122 reiserfs_write_unlock(dir->i_sb); 1122 reiserfs_write_unlock(dir->i_sb);
1123 return -EMLINK; 1123 return -EMLINK;
1124 } 1124 }
1125 if (inode->i_nlink == 0) {
1126 reiserfs_write_unlock(dir->i_sb);
1127 return -ENOENT;
1128 }
1129 1125
1130 /* inc before scheduling so reiserfs_unlink knows we are here */ 1126 /* inc before scheduling so reiserfs_unlink knows we are here */
1131 inc_nlink(inode); 1127 inc_nlink(inode);
diff --git a/fs/stat.c b/fs/stat.c
index d5c61cf2b703..961039121cb8 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -75,13 +75,16 @@ int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat,
75 int error = -EINVAL; 75 int error = -EINVAL;
76 int lookup_flags = 0; 76 int lookup_flags = 0;
77 77
78 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT)) != 0) 78 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT |
79 AT_EMPTY_PATH)) != 0)
79 goto out; 80 goto out;
80 81
81 if (!(flag & AT_SYMLINK_NOFOLLOW)) 82 if (!(flag & AT_SYMLINK_NOFOLLOW))
82 lookup_flags |= LOOKUP_FOLLOW; 83 lookup_flags |= LOOKUP_FOLLOW;
83 if (flag & AT_NO_AUTOMOUNT) 84 if (flag & AT_NO_AUTOMOUNT)
84 lookup_flags |= LOOKUP_NO_AUTOMOUNT; 85 lookup_flags |= LOOKUP_NO_AUTOMOUNT;
86 if (flag & AT_EMPTY_PATH)
87 lookup_flags |= LOOKUP_EMPTY;
85 88
86 error = user_path_at(dfd, filename, lookup_flags, &path); 89 error = user_path_at(dfd, filename, lookup_flags, &path);
87 if (error) 90 if (error)
@@ -297,7 +300,7 @@ SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname,
297 if (bufsiz <= 0) 300 if (bufsiz <= 0)
298 return -EINVAL; 301 return -EINVAL;
299 302
300 error = user_path_at(dfd, pathname, 0, &path); 303 error = user_path_at(dfd, pathname, LOOKUP_EMPTY, &path);
301 if (!error) { 304 if (!error) {
302 struct inode *inode = path.dentry->d_inode; 305 struct inode *inode = path.dentry->d_inode;
303 306
diff --git a/fs/statfs.c b/fs/statfs.c
index 30ea8c8a996b..8244924dec55 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -73,149 +73,135 @@ int vfs_statfs(struct path *path, struct kstatfs *buf)
73} 73}
74EXPORT_SYMBOL(vfs_statfs); 74EXPORT_SYMBOL(vfs_statfs);
75 75
76static int do_statfs_native(struct path *path, struct statfs *buf) 76int user_statfs(const char __user *pathname, struct kstatfs *st)
77{ 77{
78 struct kstatfs st; 78 struct path path;
79 int retval; 79 int error = user_path(pathname, &path);
80 if (!error) {
81 error = vfs_statfs(&path, st);
82 path_put(&path);
83 }
84 return error;
85}
80 86
81 retval = vfs_statfs(path, &st); 87int fd_statfs(int fd, struct kstatfs *st)
82 if (retval) 88{
83 return retval; 89 struct file *file = fget(fd);
90 int error = -EBADF;
91 if (file) {
92 error = vfs_statfs(&file->f_path, st);
93 fput(file);
94 }
95 return error;
96}
84 97
85 if (sizeof(*buf) == sizeof(st)) 98static int do_statfs_native(struct kstatfs *st, struct statfs __user *p)
86 memcpy(buf, &st, sizeof(st)); 99{
100 struct statfs buf;
101
102 if (sizeof(buf) == sizeof(*st))
103 memcpy(&buf, st, sizeof(*st));
87 else { 104 else {
88 if (sizeof buf->f_blocks == 4) { 105 if (sizeof buf.f_blocks == 4) {
89 if ((st.f_blocks | st.f_bfree | st.f_bavail | 106 if ((st->f_blocks | st->f_bfree | st->f_bavail |
90 st.f_bsize | st.f_frsize) & 107 st->f_bsize | st->f_frsize) &
91 0xffffffff00000000ULL) 108 0xffffffff00000000ULL)
92 return -EOVERFLOW; 109 return -EOVERFLOW;
93 /* 110 /*
94 * f_files and f_ffree may be -1; it's okay to stuff 111 * f_files and f_ffree may be -1; it's okay to stuff
95 * that into 32 bits 112 * that into 32 bits
96 */ 113 */
97 if (st.f_files != -1 && 114 if (st->f_files != -1 &&
98 (st.f_files & 0xffffffff00000000ULL)) 115 (st->f_files & 0xffffffff00000000ULL))
99 return -EOVERFLOW; 116 return -EOVERFLOW;
100 if (st.f_ffree != -1 && 117 if (st->f_ffree != -1 &&
101 (st.f_ffree & 0xffffffff00000000ULL)) 118 (st->f_ffree & 0xffffffff00000000ULL))
102 return -EOVERFLOW; 119 return -EOVERFLOW;
103 } 120 }
104 121
105 buf->f_type = st.f_type; 122 buf.f_type = st->f_type;
106 buf->f_bsize = st.f_bsize; 123 buf.f_bsize = st->f_bsize;
107 buf->f_blocks = st.f_blocks; 124 buf.f_blocks = st->f_blocks;
108 buf->f_bfree = st.f_bfree; 125 buf.f_bfree = st->f_bfree;
109 buf->f_bavail = st.f_bavail; 126 buf.f_bavail = st->f_bavail;
110 buf->f_files = st.f_files; 127 buf.f_files = st->f_files;
111 buf->f_ffree = st.f_ffree; 128 buf.f_ffree = st->f_ffree;
112 buf->f_fsid = st.f_fsid; 129 buf.f_fsid = st->f_fsid;
113 buf->f_namelen = st.f_namelen; 130 buf.f_namelen = st->f_namelen;
114 buf->f_frsize = st.f_frsize; 131 buf.f_frsize = st->f_frsize;
115 buf->f_flags = st.f_flags; 132 buf.f_flags = st->f_flags;
116 memset(buf->f_spare, 0, sizeof(buf->f_spare)); 133 memset(buf.f_spare, 0, sizeof(buf.f_spare));
117 } 134 }
135 if (copy_to_user(p, &buf, sizeof(buf)))
136 return -EFAULT;
118 return 0; 137 return 0;
119} 138}
120 139
121static int do_statfs64(struct path *path, struct statfs64 *buf) 140static int do_statfs64(struct kstatfs *st, struct statfs64 __user *p)
122{ 141{
123 struct kstatfs st; 142 struct statfs64 buf;
124 int retval; 143 if (sizeof(buf) == sizeof(*st))
125 144 memcpy(&buf, st, sizeof(*st));
126 retval = vfs_statfs(path, &st);
127 if (retval)
128 return retval;
129
130 if (sizeof(*buf) == sizeof(st))
131 memcpy(buf, &st, sizeof(st));
132 else { 145 else {
133 buf->f_type = st.f_type; 146 buf.f_type = st->f_type;
134 buf->f_bsize = st.f_bsize; 147 buf.f_bsize = st->f_bsize;
135 buf->f_blocks = st.f_blocks; 148 buf.f_blocks = st->f_blocks;
136 buf->f_bfree = st.f_bfree; 149 buf.f_bfree = st->f_bfree;
137 buf->f_bavail = st.f_bavail; 150 buf.f_bavail = st->f_bavail;
138 buf->f_files = st.f_files; 151 buf.f_files = st->f_files;
139 buf->f_ffree = st.f_ffree; 152 buf.f_ffree = st->f_ffree;
140 buf->f_fsid = st.f_fsid; 153 buf.f_fsid = st->f_fsid;
141 buf->f_namelen = st.f_namelen; 154 buf.f_namelen = st->f_namelen;
142 buf->f_frsize = st.f_frsize; 155 buf.f_frsize = st->f_frsize;
143 buf->f_flags = st.f_flags; 156 buf.f_flags = st->f_flags;
144 memset(buf->f_spare, 0, sizeof(buf->f_spare)); 157 memset(buf.f_spare, 0, sizeof(buf.f_spare));
145 } 158 }
159 if (copy_to_user(p, &buf, sizeof(buf)))
160 return -EFAULT;
146 return 0; 161 return 0;
147} 162}
148 163
149SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct statfs __user *, buf) 164SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct statfs __user *, buf)
150{ 165{
151 struct path path; 166 struct kstatfs st;
152 int error; 167 int error = user_statfs(pathname, &st);
153 168 if (!error)
154 error = user_path(pathname, &path); 169 error = do_statfs_native(&st, buf);
155 if (!error) {
156 struct statfs tmp;
157 error = do_statfs_native(&path, &tmp);
158 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
159 error = -EFAULT;
160 path_put(&path);
161 }
162 return error; 170 return error;
163} 171}
164 172
165SYSCALL_DEFINE3(statfs64, const char __user *, pathname, size_t, sz, struct statfs64 __user *, buf) 173SYSCALL_DEFINE3(statfs64, const char __user *, pathname, size_t, sz, struct statfs64 __user *, buf)
166{ 174{
167 struct path path; 175 struct kstatfs st;
168 long error; 176 int error;
169
170 if (sz != sizeof(*buf)) 177 if (sz != sizeof(*buf))
171 return -EINVAL; 178 return -EINVAL;
172 error = user_path(pathname, &path); 179 error = user_statfs(pathname, &st);
173 if (!error) { 180 if (!error)
174 struct statfs64 tmp; 181 error = do_statfs64(&st, buf);
175 error = do_statfs64(&path, &tmp);
176 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
177 error = -EFAULT;
178 path_put(&path);
179 }
180 return error; 182 return error;
181} 183}
182 184
183SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct statfs __user *, buf) 185SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct statfs __user *, buf)
184{ 186{
185 struct file *file; 187 struct kstatfs st;
186 struct statfs tmp; 188 int error = fd_statfs(fd, &st);
187 int error; 189 if (!error)
188 190 error = do_statfs_native(&st, buf);
189 error = -EBADF;
190 file = fget(fd);
191 if (!file)
192 goto out;
193 error = do_statfs_native(&file->f_path, &tmp);
194 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
195 error = -EFAULT;
196 fput(file);
197out:
198 return error; 191 return error;
199} 192}
200 193
201SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user *, buf) 194SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user *, buf)
202{ 195{
203 struct file *file; 196 struct kstatfs st;
204 struct statfs64 tmp;
205 int error; 197 int error;
206 198
207 if (sz != sizeof(*buf)) 199 if (sz != sizeof(*buf))
208 return -EINVAL; 200 return -EINVAL;
209 201
210 error = -EBADF; 202 error = fd_statfs(fd, &st);
211 file = fget(fd); 203 if (!error)
212 if (!file) 204 error = do_statfs64(&st, buf);
213 goto out;
214 error = do_statfs64(&file->f_path, &tmp);
215 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
216 error = -EFAULT;
217 fput(file);
218out:
219 return error; 205 return error;
220} 206}
221 207
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 14f64b689d7f..7217d67a80a6 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -522,24 +522,6 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
522 ubifs_assert(mutex_is_locked(&dir->i_mutex)); 522 ubifs_assert(mutex_is_locked(&dir->i_mutex));
523 ubifs_assert(mutex_is_locked(&inode->i_mutex)); 523 ubifs_assert(mutex_is_locked(&inode->i_mutex));
524 524
525 /*
526 * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
527 * otherwise has the potential to corrupt the orphan inode list.
528 *
529 * Indeed, consider a scenario when 'vfs_link(dirA/fileA)' and
530 * 'vfs_unlink(dirA/fileA, dirB/fileB)' race. 'vfs_link()' does not
531 * lock 'dirA->i_mutex', so this is possible. Both of the functions
532 * lock 'fileA->i_mutex' though. Suppose 'vfs_unlink()' wins, and takes
533 * 'fileA->i_mutex' mutex first. Suppose 'fileA->i_nlink' is 1. In this
534 * case 'ubifs_unlink()' will drop the last reference, and put 'inodeA'
535 * to the list of orphans. After this, 'vfs_link()' will link
536 * 'dirB/fileB' to 'inodeA'. This is a problem because, for example,
537 * the subsequent 'vfs_unlink(dirB/fileB)' will add the same inode
538 * to the list of orphans.
539 */
540 if (inode->i_nlink == 0)
541 return -ENOENT;
542
543 err = dbg_check_synced_i_size(inode); 525 err = dbg_check_synced_i_size(inode);
544 if (err) 526 if (err)
545 return err; 527 return err;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index b7c338d5e9df..f1dce848ef96 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -1286,8 +1286,13 @@ static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp,
1286 struct fid *fid = (struct fid *)fh; 1286 struct fid *fid = (struct fid *)fh;
1287 int type = FILEID_UDF_WITHOUT_PARENT; 1287 int type = FILEID_UDF_WITHOUT_PARENT;
1288 1288
1289 if (len < 3 || (connectable && len < 5)) 1289 if (connectable && (len < 5)) {
1290 *lenp = 5;
1291 return 255;
1292 } else if (len < 3) {
1293 *lenp = 3;
1290 return 255; 1294 return 255;
1295 }
1291 1296
1292 *lenp = 3; 1297 *lenp = 3;
1293 fid->udf.block = location.logicalBlockNum; 1298 fid->udf.block = location.logicalBlockNum;
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index fc0114da7fdd..f4f878fc0083 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -89,8 +89,10 @@ xfs_fs_encode_fh(
89 * seven combinations work. The real answer is "don't use v2". 89 * seven combinations work. The real answer is "don't use v2".
90 */ 90 */
91 len = xfs_fileid_length(fileid_type); 91 len = xfs_fileid_length(fileid_type);
92 if (*max_len < len) 92 if (*max_len < len) {
93 *max_len = len;
93 return 255; 94 return 255;
95 }
94 *max_len = len; 96 *max_len = len;
95 97
96 switch (fileid_type) { 98 switch (fileid_type) {
diff --git a/include/asm-generic/fcntl.h b/include/asm-generic/fcntl.h
index 0fc16e3f0bfc..84793c7025e2 100644
--- a/include/asm-generic/fcntl.h
+++ b/include/asm-generic/fcntl.h
@@ -80,6 +80,10 @@
80#define O_SYNC (__O_SYNC|O_DSYNC) 80#define O_SYNC (__O_SYNC|O_DSYNC)
81#endif 81#endif
82 82
83#ifndef O_PATH
84#define O_PATH 010000000
85#endif
86
83#ifndef O_NDELAY 87#ifndef O_NDELAY
84#define O_NDELAY O_NONBLOCK 88#define O_NDELAY O_NONBLOCK
85#endif 89#endif
diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
index b969770196c2..57af0338d270 100644
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -646,9 +646,13 @@ __SYSCALL(__NR_prlimit64, sys_prlimit64)
646__SYSCALL(__NR_fanotify_init, sys_fanotify_init) 646__SYSCALL(__NR_fanotify_init, sys_fanotify_init)
647#define __NR_fanotify_mark 263 647#define __NR_fanotify_mark 263
648__SYSCALL(__NR_fanotify_mark, sys_fanotify_mark) 648__SYSCALL(__NR_fanotify_mark, sys_fanotify_mark)
649#define __NR_name_to_handle_at 264
650__SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at)
651#define __NR_open_by_handle_at 265
652__SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
649 653
650#undef __NR_syscalls 654#undef __NR_syscalls
651#define __NR_syscalls 264 655#define __NR_syscalls 266
652 656
653/* 657/*
654 * All syscalls below here should go away really, 658 * All syscalls below here should go away really,
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 28028988c862..33a42f24b275 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -8,6 +8,9 @@ struct inode;
8struct super_block; 8struct super_block;
9struct vfsmount; 9struct vfsmount;
10 10
11/* limit the handle size to NFSv4 handle size now */
12#define MAX_HANDLE_SZ 128
13
11/* 14/*
12 * The fileid_type identifies how the file within the filesystem is encoded. 15 * The fileid_type identifies how the file within the filesystem is encoded.
13 * In theory this is freely set and parsed by the filesystem, but we try to 16 * In theory this is freely set and parsed by the filesystem, but we try to
@@ -121,8 +124,10 @@ struct fid {
121 * set, the encode_fh() should store sufficient information so that a good 124 * set, the encode_fh() should store sufficient information so that a good
122 * attempt can be made to find not only the file but also it's place in the 125 * attempt can be made to find not only the file but also it's place in the
123 * filesystem. This typically means storing a reference to de->d_parent in 126 * filesystem. This typically means storing a reference to de->d_parent in
124 * the filehandle fragment. encode_fh() should return the number of bytes 127 * the filehandle fragment. encode_fh() should return the fileid_type on
125 * stored or a negative error code such as %-ENOSPC 128 * success and on error returns 255 (if the space needed to encode fh is
129 * greater than @max_len*4 bytes). On error @max_len contains the minimum
130 * size(in 4 byte unit) needed to encode the file handle.
126 * 131 *
127 * fh_to_dentry: 132 * fh_to_dentry:
128 * @fh_to_dentry is given a &struct super_block (@sb) and a file handle 133 * @fh_to_dentry is given a &struct super_block (@sb) and a file handle
diff --git a/include/linux/fcntl.h b/include/linux/fcntl.h
index a562fa5fb4e3..f550f894ba15 100644
--- a/include/linux/fcntl.h
+++ b/include/linux/fcntl.h
@@ -46,6 +46,7 @@
46 unlinking file. */ 46 unlinking file. */
47#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */ 47#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */
48#define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount traversal */ 48#define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount traversal */
49#define AT_EMPTY_PATH 0x1000 /* Allow empty relative pathname */
49 50
50#ifdef __KERNEL__ 51#ifdef __KERNEL__
51 52
diff --git a/include/linux/file.h b/include/linux/file.h
index e85baebf6279..21a79958541c 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -29,6 +29,8 @@ static inline void fput_light(struct file *file, int fput_needed)
29 29
30extern struct file *fget(unsigned int fd); 30extern struct file *fget(unsigned int fd);
31extern struct file *fget_light(unsigned int fd, int *fput_needed); 31extern struct file *fget_light(unsigned int fd, int *fput_needed);
32extern struct file *fget_raw(unsigned int fd);
33extern struct file *fget_raw_light(unsigned int fd, int *fput_needed);
32extern void set_close_on_exec(unsigned int fd, int flag); 34extern void set_close_on_exec(unsigned int fd, int flag);
33extern void put_filp(struct file *); 35extern void put_filp(struct file *);
34extern int alloc_fd(unsigned start, unsigned flags); 36extern int alloc_fd(unsigned start, unsigned flags);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e38b50a4b9d2..13df14e2c42e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -102,6 +102,9 @@ struct inodes_stat_t {
102/* File is huge (eg. /dev/kmem): treat loff_t as unsigned */ 102/* File is huge (eg. /dev/kmem): treat loff_t as unsigned */
103#define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000) 103#define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000)
104 104
105/* File is opened with O_PATH; almost nothing can be done with it */
106#define FMODE_PATH ((__force fmode_t)0x4000)
107
105/* File was opened by fanotify and shouldn't generate fanotify events */ 108/* File was opened by fanotify and shouldn't generate fanotify events */
106#define FMODE_NONOTIFY ((__force fmode_t)0x1000000) 109#define FMODE_NONOTIFY ((__force fmode_t)0x1000000)
107 110
@@ -978,6 +981,13 @@ struct file {
978#endif 981#endif
979}; 982};
980 983
984struct file_handle {
985 __u32 handle_bytes;
986 int handle_type;
987 /* file identifier */
988 unsigned char f_handle[0];
989};
990
981#define get_file(x) atomic_long_inc(&(x)->f_count) 991#define get_file(x) atomic_long_inc(&(x)->f_count)
982#define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1) 992#define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1)
983#define file_count(x) atomic_long_read(&(x)->f_count) 993#define file_count(x) atomic_long_read(&(x)->f_count)
@@ -1401,6 +1411,7 @@ struct super_block {
1401 wait_queue_head_t s_wait_unfrozen; 1411 wait_queue_head_t s_wait_unfrozen;
1402 1412
1403 char s_id[32]; /* Informational name */ 1413 char s_id[32]; /* Informational name */
1414 u8 s_uuid[16]; /* UUID */
1404 1415
1405 void *s_fs_info; /* Filesystem private info */ 1416 void *s_fs_info; /* Filesystem private info */
1406 fmode_t s_mode; 1417 fmode_t s_mode;
@@ -1874,6 +1885,8 @@ extern void drop_collected_mounts(struct vfsmount *);
1874extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *, 1885extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *,
1875 struct vfsmount *); 1886 struct vfsmount *);
1876extern int vfs_statfs(struct path *, struct kstatfs *); 1887extern int vfs_statfs(struct path *, struct kstatfs *);
1888extern int user_statfs(const char __user *, struct kstatfs *);
1889extern int fd_statfs(int, struct kstatfs *);
1877extern int statfs_by_dentry(struct dentry *, struct kstatfs *); 1890extern int statfs_by_dentry(struct dentry *, struct kstatfs *);
1878extern int freeze_super(struct super_block *super); 1891extern int freeze_super(struct super_block *super);
1879extern int thaw_super(struct super_block *super); 1892extern int thaw_super(struct super_block *super);
@@ -1990,6 +2003,8 @@ extern int do_fallocate(struct file *file, int mode, loff_t offset,
1990extern long do_sys_open(int dfd, const char __user *filename, int flags, 2003extern long do_sys_open(int dfd, const char __user *filename, int flags,
1991 int mode); 2004 int mode);
1992extern struct file *filp_open(const char *, int, int); 2005extern struct file *filp_open(const char *, int, int);
2006extern struct file *file_open_root(struct dentry *, struct vfsmount *,
2007 const char *, int);
1993extern struct file * dentry_open(struct dentry *, struct vfsmount *, int, 2008extern struct file * dentry_open(struct dentry *, struct vfsmount *, int,
1994 const struct cred *); 2009 const struct cred *);
1995extern int filp_close(struct file *, fl_owner_t id); 2010extern int filp_close(struct file *, fl_owner_t id);
@@ -2205,10 +2220,6 @@ extern struct file *create_read_pipe(struct file *f, int flags);
2205extern struct file *create_write_pipe(int flags); 2220extern struct file *create_write_pipe(int flags);
2206extern void free_write_pipe(struct file *); 2221extern void free_write_pipe(struct file *);
2207 2222
2208extern struct file *do_filp_open(int dfd, const char *pathname,
2209 int open_flag, int mode, int acc_mode);
2210extern int may_open(struct path *, int, int);
2211
2212extern int kernel_read(struct file *, loff_t, char *, unsigned long); 2223extern int kernel_read(struct file *, loff_t, char *, unsigned long);
2213extern struct file * open_exec(const char *); 2224extern struct file * open_exec(const char *);
2214 2225
diff --git a/include/linux/namei.h b/include/linux/namei.h
index f276d4fa01fc..9c8603872c36 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -19,7 +19,6 @@ struct nameidata {
19 struct path path; 19 struct path path;
20 struct qstr last; 20 struct qstr last;
21 struct path root; 21 struct path root;
22 struct file *file;
23 struct inode *inode; /* path.dentry.d_inode */ 22 struct inode *inode; /* path.dentry.d_inode */
24 unsigned int flags; 23 unsigned int flags;
25 unsigned seq; 24 unsigned seq;
@@ -63,6 +62,10 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
63#define LOOKUP_EXCL 0x0400 62#define LOOKUP_EXCL 0x0400
64#define LOOKUP_RENAME_TARGET 0x0800 63#define LOOKUP_RENAME_TARGET 0x0800
65 64
65#define LOOKUP_JUMPED 0x1000
66#define LOOKUP_ROOT 0x2000
67#define LOOKUP_EMPTY 0x4000
68
66extern int user_path_at(int, const char __user *, unsigned, struct path *); 69extern int user_path_at(int, const char __user *, unsigned, struct path *);
67 70
68#define user_path(name, path) user_path_at(AT_FDCWD, name, LOOKUP_FOLLOW, path) 71#define user_path(name, path) user_path_at(AT_FDCWD, name, LOOKUP_FOLLOW, path)
@@ -72,7 +75,7 @@ extern int user_path_at(int, const char __user *, unsigned, struct path *);
72 75
73extern int kern_path(const char *, unsigned, struct path *); 76extern int kern_path(const char *, unsigned, struct path *);
74 77
75extern int path_lookup(const char *, unsigned, struct nameidata *); 78extern int kern_path_parent(const char *, struct nameidata *);
76extern int vfs_path_lookup(struct dentry *, struct vfsmount *, 79extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
77 const char *, unsigned int, struct nameidata *); 80 const char *, unsigned int, struct nameidata *);
78 81
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 98664db1be47..2d9b79c0f224 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -62,6 +62,7 @@ struct robust_list_head;
62struct getcpu_cache; 62struct getcpu_cache;
63struct old_linux_dirent; 63struct old_linux_dirent;
64struct perf_event_attr; 64struct perf_event_attr;
65struct file_handle;
65 66
66#include <linux/types.h> 67#include <linux/types.h>
67#include <linux/aio_abi.h> 68#include <linux/aio_abi.h>
@@ -832,5 +833,10 @@ asmlinkage long sys_mmap_pgoff(unsigned long addr, unsigned long len,
832 unsigned long prot, unsigned long flags, 833 unsigned long prot, unsigned long flags,
833 unsigned long fd, unsigned long pgoff); 834 unsigned long fd, unsigned long pgoff);
834asmlinkage long sys_old_mmap(struct mmap_arg_struct __user *arg); 835asmlinkage long sys_old_mmap(struct mmap_arg_struct __user *arg);
835 836asmlinkage long sys_name_to_handle_at(int dfd, const char __user *name,
837 struct file_handle __user *handle,
838 int __user *mnt_id, int flag);
839asmlinkage long sys_open_by_handle_at(int mountdirfd,
840 struct file_handle __user *handle,
841 int flags);
836#endif 842#endif
diff --git a/init/Kconfig b/init/Kconfig
index be788c0957d4..e72fa17fe559 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -287,6 +287,18 @@ config BSD_PROCESS_ACCT_V3
287 for processing it. A preliminary version of these tools is available 287 for processing it. A preliminary version of these tools is available
288 at <http://www.gnu.org/software/acct/>. 288 at <http://www.gnu.org/software/acct/>.
289 289
290config FHANDLE
291 bool "open by fhandle syscalls"
292 select EXPORTFS
293 help
294 If you say Y here, a user level program will be able to map
295 file names to handle and then later use the handle for
296 different file system operations. This is useful in implementing
297 userspace file servers, which now track files using handles instead
298 of names. The handle would remain the same even if file names
299 get renamed. Enables open_by_handle_at(2) and name_to_handle_at(2)
300 syscalls.
301
290config TASKSTATS 302config TASKSTATS
291 bool "Export task/process statistics through netlink (EXPERIMENTAL)" 303 bool "Export task/process statistics through netlink (EXPERIMENTAL)"
292 depends on NET 304 depends on NET
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index d2e3c7866460..e683869365d9 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -144,9 +144,9 @@ int audit_watch_compare(struct audit_watch *watch, unsigned long ino, dev_t dev)
144} 144}
145 145
146/* Initialize a parent watch entry. */ 146/* Initialize a parent watch entry. */
147static struct audit_parent *audit_init_parent(struct nameidata *ndp) 147static struct audit_parent *audit_init_parent(struct path *path)
148{ 148{
149 struct inode *inode = ndp->path.dentry->d_inode; 149 struct inode *inode = path->dentry->d_inode;
150 struct audit_parent *parent; 150 struct audit_parent *parent;
151 int ret; 151 int ret;
152 152
@@ -353,53 +353,40 @@ static void audit_remove_parent_watches(struct audit_parent *parent)
353} 353}
354 354
355/* Get path information necessary for adding watches. */ 355/* Get path information necessary for adding watches. */
356static int audit_get_nd(char *path, struct nameidata **ndp, struct nameidata **ndw) 356static int audit_get_nd(struct audit_watch *watch, struct path *parent)
357{ 357{
358 struct nameidata *ndparent, *ndwatch; 358 struct nameidata nd;
359 struct dentry *d;
359 int err; 360 int err;
360 361
361 ndparent = kmalloc(sizeof(*ndparent), GFP_KERNEL); 362 err = kern_path_parent(watch->path, &nd);
362 if (unlikely(!ndparent)) 363 if (err)
363 return -ENOMEM; 364 return err;
364 365
365 ndwatch = kmalloc(sizeof(*ndwatch), GFP_KERNEL); 366 if (nd.last_type != LAST_NORM) {
366 if (unlikely(!ndwatch)) { 367 path_put(&nd.path);
367 kfree(ndparent); 368 return -EINVAL;
368 return -ENOMEM;
369 } 369 }
370 370
371 err = path_lookup(path, LOOKUP_PARENT, ndparent); 371 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
372 if (err) { 372 d = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len);
373 kfree(ndparent); 373 if (IS_ERR(d)) {
374 kfree(ndwatch); 374 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
375 return err; 375 path_put(&nd.path);
376 return PTR_ERR(d);
376 } 377 }
377 378 if (d->d_inode) {
378 err = path_lookup(path, 0, ndwatch); 379 /* update watch filter fields */
379 if (err) { 380 watch->dev = d->d_inode->i_sb->s_dev;
380 kfree(ndwatch); 381 watch->ino = d->d_inode->i_ino;
381 ndwatch = NULL;
382 } 382 }
383 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
383 384
384 *ndp = ndparent; 385 *parent = nd.path;
385 *ndw = ndwatch; 386 dput(d);
386
387 return 0; 387 return 0;
388} 388}
389 389
390/* Release resources used for watch path information. */
391static void audit_put_nd(struct nameidata *ndp, struct nameidata *ndw)
392{
393 if (ndp) {
394 path_put(&ndp->path);
395 kfree(ndp);
396 }
397 if (ndw) {
398 path_put(&ndw->path);
399 kfree(ndw);
400 }
401}
402
403/* Associate the given rule with an existing parent. 390/* Associate the given rule with an existing parent.
404 * Caller must hold audit_filter_mutex. */ 391 * Caller must hold audit_filter_mutex. */
405static void audit_add_to_parent(struct audit_krule *krule, 392static void audit_add_to_parent(struct audit_krule *krule,
@@ -440,31 +427,24 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list)
440{ 427{
441 struct audit_watch *watch = krule->watch; 428 struct audit_watch *watch = krule->watch;
442 struct audit_parent *parent; 429 struct audit_parent *parent;
443 struct nameidata *ndp = NULL, *ndw = NULL; 430 struct path parent_path;
444 int h, ret = 0; 431 int h, ret = 0;
445 432
446 mutex_unlock(&audit_filter_mutex); 433 mutex_unlock(&audit_filter_mutex);
447 434
448 /* Avoid calling path_lookup under audit_filter_mutex. */ 435 /* Avoid calling path_lookup under audit_filter_mutex. */
449 ret = audit_get_nd(watch->path, &ndp, &ndw); 436 ret = audit_get_nd(watch, &parent_path);
450 if (ret) {
451 /* caller expects mutex locked */
452 mutex_lock(&audit_filter_mutex);
453 goto error;
454 }
455 437
438 /* caller expects mutex locked */
456 mutex_lock(&audit_filter_mutex); 439 mutex_lock(&audit_filter_mutex);
457 440
458 /* update watch filter fields */ 441 if (ret)
459 if (ndw) { 442 return ret;
460 watch->dev = ndw->path.dentry->d_inode->i_sb->s_dev;
461 watch->ino = ndw->path.dentry->d_inode->i_ino;
462 }
463 443
464 /* either find an old parent or attach a new one */ 444 /* either find an old parent or attach a new one */
465 parent = audit_find_parent(ndp->path.dentry->d_inode); 445 parent = audit_find_parent(parent_path.dentry->d_inode);
466 if (!parent) { 446 if (!parent) {
467 parent = audit_init_parent(ndp); 447 parent = audit_init_parent(&parent_path);
468 if (IS_ERR(parent)) { 448 if (IS_ERR(parent)) {
469 ret = PTR_ERR(parent); 449 ret = PTR_ERR(parent);
470 goto error; 450 goto error;
@@ -479,9 +459,8 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list)
479 h = audit_hash_ino((u32)watch->ino); 459 h = audit_hash_ino((u32)watch->ino);
480 *list = &audit_inode_hash[h]; 460 *list = &audit_inode_hash[h];
481error: 461error:
482 audit_put_nd(ndp, ndw); /* NULL args OK */ 462 path_put(&parent_path);
483 return ret; 463 return ret;
484
485} 464}
486 465
487void audit_remove_watch_rule(struct audit_krule *krule) 466void audit_remove_watch_rule(struct audit_krule *krule)
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index c782fe9924c7..25cc41cd8f33 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -186,3 +186,8 @@ cond_syscall(sys_perf_event_open);
186/* fanotify! */ 186/* fanotify! */
187cond_syscall(sys_fanotify_init); 187cond_syscall(sys_fanotify_init);
188cond_syscall(sys_fanotify_mark); 188cond_syscall(sys_fanotify_mark);
189
190/* open by handle */
191cond_syscall(sys_name_to_handle_at);
192cond_syscall(sys_open_by_handle_at);
193cond_syscall(compat_sys_open_by_handle_at);
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index b875bedf7c9a..3b8e028b9601 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -1321,13 +1321,11 @@ static ssize_t binary_sysctl(const int *name, int nlen,
1321 void __user *oldval, size_t oldlen, void __user *newval, size_t newlen) 1321 void __user *oldval, size_t oldlen, void __user *newval, size_t newlen)
1322{ 1322{
1323 const struct bin_table *table = NULL; 1323 const struct bin_table *table = NULL;
1324 struct nameidata nd;
1325 struct vfsmount *mnt; 1324 struct vfsmount *mnt;
1326 struct file *file; 1325 struct file *file;
1327 ssize_t result; 1326 ssize_t result;
1328 char *pathname; 1327 char *pathname;
1329 int flags; 1328 int flags;
1330 int acc_mode;
1331 1329
1332 pathname = sysctl_getname(name, nlen, &table); 1330 pathname = sysctl_getname(name, nlen, &table);
1333 result = PTR_ERR(pathname); 1331 result = PTR_ERR(pathname);
@@ -1337,28 +1335,17 @@ static ssize_t binary_sysctl(const int *name, int nlen,
1337 /* How should the sysctl be accessed? */ 1335 /* How should the sysctl be accessed? */
1338 if (oldval && oldlen && newval && newlen) { 1336 if (oldval && oldlen && newval && newlen) {
1339 flags = O_RDWR; 1337 flags = O_RDWR;
1340 acc_mode = MAY_READ | MAY_WRITE;
1341 } else if (newval && newlen) { 1338 } else if (newval && newlen) {
1342 flags = O_WRONLY; 1339 flags = O_WRONLY;
1343 acc_mode = MAY_WRITE;
1344 } else if (oldval && oldlen) { 1340 } else if (oldval && oldlen) {
1345 flags = O_RDONLY; 1341 flags = O_RDONLY;
1346 acc_mode = MAY_READ;
1347 } else { 1342 } else {
1348 result = 0; 1343 result = 0;
1349 goto out_putname; 1344 goto out_putname;
1350 } 1345 }
1351 1346
1352 mnt = current->nsproxy->pid_ns->proc_mnt; 1347 mnt = current->nsproxy->pid_ns->proc_mnt;
1353 result = vfs_path_lookup(mnt->mnt_root, mnt, pathname, 0, &nd); 1348 file = file_open_root(mnt->mnt_root, mnt, pathname, flags);
1354 if (result)
1355 goto out_putname;
1356
1357 result = may_open(&nd.path, acc_mode, flags);
1358 if (result)
1359 goto out_putpath;
1360
1361 file = dentry_open(nd.path.dentry, nd.path.mnt, flags, current_cred());
1362 result = PTR_ERR(file); 1349 result = PTR_ERR(file);
1363 if (IS_ERR(file)) 1350 if (IS_ERR(file))
1364 goto out_putname; 1351 goto out_putname;
@@ -1370,10 +1357,6 @@ out_putname:
1370 putname(pathname); 1357 putname(pathname);
1371out: 1358out:
1372 return result; 1359 return result;
1373
1374out_putpath:
1375 path_put(&nd.path);
1376 goto out_putname;
1377} 1360}
1378 1361
1379 1362
diff --git a/mm/shmem.c b/mm/shmem.c
index 5ee67c990602..3437b65d6d6e 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2144,8 +2144,10 @@ static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
2144{ 2144{
2145 struct inode *inode = dentry->d_inode; 2145 struct inode *inode = dentry->d_inode;
2146 2146
2147 if (*len < 3) 2147 if (*len < 3) {
2148 *len = 3;
2148 return 255; 2149 return 255;
2150 }
2149 2151
2150 if (inode_unhashed(inode)) { 2152 if (inode_unhashed(inode)) {
2151 /* Unfortunately insert_inode_hash is not idempotent, 2153 /* Unfortunately insert_inode_hash is not idempotent,
diff --git a/net/core/scm.c b/net/core/scm.c
index bbe454450801..4c1ef026d695 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -95,7 +95,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
95 int fd = fdp[i]; 95 int fd = fdp[i];
96 struct file *file; 96 struct file *file;
97 97
98 if (fd < 0 || !(file = fget(fd))) 98 if (fd < 0 || !(file = fget_raw(fd)))
99 return -EBADF; 99 return -EBADF;
100 *fpp++ = file; 100 *fpp++ = file;
101 fpl->count++; 101 fpl->count++;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 437a99e560e1..ba5b8c208498 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -850,7 +850,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
850 * Get the parent directory, calculate the hash for last 850 * Get the parent directory, calculate the hash for last
851 * component. 851 * component.
852 */ 852 */
853 err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd); 853 err = kern_path_parent(sunaddr->sun_path, &nd);
854 if (err) 854 if (err)
855 goto out_mknod_parent; 855 goto out_mknod_parent;
856 856
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index f89f83bf828e..b6f4b994eb35 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -104,7 +104,7 @@ struct sock *unix_get_socket(struct file *filp)
104 /* 104 /*
105 * Socket ? 105 * Socket ?
106 */ 106 */
107 if (S_ISSOCK(inode->i_mode)) { 107 if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
108 struct socket *sock = SOCKET_I(inode); 108 struct socket *sock = SOCKET_I(inode);
109 struct sock *s = sock->sk; 109 struct sock *s = sock->sk;
110 110