aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-03-15 18:48:13 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-03-15 18:48:13 -0400
commit422e6c4bc4b48c15b3cb57a1ca71431abfc57e54 (patch)
tree81ceb21f84a79e796ee33b5d17e17406f096abcb /fs
parentc83ce989cb5ff86575821992ea82c4df5c388ebc (diff)
parent574197e0de46a8a4db5c54ef7b65e43ffa8873a7 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6: (57 commits) tidy the trailing symlinks traversal up Turn resolution of trailing symlinks iterative everywhere simplify link_path_walk() tail Make trailing symlink resolution in path_lookupat() iterative update nd->inode in __do_follow_link() instead of after do_follow_link() pull handling of one pathname component into a helper fs: allow AT_EMPTY_PATH in linkat(), limit that to CAP_DAC_READ_SEARCH Allow passing O_PATH descriptors via SCM_RIGHTS datagrams readlinkat(), fchownat() and fstatat() with empty relative pathnames Allow O_PATH for symlinks New kind of open files - "location only". ext4: Copy fs UUID to superblock ext3: Copy fs UUID to superblock. vfs: Export file system uuid via /proc/<pid>/mountinfo unistd.h: Add new syscalls numbers to asm-generic x86: Add new syscalls for x86_64 x86: Add new syscalls for x86_32 fs: Remove i_nlink check from file system link callback fs: Don't allow to create hardlink for deleted file vfs: Add open by file handle support ...
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/Makefile2
-rw-r--r--fs/btrfs/export.c8
-rw-r--r--fs/btrfs/inode.c3
-rw-r--r--fs/compat.c61
-rw-r--r--fs/exec.c18
-rw-r--r--fs/exportfs/expfs.c11
-rw-r--r--fs/ext3/namei.c7
-rw-r--r--fs/ext3/super.c1
-rw-r--r--fs/ext4/namei.c7
-rw-r--r--fs/ext4/super.c2
-rw-r--r--fs/fat/inode.c4
-rw-r--r--fs/fcntl.c37
-rw-r--r--fs/fhandle.c265
-rw-r--r--fs/file_table.c55
-rw-r--r--fs/fuse/inode.c4
-rw-r--r--fs/gfs2/export.c8
-rw-r--r--fs/internal.h13
-rw-r--r--fs/isofs/export.c8
-rw-r--r--fs/jfs/namei.c3
-rw-r--r--fs/namei.c1498
-rw-r--r--fs/namespace.c16
-rw-r--r--fs/nfsctl.c21
-rw-r--r--fs/ocfs2/export.c8
-rw-r--r--fs/ocfs2/refcounttree.c2
-rw-r--r--fs/open.c126
-rw-r--r--fs/reiserfs/inode.c7
-rw-r--r--fs/reiserfs/namei.c4
-rw-r--r--fs/stat.c7
-rw-r--r--fs/statfs.c176
-rw-r--r--fs/ubifs/dir.c18
-rw-r--r--fs/udf/namei.c7
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c4
33 files changed, 1337 insertions, 1076 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 3db9caa57edc..7cb53aafac1e 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -47,7 +47,7 @@ config FS_POSIX_ACL
47 def_bool n 47 def_bool n
48 48
49config EXPORTFS 49config EXPORTFS
50 tristate 50 bool
51 51
52config FILE_LOCKING 52config FILE_LOCKING
53 bool "Enable POSIX file locking API" if EXPERT 53 bool "Enable POSIX file locking API" if EXPERT
diff --git a/fs/Makefile b/fs/Makefile
index a7f7cef0c0c8..ba01202844c5 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -48,6 +48,8 @@ obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o
48obj-$(CONFIG_NFS_COMMON) += nfs_common/ 48obj-$(CONFIG_NFS_COMMON) += nfs_common/
49obj-$(CONFIG_GENERIC_ACL) += generic_acl.o 49obj-$(CONFIG_GENERIC_ACL) += generic_acl.o
50 50
51obj-$(CONFIG_FHANDLE) += fhandle.o
52
51obj-y += quota/ 53obj-y += quota/
52 54
53obj-$(CONFIG_PROC_FS) += proc/ 55obj-$(CONFIG_PROC_FS) += proc/
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index ff27d7a477b2..b4ffad859adb 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -21,9 +21,13 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
21 int len = *max_len; 21 int len = *max_len;
22 int type; 22 int type;
23 23
24 if ((len < BTRFS_FID_SIZE_NON_CONNECTABLE) || 24 if (connectable && (len < BTRFS_FID_SIZE_CONNECTABLE)) {
25 (connectable && len < BTRFS_FID_SIZE_CONNECTABLE)) 25 *max_len = BTRFS_FID_SIZE_CONNECTABLE;
26 return 255; 26 return 255;
27 } else if (len < BTRFS_FID_SIZE_NON_CONNECTABLE) {
28 *max_len = BTRFS_FID_SIZE_NON_CONNECTABLE;
29 return 255;
30 }
27 31
28 len = BTRFS_FID_SIZE_NON_CONNECTABLE; 32 len = BTRFS_FID_SIZE_NON_CONNECTABLE;
29 type = FILEID_BTRFS_WITHOUT_PARENT; 33 type = FILEID_BTRFS_WITHOUT_PARENT;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 9007bbd01dbf..4a0107e18747 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4806,9 +4806,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4806 int err; 4806 int err;
4807 int drop_inode = 0; 4807 int drop_inode = 0;
4808 4808
4809 if (inode->i_nlink == 0)
4810 return -ENOENT;
4811
4812 /* do not allow sys_link's with other subvols of the same device */ 4809 /* do not allow sys_link's with other subvols of the same device */
4813 if (root->objectid != BTRFS_I(inode)->root->objectid) 4810 if (root->objectid != BTRFS_I(inode)->root->objectid)
4814 return -EPERM; 4811 return -EPERM;
diff --git a/fs/compat.c b/fs/compat.c
index 691c3fd8ce1d..c6d31a3bab88 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -262,35 +262,19 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs *
262 */ 262 */
263asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf) 263asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf)
264{ 264{
265 struct path path; 265 struct kstatfs tmp;
266 int error; 266 int error = user_statfs(pathname, &tmp);
267 267 if (!error)
268 error = user_path(pathname, &path); 268 error = put_compat_statfs(buf, &tmp);
269 if (!error) {
270 struct kstatfs tmp;
271 error = vfs_statfs(&path, &tmp);
272 if (!error)
273 error = put_compat_statfs(buf, &tmp);
274 path_put(&path);
275 }
276 return error; 269 return error;
277} 270}
278 271
279asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user *buf) 272asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user *buf)
280{ 273{
281 struct file * file;
282 struct kstatfs tmp; 274 struct kstatfs tmp;
283 int error; 275 int error = fd_statfs(fd, &tmp);
284
285 error = -EBADF;
286 file = fget(fd);
287 if (!file)
288 goto out;
289 error = vfs_statfs(&file->f_path, &tmp);
290 if (!error) 276 if (!error)
291 error = put_compat_statfs(buf, &tmp); 277 error = put_compat_statfs(buf, &tmp);
292 fput(file);
293out:
294 return error; 278 return error;
295} 279}
296 280
@@ -329,41 +313,29 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat
329 313
330asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf) 314asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf)
331{ 315{
332 struct path path; 316 struct kstatfs tmp;
333 int error; 317 int error;
334 318
335 if (sz != sizeof(*buf)) 319 if (sz != sizeof(*buf))
336 return -EINVAL; 320 return -EINVAL;
337 321
338 error = user_path(pathname, &path); 322 error = user_statfs(pathname, &tmp);
339 if (!error) { 323 if (!error)
340 struct kstatfs tmp; 324 error = put_compat_statfs64(buf, &tmp);
341 error = vfs_statfs(&path, &tmp);
342 if (!error)
343 error = put_compat_statfs64(buf, &tmp);
344 path_put(&path);
345 }
346 return error; 325 return error;
347} 326}
348 327
349asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user *buf) 328asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user *buf)
350{ 329{
351 struct file * file;
352 struct kstatfs tmp; 330 struct kstatfs tmp;
353 int error; 331 int error;
354 332
355 if (sz != sizeof(*buf)) 333 if (sz != sizeof(*buf))
356 return -EINVAL; 334 return -EINVAL;
357 335
358 error = -EBADF; 336 error = fd_statfs(fd, &tmp);
359 file = fget(fd);
360 if (!file)
361 goto out;
362 error = vfs_statfs(&file->f_path, &tmp);
363 if (!error) 337 if (!error)
364 error = put_compat_statfs64(buf, &tmp); 338 error = put_compat_statfs64(buf, &tmp);
365 fput(file);
366out:
367 return error; 339 return error;
368} 340}
369 341
@@ -2312,3 +2284,16 @@ asmlinkage long compat_sys_timerfd_gettime(int ufd,
2312} 2284}
2313 2285
2314#endif /* CONFIG_TIMERFD */ 2286#endif /* CONFIG_TIMERFD */
2287
2288#ifdef CONFIG_FHANDLE
2289/*
2290 * Exactly like fs/open.c:sys_open_by_handle_at(), except that it
2291 * doesn't set the O_LARGEFILE flag.
2292 */
2293asmlinkage long
2294compat_sys_open_by_handle_at(int mountdirfd,
2295 struct file_handle __user *handle, int flags)
2296{
2297 return do_handle_open(mountdirfd, handle, flags);
2298}
2299#endif
diff --git a/fs/exec.c b/fs/exec.c
index 52a447d9b6ab..ba99e1abb1aa 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -115,13 +115,16 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
115 struct file *file; 115 struct file *file;
116 char *tmp = getname(library); 116 char *tmp = getname(library);
117 int error = PTR_ERR(tmp); 117 int error = PTR_ERR(tmp);
118 static const struct open_flags uselib_flags = {
119 .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
120 .acc_mode = MAY_READ | MAY_EXEC | MAY_OPEN,
121 .intent = LOOKUP_OPEN
122 };
118 123
119 if (IS_ERR(tmp)) 124 if (IS_ERR(tmp))
120 goto out; 125 goto out;
121 126
122 file = do_filp_open(AT_FDCWD, tmp, 127 file = do_filp_open(AT_FDCWD, tmp, &uselib_flags, LOOKUP_FOLLOW);
123 O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0,
124 MAY_READ | MAY_EXEC | MAY_OPEN);
125 putname(tmp); 128 putname(tmp);
126 error = PTR_ERR(file); 129 error = PTR_ERR(file);
127 if (IS_ERR(file)) 130 if (IS_ERR(file))
@@ -721,10 +724,13 @@ struct file *open_exec(const char *name)
721{ 724{
722 struct file *file; 725 struct file *file;
723 int err; 726 int err;
727 static const struct open_flags open_exec_flags = {
728 .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
729 .acc_mode = MAY_EXEC | MAY_OPEN,
730 .intent = LOOKUP_OPEN
731 };
724 732
725 file = do_filp_open(AT_FDCWD, name, 733 file = do_filp_open(AT_FDCWD, name, &open_exec_flags, LOOKUP_FOLLOW);
726 O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0,
727 MAY_EXEC | MAY_OPEN);
728 if (IS_ERR(file)) 734 if (IS_ERR(file))
729 goto out; 735 goto out;
730 736
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 4b6825740dd5..b05acb796135 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -320,9 +320,14 @@ static int export_encode_fh(struct dentry *dentry, struct fid *fid,
320 struct inode * inode = dentry->d_inode; 320 struct inode * inode = dentry->d_inode;
321 int len = *max_len; 321 int len = *max_len;
322 int type = FILEID_INO32_GEN; 322 int type = FILEID_INO32_GEN;
323 323
324 if (len < 2 || (connectable && len < 4)) 324 if (connectable && (len < 4)) {
325 *max_len = 4;
326 return 255;
327 } else if (len < 2) {
328 *max_len = 2;
325 return 255; 329 return 255;
330 }
326 331
327 len = 2; 332 len = 2;
328 fid->i32.ino = inode->i_ino; 333 fid->i32.ino = inode->i_ino;
@@ -369,6 +374,8 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
369 /* 374 /*
370 * Try to get any dentry for the given file handle from the filesystem. 375 * Try to get any dentry for the given file handle from the filesystem.
371 */ 376 */
377 if (!nop || !nop->fh_to_dentry)
378 return ERR_PTR(-ESTALE);
372 result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type); 379 result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type);
373 if (!result) 380 if (!result)
374 result = ERR_PTR(-ESTALE); 381 result = ERR_PTR(-ESTALE);
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index b27ba71810ec..561f69256266 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -2253,13 +2253,6 @@ static int ext3_link (struct dentry * old_dentry,
2253 2253
2254 dquot_initialize(dir); 2254 dquot_initialize(dir);
2255 2255
2256 /*
2257 * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
2258 * otherwise has the potential to corrupt the orphan inode list.
2259 */
2260 if (inode->i_nlink == 0)
2261 return -ENOENT;
2262
2263retry: 2256retry:
2264 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + 2257 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
2265 EXT3_INDEX_EXTRA_TRANS_BLOCKS); 2258 EXT3_INDEX_EXTRA_TRANS_BLOCKS);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 85c8cc8f2473..9cc19a1dea8e 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1936,6 +1936,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1936 sb->s_qcop = &ext3_qctl_operations; 1936 sb->s_qcop = &ext3_qctl_operations;
1937 sb->dq_op = &ext3_quota_operations; 1937 sb->dq_op = &ext3_quota_operations;
1938#endif 1938#endif
1939 memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
1939 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 1940 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
1940 mutex_init(&sbi->s_orphan_lock); 1941 mutex_init(&sbi->s_orphan_lock);
1941 mutex_init(&sbi->s_resize_lock); 1942 mutex_init(&sbi->s_resize_lock);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 5485390d32c5..e781b7ea5630 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2304,13 +2304,6 @@ static int ext4_link(struct dentry *old_dentry,
2304 2304
2305 dquot_initialize(dir); 2305 dquot_initialize(dir);
2306 2306
2307 /*
2308 * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
2309 * otherwise has the potential to corrupt the orphan inode list.
2310 */
2311 if (inode->i_nlink == 0)
2312 return -ENOENT;
2313
2314retry: 2307retry:
2315 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 2308 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2316 EXT4_INDEX_EXTRA_TRANS_BLOCKS); 2309 EXT4_INDEX_EXTRA_TRANS_BLOCKS);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f6a318f836b2..5977b356a435 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3415,6 +3415,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3415 sb->s_qcop = &ext4_qctl_operations; 3415 sb->s_qcop = &ext4_qctl_operations;
3416 sb->dq_op = &ext4_quota_operations; 3416 sb->dq_op = &ext4_quota_operations;
3417#endif 3417#endif
3418 memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
3419
3418 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 3420 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
3419 mutex_init(&sbi->s_orphan_lock); 3421 mutex_init(&sbi->s_orphan_lock);
3420 mutex_init(&sbi->s_resize_lock); 3422 mutex_init(&sbi->s_resize_lock);
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 86753fe10bd1..0e277ec4b612 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -757,8 +757,10 @@ fat_encode_fh(struct dentry *de, __u32 *fh, int *lenp, int connectable)
757 struct inode *inode = de->d_inode; 757 struct inode *inode = de->d_inode;
758 u32 ipos_h, ipos_m, ipos_l; 758 u32 ipos_h, ipos_m, ipos_l;
759 759
760 if (len < 5) 760 if (len < 5) {
761 *lenp = 5;
761 return 255; /* no room */ 762 return 255; /* no room */
763 }
762 764
763 ipos_h = MSDOS_I(inode)->i_pos >> 8; 765 ipos_h = MSDOS_I(inode)->i_pos >> 8;
764 ipos_m = (MSDOS_I(inode)->i_pos & 0xf0) << 24; 766 ipos_m = (MSDOS_I(inode)->i_pos & 0xf0) << 24;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index cb1026181bdc..6c82e5bac039 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -131,7 +131,7 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
131SYSCALL_DEFINE1(dup, unsigned int, fildes) 131SYSCALL_DEFINE1(dup, unsigned int, fildes)
132{ 132{
133 int ret = -EBADF; 133 int ret = -EBADF;
134 struct file *file = fget(fildes); 134 struct file *file = fget_raw(fildes);
135 135
136 if (file) { 136 if (file) {
137 ret = get_unused_fd(); 137 ret = get_unused_fd();
@@ -426,15 +426,35 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
426 return err; 426 return err;
427} 427}
428 428
429static int check_fcntl_cmd(unsigned cmd)
430{
431 switch (cmd) {
432 case F_DUPFD:
433 case F_DUPFD_CLOEXEC:
434 case F_GETFD:
435 case F_SETFD:
436 case F_GETFL:
437 return 1;
438 }
439 return 0;
440}
441
429SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) 442SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
430{ 443{
431 struct file *filp; 444 struct file *filp;
432 long err = -EBADF; 445 long err = -EBADF;
433 446
434 filp = fget(fd); 447 filp = fget_raw(fd);
435 if (!filp) 448 if (!filp)
436 goto out; 449 goto out;
437 450
451 if (unlikely(filp->f_mode & FMODE_PATH)) {
452 if (!check_fcntl_cmd(cmd)) {
453 fput(filp);
454 goto out;
455 }
456 }
457
438 err = security_file_fcntl(filp, cmd, arg); 458 err = security_file_fcntl(filp, cmd, arg);
439 if (err) { 459 if (err) {
440 fput(filp); 460 fput(filp);
@@ -456,10 +476,17 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
456 long err; 476 long err;
457 477
458 err = -EBADF; 478 err = -EBADF;
459 filp = fget(fd); 479 filp = fget_raw(fd);
460 if (!filp) 480 if (!filp)
461 goto out; 481 goto out;
462 482
483 if (unlikely(filp->f_mode & FMODE_PATH)) {
484 if (!check_fcntl_cmd(cmd)) {
485 fput(filp);
486 goto out;
487 }
488 }
489
463 err = security_file_fcntl(filp, cmd, arg); 490 err = security_file_fcntl(filp, cmd, arg);
464 if (err) { 491 if (err) {
465 fput(filp); 492 fput(filp);
@@ -808,14 +835,14 @@ static int __init fcntl_init(void)
808 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY 835 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
809 * is defined as O_NONBLOCK on some platforms and not on others. 836 * is defined as O_NONBLOCK on some platforms and not on others.
810 */ 837 */
811 BUILD_BUG_ON(18 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( 838 BUILD_BUG_ON(19 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
812 O_RDONLY | O_WRONLY | O_RDWR | 839 O_RDONLY | O_WRONLY | O_RDWR |
813 O_CREAT | O_EXCL | O_NOCTTY | 840 O_CREAT | O_EXCL | O_NOCTTY |
814 O_TRUNC | O_APPEND | /* O_NONBLOCK | */ 841 O_TRUNC | O_APPEND | /* O_NONBLOCK | */
815 __O_SYNC | O_DSYNC | FASYNC | 842 __O_SYNC | O_DSYNC | FASYNC |
816 O_DIRECT | O_LARGEFILE | O_DIRECTORY | 843 O_DIRECT | O_LARGEFILE | O_DIRECTORY |
817 O_NOFOLLOW | O_NOATIME | O_CLOEXEC | 844 O_NOFOLLOW | O_NOATIME | O_CLOEXEC |
818 __FMODE_EXEC 845 __FMODE_EXEC | O_PATH
819 )); 846 ));
820 847
821 fasync_cache = kmem_cache_create("fasync_cache", 848 fasync_cache = kmem_cache_create("fasync_cache",
diff --git a/fs/fhandle.c b/fs/fhandle.c
new file mode 100644
index 000000000000..bf93ad2bee07
--- /dev/null
+++ b/fs/fhandle.c
@@ -0,0 +1,265 @@
1#include <linux/syscalls.h>
2#include <linux/slab.h>
3#include <linux/fs.h>
4#include <linux/file.h>
5#include <linux/mount.h>
6#include <linux/namei.h>
7#include <linux/exportfs.h>
8#include <linux/fs_struct.h>
9#include <linux/fsnotify.h>
10#include <asm/uaccess.h>
11#include "internal.h"
12
13static long do_sys_name_to_handle(struct path *path,
14 struct file_handle __user *ufh,
15 int __user *mnt_id)
16{
17 long retval;
18 struct file_handle f_handle;
19 int handle_dwords, handle_bytes;
20 struct file_handle *handle = NULL;
21
22 /*
23 * We need t make sure wether the file system
24 * support decoding of the file handle
25 */
26 if (!path->mnt->mnt_sb->s_export_op ||
27 !path->mnt->mnt_sb->s_export_op->fh_to_dentry)
28 return -EOPNOTSUPP;
29
30 if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle)))
31 return -EFAULT;
32
33 if (f_handle.handle_bytes > MAX_HANDLE_SZ)
34 return -EINVAL;
35
36 handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes,
37 GFP_KERNEL);
38 if (!handle)
39 return -ENOMEM;
40
41 /* convert handle size to multiple of sizeof(u32) */
42 handle_dwords = f_handle.handle_bytes >> 2;
43
44 /* we ask for a non connected handle */
45 retval = exportfs_encode_fh(path->dentry,
46 (struct fid *)handle->f_handle,
47 &handle_dwords, 0);
48 handle->handle_type = retval;
49 /* convert handle size to bytes */
50 handle_bytes = handle_dwords * sizeof(u32);
51 handle->handle_bytes = handle_bytes;
52 if ((handle->handle_bytes > f_handle.handle_bytes) ||
53 (retval == 255) || (retval == -ENOSPC)) {
54 /* As per old exportfs_encode_fh documentation
55 * we could return ENOSPC to indicate overflow
56 * But file system returned 255 always. So handle
57 * both the values
58 */
59 /*
60 * set the handle size to zero so we copy only
61 * non variable part of the file_handle
62 */
63 handle_bytes = 0;
64 retval = -EOVERFLOW;
65 } else
66 retval = 0;
67 /* copy the mount id */
68 if (copy_to_user(mnt_id, &path->mnt->mnt_id, sizeof(*mnt_id)) ||
69 copy_to_user(ufh, handle,
70 sizeof(struct file_handle) + handle_bytes))
71 retval = -EFAULT;
72 kfree(handle);
73 return retval;
74}
75
76/**
77 * sys_name_to_handle_at: convert name to handle
78 * @dfd: directory relative to which name is interpreted if not absolute
79 * @name: name that should be converted to handle.
80 * @handle: resulting file handle
81 * @mnt_id: mount id of the file system containing the file
82 * @flag: flag value to indicate whether to follow symlink or not
83 *
84 * @handle->handle_size indicate the space available to store the
85 * variable part of the file handle in bytes. If there is not
86 * enough space, the field is updated to return the minimum
87 * value required.
88 */
89SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
90 struct file_handle __user *, handle, int __user *, mnt_id,
91 int, flag)
92{
93 struct path path;
94 int lookup_flags;
95 int err;
96
97 if ((flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
98 return -EINVAL;
99
100 lookup_flags = (flag & AT_SYMLINK_FOLLOW) ? LOOKUP_FOLLOW : 0;
101 if (flag & AT_EMPTY_PATH)
102 lookup_flags |= LOOKUP_EMPTY;
103 err = user_path_at(dfd, name, lookup_flags, &path);
104 if (!err) {
105 err = do_sys_name_to_handle(&path, handle, mnt_id);
106 path_put(&path);
107 }
108 return err;
109}
110
111static struct vfsmount *get_vfsmount_from_fd(int fd)
112{
113 struct path path;
114
115 if (fd == AT_FDCWD) {
116 struct fs_struct *fs = current->fs;
117 spin_lock(&fs->lock);
118 path = fs->pwd;
119 mntget(path.mnt);
120 spin_unlock(&fs->lock);
121 } else {
122 int fput_needed;
123 struct file *file = fget_light(fd, &fput_needed);
124 if (!file)
125 return ERR_PTR(-EBADF);
126 path = file->f_path;
127 mntget(path.mnt);
128 fput_light(file, fput_needed);
129 }
130 return path.mnt;
131}
132
133static int vfs_dentry_acceptable(void *context, struct dentry *dentry)
134{
135 return 1;
136}
137
138static int do_handle_to_path(int mountdirfd, struct file_handle *handle,
139 struct path *path)
140{
141 int retval = 0;
142 int handle_dwords;
143
144 path->mnt = get_vfsmount_from_fd(mountdirfd);
145 if (IS_ERR(path->mnt)) {
146 retval = PTR_ERR(path->mnt);
147 goto out_err;
148 }
149 /* change the handle size to multiple of sizeof(u32) */
150 handle_dwords = handle->handle_bytes >> 2;
151 path->dentry = exportfs_decode_fh(path->mnt,
152 (struct fid *)handle->f_handle,
153 handle_dwords, handle->handle_type,
154 vfs_dentry_acceptable, NULL);
155 if (IS_ERR(path->dentry)) {
156 retval = PTR_ERR(path->dentry);
157 goto out_mnt;
158 }
159 return 0;
160out_mnt:
161 mntput(path->mnt);
162out_err:
163 return retval;
164}
165
166static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
167 struct path *path)
168{
169 int retval = 0;
170 struct file_handle f_handle;
171 struct file_handle *handle = NULL;
172
173 /*
174 * With handle we don't look at the execute bit on the
175 * the directory. Ideally we would like CAP_DAC_SEARCH.
176 * But we don't have that
177 */
178 if (!capable(CAP_DAC_READ_SEARCH)) {
179 retval = -EPERM;
180 goto out_err;
181 }
182 if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) {
183 retval = -EFAULT;
184 goto out_err;
185 }
186 if ((f_handle.handle_bytes > MAX_HANDLE_SZ) ||
187 (f_handle.handle_bytes == 0)) {
188 retval = -EINVAL;
189 goto out_err;
190 }
191 handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes,
192 GFP_KERNEL);
193 if (!handle) {
194 retval = -ENOMEM;
195 goto out_err;
196 }
197 /* copy the full handle */
198 if (copy_from_user(handle, ufh,
199 sizeof(struct file_handle) +
200 f_handle.handle_bytes)) {
201 retval = -EFAULT;
202 goto out_handle;
203 }
204
205 retval = do_handle_to_path(mountdirfd, handle, path);
206
207out_handle:
208 kfree(handle);
209out_err:
210 return retval;
211}
212
213long do_handle_open(int mountdirfd,
214 struct file_handle __user *ufh, int open_flag)
215{
216 long retval = 0;
217 struct path path;
218 struct file *file;
219 int fd;
220
221 retval = handle_to_path(mountdirfd, ufh, &path);
222 if (retval)
223 return retval;
224
225 fd = get_unused_fd_flags(open_flag);
226 if (fd < 0) {
227 path_put(&path);
228 return fd;
229 }
230 file = file_open_root(path.dentry, path.mnt, "", open_flag);
231 if (IS_ERR(file)) {
232 put_unused_fd(fd);
233 retval = PTR_ERR(file);
234 } else {
235 retval = fd;
236 fsnotify_open(file);
237 fd_install(fd, file);
238 }
239 path_put(&path);
240 return retval;
241}
242
243/**
244 * sys_open_by_handle_at: Open the file handle
245 * @mountdirfd: directory file descriptor
246 * @handle: file handle to be opened
247 * @flag: open flags.
248 *
249 * @mountdirfd indicate the directory file descriptor
250 * of the mount point. file handle is decoded relative
251 * to the vfsmount pointed by the @mountdirfd. @flags
252 * value is same as the open(2) flags.
253 */
254SYSCALL_DEFINE3(open_by_handle_at, int, mountdirfd,
255 struct file_handle __user *, handle,
256 int, flags)
257{
258 long ret;
259
260 if (force_o_largefile())
261 flags |= O_LARGEFILE;
262
263 ret = do_handle_open(mountdirfd, handle, flags);
264 return ret;
265}
diff --git a/fs/file_table.c b/fs/file_table.c
index eb36b6b17e26..74a9544ac770 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -276,11 +276,10 @@ struct file *fget(unsigned int fd)
276 rcu_read_lock(); 276 rcu_read_lock();
277 file = fcheck_files(files, fd); 277 file = fcheck_files(files, fd);
278 if (file) { 278 if (file) {
279 if (!atomic_long_inc_not_zero(&file->f_count)) { 279 /* File object ref couldn't be taken */
280 /* File object ref couldn't be taken */ 280 if (file->f_mode & FMODE_PATH ||
281 rcu_read_unlock(); 281 !atomic_long_inc_not_zero(&file->f_count))
282 return NULL; 282 file = NULL;
283 }
284 } 283 }
285 rcu_read_unlock(); 284 rcu_read_unlock();
286 285
@@ -289,6 +288,25 @@ struct file *fget(unsigned int fd)
289 288
290EXPORT_SYMBOL(fget); 289EXPORT_SYMBOL(fget);
291 290
291struct file *fget_raw(unsigned int fd)
292{
293 struct file *file;
294 struct files_struct *files = current->files;
295
296 rcu_read_lock();
297 file = fcheck_files(files, fd);
298 if (file) {
299 /* File object ref couldn't be taken */
300 if (!atomic_long_inc_not_zero(&file->f_count))
301 file = NULL;
302 }
303 rcu_read_unlock();
304
305 return file;
306}
307
308EXPORT_SYMBOL(fget_raw);
309
292/* 310/*
293 * Lightweight file lookup - no refcnt increment if fd table isn't shared. 311 * Lightweight file lookup - no refcnt increment if fd table isn't shared.
294 * 312 *
@@ -313,6 +331,33 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
313 *fput_needed = 0; 331 *fput_needed = 0;
314 if (atomic_read(&files->count) == 1) { 332 if (atomic_read(&files->count) == 1) {
315 file = fcheck_files(files, fd); 333 file = fcheck_files(files, fd);
334 if (file && (file->f_mode & FMODE_PATH))
335 file = NULL;
336 } else {
337 rcu_read_lock();
338 file = fcheck_files(files, fd);
339 if (file) {
340 if (!(file->f_mode & FMODE_PATH) &&
341 atomic_long_inc_not_zero(&file->f_count))
342 *fput_needed = 1;
343 else
344 /* Didn't get the reference, someone's freed */
345 file = NULL;
346 }
347 rcu_read_unlock();
348 }
349
350 return file;
351}
352
353struct file *fget_raw_light(unsigned int fd, int *fput_needed)
354{
355 struct file *file;
356 struct files_struct *files = current->files;
357
358 *fput_needed = 0;
359 if (atomic_read(&files->count) == 1) {
360 file = fcheck_files(files, fd);
316 } else { 361 } else {
317 rcu_read_lock(); 362 rcu_read_lock();
318 file = fcheck_files(files, fd); 363 file = fcheck_files(files, fd);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 9e3f68cc1bd1..051b1a084528 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -637,8 +637,10 @@ static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
637 u64 nodeid; 637 u64 nodeid;
638 u32 generation; 638 u32 generation;
639 639
640 if (*max_len < len) 640 if (*max_len < len) {
641 *max_len = len;
641 return 255; 642 return 255;
643 }
642 644
643 nodeid = get_fuse_inode(inode)->nodeid; 645 nodeid = get_fuse_inode(inode)->nodeid;
644 generation = inode->i_generation; 646 generation = inode->i_generation;
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index 9023db8184f9..b5a5e60df0d5 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -36,9 +36,13 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
36 struct super_block *sb = inode->i_sb; 36 struct super_block *sb = inode->i_sb;
37 struct gfs2_inode *ip = GFS2_I(inode); 37 struct gfs2_inode *ip = GFS2_I(inode);
38 38
39 if (*len < GFS2_SMALL_FH_SIZE || 39 if (connectable && (*len < GFS2_LARGE_FH_SIZE)) {
40 (connectable && *len < GFS2_LARGE_FH_SIZE)) 40 *len = GFS2_LARGE_FH_SIZE;
41 return 255; 41 return 255;
42 } else if (*len < GFS2_SMALL_FH_SIZE) {
43 *len = GFS2_SMALL_FH_SIZE;
44 return 255;
45 }
42 46
43 fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32); 47 fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32);
44 fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF); 48 fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF);
diff --git a/fs/internal.h b/fs/internal.h
index 9b976b57d7fe..f3d15de44b15 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -106,6 +106,19 @@ extern void put_super(struct super_block *sb);
106struct nameidata; 106struct nameidata;
107extern struct file *nameidata_to_filp(struct nameidata *); 107extern struct file *nameidata_to_filp(struct nameidata *);
108extern void release_open_intent(struct nameidata *); 108extern void release_open_intent(struct nameidata *);
109struct open_flags {
110 int open_flag;
111 int mode;
112 int acc_mode;
113 int intent;
114};
115extern struct file *do_filp_open(int dfd, const char *pathname,
116 const struct open_flags *op, int lookup_flags);
117extern struct file *do_file_open_root(struct dentry *, struct vfsmount *,
118 const char *, const struct open_flags *, int lookup_flags);
119
120extern long do_handle_open(int mountdirfd,
121 struct file_handle __user *ufh, int open_flag);
109 122
110/* 123/*
111 * inode.c 124 * inode.c
diff --git a/fs/isofs/export.c b/fs/isofs/export.c
index ed752cb38474..dd4687ff30d0 100644
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
@@ -124,9 +124,13 @@ isofs_export_encode_fh(struct dentry *dentry,
124 * offset of the inode and the upper 16 bits of fh32[1] to 124 * offset of the inode and the upper 16 bits of fh32[1] to
125 * hold the offset of the parent. 125 * hold the offset of the parent.
126 */ 126 */
127 127 if (connectable && (len < 5)) {
128 if (len < 3 || (connectable && len < 5)) 128 *max_len = 5;
129 return 255;
130 } else if (len < 3) {
131 *max_len = 3;
129 return 255; 132 return 255;
133 }
130 134
131 len = 3; 135 len = 3;
132 fh32[0] = ei->i_iget5_block; 136 fh32[0] = ei->i_iget5_block;
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 5a2b269428a6..3f04a1804931 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -809,9 +809,6 @@ static int jfs_link(struct dentry *old_dentry,
809 if (ip->i_nlink == JFS_LINK_MAX) 809 if (ip->i_nlink == JFS_LINK_MAX)
810 return -EMLINK; 810 return -EMLINK;
811 811
812 if (ip->i_nlink == 0)
813 return -ENOENT;
814
815 dquot_initialize(dir); 812 dquot_initialize(dir);
816 813
817 tid = txBegin(ip->i_sb, 0); 814 tid = txBegin(ip->i_sb, 0);
diff --git a/fs/namei.c b/fs/namei.c
index a4689eb2df28..0a601cae23de 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -136,7 +136,7 @@ static int do_getname(const char __user *filename, char *page)
136 return retval; 136 return retval;
137} 137}
138 138
139char * getname(const char __user * filename) 139static char *getname_flags(const char __user * filename, int flags)
140{ 140{
141 char *tmp, *result; 141 char *tmp, *result;
142 142
@@ -147,14 +147,21 @@ char * getname(const char __user * filename)
147 147
148 result = tmp; 148 result = tmp;
149 if (retval < 0) { 149 if (retval < 0) {
150 __putname(tmp); 150 if (retval != -ENOENT || !(flags & LOOKUP_EMPTY)) {
151 result = ERR_PTR(retval); 151 __putname(tmp);
152 result = ERR_PTR(retval);
153 }
152 } 154 }
153 } 155 }
154 audit_getname(result); 156 audit_getname(result);
155 return result; 157 return result;
156} 158}
157 159
160char *getname(const char __user * filename)
161{
162 return getname_flags(filename, 0);
163}
164
158#ifdef CONFIG_AUDITSYSCALL 165#ifdef CONFIG_AUDITSYSCALL
159void putname(const char *name) 166void putname(const char *name)
160{ 167{
@@ -401,9 +408,11 @@ static int nameidata_drop_rcu(struct nameidata *nd)
401{ 408{
402 struct fs_struct *fs = current->fs; 409 struct fs_struct *fs = current->fs;
403 struct dentry *dentry = nd->path.dentry; 410 struct dentry *dentry = nd->path.dentry;
411 int want_root = 0;
404 412
405 BUG_ON(!(nd->flags & LOOKUP_RCU)); 413 BUG_ON(!(nd->flags & LOOKUP_RCU));
406 if (nd->root.mnt) { 414 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
415 want_root = 1;
407 spin_lock(&fs->lock); 416 spin_lock(&fs->lock);
408 if (nd->root.mnt != fs->root.mnt || 417 if (nd->root.mnt != fs->root.mnt ||
409 nd->root.dentry != fs->root.dentry) 418 nd->root.dentry != fs->root.dentry)
@@ -414,7 +423,7 @@ static int nameidata_drop_rcu(struct nameidata *nd)
414 goto err; 423 goto err;
415 BUG_ON(nd->inode != dentry->d_inode); 424 BUG_ON(nd->inode != dentry->d_inode);
416 spin_unlock(&dentry->d_lock); 425 spin_unlock(&dentry->d_lock);
417 if (nd->root.mnt) { 426 if (want_root) {
418 path_get(&nd->root); 427 path_get(&nd->root);
419 spin_unlock(&fs->lock); 428 spin_unlock(&fs->lock);
420 } 429 }
@@ -427,7 +436,7 @@ static int nameidata_drop_rcu(struct nameidata *nd)
427err: 436err:
428 spin_unlock(&dentry->d_lock); 437 spin_unlock(&dentry->d_lock);
429err_root: 438err_root:
430 if (nd->root.mnt) 439 if (want_root)
431 spin_unlock(&fs->lock); 440 spin_unlock(&fs->lock);
432 return -ECHILD; 441 return -ECHILD;
433} 442}
@@ -454,9 +463,11 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry
454{ 463{
455 struct fs_struct *fs = current->fs; 464 struct fs_struct *fs = current->fs;
456 struct dentry *parent = nd->path.dentry; 465 struct dentry *parent = nd->path.dentry;
466 int want_root = 0;
457 467
458 BUG_ON(!(nd->flags & LOOKUP_RCU)); 468 BUG_ON(!(nd->flags & LOOKUP_RCU));
459 if (nd->root.mnt) { 469 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
470 want_root = 1;
460 spin_lock(&fs->lock); 471 spin_lock(&fs->lock);
461 if (nd->root.mnt != fs->root.mnt || 472 if (nd->root.mnt != fs->root.mnt ||
462 nd->root.dentry != fs->root.dentry) 473 nd->root.dentry != fs->root.dentry)
@@ -476,7 +487,7 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry
476 parent->d_count++; 487 parent->d_count++;
477 spin_unlock(&dentry->d_lock); 488 spin_unlock(&dentry->d_lock);
478 spin_unlock(&parent->d_lock); 489 spin_unlock(&parent->d_lock);
479 if (nd->root.mnt) { 490 if (want_root) {
480 path_get(&nd->root); 491 path_get(&nd->root);
481 spin_unlock(&fs->lock); 492 spin_unlock(&fs->lock);
482 } 493 }
@@ -490,7 +501,7 @@ err:
490 spin_unlock(&dentry->d_lock); 501 spin_unlock(&dentry->d_lock);
491 spin_unlock(&parent->d_lock); 502 spin_unlock(&parent->d_lock);
492err_root: 503err_root:
493 if (nd->root.mnt) 504 if (want_root)
494 spin_unlock(&fs->lock); 505 spin_unlock(&fs->lock);
495 return -ECHILD; 506 return -ECHILD;
496} 507}
@@ -498,8 +509,16 @@ err_root:
498/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */ 509/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
499static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry) 510static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry)
500{ 511{
501 if (nd->flags & LOOKUP_RCU) 512 if (nd->flags & LOOKUP_RCU) {
502 return nameidata_dentry_drop_rcu(nd, dentry); 513 if (unlikely(nameidata_dentry_drop_rcu(nd, dentry))) {
514 nd->flags &= ~LOOKUP_RCU;
515 if (!(nd->flags & LOOKUP_ROOT))
516 nd->root.mnt = NULL;
517 rcu_read_unlock();
518 br_read_unlock(vfsmount_lock);
519 return -ECHILD;
520 }
521 }
503 return 0; 522 return 0;
504} 523}
505 524
@@ -518,7 +537,8 @@ static int nameidata_drop_rcu_last(struct nameidata *nd)
518 537
519 BUG_ON(!(nd->flags & LOOKUP_RCU)); 538 BUG_ON(!(nd->flags & LOOKUP_RCU));
520 nd->flags &= ~LOOKUP_RCU; 539 nd->flags &= ~LOOKUP_RCU;
521 nd->root.mnt = NULL; 540 if (!(nd->flags & LOOKUP_ROOT))
541 nd->root.mnt = NULL;
522 spin_lock(&dentry->d_lock); 542 spin_lock(&dentry->d_lock);
523 if (!__d_rcu_to_refcount(dentry, nd->seq)) 543 if (!__d_rcu_to_refcount(dentry, nd->seq))
524 goto err_unlock; 544 goto err_unlock;
@@ -539,14 +559,6 @@ err_unlock:
539 return -ECHILD; 559 return -ECHILD;
540} 560}
541 561
542/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
543static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd)
544{
545 if (likely(nd->flags & LOOKUP_RCU))
546 return nameidata_drop_rcu_last(nd);
547 return 0;
548}
549
550/** 562/**
551 * release_open_intent - free up open intent resources 563 * release_open_intent - free up open intent resources
552 * @nd: pointer to nameidata 564 * @nd: pointer to nameidata
@@ -590,42 +602,8 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd)
590 return dentry; 602 return dentry;
591} 603}
592 604
593static inline struct dentry *
594do_revalidate_rcu(struct dentry *dentry, struct nameidata *nd)
595{
596 int status = d_revalidate(dentry, nd);
597 if (likely(status > 0))
598 return dentry;
599 if (status == -ECHILD) {
600 if (nameidata_dentry_drop_rcu(nd, dentry))
601 return ERR_PTR(-ECHILD);
602 return do_revalidate(dentry, nd);
603 }
604 if (status < 0)
605 return ERR_PTR(status);
606 /* Don't d_invalidate in rcu-walk mode */
607 if (nameidata_dentry_drop_rcu(nd, dentry))
608 return ERR_PTR(-ECHILD);
609 if (!d_invalidate(dentry)) {
610 dput(dentry);
611 dentry = NULL;
612 }
613 return dentry;
614}
615
616static inline int need_reval_dot(struct dentry *dentry)
617{
618 if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
619 return 0;
620
621 if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
622 return 0;
623
624 return 1;
625}
626
627/* 605/*
628 * force_reval_path - force revalidation of a dentry 606 * handle_reval_path - force revalidation of a dentry
629 * 607 *
630 * In some situations the path walking code will trust dentries without 608 * In some situations the path walking code will trust dentries without
631 * revalidating them. This causes problems for filesystems that depend on 609 * revalidating them. This causes problems for filesystems that depend on
@@ -639,27 +617,28 @@ static inline int need_reval_dot(struct dentry *dentry)
639 * invalidate the dentry. It's up to the caller to handle putting references 617 * invalidate the dentry. It's up to the caller to handle putting references
640 * to the path if necessary. 618 * to the path if necessary.
641 */ 619 */
642static int 620static inline int handle_reval_path(struct nameidata *nd)
643force_reval_path(struct path *path, struct nameidata *nd)
644{ 621{
622 struct dentry *dentry = nd->path.dentry;
645 int status; 623 int status;
646 struct dentry *dentry = path->dentry;
647 624
648 /* 625 if (likely(!(nd->flags & LOOKUP_JUMPED)))
649 * only check on filesystems where it's possible for the dentry to 626 return 0;
650 * become stale. 627
651 */ 628 if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
652 if (!need_reval_dot(dentry)) 629 return 0;
630
631 if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
653 return 0; 632 return 0;
654 633
634 /* Note: we do not d_invalidate() */
655 status = d_revalidate(dentry, nd); 635 status = d_revalidate(dentry, nd);
656 if (status > 0) 636 if (status > 0)
657 return 0; 637 return 0;
658 638
659 if (!status) { 639 if (!status)
660 d_invalidate(dentry);
661 status = -ESTALE; 640 status = -ESTALE;
662 } 641
663 return status; 642 return status;
664} 643}
665 644
@@ -728,6 +707,7 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
728 path_put(&nd->path); 707 path_put(&nd->path);
729 nd->path = nd->root; 708 nd->path = nd->root;
730 path_get(&nd->root); 709 path_get(&nd->root);
710 nd->flags |= LOOKUP_JUMPED;
731 } 711 }
732 nd->inode = nd->path.dentry->d_inode; 712 nd->inode = nd->path.dentry->d_inode;
733 713
@@ -757,20 +737,44 @@ static inline void path_to_nameidata(const struct path *path,
757 nd->path.dentry = path->dentry; 737 nd->path.dentry = path->dentry;
758} 738}
759 739
740static inline void put_link(struct nameidata *nd, struct path *link, void *cookie)
741{
742 struct inode *inode = link->dentry->d_inode;
743 if (!IS_ERR(cookie) && inode->i_op->put_link)
744 inode->i_op->put_link(link->dentry, nd, cookie);
745 path_put(link);
746}
747
760static __always_inline int 748static __always_inline int
761__do_follow_link(const struct path *link, struct nameidata *nd, void **p) 749follow_link(struct path *link, struct nameidata *nd, void **p)
762{ 750{
763 int error; 751 int error;
764 struct dentry *dentry = link->dentry; 752 struct dentry *dentry = link->dentry;
765 753
766 BUG_ON(nd->flags & LOOKUP_RCU); 754 BUG_ON(nd->flags & LOOKUP_RCU);
767 755
756 if (unlikely(current->total_link_count >= 40)) {
757 *p = ERR_PTR(-ELOOP); /* no ->put_link(), please */
758 path_put_conditional(link, nd);
759 path_put(&nd->path);
760 return -ELOOP;
761 }
762 cond_resched();
763 current->total_link_count++;
764
768 touch_atime(link->mnt, dentry); 765 touch_atime(link->mnt, dentry);
769 nd_set_link(nd, NULL); 766 nd_set_link(nd, NULL);
770 767
771 if (link->mnt == nd->path.mnt) 768 if (link->mnt == nd->path.mnt)
772 mntget(link->mnt); 769 mntget(link->mnt);
773 770
771 error = security_inode_follow_link(link->dentry, nd);
772 if (error) {
773 *p = ERR_PTR(error); /* no ->put_link(), please */
774 path_put(&nd->path);
775 return error;
776 }
777
774 nd->last_type = LAST_BIND; 778 nd->last_type = LAST_BIND;
775 *p = dentry->d_inode->i_op->follow_link(dentry, nd); 779 *p = dentry->d_inode->i_op->follow_link(dentry, nd);
776 error = PTR_ERR(*p); 780 error = PTR_ERR(*p);
@@ -780,56 +784,18 @@ __do_follow_link(const struct path *link, struct nameidata *nd, void **p)
780 if (s) 784 if (s)
781 error = __vfs_follow_link(nd, s); 785 error = __vfs_follow_link(nd, s);
782 else if (nd->last_type == LAST_BIND) { 786 else if (nd->last_type == LAST_BIND) {
783 error = force_reval_path(&nd->path, nd); 787 nd->flags |= LOOKUP_JUMPED;
784 if (error) 788 nd->inode = nd->path.dentry->d_inode;
789 if (nd->inode->i_op->follow_link) {
790 /* stepped on a _really_ weird one */
785 path_put(&nd->path); 791 path_put(&nd->path);
792 error = -ELOOP;
793 }
786 } 794 }
787 } 795 }
788 return error; 796 return error;
789} 797}
790 798
791/*
792 * This limits recursive symlink follows to 8, while
793 * limiting consecutive symlinks to 40.
794 *
795 * Without that kind of total limit, nasty chains of consecutive
796 * symlinks can cause almost arbitrarily long lookups.
797 */
798static inline int do_follow_link(struct inode *inode, struct path *path, struct nameidata *nd)
799{
800 void *cookie;
801 int err = -ELOOP;
802
803 /* We drop rcu-walk here */
804 if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry))
805 return -ECHILD;
806 BUG_ON(inode != path->dentry->d_inode);
807
808 if (current->link_count >= MAX_NESTED_LINKS)
809 goto loop;
810 if (current->total_link_count >= 40)
811 goto loop;
812 BUG_ON(nd->depth >= MAX_NESTED_LINKS);
813 cond_resched();
814 err = security_inode_follow_link(path->dentry, nd);
815 if (err)
816 goto loop;
817 current->link_count++;
818 current->total_link_count++;
819 nd->depth++;
820 err = __do_follow_link(path, nd, &cookie);
821 if (!IS_ERR(cookie) && path->dentry->d_inode->i_op->put_link)
822 path->dentry->d_inode->i_op->put_link(path->dentry, nd, cookie);
823 path_put(path);
824 current->link_count--;
825 nd->depth--;
826 return err;
827loop:
828 path_put_conditional(path, nd);
829 path_put(&nd->path);
830 return err;
831}
832
833static int follow_up_rcu(struct path *path) 799static int follow_up_rcu(struct path *path)
834{ 800{
835 struct vfsmount *parent; 801 struct vfsmount *parent;
@@ -1068,7 +1034,7 @@ static int follow_dotdot_rcu(struct nameidata *nd)
1068 1034
1069 seq = read_seqcount_begin(&parent->d_seq); 1035 seq = read_seqcount_begin(&parent->d_seq);
1070 if (read_seqcount_retry(&old->d_seq, nd->seq)) 1036 if (read_seqcount_retry(&old->d_seq, nd->seq))
1071 return -ECHILD; 1037 goto failed;
1072 inode = parent->d_inode; 1038 inode = parent->d_inode;
1073 nd->path.dentry = parent; 1039 nd->path.dentry = parent;
1074 nd->seq = seq; 1040 nd->seq = seq;
@@ -1081,8 +1047,15 @@ static int follow_dotdot_rcu(struct nameidata *nd)
1081 } 1047 }
1082 __follow_mount_rcu(nd, &nd->path, &inode, true); 1048 __follow_mount_rcu(nd, &nd->path, &inode, true);
1083 nd->inode = inode; 1049 nd->inode = inode;
1084
1085 return 0; 1050 return 0;
1051
1052failed:
1053 nd->flags &= ~LOOKUP_RCU;
1054 if (!(nd->flags & LOOKUP_ROOT))
1055 nd->root.mnt = NULL;
1056 rcu_read_unlock();
1057 br_read_unlock(vfsmount_lock);
1058 return -ECHILD;
1086} 1059}
1087 1060
1088/* 1061/*
@@ -1216,68 +1189,85 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
1216{ 1189{
1217 struct vfsmount *mnt = nd->path.mnt; 1190 struct vfsmount *mnt = nd->path.mnt;
1218 struct dentry *dentry, *parent = nd->path.dentry; 1191 struct dentry *dentry, *parent = nd->path.dentry;
1219 struct inode *dir; 1192 int need_reval = 1;
1193 int status = 1;
1220 int err; 1194 int err;
1221 1195
1222 /* 1196 /*
1223 * See if the low-level filesystem might want
1224 * to use its own hash..
1225 */
1226 if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
1227 err = parent->d_op->d_hash(parent, nd->inode, name);
1228 if (err < 0)
1229 return err;
1230 }
1231
1232 /*
1233 * Rename seqlock is not required here because in the off chance 1197 * Rename seqlock is not required here because in the off chance
1234 * of a false negative due to a concurrent rename, we're going to 1198 * of a false negative due to a concurrent rename, we're going to
1235 * do the non-racy lookup, below. 1199 * do the non-racy lookup, below.
1236 */ 1200 */
1237 if (nd->flags & LOOKUP_RCU) { 1201 if (nd->flags & LOOKUP_RCU) {
1238 unsigned seq; 1202 unsigned seq;
1239
1240 *inode = nd->inode; 1203 *inode = nd->inode;
1241 dentry = __d_lookup_rcu(parent, name, &seq, inode); 1204 dentry = __d_lookup_rcu(parent, name, &seq, inode);
1242 if (!dentry) { 1205 if (!dentry)
1243 if (nameidata_drop_rcu(nd)) 1206 goto unlazy;
1244 return -ECHILD; 1207
1245 goto need_lookup;
1246 }
1247 /* Memory barrier in read_seqcount_begin of child is enough */ 1208 /* Memory barrier in read_seqcount_begin of child is enough */
1248 if (__read_seqcount_retry(&parent->d_seq, nd->seq)) 1209 if (__read_seqcount_retry(&parent->d_seq, nd->seq))
1249 return -ECHILD; 1210 return -ECHILD;
1250
1251 nd->seq = seq; 1211 nd->seq = seq;
1212
1252 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { 1213 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
1253 dentry = do_revalidate_rcu(dentry, nd); 1214 status = d_revalidate(dentry, nd);
1254 if (!dentry) 1215 if (unlikely(status <= 0)) {
1255 goto need_lookup; 1216 if (status != -ECHILD)
1256 if (IS_ERR(dentry)) 1217 need_reval = 0;
1257 goto fail; 1218 goto unlazy;
1258 if (!(nd->flags & LOOKUP_RCU)) 1219 }
1259 goto done;
1260 } 1220 }
1261 path->mnt = mnt; 1221 path->mnt = mnt;
1262 path->dentry = dentry; 1222 path->dentry = dentry;
1263 if (likely(__follow_mount_rcu(nd, path, inode, false))) 1223 if (likely(__follow_mount_rcu(nd, path, inode, false)))
1264 return 0; 1224 return 0;
1265 if (nameidata_drop_rcu(nd)) 1225unlazy:
1266 return -ECHILD; 1226 if (dentry) {
1267 /* fallthru */ 1227 if (nameidata_dentry_drop_rcu(nd, dentry))
1228 return -ECHILD;
1229 } else {
1230 if (nameidata_drop_rcu(nd))
1231 return -ECHILD;
1232 }
1233 } else {
1234 dentry = __d_lookup(parent, name);
1268 } 1235 }
1269 dentry = __d_lookup(parent, name); 1236
1270 if (!dentry) 1237retry:
1271 goto need_lookup; 1238 if (unlikely(!dentry)) {
1272found: 1239 struct inode *dir = parent->d_inode;
1273 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { 1240 BUG_ON(nd->inode != dir);
1274 dentry = do_revalidate(dentry, nd); 1241
1275 if (!dentry) 1242 mutex_lock(&dir->i_mutex);
1276 goto need_lookup; 1243 dentry = d_lookup(parent, name);
1277 if (IS_ERR(dentry)) 1244 if (likely(!dentry)) {
1278 goto fail; 1245 dentry = d_alloc_and_lookup(parent, name, nd);
1246 if (IS_ERR(dentry)) {
1247 mutex_unlock(&dir->i_mutex);
1248 return PTR_ERR(dentry);
1249 }
1250 /* known good */
1251 need_reval = 0;
1252 status = 1;
1253 }
1254 mutex_unlock(&dir->i_mutex);
1279 } 1255 }
1280done: 1256 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval)
1257 status = d_revalidate(dentry, nd);
1258 if (unlikely(status <= 0)) {
1259 if (status < 0) {
1260 dput(dentry);
1261 return status;
1262 }
1263 if (!d_invalidate(dentry)) {
1264 dput(dentry);
1265 dentry = NULL;
1266 need_reval = 1;
1267 goto retry;
1268 }
1269 }
1270
1281 path->mnt = mnt; 1271 path->mnt = mnt;
1282 path->dentry = dentry; 1272 path->dentry = dentry;
1283 err = follow_managed(path, nd->flags); 1273 err = follow_managed(path, nd->flags);
@@ -1287,39 +1277,113 @@ done:
1287 } 1277 }
1288 *inode = path->dentry->d_inode; 1278 *inode = path->dentry->d_inode;
1289 return 0; 1279 return 0;
1280}
1290 1281
1291need_lookup: 1282static inline int may_lookup(struct nameidata *nd)
1292 dir = parent->d_inode; 1283{
1293 BUG_ON(nd->inode != dir); 1284 if (nd->flags & LOOKUP_RCU) {
1285 int err = exec_permission(nd->inode, IPERM_FLAG_RCU);
1286 if (err != -ECHILD)
1287 return err;
1288 if (nameidata_drop_rcu(nd))
1289 return -ECHILD;
1290 }
1291 return exec_permission(nd->inode, 0);
1292}
1294 1293
1295 mutex_lock(&dir->i_mutex); 1294static inline int handle_dots(struct nameidata *nd, int type)
1296 /* 1295{
1297 * First re-do the cached lookup just in case it was created 1296 if (type == LAST_DOTDOT) {
1298 * while we waited for the directory semaphore, or the first 1297 if (nd->flags & LOOKUP_RCU) {
1299 * lookup failed due to an unrelated rename. 1298 if (follow_dotdot_rcu(nd))
1300 * 1299 return -ECHILD;
1301 * This could use version numbering or similar to avoid unnecessary 1300 } else
1302 * cache lookups, but then we'd have to do the first lookup in the 1301 follow_dotdot(nd);
1303 * non-racy way. However in the common case here, everything should 1302 }
1304 * be hot in cache, so would it be a big win? 1303 return 0;
1305 */ 1304}
1306 dentry = d_lookup(parent, name); 1305
1307 if (likely(!dentry)) { 1306static void terminate_walk(struct nameidata *nd)
1308 dentry = d_alloc_and_lookup(parent, name, nd); 1307{
1309 mutex_unlock(&dir->i_mutex); 1308 if (!(nd->flags & LOOKUP_RCU)) {
1310 if (IS_ERR(dentry)) 1309 path_put(&nd->path);
1311 goto fail; 1310 } else {
1312 goto done; 1311 nd->flags &= ~LOOKUP_RCU;
1312 if (!(nd->flags & LOOKUP_ROOT))
1313 nd->root.mnt = NULL;
1314 rcu_read_unlock();
1315 br_read_unlock(vfsmount_lock);
1313 } 1316 }
1317}
1318
1319static inline int walk_component(struct nameidata *nd, struct path *path,
1320 struct qstr *name, int type, int follow)
1321{
1322 struct inode *inode;
1323 int err;
1314 /* 1324 /*
1315 * Uhhuh! Nasty case: the cache was re-populated while 1325 * "." and ".." are special - ".." especially so because it has
1316 * we waited on the semaphore. Need to revalidate. 1326 * to be able to know about the current root directory and
1327 * parent relationships.
1317 */ 1328 */
1318 mutex_unlock(&dir->i_mutex); 1329 if (unlikely(type != LAST_NORM))
1319 goto found; 1330 return handle_dots(nd, type);
1331 err = do_lookup(nd, name, path, &inode);
1332 if (unlikely(err)) {
1333 terminate_walk(nd);
1334 return err;
1335 }
1336 if (!inode) {
1337 path_to_nameidata(path, nd);
1338 terminate_walk(nd);
1339 return -ENOENT;
1340 }
1341 if (unlikely(inode->i_op->follow_link) && follow) {
1342 if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry))
1343 return -ECHILD;
1344 BUG_ON(inode != path->dentry->d_inode);
1345 return 1;
1346 }
1347 path_to_nameidata(path, nd);
1348 nd->inode = inode;
1349 return 0;
1350}
1320 1351
1321fail: 1352/*
1322 return PTR_ERR(dentry); 1353 * This limits recursive symlink follows to 8, while
1354 * limiting consecutive symlinks to 40.
1355 *
1356 * Without that kind of total limit, nasty chains of consecutive
1357 * symlinks can cause almost arbitrarily long lookups.
1358 */
1359static inline int nested_symlink(struct path *path, struct nameidata *nd)
1360{
1361 int res;
1362
1363 BUG_ON(nd->depth >= MAX_NESTED_LINKS);
1364 if (unlikely(current->link_count >= MAX_NESTED_LINKS)) {
1365 path_put_conditional(path, nd);
1366 path_put(&nd->path);
1367 return -ELOOP;
1368 }
1369
1370 nd->depth++;
1371 current->link_count++;
1372
1373 do {
1374 struct path link = *path;
1375 void *cookie;
1376
1377 res = follow_link(&link, nd, &cookie);
1378 if (!res)
1379 res = walk_component(nd, path, &nd->last,
1380 nd->last_type, LOOKUP_FOLLOW);
1381 put_link(nd, &link, cookie);
1382 } while (res > 0);
1383
1384 current->link_count--;
1385 nd->depth--;
1386 return res;
1323} 1387}
1324 1388
1325/* 1389/*
@@ -1339,30 +1403,18 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1339 while (*name=='/') 1403 while (*name=='/')
1340 name++; 1404 name++;
1341 if (!*name) 1405 if (!*name)
1342 goto return_reval; 1406 return 0;
1343
1344 if (nd->depth)
1345 lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);
1346 1407
1347 /* At this point we know we have a real path component. */ 1408 /* At this point we know we have a real path component. */
1348 for(;;) { 1409 for(;;) {
1349 struct inode *inode;
1350 unsigned long hash; 1410 unsigned long hash;
1351 struct qstr this; 1411 struct qstr this;
1352 unsigned int c; 1412 unsigned int c;
1413 int type;
1353 1414
1354 nd->flags |= LOOKUP_CONTINUE; 1415 nd->flags |= LOOKUP_CONTINUE;
1355 if (nd->flags & LOOKUP_RCU) { 1416
1356 err = exec_permission(nd->inode, IPERM_FLAG_RCU); 1417 err = may_lookup(nd);
1357 if (err == -ECHILD) {
1358 if (nameidata_drop_rcu(nd))
1359 return -ECHILD;
1360 goto exec_again;
1361 }
1362 } else {
1363exec_again:
1364 err = exec_permission(nd->inode, 0);
1365 }
1366 if (err) 1418 if (err)
1367 break; 1419 break;
1368 1420
@@ -1378,52 +1430,43 @@ exec_again:
1378 this.len = name - (const char *) this.name; 1430 this.len = name - (const char *) this.name;
1379 this.hash = end_name_hash(hash); 1431 this.hash = end_name_hash(hash);
1380 1432
1433 type = LAST_NORM;
1434 if (this.name[0] == '.') switch (this.len) {
1435 case 2:
1436 if (this.name[1] == '.') {
1437 type = LAST_DOTDOT;
1438 nd->flags |= LOOKUP_JUMPED;
1439 }
1440 break;
1441 case 1:
1442 type = LAST_DOT;
1443 }
1444 if (likely(type == LAST_NORM)) {
1445 struct dentry *parent = nd->path.dentry;
1446 nd->flags &= ~LOOKUP_JUMPED;
1447 if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
1448 err = parent->d_op->d_hash(parent, nd->inode,
1449 &this);
1450 if (err < 0)
1451 break;
1452 }
1453 }
1454
1381 /* remove trailing slashes? */ 1455 /* remove trailing slashes? */
1382 if (!c) 1456 if (!c)
1383 goto last_component; 1457 goto last_component;
1384 while (*++name == '/'); 1458 while (*++name == '/');
1385 if (!*name) 1459 if (!*name)
1386 goto last_with_slashes; 1460 goto last_component;
1387 1461
1388 /* 1462 err = walk_component(nd, &next, &this, type, LOOKUP_FOLLOW);
1389 * "." and ".." are special - ".." especially so because it has 1463 if (err < 0)
1390 * to be able to know about the current root directory and 1464 return err;
1391 * parent relationships.
1392 */
1393 if (this.name[0] == '.') switch (this.len) {
1394 default:
1395 break;
1396 case 2:
1397 if (this.name[1] != '.')
1398 break;
1399 if (nd->flags & LOOKUP_RCU) {
1400 if (follow_dotdot_rcu(nd))
1401 return -ECHILD;
1402 } else
1403 follow_dotdot(nd);
1404 /* fallthrough */
1405 case 1:
1406 continue;
1407 }
1408 /* This does the actual lookups.. */
1409 err = do_lookup(nd, &this, &next, &inode);
1410 if (err)
1411 break;
1412 err = -ENOENT;
1413 if (!inode)
1414 goto out_dput;
1415 1465
1416 if (inode->i_op->follow_link) { 1466 if (err) {
1417 err = do_follow_link(inode, &next, nd); 1467 err = nested_symlink(&next, nd);
1418 if (err) 1468 if (err)
1419 goto return_err; 1469 return err;
1420 nd->inode = nd->path.dentry->d_inode;
1421 err = -ENOENT;
1422 if (!nd->inode)
1423 break;
1424 } else {
1425 path_to_nameidata(&next, nd);
1426 nd->inode = inode;
1427 } 1470 }
1428 err = -ENOTDIR; 1471 err = -ENOTDIR;
1429 if (!nd->inode->i_op->lookup) 1472 if (!nd->inode->i_op->lookup)
@@ -1431,210 +1474,109 @@ exec_again:
1431 continue; 1474 continue;
1432 /* here ends the main loop */ 1475 /* here ends the main loop */
1433 1476
1434last_with_slashes:
1435 lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
1436last_component: 1477last_component:
1437 /* Clear LOOKUP_CONTINUE iff it was previously unset */ 1478 /* Clear LOOKUP_CONTINUE iff it was previously unset */
1438 nd->flags &= lookup_flags | ~LOOKUP_CONTINUE; 1479 nd->flags &= lookup_flags | ~LOOKUP_CONTINUE;
1439 if (lookup_flags & LOOKUP_PARENT)
1440 goto lookup_parent;
1441 if (this.name[0] == '.') switch (this.len) {
1442 default:
1443 break;
1444 case 2:
1445 if (this.name[1] != '.')
1446 break;
1447 if (nd->flags & LOOKUP_RCU) {
1448 if (follow_dotdot_rcu(nd))
1449 return -ECHILD;
1450 } else
1451 follow_dotdot(nd);
1452 /* fallthrough */
1453 case 1:
1454 goto return_reval;
1455 }
1456 err = do_lookup(nd, &this, &next, &inode);
1457 if (err)
1458 break;
1459 if (inode && unlikely(inode->i_op->follow_link) &&
1460 (lookup_flags & LOOKUP_FOLLOW)) {
1461 err = do_follow_link(inode, &next, nd);
1462 if (err)
1463 goto return_err;
1464 nd->inode = nd->path.dentry->d_inode;
1465 } else {
1466 path_to_nameidata(&next, nd);
1467 nd->inode = inode;
1468 }
1469 err = -ENOENT;
1470 if (!nd->inode)
1471 break;
1472 if (lookup_flags & LOOKUP_DIRECTORY) {
1473 err = -ENOTDIR;
1474 if (!nd->inode->i_op->lookup)
1475 break;
1476 }
1477 goto return_base;
1478lookup_parent:
1479 nd->last = this; 1480 nd->last = this;
1480 nd->last_type = LAST_NORM; 1481 nd->last_type = type;
1481 if (this.name[0] != '.')
1482 goto return_base;
1483 if (this.len == 1)
1484 nd->last_type = LAST_DOT;
1485 else if (this.len == 2 && this.name[1] == '.')
1486 nd->last_type = LAST_DOTDOT;
1487 else
1488 goto return_base;
1489return_reval:
1490 /*
1491 * We bypassed the ordinary revalidation routines.
1492 * We may need to check the cached dentry for staleness.
1493 */
1494 if (need_reval_dot(nd->path.dentry)) {
1495 if (nameidata_drop_rcu_last_maybe(nd))
1496 return -ECHILD;
1497 /* Note: we do not d_invalidate() */
1498 err = d_revalidate(nd->path.dentry, nd);
1499 if (!err)
1500 err = -ESTALE;
1501 if (err < 0)
1502 break;
1503 return 0;
1504 }
1505return_base:
1506 if (nameidata_drop_rcu_last_maybe(nd))
1507 return -ECHILD;
1508 return 0; 1482 return 0;
1509out_dput:
1510 if (!(nd->flags & LOOKUP_RCU))
1511 path_put_conditional(&next, nd);
1512 break;
1513 } 1483 }
1514 if (!(nd->flags & LOOKUP_RCU)) 1484 terminate_walk(nd);
1515 path_put(&nd->path);
1516return_err:
1517 return err; 1485 return err;
1518} 1486}
1519 1487
1520static inline int path_walk_rcu(const char *name, struct nameidata *nd) 1488static int path_init(int dfd, const char *name, unsigned int flags,
1521{ 1489 struct nameidata *nd, struct file **fp)
1522 current->total_link_count = 0;
1523
1524 return link_path_walk(name, nd);
1525}
1526
1527static inline int path_walk_simple(const char *name, struct nameidata *nd)
1528{
1529 current->total_link_count = 0;
1530
1531 return link_path_walk(name, nd);
1532}
1533
1534static int path_walk(const char *name, struct nameidata *nd)
1535{
1536 struct path save = nd->path;
1537 int result;
1538
1539 current->total_link_count = 0;
1540
1541 /* make sure the stuff we saved doesn't go away */
1542 path_get(&save);
1543
1544 result = link_path_walk(name, nd);
1545 if (result == -ESTALE) {
1546 /* nd->path had been dropped */
1547 current->total_link_count = 0;
1548 nd->path = save;
1549 nd->inode = save.dentry->d_inode;
1550 path_get(&nd->path);
1551 nd->flags |= LOOKUP_REVAL;
1552 result = link_path_walk(name, nd);
1553 }
1554
1555 path_put(&save);
1556
1557 return result;
1558}
1559
1560static void path_finish_rcu(struct nameidata *nd)
1561{
1562 if (nd->flags & LOOKUP_RCU) {
1563 /* RCU dangling. Cancel it. */
1564 nd->flags &= ~LOOKUP_RCU;
1565 nd->root.mnt = NULL;
1566 rcu_read_unlock();
1567 br_read_unlock(vfsmount_lock);
1568 }
1569 if (nd->file)
1570 fput(nd->file);
1571}
1572
1573static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
1574{ 1490{
1575 int retval = 0; 1491 int retval = 0;
1576 int fput_needed; 1492 int fput_needed;
1577 struct file *file; 1493 struct file *file;
1578 1494
1579 nd->last_type = LAST_ROOT; /* if there are only slashes... */ 1495 nd->last_type = LAST_ROOT; /* if there are only slashes... */
1580 nd->flags = flags | LOOKUP_RCU; 1496 nd->flags = flags | LOOKUP_JUMPED;
1581 nd->depth = 0; 1497 nd->depth = 0;
1498 if (flags & LOOKUP_ROOT) {
1499 struct inode *inode = nd->root.dentry->d_inode;
1500 if (*name) {
1501 if (!inode->i_op->lookup)
1502 return -ENOTDIR;
1503 retval = inode_permission(inode, MAY_EXEC);
1504 if (retval)
1505 return retval;
1506 }
1507 nd->path = nd->root;
1508 nd->inode = inode;
1509 if (flags & LOOKUP_RCU) {
1510 br_read_lock(vfsmount_lock);
1511 rcu_read_lock();
1512 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1513 } else {
1514 path_get(&nd->path);
1515 }
1516 return 0;
1517 }
1518
1582 nd->root.mnt = NULL; 1519 nd->root.mnt = NULL;
1583 nd->file = NULL;
1584 1520
1585 if (*name=='/') { 1521 if (*name=='/') {
1586 struct fs_struct *fs = current->fs; 1522 if (flags & LOOKUP_RCU) {
1587 unsigned seq; 1523 br_read_lock(vfsmount_lock);
1588 1524 rcu_read_lock();
1589 br_read_lock(vfsmount_lock); 1525 set_root_rcu(nd);
1590 rcu_read_lock(); 1526 } else {
1591 1527 set_root(nd);
1592 do { 1528 path_get(&nd->root);
1593 seq = read_seqcount_begin(&fs->seq); 1529 }
1594 nd->root = fs->root; 1530 nd->path = nd->root;
1595 nd->path = nd->root;
1596 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1597 } while (read_seqcount_retry(&fs->seq, seq));
1598
1599 } else if (dfd == AT_FDCWD) { 1531 } else if (dfd == AT_FDCWD) {
1600 struct fs_struct *fs = current->fs; 1532 if (flags & LOOKUP_RCU) {
1601 unsigned seq; 1533 struct fs_struct *fs = current->fs;
1602 1534 unsigned seq;
1603 br_read_lock(vfsmount_lock);
1604 rcu_read_lock();
1605 1535
1606 do { 1536 br_read_lock(vfsmount_lock);
1607 seq = read_seqcount_begin(&fs->seq); 1537 rcu_read_lock();
1608 nd->path = fs->pwd;
1609 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1610 } while (read_seqcount_retry(&fs->seq, seq));
1611 1538
1539 do {
1540 seq = read_seqcount_begin(&fs->seq);
1541 nd->path = fs->pwd;
1542 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1543 } while (read_seqcount_retry(&fs->seq, seq));
1544 } else {
1545 get_fs_pwd(current->fs, &nd->path);
1546 }
1612 } else { 1547 } else {
1613 struct dentry *dentry; 1548 struct dentry *dentry;
1614 1549
1615 file = fget_light(dfd, &fput_needed); 1550 file = fget_raw_light(dfd, &fput_needed);
1616 retval = -EBADF; 1551 retval = -EBADF;
1617 if (!file) 1552 if (!file)
1618 goto out_fail; 1553 goto out_fail;
1619 1554
1620 dentry = file->f_path.dentry; 1555 dentry = file->f_path.dentry;
1621 1556
1622 retval = -ENOTDIR; 1557 if (*name) {
1623 if (!S_ISDIR(dentry->d_inode->i_mode)) 1558 retval = -ENOTDIR;
1624 goto fput_fail; 1559 if (!S_ISDIR(dentry->d_inode->i_mode))
1560 goto fput_fail;
1625 1561
1626 retval = file_permission(file, MAY_EXEC); 1562 retval = file_permission(file, MAY_EXEC);
1627 if (retval) 1563 if (retval)
1628 goto fput_fail; 1564 goto fput_fail;
1565 }
1629 1566
1630 nd->path = file->f_path; 1567 nd->path = file->f_path;
1631 if (fput_needed) 1568 if (flags & LOOKUP_RCU) {
1632 nd->file = file; 1569 if (fput_needed)
1633 1570 *fp = file;
1634 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); 1571 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1635 br_read_lock(vfsmount_lock); 1572 br_read_lock(vfsmount_lock);
1636 rcu_read_lock(); 1573 rcu_read_lock();
1574 } else {
1575 path_get(&file->f_path);
1576 fput_light(file, fput_needed);
1577 }
1637 } 1578 }
1579
1638 nd->inode = nd->path.dentry->d_inode; 1580 nd->inode = nd->path.dentry->d_inode;
1639 return 0; 1581 return 0;
1640 1582
@@ -1644,60 +1586,23 @@ out_fail:
1644 return retval; 1586 return retval;
1645} 1587}
1646 1588
1647static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd) 1589static inline int lookup_last(struct nameidata *nd, struct path *path)
1648{ 1590{
1649 int retval = 0; 1591 if (nd->last_type == LAST_NORM && nd->last.name[nd->last.len])
1650 int fput_needed; 1592 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
1651 struct file *file;
1652
1653 nd->last_type = LAST_ROOT; /* if there are only slashes... */
1654 nd->flags = flags;
1655 nd->depth = 0;
1656 nd->root.mnt = NULL;
1657
1658 if (*name=='/') {
1659 set_root(nd);
1660 nd->path = nd->root;
1661 path_get(&nd->root);
1662 } else if (dfd == AT_FDCWD) {
1663 get_fs_pwd(current->fs, &nd->path);
1664 } else {
1665 struct dentry *dentry;
1666
1667 file = fget_light(dfd, &fput_needed);
1668 retval = -EBADF;
1669 if (!file)
1670 goto out_fail;
1671
1672 dentry = file->f_path.dentry;
1673
1674 retval = -ENOTDIR;
1675 if (!S_ISDIR(dentry->d_inode->i_mode))
1676 goto fput_fail;
1677
1678 retval = file_permission(file, MAY_EXEC);
1679 if (retval)
1680 goto fput_fail;
1681
1682 nd->path = file->f_path;
1683 path_get(&file->f_path);
1684 1593
1685 fput_light(file, fput_needed); 1594 nd->flags &= ~LOOKUP_PARENT;
1686 } 1595 return walk_component(nd, path, &nd->last, nd->last_type,
1687 nd->inode = nd->path.dentry->d_inode; 1596 nd->flags & LOOKUP_FOLLOW);
1688 return 0;
1689
1690fput_fail:
1691 fput_light(file, fput_needed);
1692out_fail:
1693 return retval;
1694} 1597}
1695 1598
1696/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ 1599/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
1697static int do_path_lookup(int dfd, const char *name, 1600static int path_lookupat(int dfd, const char *name,
1698 unsigned int flags, struct nameidata *nd) 1601 unsigned int flags, struct nameidata *nd)
1699{ 1602{
1700 int retval; 1603 struct file *base = NULL;
1604 struct path path;
1605 int err;
1701 1606
1702 /* 1607 /*
1703 * Path walking is largely split up into 2 different synchronisation 1608 * Path walking is largely split up into 2 different synchronisation
@@ -1713,44 +1618,75 @@ static int do_path_lookup(int dfd, const char *name,
1713 * be handled by restarting a traditional ref-walk (which will always 1618 * be handled by restarting a traditional ref-walk (which will always
1714 * be able to complete). 1619 * be able to complete).
1715 */ 1620 */
1716 retval = path_init_rcu(dfd, name, flags, nd); 1621 err = path_init(dfd, name, flags | LOOKUP_PARENT, nd, &base);
1717 if (unlikely(retval)) 1622
1718 return retval; 1623 if (unlikely(err))
1719 retval = path_walk_rcu(name, nd); 1624 return err;
1720 path_finish_rcu(nd); 1625
1721 if (nd->root.mnt) { 1626 current->total_link_count = 0;
1722 path_put(&nd->root); 1627 err = link_path_walk(name, nd);
1723 nd->root.mnt = NULL; 1628
1629 if (!err && !(flags & LOOKUP_PARENT)) {
1630 err = lookup_last(nd, &path);
1631 while (err > 0) {
1632 void *cookie;
1633 struct path link = path;
1634 nd->flags |= LOOKUP_PARENT;
1635 err = follow_link(&link, nd, &cookie);
1636 if (!err)
1637 err = lookup_last(nd, &path);
1638 put_link(nd, &link, cookie);
1639 }
1724 } 1640 }
1725 1641
1726 if (unlikely(retval == -ECHILD || retval == -ESTALE)) { 1642 if (nd->flags & LOOKUP_RCU) {
1727 /* slower, locked walk */ 1643 /* went all way through without dropping RCU */
1728 if (retval == -ESTALE) 1644 BUG_ON(err);
1729 flags |= LOOKUP_REVAL; 1645 if (nameidata_drop_rcu_last(nd))
1730 retval = path_init(dfd, name, flags, nd); 1646 err = -ECHILD;
1731 if (unlikely(retval)) 1647 }
1732 return retval; 1648
1733 retval = path_walk(name, nd); 1649 if (!err)
1734 if (nd->root.mnt) { 1650 err = handle_reval_path(nd);
1735 path_put(&nd->root); 1651
1736 nd->root.mnt = NULL; 1652 if (!err && nd->flags & LOOKUP_DIRECTORY) {
1653 if (!nd->inode->i_op->lookup) {
1654 path_put(&nd->path);
1655 return -ENOTDIR;
1737 } 1656 }
1738 } 1657 }
1739 1658
1659 if (base)
1660 fput(base);
1661
1662 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
1663 path_put(&nd->root);
1664 nd->root.mnt = NULL;
1665 }
1666 return err;
1667}
1668
1669static int do_path_lookup(int dfd, const char *name,
1670 unsigned int flags, struct nameidata *nd)
1671{
1672 int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd);
1673 if (unlikely(retval == -ECHILD))
1674 retval = path_lookupat(dfd, name, flags, nd);
1675 if (unlikely(retval == -ESTALE))
1676 retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd);
1677
1740 if (likely(!retval)) { 1678 if (likely(!retval)) {
1741 if (unlikely(!audit_dummy_context())) { 1679 if (unlikely(!audit_dummy_context())) {
1742 if (nd->path.dentry && nd->inode) 1680 if (nd->path.dentry && nd->inode)
1743 audit_inode(name, nd->path.dentry); 1681 audit_inode(name, nd->path.dentry);
1744 } 1682 }
1745 } 1683 }
1746
1747 return retval; 1684 return retval;
1748} 1685}
1749 1686
1750int path_lookup(const char *name, unsigned int flags, 1687int kern_path_parent(const char *name, struct nameidata *nd)
1751 struct nameidata *nd)
1752{ 1688{
1753 return do_path_lookup(AT_FDCWD, name, flags, nd); 1689 return do_path_lookup(AT_FDCWD, name, LOOKUP_PARENT, nd);
1754} 1690}
1755 1691
1756int kern_path(const char *name, unsigned int flags, struct path *path) 1692int kern_path(const char *name, unsigned int flags, struct path *path)
@@ -1774,29 +1710,10 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
1774 const char *name, unsigned int flags, 1710 const char *name, unsigned int flags,
1775 struct nameidata *nd) 1711 struct nameidata *nd)
1776{ 1712{
1777 int retval; 1713 nd->root.dentry = dentry;
1778 1714 nd->root.mnt = mnt;
1779 /* same as do_path_lookup */ 1715 /* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */
1780 nd->last_type = LAST_ROOT; 1716 return do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, nd);
1781 nd->flags = flags;
1782 nd->depth = 0;
1783
1784 nd->path.dentry = dentry;
1785 nd->path.mnt = mnt;
1786 path_get(&nd->path);
1787 nd->root = nd->path;
1788 path_get(&nd->root);
1789 nd->inode = nd->path.dentry->d_inode;
1790
1791 retval = path_walk(name, nd);
1792 if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
1793 nd->inode))
1794 audit_inode(name, nd->path.dentry);
1795
1796 path_put(&nd->root);
1797 nd->root.mnt = NULL;
1798
1799 return retval;
1800} 1717}
1801 1718
1802static struct dentry *__lookup_hash(struct qstr *name, 1719static struct dentry *__lookup_hash(struct qstr *name,
@@ -1811,17 +1728,6 @@ static struct dentry *__lookup_hash(struct qstr *name,
1811 return ERR_PTR(err); 1728 return ERR_PTR(err);
1812 1729
1813 /* 1730 /*
1814 * See if the low-level filesystem might want
1815 * to use its own hash..
1816 */
1817 if (base->d_flags & DCACHE_OP_HASH) {
1818 err = base->d_op->d_hash(base, inode, name);
1819 dentry = ERR_PTR(err);
1820 if (err < 0)
1821 goto out;
1822 }
1823
1824 /*
1825 * Don't bother with __d_lookup: callers are for creat as 1731 * Don't bother with __d_lookup: callers are for creat as
1826 * well as unlink, so a lot of the time it would cost 1732 * well as unlink, so a lot of the time it would cost
1827 * a double lookup. 1733 * a double lookup.
@@ -1833,7 +1739,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
1833 1739
1834 if (!dentry) 1740 if (!dentry)
1835 dentry = d_alloc_and_lookup(base, name, nd); 1741 dentry = d_alloc_and_lookup(base, name, nd);
1836out: 1742
1837 return dentry; 1743 return dentry;
1838} 1744}
1839 1745
@@ -1847,28 +1753,6 @@ static struct dentry *lookup_hash(struct nameidata *nd)
1847 return __lookup_hash(&nd->last, nd->path.dentry, nd); 1753 return __lookup_hash(&nd->last, nd->path.dentry, nd);
1848} 1754}
1849 1755
1850static int __lookup_one_len(const char *name, struct qstr *this,
1851 struct dentry *base, int len)
1852{
1853 unsigned long hash;
1854 unsigned int c;
1855
1856 this->name = name;
1857 this->len = len;
1858 if (!len)
1859 return -EACCES;
1860
1861 hash = init_name_hash();
1862 while (len--) {
1863 c = *(const unsigned char *)name++;
1864 if (c == '/' || c == '\0')
1865 return -EACCES;
1866 hash = partial_name_hash(c, hash);
1867 }
1868 this->hash = end_name_hash(hash);
1869 return 0;
1870}
1871
1872/** 1756/**
1873 * lookup_one_len - filesystem helper to lookup single pathname component 1757 * lookup_one_len - filesystem helper to lookup single pathname component
1874 * @name: pathname component to lookup 1758 * @name: pathname component to lookup
@@ -1882,14 +1766,34 @@ static int __lookup_one_len(const char *name, struct qstr *this,
1882 */ 1766 */
1883struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) 1767struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
1884{ 1768{
1885 int err;
1886 struct qstr this; 1769 struct qstr this;
1770 unsigned long hash;
1771 unsigned int c;
1887 1772
1888 WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex)); 1773 WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
1889 1774
1890 err = __lookup_one_len(name, &this, base, len); 1775 this.name = name;
1891 if (err) 1776 this.len = len;
1892 return ERR_PTR(err); 1777 if (!len)
1778 return ERR_PTR(-EACCES);
1779
1780 hash = init_name_hash();
1781 while (len--) {
1782 c = *(const unsigned char *)name++;
1783 if (c == '/' || c == '\0')
1784 return ERR_PTR(-EACCES);
1785 hash = partial_name_hash(c, hash);
1786 }
1787 this.hash = end_name_hash(hash);
1788 /*
1789 * See if the low-level filesystem might want
1790 * to use its own hash..
1791 */
1792 if (base->d_flags & DCACHE_OP_HASH) {
1793 int err = base->d_op->d_hash(base, base->d_inode, &this);
1794 if (err < 0)
1795 return ERR_PTR(err);
1796 }
1893 1797
1894 return __lookup_hash(&this, base, NULL); 1798 return __lookup_hash(&this, base, NULL);
1895} 1799}
@@ -1898,7 +1802,7 @@ int user_path_at(int dfd, const char __user *name, unsigned flags,
1898 struct path *path) 1802 struct path *path)
1899{ 1803{
1900 struct nameidata nd; 1804 struct nameidata nd;
1901 char *tmp = getname(name); 1805 char *tmp = getname_flags(name, flags);
1902 int err = PTR_ERR(tmp); 1806 int err = PTR_ERR(tmp);
1903 if (!IS_ERR(tmp)) { 1807 if (!IS_ERR(tmp)) {
1904 1808
@@ -2078,12 +1982,16 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
2078 return error; 1982 return error;
2079} 1983}
2080 1984
2081int may_open(struct path *path, int acc_mode, int flag) 1985static int may_open(struct path *path, int acc_mode, int flag)
2082{ 1986{
2083 struct dentry *dentry = path->dentry; 1987 struct dentry *dentry = path->dentry;
2084 struct inode *inode = dentry->d_inode; 1988 struct inode *inode = dentry->d_inode;
2085 int error; 1989 int error;
2086 1990
1991 /* O_PATH? */
1992 if (!acc_mode)
1993 return 0;
1994
2087 if (!inode) 1995 if (!inode)
2088 return -ENOENT; 1996 return -ENOENT;
2089 1997
@@ -2152,34 +2060,6 @@ static int handle_truncate(struct file *filp)
2152} 2060}
2153 2061
2154/* 2062/*
2155 * Be careful about ever adding any more callers of this
2156 * function. Its flags must be in the namei format, not
2157 * what get passed to sys_open().
2158 */
2159static int __open_namei_create(struct nameidata *nd, struct path *path,
2160 int open_flag, int mode)
2161{
2162 int error;
2163 struct dentry *dir = nd->path.dentry;
2164
2165 if (!IS_POSIXACL(dir->d_inode))
2166 mode &= ~current_umask();
2167 error = security_path_mknod(&nd->path, path->dentry, mode, 0);
2168 if (error)
2169 goto out_unlock;
2170 error = vfs_create(dir->d_inode, path->dentry, mode, nd);
2171out_unlock:
2172 mutex_unlock(&dir->d_inode->i_mutex);
2173 dput(nd->path.dentry);
2174 nd->path.dentry = path->dentry;
2175
2176 if (error)
2177 return error;
2178 /* Don't check for write permission, don't truncate */
2179 return may_open(&nd->path, 0, open_flag & ~O_TRUNC);
2180}
2181
2182/*
2183 * Note that while the flag value (low two bits) for sys_open means: 2063 * Note that while the flag value (low two bits) for sys_open means:
2184 * 00 - read-only 2064 * 00 - read-only
2185 * 01 - write-only 2065 * 01 - write-only
@@ -2203,126 +2083,115 @@ static inline int open_to_namei_flags(int flag)
2203 return flag; 2083 return flag;
2204} 2084}
2205 2085
2206static int open_will_truncate(int flag, struct inode *inode)
2207{
2208 /*
2209 * We'll never write to the fs underlying
2210 * a device file.
2211 */
2212 if (special_file(inode->i_mode))
2213 return 0;
2214 return (flag & O_TRUNC);
2215}
2216
2217static struct file *finish_open(struct nameidata *nd,
2218 int open_flag, int acc_mode)
2219{
2220 struct file *filp;
2221 int will_truncate;
2222 int error;
2223
2224 will_truncate = open_will_truncate(open_flag, nd->path.dentry->d_inode);
2225 if (will_truncate) {
2226 error = mnt_want_write(nd->path.mnt);
2227 if (error)
2228 goto exit;
2229 }
2230 error = may_open(&nd->path, acc_mode, open_flag);
2231 if (error) {
2232 if (will_truncate)
2233 mnt_drop_write(nd->path.mnt);
2234 goto exit;
2235 }
2236 filp = nameidata_to_filp(nd);
2237 if (!IS_ERR(filp)) {
2238 error = ima_file_check(filp, acc_mode);
2239 if (error) {
2240 fput(filp);
2241 filp = ERR_PTR(error);
2242 }
2243 }
2244 if (!IS_ERR(filp)) {
2245 if (will_truncate) {
2246 error = handle_truncate(filp);
2247 if (error) {
2248 fput(filp);
2249 filp = ERR_PTR(error);
2250 }
2251 }
2252 }
2253 /*
2254 * It is now safe to drop the mnt write
2255 * because the filp has had a write taken
2256 * on its behalf.
2257 */
2258 if (will_truncate)
2259 mnt_drop_write(nd->path.mnt);
2260 path_put(&nd->path);
2261 return filp;
2262
2263exit:
2264 path_put(&nd->path);
2265 return ERR_PTR(error);
2266}
2267
2268/* 2086/*
2269 * Handle O_CREAT case for do_filp_open 2087 * Handle the last step of open()
2270 */ 2088 */
2271static struct file *do_last(struct nameidata *nd, struct path *path, 2089static struct file *do_last(struct nameidata *nd, struct path *path,
2272 int open_flag, int acc_mode, 2090 const struct open_flags *op, const char *pathname)
2273 int mode, const char *pathname)
2274{ 2091{
2275 struct dentry *dir = nd->path.dentry; 2092 struct dentry *dir = nd->path.dentry;
2093 struct dentry *dentry;
2094 int open_flag = op->open_flag;
2095 int will_truncate = open_flag & O_TRUNC;
2096 int want_write = 0;
2097 int acc_mode = op->acc_mode;
2276 struct file *filp; 2098 struct file *filp;
2277 int error = -EISDIR; 2099 int error;
2100
2101 nd->flags &= ~LOOKUP_PARENT;
2102 nd->flags |= op->intent;
2278 2103
2279 switch (nd->last_type) { 2104 switch (nd->last_type) {
2280 case LAST_DOTDOT: 2105 case LAST_DOTDOT:
2281 follow_dotdot(nd);
2282 dir = nd->path.dentry;
2283 case LAST_DOT: 2106 case LAST_DOT:
2284 if (need_reval_dot(dir)) { 2107 error = handle_dots(nd, nd->last_type);
2285 int status = d_revalidate(nd->path.dentry, nd); 2108 if (error)
2286 if (!status) 2109 return ERR_PTR(error);
2287 status = -ESTALE;
2288 if (status < 0) {
2289 error = status;
2290 goto exit;
2291 }
2292 }
2293 /* fallthrough */ 2110 /* fallthrough */
2294 case LAST_ROOT: 2111 case LAST_ROOT:
2295 goto exit; 2112 if (nd->flags & LOOKUP_RCU) {
2113 if (nameidata_drop_rcu_last(nd))
2114 return ERR_PTR(-ECHILD);
2115 }
2116 error = handle_reval_path(nd);
2117 if (error)
2118 goto exit;
2119 audit_inode(pathname, nd->path.dentry);
2120 if (open_flag & O_CREAT) {
2121 error = -EISDIR;
2122 goto exit;
2123 }
2124 goto ok;
2296 case LAST_BIND: 2125 case LAST_BIND:
2126 /* can't be RCU mode here */
2127 error = handle_reval_path(nd);
2128 if (error)
2129 goto exit;
2297 audit_inode(pathname, dir); 2130 audit_inode(pathname, dir);
2298 goto ok; 2131 goto ok;
2299 } 2132 }
2300 2133
2134 if (!(open_flag & O_CREAT)) {
2135 int symlink_ok = 0;
2136 if (nd->last.name[nd->last.len])
2137 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
2138 if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW))
2139 symlink_ok = 1;
2140 /* we _can_ be in RCU mode here */
2141 error = walk_component(nd, path, &nd->last, LAST_NORM,
2142 !symlink_ok);
2143 if (error < 0)
2144 return ERR_PTR(error);
2145 if (error) /* symlink */
2146 return NULL;
2147 /* sayonara */
2148 if (nd->flags & LOOKUP_RCU) {
2149 if (nameidata_drop_rcu_last(nd))
2150 return ERR_PTR(-ECHILD);
2151 }
2152
2153 error = -ENOTDIR;
2154 if (nd->flags & LOOKUP_DIRECTORY) {
2155 if (!nd->inode->i_op->lookup)
2156 goto exit;
2157 }
2158 audit_inode(pathname, nd->path.dentry);
2159 goto ok;
2160 }
2161
2162 /* create side of things */
2163
2164 if (nd->flags & LOOKUP_RCU) {
2165 if (nameidata_drop_rcu_last(nd))
2166 return ERR_PTR(-ECHILD);
2167 }
2168
2169 audit_inode(pathname, dir);
2170 error = -EISDIR;
2301 /* trailing slashes? */ 2171 /* trailing slashes? */
2302 if (nd->last.name[nd->last.len]) 2172 if (nd->last.name[nd->last.len])
2303 goto exit; 2173 goto exit;
2304 2174
2305 mutex_lock(&dir->d_inode->i_mutex); 2175 mutex_lock(&dir->d_inode->i_mutex);
2306 2176
2307 path->dentry = lookup_hash(nd); 2177 dentry = lookup_hash(nd);
2308 path->mnt = nd->path.mnt; 2178 error = PTR_ERR(dentry);
2309 2179 if (IS_ERR(dentry)) {
2310 error = PTR_ERR(path->dentry);
2311 if (IS_ERR(path->dentry)) {
2312 mutex_unlock(&dir->d_inode->i_mutex); 2180 mutex_unlock(&dir->d_inode->i_mutex);
2313 goto exit; 2181 goto exit;
2314 } 2182 }
2315 2183
2316 if (IS_ERR(nd->intent.open.file)) { 2184 path->dentry = dentry;
2317 error = PTR_ERR(nd->intent.open.file); 2185 path->mnt = nd->path.mnt;
2318 goto exit_mutex_unlock;
2319 }
2320 2186
2321 /* Negative dentry, just create the file */ 2187 /* Negative dentry, just create the file */
2322 if (!path->dentry->d_inode) { 2188 if (!dentry->d_inode) {
2189 int mode = op->mode;
2190 if (!IS_POSIXACL(dir->d_inode))
2191 mode &= ~current_umask();
2323 /* 2192 /*
2324 * This write is needed to ensure that a 2193 * This write is needed to ensure that a
2325 * ro->rw transition does not occur between 2194 * rw->ro transition does not occur between
2326 * the time when the file is created and when 2195 * the time when the file is created and when
2327 * a permanent write count is taken through 2196 * a permanent write count is taken through
2328 * the 'struct file' in nameidata_to_filp(). 2197 * the 'struct file' in nameidata_to_filp().
@@ -2330,22 +2199,21 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
2330 error = mnt_want_write(nd->path.mnt); 2199 error = mnt_want_write(nd->path.mnt);
2331 if (error) 2200 if (error)
2332 goto exit_mutex_unlock; 2201 goto exit_mutex_unlock;
2333 error = __open_namei_create(nd, path, open_flag, mode); 2202 want_write = 1;
2334 if (error) { 2203 /* Don't check for write permission, don't truncate */
2335 mnt_drop_write(nd->path.mnt); 2204 open_flag &= ~O_TRUNC;
2336 goto exit; 2205 will_truncate = 0;
2337 } 2206 acc_mode = MAY_OPEN;
2338 filp = nameidata_to_filp(nd); 2207 error = security_path_mknod(&nd->path, dentry, mode, 0);
2339 mnt_drop_write(nd->path.mnt); 2208 if (error)
2340 path_put(&nd->path); 2209 goto exit_mutex_unlock;
2341 if (!IS_ERR(filp)) { 2210 error = vfs_create(dir->d_inode, dentry, mode, nd);
2342 error = ima_file_check(filp, acc_mode); 2211 if (error)
2343 if (error) { 2212 goto exit_mutex_unlock;
2344 fput(filp); 2213 mutex_unlock(&dir->d_inode->i_mutex);
2345 filp = ERR_PTR(error); 2214 dput(nd->path.dentry);
2346 } 2215 nd->path.dentry = dentry;
2347 } 2216 goto common;
2348 return filp;
2349 } 2217 }
2350 2218
2351 /* 2219 /*
@@ -2375,7 +2243,40 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
2375 if (S_ISDIR(nd->inode->i_mode)) 2243 if (S_ISDIR(nd->inode->i_mode))
2376 goto exit; 2244 goto exit;
2377ok: 2245ok:
2378 filp = finish_open(nd, open_flag, acc_mode); 2246 if (!S_ISREG(nd->inode->i_mode))
2247 will_truncate = 0;
2248
2249 if (will_truncate) {
2250 error = mnt_want_write(nd->path.mnt);
2251 if (error)
2252 goto exit;
2253 want_write = 1;
2254 }
2255common:
2256 error = may_open(&nd->path, acc_mode, open_flag);
2257 if (error)
2258 goto exit;
2259 filp = nameidata_to_filp(nd);
2260 if (!IS_ERR(filp)) {
2261 error = ima_file_check(filp, op->acc_mode);
2262 if (error) {
2263 fput(filp);
2264 filp = ERR_PTR(error);
2265 }
2266 }
2267 if (!IS_ERR(filp)) {
2268 if (will_truncate) {
2269 error = handle_truncate(filp);
2270 if (error) {
2271 fput(filp);
2272 filp = ERR_PTR(error);
2273 }
2274 }
2275 }
2276out:
2277 if (want_write)
2278 mnt_drop_write(nd->path.mnt);
2279 path_put(&nd->path);
2379 return filp; 2280 return filp;
2380 2281
2381exit_mutex_unlock: 2282exit_mutex_unlock:
@@ -2383,204 +2284,103 @@ exit_mutex_unlock:
2383exit_dput: 2284exit_dput:
2384 path_put_conditional(path, nd); 2285 path_put_conditional(path, nd);
2385exit: 2286exit:
2386 path_put(&nd->path); 2287 filp = ERR_PTR(error);
2387 return ERR_PTR(error); 2288 goto out;
2388} 2289}
2389 2290
2390/* 2291static struct file *path_openat(int dfd, const char *pathname,
2391 * Note that the low bits of the passed in "open_flag" 2292 struct nameidata *nd, const struct open_flags *op, int flags)
2392 * are not the same as in the local variable "flag". See
2393 * open_to_namei_flags() for more details.
2394 */
2395struct file *do_filp_open(int dfd, const char *pathname,
2396 int open_flag, int mode, int acc_mode)
2397{ 2293{
2294 struct file *base = NULL;
2398 struct file *filp; 2295 struct file *filp;
2399 struct nameidata nd;
2400 int error;
2401 struct path path; 2296 struct path path;
2402 int count = 0; 2297 int error;
2403 int flag = open_to_namei_flags(open_flag);
2404 int flags;
2405
2406 if (!(open_flag & O_CREAT))
2407 mode = 0;
2408
2409 /* Must never be set by userspace */
2410 open_flag &= ~FMODE_NONOTIFY;
2411
2412 /*
2413 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
2414 * check for O_DSYNC if the need any syncing at all we enforce it's
2415 * always set instead of having to deal with possibly weird behaviour
2416 * for malicious applications setting only __O_SYNC.
2417 */
2418 if (open_flag & __O_SYNC)
2419 open_flag |= O_DSYNC;
2420
2421 if (!acc_mode)
2422 acc_mode = MAY_OPEN | ACC_MODE(open_flag);
2423
2424 /* O_TRUNC implies we need access checks for write permissions */
2425 if (open_flag & O_TRUNC)
2426 acc_mode |= MAY_WRITE;
2427
2428 /* Allow the LSM permission hook to distinguish append
2429 access from general write access. */
2430 if (open_flag & O_APPEND)
2431 acc_mode |= MAY_APPEND;
2432
2433 flags = LOOKUP_OPEN;
2434 if (open_flag & O_CREAT) {
2435 flags |= LOOKUP_CREATE;
2436 if (open_flag & O_EXCL)
2437 flags |= LOOKUP_EXCL;
2438 }
2439 if (open_flag & O_DIRECTORY)
2440 flags |= LOOKUP_DIRECTORY;
2441 if (!(open_flag & O_NOFOLLOW))
2442 flags |= LOOKUP_FOLLOW;
2443 2298
2444 filp = get_empty_filp(); 2299 filp = get_empty_filp();
2445 if (!filp) 2300 if (!filp)
2446 return ERR_PTR(-ENFILE); 2301 return ERR_PTR(-ENFILE);
2447 2302
2448 filp->f_flags = open_flag; 2303 filp->f_flags = op->open_flag;
2449 nd.intent.open.file = filp; 2304 nd->intent.open.file = filp;
2450 nd.intent.open.flags = flag; 2305 nd->intent.open.flags = open_to_namei_flags(op->open_flag);
2451 nd.intent.open.create_mode = mode; 2306 nd->intent.open.create_mode = op->mode;
2452 2307
2453 if (open_flag & O_CREAT) 2308 error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base);
2454 goto creat;
2455
2456 /* !O_CREAT, simple open */
2457 error = do_path_lookup(dfd, pathname, flags, &nd);
2458 if (unlikely(error)) 2309 if (unlikely(error))
2459 goto out_filp2;
2460 error = -ELOOP;
2461 if (!(nd.flags & LOOKUP_FOLLOW)) {
2462 if (nd.inode->i_op->follow_link)
2463 goto out_path2;
2464 }
2465 error = -ENOTDIR;
2466 if (nd.flags & LOOKUP_DIRECTORY) {
2467 if (!nd.inode->i_op->lookup)
2468 goto out_path2;
2469 }
2470 audit_inode(pathname, nd.path.dentry);
2471 filp = finish_open(&nd, open_flag, acc_mode);
2472out2:
2473 release_open_intent(&nd);
2474 return filp;
2475
2476out_path2:
2477 path_put(&nd.path);
2478out_filp2:
2479 filp = ERR_PTR(error);
2480 goto out2;
2481
2482creat:
2483 /* OK, have to create the file. Find the parent. */
2484 error = path_init_rcu(dfd, pathname,
2485 LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
2486 if (error)
2487 goto out_filp; 2310 goto out_filp;
2488 error = path_walk_rcu(pathname, &nd);
2489 path_finish_rcu(&nd);
2490 if (unlikely(error == -ECHILD || error == -ESTALE)) {
2491 /* slower, locked walk */
2492 if (error == -ESTALE) {
2493reval:
2494 flags |= LOOKUP_REVAL;
2495 }
2496 error = path_init(dfd, pathname,
2497 LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
2498 if (error)
2499 goto out_filp;
2500 2311
2501 error = path_walk_simple(pathname, &nd); 2312 current->total_link_count = 0;
2502 } 2313 error = link_path_walk(pathname, nd);
2503 if (unlikely(error)) 2314 if (unlikely(error))
2504 goto out_filp; 2315 goto out_filp;
2505 if (unlikely(!audit_dummy_context()))
2506 audit_inode(pathname, nd.path.dentry);
2507 2316
2508 /* 2317 filp = do_last(nd, &path, op, pathname);
2509 * We have the parent and last component.
2510 */
2511 nd.flags = flags;
2512 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
2513 while (unlikely(!filp)) { /* trailing symlink */ 2318 while (unlikely(!filp)) { /* trailing symlink */
2514 struct path link = path; 2319 struct path link = path;
2515 struct inode *linki = link.dentry->d_inode;
2516 void *cookie; 2320 void *cookie;
2517 error = -ELOOP; 2321 if (!(nd->flags & LOOKUP_FOLLOW)) {
2518 if (!(nd.flags & LOOKUP_FOLLOW)) 2322 path_put_conditional(&path, nd);
2519 goto exit_dput; 2323 path_put(&nd->path);
2520 if (count++ == 32) 2324 filp = ERR_PTR(-ELOOP);
2521 goto exit_dput; 2325 break;
2522 /*
2523 * This is subtle. Instead of calling do_follow_link() we do
2524 * the thing by hands. The reason is that this way we have zero
2525 * link_count and path_walk() (called from ->follow_link)
2526 * honoring LOOKUP_PARENT. After that we have the parent and
2527 * last component, i.e. we are in the same situation as after
2528 * the first path_walk(). Well, almost - if the last component
2529 * is normal we get its copy stored in nd->last.name and we will
2530 * have to putname() it when we are done. Procfs-like symlinks
2531 * just set LAST_BIND.
2532 */
2533 nd.flags |= LOOKUP_PARENT;
2534 error = security_inode_follow_link(link.dentry, &nd);
2535 if (error)
2536 goto exit_dput;
2537 error = __do_follow_link(&link, &nd, &cookie);
2538 if (unlikely(error)) {
2539 if (!IS_ERR(cookie) && linki->i_op->put_link)
2540 linki->i_op->put_link(link.dentry, &nd, cookie);
2541 /* nd.path had been dropped */
2542 nd.path = link;
2543 goto out_path;
2544 } 2326 }
2545 nd.flags &= ~LOOKUP_PARENT; 2327 nd->flags |= LOOKUP_PARENT;
2546 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); 2328 nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
2547 if (linki->i_op->put_link) 2329 error = follow_link(&link, nd, &cookie);
2548 linki->i_op->put_link(link.dentry, &nd, cookie); 2330 if (unlikely(error))
2549 path_put(&link); 2331 filp = ERR_PTR(error);
2332 else
2333 filp = do_last(nd, &path, op, pathname);
2334 put_link(nd, &link, cookie);
2550 } 2335 }
2551out: 2336out:
2552 if (nd.root.mnt) 2337 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT))
2553 path_put(&nd.root); 2338 path_put(&nd->root);
2554 if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL)) 2339 if (base)
2555 goto reval; 2340 fput(base);
2556 release_open_intent(&nd); 2341 release_open_intent(nd);
2557 return filp; 2342 return filp;
2558 2343
2559exit_dput:
2560 path_put_conditional(&path, &nd);
2561out_path:
2562 path_put(&nd.path);
2563out_filp: 2344out_filp:
2564 filp = ERR_PTR(error); 2345 filp = ERR_PTR(error);
2565 goto out; 2346 goto out;
2566} 2347}
2567 2348
2568/** 2349struct file *do_filp_open(int dfd, const char *pathname,
2569 * filp_open - open file and return file pointer 2350 const struct open_flags *op, int flags)
2570 * 2351{
2571 * @filename: path to open 2352 struct nameidata nd;
2572 * @flags: open flags as per the open(2) second argument 2353 struct file *filp;
2573 * @mode: mode for the new file if O_CREAT is set, else ignored 2354
2574 * 2355 filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_RCU);
2575 * This is the helper to open a file from kernelspace if you really 2356 if (unlikely(filp == ERR_PTR(-ECHILD)))
2576 * have to. But in generally you should not do this, so please move 2357 filp = path_openat(dfd, pathname, &nd, op, flags);
2577 * along, nothing to see here.. 2358 if (unlikely(filp == ERR_PTR(-ESTALE)))
2578 */ 2359 filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_REVAL);
2579struct file *filp_open(const char *filename, int flags, int mode) 2360 return filp;
2361}
2362
2363struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt,
2364 const char *name, const struct open_flags *op, int flags)
2580{ 2365{
2581 return do_filp_open(AT_FDCWD, filename, flags, mode, 0); 2366 struct nameidata nd;
2367 struct file *file;
2368
2369 nd.root.mnt = mnt;
2370 nd.root.dentry = dentry;
2371
2372 flags |= LOOKUP_ROOT;
2373
2374 if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN)
2375 return ERR_PTR(-ELOOP);
2376
2377 file = path_openat(-1, name, &nd, op, flags | LOOKUP_RCU);
2378 if (unlikely(file == ERR_PTR(-ECHILD)))
2379 file = path_openat(-1, name, &nd, op, flags);
2380 if (unlikely(file == ERR_PTR(-ESTALE)))
2381 file = path_openat(-1, name, &nd, op, flags | LOOKUP_REVAL);
2382 return file;
2582} 2383}
2583EXPORT_SYMBOL(filp_open);
2584 2384
2585/** 2385/**
2586 * lookup_create - lookup a dentry, creating it if it doesn't exist 2386 * lookup_create - lookup a dentry, creating it if it doesn't exist
@@ -3119,7 +2919,11 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
3119 return error; 2919 return error;
3120 2920
3121 mutex_lock(&inode->i_mutex); 2921 mutex_lock(&inode->i_mutex);
3122 error = dir->i_op->link(old_dentry, dir, new_dentry); 2922 /* Make sure we don't allow creating hardlink to an unlinked file */
2923 if (inode->i_nlink == 0)
2924 error = -ENOENT;
2925 else
2926 error = dir->i_op->link(old_dentry, dir, new_dentry);
3123 mutex_unlock(&inode->i_mutex); 2927 mutex_unlock(&inode->i_mutex);
3124 if (!error) 2928 if (!error)
3125 fsnotify_link(dir, inode, new_dentry); 2929 fsnotify_link(dir, inode, new_dentry);
@@ -3141,15 +2945,27 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
3141 struct dentry *new_dentry; 2945 struct dentry *new_dentry;
3142 struct nameidata nd; 2946 struct nameidata nd;
3143 struct path old_path; 2947 struct path old_path;
2948 int how = 0;
3144 int error; 2949 int error;
3145 char *to; 2950 char *to;
3146 2951
3147 if ((flags & ~AT_SYMLINK_FOLLOW) != 0) 2952 if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
3148 return -EINVAL; 2953 return -EINVAL;
2954 /*
2955 * To use null names we require CAP_DAC_READ_SEARCH
2956 * This ensures that not everyone will be able to create
2957 * handlink using the passed filedescriptor.
2958 */
2959 if (flags & AT_EMPTY_PATH) {
2960 if (!capable(CAP_DAC_READ_SEARCH))
2961 return -ENOENT;
2962 how = LOOKUP_EMPTY;
2963 }
2964
2965 if (flags & AT_SYMLINK_FOLLOW)
2966 how |= LOOKUP_FOLLOW;
3149 2967
3150 error = user_path_at(olddfd, oldname, 2968 error = user_path_at(olddfd, oldname, how, &old_path);
3151 flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0,
3152 &old_path);
3153 if (error) 2969 if (error)
3154 return error; 2970 return error;
3155 2971
@@ -3586,7 +3402,7 @@ EXPORT_SYMBOL(page_readlink);
3586EXPORT_SYMBOL(__page_symlink); 3402EXPORT_SYMBOL(__page_symlink);
3587EXPORT_SYMBOL(page_symlink); 3403EXPORT_SYMBOL(page_symlink);
3588EXPORT_SYMBOL(page_symlink_inode_operations); 3404EXPORT_SYMBOL(page_symlink_inode_operations);
3589EXPORT_SYMBOL(path_lookup); 3405EXPORT_SYMBOL(kern_path_parent);
3590EXPORT_SYMBOL(kern_path); 3406EXPORT_SYMBOL(kern_path);
3591EXPORT_SYMBOL(vfs_path_lookup); 3407EXPORT_SYMBOL(vfs_path_lookup);
3592EXPORT_SYMBOL(inode_permission); 3408EXPORT_SYMBOL(inode_permission);
diff --git a/fs/namespace.c b/fs/namespace.c
index d1edf26025dc..dffe6f49ab93 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1002,6 +1002,18 @@ const struct seq_operations mounts_op = {
1002 .show = show_vfsmnt 1002 .show = show_vfsmnt
1003}; 1003};
1004 1004
1005static int uuid_is_nil(u8 *uuid)
1006{
1007 int i;
1008 u8 *cp = (u8 *)uuid;
1009
1010 for (i = 0; i < 16; i++) {
1011 if (*cp++)
1012 return 0;
1013 }
1014 return 1;
1015}
1016
1005static int show_mountinfo(struct seq_file *m, void *v) 1017static int show_mountinfo(struct seq_file *m, void *v)
1006{ 1018{
1007 struct proc_mounts *p = m->private; 1019 struct proc_mounts *p = m->private;
@@ -1040,6 +1052,10 @@ static int show_mountinfo(struct seq_file *m, void *v)
1040 if (IS_MNT_UNBINDABLE(mnt)) 1052 if (IS_MNT_UNBINDABLE(mnt))
1041 seq_puts(m, " unbindable"); 1053 seq_puts(m, " unbindable");
1042 1054
1055 if (!uuid_is_nil(mnt->mnt_sb->s_uuid))
1056 /* print the uuid */
1057 seq_printf(m, " uuid:%pU", mnt->mnt_sb->s_uuid);
1058
1043 /* Filesystem specific data */ 1059 /* Filesystem specific data */
1044 seq_puts(m, " - "); 1060 seq_puts(m, " - ");
1045 show_type(m, sb); 1061 show_type(m, sb);
diff --git a/fs/nfsctl.c b/fs/nfsctl.c
index bf9cbd242ddd..124e8fcb0dd6 100644
--- a/fs/nfsctl.c
+++ b/fs/nfsctl.c
@@ -22,30 +22,17 @@
22 22
23static struct file *do_open(char *name, int flags) 23static struct file *do_open(char *name, int flags)
24{ 24{
25 struct nameidata nd;
26 struct vfsmount *mnt; 25 struct vfsmount *mnt;
27 int error; 26 struct file *file;
28 27
29 mnt = do_kern_mount("nfsd", 0, "nfsd", NULL); 28 mnt = do_kern_mount("nfsd", 0, "nfsd", NULL);
30 if (IS_ERR(mnt)) 29 if (IS_ERR(mnt))
31 return (struct file *)mnt; 30 return (struct file *)mnt;
32 31
33 error = vfs_path_lookup(mnt->mnt_root, mnt, name, 0, &nd); 32 file = file_open_root(mnt->mnt_root, mnt, name, flags);
34 mntput(mnt); /* drop do_kern_mount reference */
35 if (error)
36 return ERR_PTR(error);
37
38 if (flags == O_RDWR)
39 error = may_open(&nd.path, MAY_READ|MAY_WRITE, flags);
40 else
41 error = may_open(&nd.path, MAY_WRITE, flags);
42 33
43 if (!error) 34 mntput(mnt); /* drop do_kern_mount reference */
44 return dentry_open(nd.path.dentry, nd.path.mnt, flags, 35 return file;
45 current_cred());
46
47 path_put(&nd.path);
48 return ERR_PTR(error);
49} 36}
50 37
51static struct { 38static struct {
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 5dbc3062b4fd..254652a9b542 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -197,8 +197,12 @@ static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len,
197 dentry->d_name.len, dentry->d_name.name, 197 dentry->d_name.len, dentry->d_name.name,
198 fh, len, connectable); 198 fh, len, connectable);
199 199
200 if (len < 3 || (connectable && len < 6)) { 200 if (connectable && (len < 6)) {
201 mlog(ML_ERROR, "fh buffer is too small for encoding\n"); 201 *max_len = 6;
202 type = 255;
203 goto bail;
204 } else if (len < 3) {
205 *max_len = 3;
202 type = 255; 206 type = 255;
203 goto bail; 207 goto bail;
204 } 208 }
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 19ebc5aad391..29623da133cc 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4379,7 +4379,7 @@ static int ocfs2_user_path_parent(const char __user *path,
4379 if (IS_ERR(s)) 4379 if (IS_ERR(s))
4380 return PTR_ERR(s); 4380 return PTR_ERR(s);
4381 4381
4382 error = path_lookup(s, LOOKUP_PARENT, nd); 4382 error = kern_path_parent(s, nd);
4383 if (error) 4383 if (error)
4384 putname(s); 4384 putname(s);
4385 else 4385 else
diff --git a/fs/open.c b/fs/open.c
index b47aab39c057..3cac0bda46df 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -573,13 +573,15 @@ SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
573{ 573{
574 struct path path; 574 struct path path;
575 int error = -EINVAL; 575 int error = -EINVAL;
576 int follow; 576 int lookup_flags;
577 577
578 if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) 578 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
579 goto out; 579 goto out;
580 580
581 follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW; 581 lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
582 error = user_path_at(dfd, filename, follow, &path); 582 if (flag & AT_EMPTY_PATH)
583 lookup_flags |= LOOKUP_EMPTY;
584 error = user_path_at(dfd, filename, lookup_flags, &path);
583 if (error) 585 if (error)
584 goto out; 586 goto out;
585 error = mnt_want_write(path.mnt); 587 error = mnt_want_write(path.mnt);
@@ -669,11 +671,16 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
669 int (*open)(struct inode *, struct file *), 671 int (*open)(struct inode *, struct file *),
670 const struct cred *cred) 672 const struct cred *cred)
671{ 673{
674 static const struct file_operations empty_fops = {};
672 struct inode *inode; 675 struct inode *inode;
673 int error; 676 int error;
674 677
675 f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK | 678 f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
676 FMODE_PREAD | FMODE_PWRITE; 679 FMODE_PREAD | FMODE_PWRITE;
680
681 if (unlikely(f->f_flags & O_PATH))
682 f->f_mode = FMODE_PATH;
683
677 inode = dentry->d_inode; 684 inode = dentry->d_inode;
678 if (f->f_mode & FMODE_WRITE) { 685 if (f->f_mode & FMODE_WRITE) {
679 error = __get_file_write_access(inode, mnt); 686 error = __get_file_write_access(inode, mnt);
@@ -687,9 +694,15 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
687 f->f_path.dentry = dentry; 694 f->f_path.dentry = dentry;
688 f->f_path.mnt = mnt; 695 f->f_path.mnt = mnt;
689 f->f_pos = 0; 696 f->f_pos = 0;
690 f->f_op = fops_get(inode->i_fop);
691 file_sb_list_add(f, inode->i_sb); 697 file_sb_list_add(f, inode->i_sb);
692 698
699 if (unlikely(f->f_mode & FMODE_PATH)) {
700 f->f_op = &empty_fops;
701 return f;
702 }
703
704 f->f_op = fops_get(inode->i_fop);
705
693 error = security_dentry_open(f, cred); 706 error = security_dentry_open(f, cred);
694 if (error) 707 if (error)
695 goto cleanup_all; 708 goto cleanup_all;
@@ -890,15 +903,110 @@ void fd_install(unsigned int fd, struct file *file)
890 903
891EXPORT_SYMBOL(fd_install); 904EXPORT_SYMBOL(fd_install);
892 905
906static inline int build_open_flags(int flags, int mode, struct open_flags *op)
907{
908 int lookup_flags = 0;
909 int acc_mode;
910
911 if (!(flags & O_CREAT))
912 mode = 0;
913 op->mode = mode;
914
915 /* Must never be set by userspace */
916 flags &= ~FMODE_NONOTIFY;
917
918 /*
919 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
920 * check for O_DSYNC if the need any syncing at all we enforce it's
921 * always set instead of having to deal with possibly weird behaviour
922 * for malicious applications setting only __O_SYNC.
923 */
924 if (flags & __O_SYNC)
925 flags |= O_DSYNC;
926
927 /*
928 * If we have O_PATH in the open flag. Then we
929 * cannot have anything other than the below set of flags
930 */
931 if (flags & O_PATH) {
932 flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH;
933 acc_mode = 0;
934 } else {
935 acc_mode = MAY_OPEN | ACC_MODE(flags);
936 }
937
938 op->open_flag = flags;
939
940 /* O_TRUNC implies we need access checks for write permissions */
941 if (flags & O_TRUNC)
942 acc_mode |= MAY_WRITE;
943
944 /* Allow the LSM permission hook to distinguish append
945 access from general write access. */
946 if (flags & O_APPEND)
947 acc_mode |= MAY_APPEND;
948
949 op->acc_mode = acc_mode;
950
951 op->intent = flags & O_PATH ? 0 : LOOKUP_OPEN;
952
953 if (flags & O_CREAT) {
954 op->intent |= LOOKUP_CREATE;
955 if (flags & O_EXCL)
956 op->intent |= LOOKUP_EXCL;
957 }
958
959 if (flags & O_DIRECTORY)
960 lookup_flags |= LOOKUP_DIRECTORY;
961 if (!(flags & O_NOFOLLOW))
962 lookup_flags |= LOOKUP_FOLLOW;
963 return lookup_flags;
964}
965
966/**
967 * filp_open - open file and return file pointer
968 *
969 * @filename: path to open
970 * @flags: open flags as per the open(2) second argument
971 * @mode: mode for the new file if O_CREAT is set, else ignored
972 *
973 * This is the helper to open a file from kernelspace if you really
974 * have to. But in generally you should not do this, so please move
975 * along, nothing to see here..
976 */
977struct file *filp_open(const char *filename, int flags, int mode)
978{
979 struct open_flags op;
980 int lookup = build_open_flags(flags, mode, &op);
981 return do_filp_open(AT_FDCWD, filename, &op, lookup);
982}
983EXPORT_SYMBOL(filp_open);
984
985struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
986 const char *filename, int flags)
987{
988 struct open_flags op;
989 int lookup = build_open_flags(flags, 0, &op);
990 if (flags & O_CREAT)
991 return ERR_PTR(-EINVAL);
992 if (!filename && (flags & O_DIRECTORY))
993 if (!dentry->d_inode->i_op->lookup)
994 return ERR_PTR(-ENOTDIR);
995 return do_file_open_root(dentry, mnt, filename, &op, lookup);
996}
997EXPORT_SYMBOL(file_open_root);
998
893long do_sys_open(int dfd, const char __user *filename, int flags, int mode) 999long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
894{ 1000{
1001 struct open_flags op;
1002 int lookup = build_open_flags(flags, mode, &op);
895 char *tmp = getname(filename); 1003 char *tmp = getname(filename);
896 int fd = PTR_ERR(tmp); 1004 int fd = PTR_ERR(tmp);
897 1005
898 if (!IS_ERR(tmp)) { 1006 if (!IS_ERR(tmp)) {
899 fd = get_unused_fd_flags(flags); 1007 fd = get_unused_fd_flags(flags);
900 if (fd >= 0) { 1008 if (fd >= 0) {
901 struct file *f = do_filp_open(dfd, tmp, flags, mode, 0); 1009 struct file *f = do_filp_open(dfd, tmp, &op, lookup);
902 if (IS_ERR(f)) { 1010 if (IS_ERR(f)) {
903 put_unused_fd(fd); 1011 put_unused_fd(fd);
904 fd = PTR_ERR(f); 1012 fd = PTR_ERR(f);
@@ -968,8 +1076,10 @@ int filp_close(struct file *filp, fl_owner_t id)
968 if (filp->f_op && filp->f_op->flush) 1076 if (filp->f_op && filp->f_op->flush)
969 retval = filp->f_op->flush(filp, id); 1077 retval = filp->f_op->flush(filp, id);
970 1078
971 dnotify_flush(filp, id); 1079 if (likely(!(filp->f_mode & FMODE_PATH))) {
972 locks_remove_posix(filp, id); 1080 dnotify_flush(filp, id);
1081 locks_remove_posix(filp, id);
1082 }
973 fput(filp); 1083 fput(filp);
974 return retval; 1084 return retval;
975} 1085}
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 0bae036831e2..1bba24bad820 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1593,8 +1593,13 @@ int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp,
1593 struct inode *inode = dentry->d_inode; 1593 struct inode *inode = dentry->d_inode;
1594 int maxlen = *lenp; 1594 int maxlen = *lenp;
1595 1595
1596 if (maxlen < 3) 1596 if (need_parent && (maxlen < 5)) {
1597 *lenp = 5;
1597 return 255; 1598 return 255;
1599 } else if (maxlen < 3) {
1600 *lenp = 3;
1601 return 255;
1602 }
1598 1603
1599 data[0] = inode->i_ino; 1604 data[0] = inode->i_ino;
1600 data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); 1605 data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 68fdf45cc6c9..4b2eb564fdad 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -1122,10 +1122,6 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
1122 reiserfs_write_unlock(dir->i_sb); 1122 reiserfs_write_unlock(dir->i_sb);
1123 return -EMLINK; 1123 return -EMLINK;
1124 } 1124 }
1125 if (inode->i_nlink == 0) {
1126 reiserfs_write_unlock(dir->i_sb);
1127 return -ENOENT;
1128 }
1129 1125
1130 /* inc before scheduling so reiserfs_unlink knows we are here */ 1126 /* inc before scheduling so reiserfs_unlink knows we are here */
1131 inc_nlink(inode); 1127 inc_nlink(inode);
diff --git a/fs/stat.c b/fs/stat.c
index d5c61cf2b703..961039121cb8 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -75,13 +75,16 @@ int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat,
75 int error = -EINVAL; 75 int error = -EINVAL;
76 int lookup_flags = 0; 76 int lookup_flags = 0;
77 77
78 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT)) != 0) 78 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT |
79 AT_EMPTY_PATH)) != 0)
79 goto out; 80 goto out;
80 81
81 if (!(flag & AT_SYMLINK_NOFOLLOW)) 82 if (!(flag & AT_SYMLINK_NOFOLLOW))
82 lookup_flags |= LOOKUP_FOLLOW; 83 lookup_flags |= LOOKUP_FOLLOW;
83 if (flag & AT_NO_AUTOMOUNT) 84 if (flag & AT_NO_AUTOMOUNT)
84 lookup_flags |= LOOKUP_NO_AUTOMOUNT; 85 lookup_flags |= LOOKUP_NO_AUTOMOUNT;
86 if (flag & AT_EMPTY_PATH)
87 lookup_flags |= LOOKUP_EMPTY;
85 88
86 error = user_path_at(dfd, filename, lookup_flags, &path); 89 error = user_path_at(dfd, filename, lookup_flags, &path);
87 if (error) 90 if (error)
@@ -297,7 +300,7 @@ SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname,
297 if (bufsiz <= 0) 300 if (bufsiz <= 0)
298 return -EINVAL; 301 return -EINVAL;
299 302
300 error = user_path_at(dfd, pathname, 0, &path); 303 error = user_path_at(dfd, pathname, LOOKUP_EMPTY, &path);
301 if (!error) { 304 if (!error) {
302 struct inode *inode = path.dentry->d_inode; 305 struct inode *inode = path.dentry->d_inode;
303 306
diff --git a/fs/statfs.c b/fs/statfs.c
index 30ea8c8a996b..8244924dec55 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -73,149 +73,135 @@ int vfs_statfs(struct path *path, struct kstatfs *buf)
73} 73}
74EXPORT_SYMBOL(vfs_statfs); 74EXPORT_SYMBOL(vfs_statfs);
75 75
76static int do_statfs_native(struct path *path, struct statfs *buf) 76int user_statfs(const char __user *pathname, struct kstatfs *st)
77{ 77{
78 struct kstatfs st; 78 struct path path;
79 int retval; 79 int error = user_path(pathname, &path);
80 if (!error) {
81 error = vfs_statfs(&path, st);
82 path_put(&path);
83 }
84 return error;
85}
80 86
81 retval = vfs_statfs(path, &st); 87int fd_statfs(int fd, struct kstatfs *st)
82 if (retval) 88{
83 return retval; 89 struct file *file = fget(fd);
90 int error = -EBADF;
91 if (file) {
92 error = vfs_statfs(&file->f_path, st);
93 fput(file);
94 }
95 return error;
96}
84 97
85 if (sizeof(*buf) == sizeof(st)) 98static int do_statfs_native(struct kstatfs *st, struct statfs __user *p)
86 memcpy(buf, &st, sizeof(st)); 99{
100 struct statfs buf;
101
102 if (sizeof(buf) == sizeof(*st))
103 memcpy(&buf, st, sizeof(*st));
87 else { 104 else {
88 if (sizeof buf->f_blocks == 4) { 105 if (sizeof buf.f_blocks == 4) {
89 if ((st.f_blocks | st.f_bfree | st.f_bavail | 106 if ((st->f_blocks | st->f_bfree | st->f_bavail |
90 st.f_bsize | st.f_frsize) & 107 st->f_bsize | st->f_frsize) &
91 0xffffffff00000000ULL) 108 0xffffffff00000000ULL)
92 return -EOVERFLOW; 109 return -EOVERFLOW;
93 /* 110 /*
94 * f_files and f_ffree may be -1; it's okay to stuff 111 * f_files and f_ffree may be -1; it's okay to stuff
95 * that into 32 bits 112 * that into 32 bits
96 */ 113 */
97 if (st.f_files != -1 && 114 if (st->f_files != -1 &&
98 (st.f_files & 0xffffffff00000000ULL)) 115 (st->f_files & 0xffffffff00000000ULL))
99 return -EOVERFLOW; 116 return -EOVERFLOW;
100 if (st.f_ffree != -1 && 117 if (st->f_ffree != -1 &&
101 (st.f_ffree & 0xffffffff00000000ULL)) 118 (st->f_ffree & 0xffffffff00000000ULL))
102 return -EOVERFLOW; 119 return -EOVERFLOW;
103 } 120 }
104 121
105 buf->f_type = st.f_type; 122 buf.f_type = st->f_type;
106 buf->f_bsize = st.f_bsize; 123 buf.f_bsize = st->f_bsize;
107 buf->f_blocks = st.f_blocks; 124 buf.f_blocks = st->f_blocks;
108 buf->f_bfree = st.f_bfree; 125 buf.f_bfree = st->f_bfree;
109 buf->f_bavail = st.f_bavail; 126 buf.f_bavail = st->f_bavail;
110 buf->f_files = st.f_files; 127 buf.f_files = st->f_files;
111 buf->f_ffree = st.f_ffree; 128 buf.f_ffree = st->f_ffree;
112 buf->f_fsid = st.f_fsid; 129 buf.f_fsid = st->f_fsid;
113 buf->f_namelen = st.f_namelen; 130 buf.f_namelen = st->f_namelen;
114 buf->f_frsize = st.f_frsize; 131 buf.f_frsize = st->f_frsize;
115 buf->f_flags = st.f_flags; 132 buf.f_flags = st->f_flags;
116 memset(buf->f_spare, 0, sizeof(buf->f_spare)); 133 memset(buf.f_spare, 0, sizeof(buf.f_spare));
117 } 134 }
135 if (copy_to_user(p, &buf, sizeof(buf)))
136 return -EFAULT;
118 return 0; 137 return 0;
119} 138}
120 139
121static int do_statfs64(struct path *path, struct statfs64 *buf) 140static int do_statfs64(struct kstatfs *st, struct statfs64 __user *p)
122{ 141{
123 struct kstatfs st; 142 struct statfs64 buf;
124 int retval; 143 if (sizeof(buf) == sizeof(*st))
125 144 memcpy(&buf, st, sizeof(*st));
126 retval = vfs_statfs(path, &st);
127 if (retval)
128 return retval;
129
130 if (sizeof(*buf) == sizeof(st))
131 memcpy(buf, &st, sizeof(st));
132 else { 145 else {
133 buf->f_type = st.f_type; 146 buf.f_type = st->f_type;
134 buf->f_bsize = st.f_bsize; 147 buf.f_bsize = st->f_bsize;
135 buf->f_blocks = st.f_blocks; 148 buf.f_blocks = st->f_blocks;
136 buf->f_bfree = st.f_bfree; 149 buf.f_bfree = st->f_bfree;
137 buf->f_bavail = st.f_bavail; 150 buf.f_bavail = st->f_bavail;
138 buf->f_files = st.f_files; 151 buf.f_files = st->f_files;
139 buf->f_ffree = st.f_ffree; 152 buf.f_ffree = st->f_ffree;
140 buf->f_fsid = st.f_fsid; 153 buf.f_fsid = st->f_fsid;
141 buf->f_namelen = st.f_namelen; 154 buf.f_namelen = st->f_namelen;
142 buf->f_frsize = st.f_frsize; 155 buf.f_frsize = st->f_frsize;
143 buf->f_flags = st.f_flags; 156 buf.f_flags = st->f_flags;
144 memset(buf->f_spare, 0, sizeof(buf->f_spare)); 157 memset(buf.f_spare, 0, sizeof(buf.f_spare));
145 } 158 }
159 if (copy_to_user(p, &buf, sizeof(buf)))
160 return -EFAULT;
146 return 0; 161 return 0;
147} 162}
148 163
149SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct statfs __user *, buf) 164SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct statfs __user *, buf)
150{ 165{
151 struct path path; 166 struct kstatfs st;
152 int error; 167 int error = user_statfs(pathname, &st);
153 168 if (!error)
154 error = user_path(pathname, &path); 169 error = do_statfs_native(&st, buf);
155 if (!error) {
156 struct statfs tmp;
157 error = do_statfs_native(&path, &tmp);
158 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
159 error = -EFAULT;
160 path_put(&path);
161 }
162 return error; 170 return error;
163} 171}
164 172
165SYSCALL_DEFINE3(statfs64, const char __user *, pathname, size_t, sz, struct statfs64 __user *, buf) 173SYSCALL_DEFINE3(statfs64, const char __user *, pathname, size_t, sz, struct statfs64 __user *, buf)
166{ 174{
167 struct path path; 175 struct kstatfs st;
168 long error; 176 int error;
169
170 if (sz != sizeof(*buf)) 177 if (sz != sizeof(*buf))
171 return -EINVAL; 178 return -EINVAL;
172 error = user_path(pathname, &path); 179 error = user_statfs(pathname, &st);
173 if (!error) { 180 if (!error)
174 struct statfs64 tmp; 181 error = do_statfs64(&st, buf);
175 error = do_statfs64(&path, &tmp);
176 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
177 error = -EFAULT;
178 path_put(&path);
179 }
180 return error; 182 return error;
181} 183}
182 184
183SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct statfs __user *, buf) 185SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct statfs __user *, buf)
184{ 186{
185 struct file *file; 187 struct kstatfs st;
186 struct statfs tmp; 188 int error = fd_statfs(fd, &st);
187 int error; 189 if (!error)
188 190 error = do_statfs_native(&st, buf);
189 error = -EBADF;
190 file = fget(fd);
191 if (!file)
192 goto out;
193 error = do_statfs_native(&file->f_path, &tmp);
194 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
195 error = -EFAULT;
196 fput(file);
197out:
198 return error; 191 return error;
199} 192}
200 193
201SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user *, buf) 194SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user *, buf)
202{ 195{
203 struct file *file; 196 struct kstatfs st;
204 struct statfs64 tmp;
205 int error; 197 int error;
206 198
207 if (sz != sizeof(*buf)) 199 if (sz != sizeof(*buf))
208 return -EINVAL; 200 return -EINVAL;
209 201
210 error = -EBADF; 202 error = fd_statfs(fd, &st);
211 file = fget(fd); 203 if (!error)
212 if (!file) 204 error = do_statfs64(&st, buf);
213 goto out;
214 error = do_statfs64(&file->f_path, &tmp);
215 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
216 error = -EFAULT;
217 fput(file);
218out:
219 return error; 205 return error;
220} 206}
221 207
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 14f64b689d7f..7217d67a80a6 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -522,24 +522,6 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
522 ubifs_assert(mutex_is_locked(&dir->i_mutex)); 522 ubifs_assert(mutex_is_locked(&dir->i_mutex));
523 ubifs_assert(mutex_is_locked(&inode->i_mutex)); 523 ubifs_assert(mutex_is_locked(&inode->i_mutex));
524 524
525 /*
526 * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
527 * otherwise has the potential to corrupt the orphan inode list.
528 *
529 * Indeed, consider a scenario when 'vfs_link(dirA/fileA)' and
530 * 'vfs_unlink(dirA/fileA, dirB/fileB)' race. 'vfs_link()' does not
531 * lock 'dirA->i_mutex', so this is possible. Both of the functions
532 * lock 'fileA->i_mutex' though. Suppose 'vfs_unlink()' wins, and takes
533 * 'fileA->i_mutex' mutex first. Suppose 'fileA->i_nlink' is 1. In this
534 * case 'ubifs_unlink()' will drop the last reference, and put 'inodeA'
535 * to the list of orphans. After this, 'vfs_link()' will link
536 * 'dirB/fileB' to 'inodeA'. This is a problem because, for example,
537 * the subsequent 'vfs_unlink(dirB/fileB)' will add the same inode
538 * to the list of orphans.
539 */
540 if (inode->i_nlink == 0)
541 return -ENOENT;
542
543 err = dbg_check_synced_i_size(inode); 525 err = dbg_check_synced_i_size(inode);
544 if (err) 526 if (err)
545 return err; 527 return err;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index b7c338d5e9df..f1dce848ef96 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -1286,8 +1286,13 @@ static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp,
1286 struct fid *fid = (struct fid *)fh; 1286 struct fid *fid = (struct fid *)fh;
1287 int type = FILEID_UDF_WITHOUT_PARENT; 1287 int type = FILEID_UDF_WITHOUT_PARENT;
1288 1288
1289 if (len < 3 || (connectable && len < 5)) 1289 if (connectable && (len < 5)) {
1290 *lenp = 5;
1291 return 255;
1292 } else if (len < 3) {
1293 *lenp = 3;
1290 return 255; 1294 return 255;
1295 }
1291 1296
1292 *lenp = 3; 1297 *lenp = 3;
1293 fid->udf.block = location.logicalBlockNum; 1298 fid->udf.block = location.logicalBlockNum;
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index fc0114da7fdd..f4f878fc0083 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -89,8 +89,10 @@ xfs_fs_encode_fh(
89 * seven combinations work. The real answer is "don't use v2". 89 * seven combinations work. The real answer is "don't use v2".
90 */ 90 */
91 len = xfs_fileid_length(fileid_type); 91 len = xfs_fileid_length(fileid_type);
92 if (*max_len < len) 92 if (*max_len < len) {
93 *max_len = len;
93 return 255; 94 return 255;
95 }
94 *max_len = len; 96 *max_len = len;
95 97
96 switch (fileid_type) { 98 switch (fileid_type) {