aboutsummaryrefslogtreecommitdiffstats
path: root/fs/namespace.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/namespace.c')
-rw-r--r--fs/namespace.c380
1 files changed, 206 insertions, 174 deletions
diff --git a/fs/namespace.c b/fs/namespace.c
index 61bf376e29e8..7953c96a2071 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -25,18 +25,21 @@
25#include <linux/security.h> 25#include <linux/security.h>
26#include <linux/mount.h> 26#include <linux/mount.h>
27#include <linux/ramfs.h> 27#include <linux/ramfs.h>
28#include <linux/log2.h>
28#include <asm/uaccess.h> 29#include <asm/uaccess.h>
29#include <asm/unistd.h> 30#include <asm/unistd.h>
30#include "pnode.h" 31#include "pnode.h"
31#include "internal.h" 32#include "internal.h"
32 33
34#define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head))
35#define HASH_SIZE (1UL << HASH_SHIFT)
36
33/* spinlock for vfsmount related operations, inplace of dcache_lock */ 37/* spinlock for vfsmount related operations, inplace of dcache_lock */
34__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); 38__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
35 39
36static int event; 40static int event;
37 41
38static struct list_head *mount_hashtable __read_mostly; 42static struct list_head *mount_hashtable __read_mostly;
39static int hash_mask __read_mostly, hash_bits __read_mostly;
40static struct kmem_cache *mnt_cache __read_mostly; 43static struct kmem_cache *mnt_cache __read_mostly;
41static struct rw_semaphore namespace_sem; 44static struct rw_semaphore namespace_sem;
42 45
@@ -48,8 +51,8 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
48{ 51{
49 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES); 52 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
50 tmp += ((unsigned long)dentry / L1_CACHE_BYTES); 53 tmp += ((unsigned long)dentry / L1_CACHE_BYTES);
51 tmp = tmp + (tmp >> hash_bits); 54 tmp = tmp + (tmp >> HASH_SHIFT);
52 return tmp & hash_mask; 55 return tmp & (HASH_SIZE - 1);
53} 56}
54 57
55struct vfsmount *alloc_vfsmnt(const char *name) 58struct vfsmount *alloc_vfsmnt(const char *name)
@@ -154,13 +157,13 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns)
154 157
155static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd) 158static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd)
156{ 159{
157 old_nd->dentry = mnt->mnt_mountpoint; 160 old_nd->path.dentry = mnt->mnt_mountpoint;
158 old_nd->mnt = mnt->mnt_parent; 161 old_nd->path.mnt = mnt->mnt_parent;
159 mnt->mnt_parent = mnt; 162 mnt->mnt_parent = mnt;
160 mnt->mnt_mountpoint = mnt->mnt_root; 163 mnt->mnt_mountpoint = mnt->mnt_root;
161 list_del_init(&mnt->mnt_child); 164 list_del_init(&mnt->mnt_child);
162 list_del_init(&mnt->mnt_hash); 165 list_del_init(&mnt->mnt_hash);
163 old_nd->dentry->d_mounted--; 166 old_nd->path.dentry->d_mounted--;
164} 167}
165 168
166void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, 169void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
@@ -173,10 +176,10 @@ void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
173 176
174static void attach_mnt(struct vfsmount *mnt, struct nameidata *nd) 177static void attach_mnt(struct vfsmount *mnt, struct nameidata *nd)
175{ 178{
176 mnt_set_mountpoint(nd->mnt, nd->dentry, mnt); 179 mnt_set_mountpoint(nd->path.mnt, nd->path.dentry, mnt);
177 list_add_tail(&mnt->mnt_hash, mount_hashtable + 180 list_add_tail(&mnt->mnt_hash, mount_hashtable +
178 hash(nd->mnt, nd->dentry)); 181 hash(nd->path.mnt, nd->path.dentry));
179 list_add_tail(&mnt->mnt_child, &nd->mnt->mnt_mounts); 182 list_add_tail(&mnt->mnt_child, &nd->path.mnt->mnt_mounts);
180} 183}
181 184
182/* 185/*
@@ -317,6 +320,50 @@ void mnt_unpin(struct vfsmount *mnt)
317 320
318EXPORT_SYMBOL(mnt_unpin); 321EXPORT_SYMBOL(mnt_unpin);
319 322
323static inline void mangle(struct seq_file *m, const char *s)
324{
325 seq_escape(m, s, " \t\n\\");
326}
327
328/*
329 * Simple .show_options callback for filesystems which don't want to
330 * implement more complex mount option showing.
331 *
332 * See also save_mount_options().
333 */
334int generic_show_options(struct seq_file *m, struct vfsmount *mnt)
335{
336 const char *options = mnt->mnt_sb->s_options;
337
338 if (options != NULL && options[0]) {
339 seq_putc(m, ',');
340 mangle(m, options);
341 }
342
343 return 0;
344}
345EXPORT_SYMBOL(generic_show_options);
346
347/*
348 * If filesystem uses generic_show_options(), this function should be
349 * called from the fill_super() callback.
350 *
351 * The .remount_fs callback usually needs to be handled in a special
352 * way, to make sure, that previous options are not overwritten if the
353 * remount fails.
354 *
355 * Also note, that if the filesystem's .remount_fs function doesn't
356 * reset all options to their default value, but changes only newly
357 * given options, then the displayed options will not reflect reality
358 * any more.
359 */
360void save_mount_options(struct super_block *sb, char *options)
361{
362 kfree(sb->s_options);
363 sb->s_options = kstrdup(options, GFP_KERNEL);
364}
365EXPORT_SYMBOL(save_mount_options);
366
320/* iterator */ 367/* iterator */
321static void *m_start(struct seq_file *m, loff_t *pos) 368static void *m_start(struct seq_file *m, loff_t *pos)
322{ 369{
@@ -338,11 +385,6 @@ static void m_stop(struct seq_file *m, void *v)
338 up_read(&namespace_sem); 385 up_read(&namespace_sem);
339} 386}
340 387
341static inline void mangle(struct seq_file *m, const char *s)
342{
343 seq_escape(m, s, " \t\n\\");
344}
345
346static int show_vfsmnt(struct seq_file *m, void *v) 388static int show_vfsmnt(struct seq_file *m, void *v)
347{ 389{
348 struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list); 390 struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
@@ -366,10 +408,11 @@ static int show_vfsmnt(struct seq_file *m, void *v)
366 { 0, NULL } 408 { 0, NULL }
367 }; 409 };
368 struct proc_fs_info *fs_infop; 410 struct proc_fs_info *fs_infop;
411 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
369 412
370 mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); 413 mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
371 seq_putc(m, ' '); 414 seq_putc(m, ' ');
372 seq_path(m, mnt, mnt->mnt_root, " \t\n\\"); 415 seq_path(m, &mnt_path, " \t\n\\");
373 seq_putc(m, ' '); 416 seq_putc(m, ' ');
374 mangle(m, mnt->mnt_sb->s_type->name); 417 mangle(m, mnt->mnt_sb->s_type->name);
375 if (mnt->mnt_sb->s_subtype && mnt->mnt_sb->s_subtype[0]) { 418 if (mnt->mnt_sb->s_subtype && mnt->mnt_sb->s_subtype[0]) {
@@ -401,6 +444,7 @@ struct seq_operations mounts_op = {
401static int show_vfsstat(struct seq_file *m, void *v) 444static int show_vfsstat(struct seq_file *m, void *v)
402{ 445{
403 struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list); 446 struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
447 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
404 int err = 0; 448 int err = 0;
405 449
406 /* device */ 450 /* device */
@@ -412,7 +456,7 @@ static int show_vfsstat(struct seq_file *m, void *v)
412 456
413 /* mount point */ 457 /* mount point */
414 seq_puts(m, " mounted on "); 458 seq_puts(m, " mounted on ");
415 seq_path(m, mnt, mnt->mnt_root, " \t\n\\"); 459 seq_path(m, &mnt_path, " \t\n\\");
416 seq_putc(m, ' '); 460 seq_putc(m, ' ');
417 461
418 /* file system type */ 462 /* file system type */
@@ -551,7 +595,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
551 * (2) the usage count == 1 [parent vfsmount] + 1 [sys_umount] 595 * (2) the usage count == 1 [parent vfsmount] + 1 [sys_umount]
552 */ 596 */
553 if (flags & MNT_EXPIRE) { 597 if (flags & MNT_EXPIRE) {
554 if (mnt == current->fs->rootmnt || 598 if (mnt == current->fs->root.mnt ||
555 flags & (MNT_FORCE | MNT_DETACH)) 599 flags & (MNT_FORCE | MNT_DETACH))
556 return -EINVAL; 600 return -EINVAL;
557 601
@@ -586,7 +630,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
586 * /reboot - static binary that would close all descriptors and 630 * /reboot - static binary that would close all descriptors and
587 * call reboot(9). Then init(8) could umount root and exec /reboot. 631 * call reboot(9). Then init(8) could umount root and exec /reboot.
588 */ 632 */
589 if (mnt == current->fs->rootmnt && !(flags & MNT_DETACH)) { 633 if (mnt == current->fs->root.mnt && !(flags & MNT_DETACH)) {
590 /* 634 /*
591 * Special case for "unmounting" root ... 635 * Special case for "unmounting" root ...
592 * we just try to remount it readonly. 636 * we just try to remount it readonly.
@@ -637,18 +681,20 @@ asmlinkage long sys_umount(char __user * name, int flags)
637 if (retval) 681 if (retval)
638 goto out; 682 goto out;
639 retval = -EINVAL; 683 retval = -EINVAL;
640 if (nd.dentry != nd.mnt->mnt_root) 684 if (nd.path.dentry != nd.path.mnt->mnt_root)
641 goto dput_and_out; 685 goto dput_and_out;
642 if (!check_mnt(nd.mnt)) 686 if (!check_mnt(nd.path.mnt))
643 goto dput_and_out; 687 goto dput_and_out;
644 688
645 retval = -EPERM; 689 retval = -EPERM;
646 if (!capable(CAP_SYS_ADMIN)) 690 if (!capable(CAP_SYS_ADMIN))
647 goto dput_and_out; 691 goto dput_and_out;
648 692
649 retval = do_umount(nd.mnt, flags); 693 retval = do_umount(nd.path.mnt, flags);
650dput_and_out: 694dput_and_out:
651 path_release_on_umount(&nd); 695 /* we mustn't call path_put() as that would clear mnt_expiry_mark */
696 dput(nd.path.dentry);
697 mntput_no_expire(nd.path.mnt);
652out: 698out:
653 return retval; 699 return retval;
654} 700}
@@ -671,10 +717,10 @@ static int mount_is_safe(struct nameidata *nd)
671 return 0; 717 return 0;
672 return -EPERM; 718 return -EPERM;
673#ifdef notyet 719#ifdef notyet
674 if (S_ISLNK(nd->dentry->d_inode->i_mode)) 720 if (S_ISLNK(nd->path.dentry->d_inode->i_mode))
675 return -EPERM; 721 return -EPERM;
676 if (nd->dentry->d_inode->i_mode & S_ISVTX) { 722 if (nd->path.dentry->d_inode->i_mode & S_ISVTX) {
677 if (current->uid != nd->dentry->d_inode->i_uid) 723 if (current->uid != nd->path.dentry->d_inode->i_uid)
678 return -EPERM; 724 return -EPERM;
679 } 725 }
680 if (vfs_permission(nd, MAY_WRITE)) 726 if (vfs_permission(nd, MAY_WRITE))
@@ -723,8 +769,8 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
723 q = q->mnt_parent; 769 q = q->mnt_parent;
724 } 770 }
725 p = s; 771 p = s;
726 nd.mnt = q; 772 nd.path.mnt = q;
727 nd.dentry = p->mnt_mountpoint; 773 nd.path.dentry = p->mnt_mountpoint;
728 q = clone_mnt(p, p->mnt_root, flag); 774 q = clone_mnt(p, p->mnt_root, flag);
729 if (!q) 775 if (!q)
730 goto Enomem; 776 goto Enomem;
@@ -833,8 +879,8 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
833 struct nameidata *nd, struct nameidata *parent_nd) 879 struct nameidata *nd, struct nameidata *parent_nd)
834{ 880{
835 LIST_HEAD(tree_list); 881 LIST_HEAD(tree_list);
836 struct vfsmount *dest_mnt = nd->mnt; 882 struct vfsmount *dest_mnt = nd->path.mnt;
837 struct dentry *dest_dentry = nd->dentry; 883 struct dentry *dest_dentry = nd->path.dentry;
838 struct vfsmount *child, *p; 884 struct vfsmount *child, *p;
839 885
840 if (propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list)) 886 if (propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list))
@@ -869,13 +915,13 @@ static int graft_tree(struct vfsmount *mnt, struct nameidata *nd)
869 if (mnt->mnt_sb->s_flags & MS_NOUSER) 915 if (mnt->mnt_sb->s_flags & MS_NOUSER)
870 return -EINVAL; 916 return -EINVAL;
871 917
872 if (S_ISDIR(nd->dentry->d_inode->i_mode) != 918 if (S_ISDIR(nd->path.dentry->d_inode->i_mode) !=
873 S_ISDIR(mnt->mnt_root->d_inode->i_mode)) 919 S_ISDIR(mnt->mnt_root->d_inode->i_mode))
874 return -ENOTDIR; 920 return -ENOTDIR;
875 921
876 err = -ENOENT; 922 err = -ENOENT;
877 mutex_lock(&nd->dentry->d_inode->i_mutex); 923 mutex_lock(&nd->path.dentry->d_inode->i_mutex);
878 if (IS_DEADDIR(nd->dentry->d_inode)) 924 if (IS_DEADDIR(nd->path.dentry->d_inode))
879 goto out_unlock; 925 goto out_unlock;
880 926
881 err = security_sb_check_sb(mnt, nd); 927 err = security_sb_check_sb(mnt, nd);
@@ -883,10 +929,10 @@ static int graft_tree(struct vfsmount *mnt, struct nameidata *nd)
883 goto out_unlock; 929 goto out_unlock;
884 930
885 err = -ENOENT; 931 err = -ENOENT;
886 if (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry)) 932 if (IS_ROOT(nd->path.dentry) || !d_unhashed(nd->path.dentry))
887 err = attach_recursive_mnt(mnt, nd, NULL); 933 err = attach_recursive_mnt(mnt, nd, NULL);
888out_unlock: 934out_unlock:
889 mutex_unlock(&nd->dentry->d_inode->i_mutex); 935 mutex_unlock(&nd->path.dentry->d_inode->i_mutex);
890 if (!err) 936 if (!err)
891 security_sb_post_addmount(mnt, nd); 937 security_sb_post_addmount(mnt, nd);
892 return err; 938 return err;
@@ -894,17 +940,18 @@ out_unlock:
894 940
895/* 941/*
896 * recursively change the type of the mountpoint. 942 * recursively change the type of the mountpoint.
943 * noinline this do_mount helper to save do_mount stack space.
897 */ 944 */
898static int do_change_type(struct nameidata *nd, int flag) 945static noinline int do_change_type(struct nameidata *nd, int flag)
899{ 946{
900 struct vfsmount *m, *mnt = nd->mnt; 947 struct vfsmount *m, *mnt = nd->path.mnt;
901 int recurse = flag & MS_REC; 948 int recurse = flag & MS_REC;
902 int type = flag & ~MS_REC; 949 int type = flag & ~MS_REC;
903 950
904 if (!capable(CAP_SYS_ADMIN)) 951 if (!capable(CAP_SYS_ADMIN))
905 return -EPERM; 952 return -EPERM;
906 953
907 if (nd->dentry != nd->mnt->mnt_root) 954 if (nd->path.dentry != nd->path.mnt->mnt_root)
908 return -EINVAL; 955 return -EINVAL;
909 956
910 down_write(&namespace_sem); 957 down_write(&namespace_sem);
@@ -918,8 +965,10 @@ static int do_change_type(struct nameidata *nd, int flag)
918 965
919/* 966/*
920 * do loopback mount. 967 * do loopback mount.
968 * noinline this do_mount helper to save do_mount stack space.
921 */ 969 */
922static int do_loopback(struct nameidata *nd, char *old_name, int recurse) 970static noinline int do_loopback(struct nameidata *nd, char *old_name,
971 int recurse)
923{ 972{
924 struct nameidata old_nd; 973 struct nameidata old_nd;
925 struct vfsmount *mnt = NULL; 974 struct vfsmount *mnt = NULL;
@@ -934,17 +983,17 @@ static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
934 983
935 down_write(&namespace_sem); 984 down_write(&namespace_sem);
936 err = -EINVAL; 985 err = -EINVAL;
937 if (IS_MNT_UNBINDABLE(old_nd.mnt)) 986 if (IS_MNT_UNBINDABLE(old_nd.path.mnt))
938 goto out; 987 goto out;
939 988
940 if (!check_mnt(nd->mnt) || !check_mnt(old_nd.mnt)) 989 if (!check_mnt(nd->path.mnt) || !check_mnt(old_nd.path.mnt))
941 goto out; 990 goto out;
942 991
943 err = -ENOMEM; 992 err = -ENOMEM;
944 if (recurse) 993 if (recurse)
945 mnt = copy_tree(old_nd.mnt, old_nd.dentry, 0); 994 mnt = copy_tree(old_nd.path.mnt, old_nd.path.dentry, 0);
946 else 995 else
947 mnt = clone_mnt(old_nd.mnt, old_nd.dentry, 0); 996 mnt = clone_mnt(old_nd.path.mnt, old_nd.path.dentry, 0);
948 997
949 if (!mnt) 998 if (!mnt)
950 goto out; 999 goto out;
@@ -960,7 +1009,7 @@ static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
960 1009
961out: 1010out:
962 up_write(&namespace_sem); 1011 up_write(&namespace_sem);
963 path_release(&old_nd); 1012 path_put(&old_nd.path);
964 return err; 1013 return err;
965} 1014}
966 1015
@@ -968,29 +1017,30 @@ out:
968 * change filesystem flags. dir should be a physical root of filesystem. 1017 * change filesystem flags. dir should be a physical root of filesystem.
969 * If you've mounted a non-root directory somewhere and want to do remount 1018 * If you've mounted a non-root directory somewhere and want to do remount
970 * on it - tough luck. 1019 * on it - tough luck.
1020 * noinline this do_mount helper to save do_mount stack space.
971 */ 1021 */
972static int do_remount(struct nameidata *nd, int flags, int mnt_flags, 1022static noinline int do_remount(struct nameidata *nd, int flags, int mnt_flags,
973 void *data) 1023 void *data)
974{ 1024{
975 int err; 1025 int err;
976 struct super_block *sb = nd->mnt->mnt_sb; 1026 struct super_block *sb = nd->path.mnt->mnt_sb;
977 1027
978 if (!capable(CAP_SYS_ADMIN)) 1028 if (!capable(CAP_SYS_ADMIN))
979 return -EPERM; 1029 return -EPERM;
980 1030
981 if (!check_mnt(nd->mnt)) 1031 if (!check_mnt(nd->path.mnt))
982 return -EINVAL; 1032 return -EINVAL;
983 1033
984 if (nd->dentry != nd->mnt->mnt_root) 1034 if (nd->path.dentry != nd->path.mnt->mnt_root)
985 return -EINVAL; 1035 return -EINVAL;
986 1036
987 down_write(&sb->s_umount); 1037 down_write(&sb->s_umount);
988 err = do_remount_sb(sb, flags, data, 0); 1038 err = do_remount_sb(sb, flags, data, 0);
989 if (!err) 1039 if (!err)
990 nd->mnt->mnt_flags = mnt_flags; 1040 nd->path.mnt->mnt_flags = mnt_flags;
991 up_write(&sb->s_umount); 1041 up_write(&sb->s_umount);
992 if (!err) 1042 if (!err)
993 security_sb_post_remount(nd->mnt, flags, data); 1043 security_sb_post_remount(nd->path.mnt, flags, data);
994 return err; 1044 return err;
995} 1045}
996 1046
@@ -1004,7 +1054,10 @@ static inline int tree_contains_unbindable(struct vfsmount *mnt)
1004 return 0; 1054 return 0;
1005} 1055}
1006 1056
1007static int do_move_mount(struct nameidata *nd, char *old_name) 1057/*
1058 * noinline this do_mount helper to save do_mount stack space.
1059 */
1060static noinline int do_move_mount(struct nameidata *nd, char *old_name)
1008{ 1061{
1009 struct nameidata old_nd, parent_nd; 1062 struct nameidata old_nd, parent_nd;
1010 struct vfsmount *p; 1063 struct vfsmount *p;
@@ -1018,69 +1071,74 @@ static int do_move_mount(struct nameidata *nd, char *old_name)
1018 return err; 1071 return err;
1019 1072
1020 down_write(&namespace_sem); 1073 down_write(&namespace_sem);
1021 while (d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry)) 1074 while (d_mountpoint(nd->path.dentry) &&
1075 follow_down(&nd->path.mnt, &nd->path.dentry))
1022 ; 1076 ;
1023 err = -EINVAL; 1077 err = -EINVAL;
1024 if (!check_mnt(nd->mnt) || !check_mnt(old_nd.mnt)) 1078 if (!check_mnt(nd->path.mnt) || !check_mnt(old_nd.path.mnt))
1025 goto out; 1079 goto out;
1026 1080
1027 err = -ENOENT; 1081 err = -ENOENT;
1028 mutex_lock(&nd->dentry->d_inode->i_mutex); 1082 mutex_lock(&nd->path.dentry->d_inode->i_mutex);
1029 if (IS_DEADDIR(nd->dentry->d_inode)) 1083 if (IS_DEADDIR(nd->path.dentry->d_inode))
1030 goto out1; 1084 goto out1;
1031 1085
1032 if (!IS_ROOT(nd->dentry) && d_unhashed(nd->dentry)) 1086 if (!IS_ROOT(nd->path.dentry) && d_unhashed(nd->path.dentry))
1033 goto out1; 1087 goto out1;
1034 1088
1035 err = -EINVAL; 1089 err = -EINVAL;
1036 if (old_nd.dentry != old_nd.mnt->mnt_root) 1090 if (old_nd.path.dentry != old_nd.path.mnt->mnt_root)
1037 goto out1; 1091 goto out1;
1038 1092
1039 if (old_nd.mnt == old_nd.mnt->mnt_parent) 1093 if (old_nd.path.mnt == old_nd.path.mnt->mnt_parent)
1040 goto out1; 1094 goto out1;
1041 1095
1042 if (S_ISDIR(nd->dentry->d_inode->i_mode) != 1096 if (S_ISDIR(nd->path.dentry->d_inode->i_mode) !=
1043 S_ISDIR(old_nd.dentry->d_inode->i_mode)) 1097 S_ISDIR(old_nd.path.dentry->d_inode->i_mode))
1044 goto out1; 1098 goto out1;
1045 /* 1099 /*
1046 * Don't move a mount residing in a shared parent. 1100 * Don't move a mount residing in a shared parent.
1047 */ 1101 */
1048 if (old_nd.mnt->mnt_parent && IS_MNT_SHARED(old_nd.mnt->mnt_parent)) 1102 if (old_nd.path.mnt->mnt_parent &&
1103 IS_MNT_SHARED(old_nd.path.mnt->mnt_parent))
1049 goto out1; 1104 goto out1;
1050 /* 1105 /*
1051 * Don't move a mount tree containing unbindable mounts to a destination 1106 * Don't move a mount tree containing unbindable mounts to a destination
1052 * mount which is shared. 1107 * mount which is shared.
1053 */ 1108 */
1054 if (IS_MNT_SHARED(nd->mnt) && tree_contains_unbindable(old_nd.mnt)) 1109 if (IS_MNT_SHARED(nd->path.mnt) &&
1110 tree_contains_unbindable(old_nd.path.mnt))
1055 goto out1; 1111 goto out1;
1056 err = -ELOOP; 1112 err = -ELOOP;
1057 for (p = nd->mnt; p->mnt_parent != p; p = p->mnt_parent) 1113 for (p = nd->path.mnt; p->mnt_parent != p; p = p->mnt_parent)
1058 if (p == old_nd.mnt) 1114 if (p == old_nd.path.mnt)
1059 goto out1; 1115 goto out1;
1060 1116
1061 if ((err = attach_recursive_mnt(old_nd.mnt, nd, &parent_nd))) 1117 err = attach_recursive_mnt(old_nd.path.mnt, nd, &parent_nd);
1118 if (err)
1062 goto out1; 1119 goto out1;
1063 1120
1064 spin_lock(&vfsmount_lock); 1121 spin_lock(&vfsmount_lock);
1065 /* if the mount is moved, it should no longer be expire 1122 /* if the mount is moved, it should no longer be expire
1066 * automatically */ 1123 * automatically */
1067 list_del_init(&old_nd.mnt->mnt_expire); 1124 list_del_init(&old_nd.path.mnt->mnt_expire);
1068 spin_unlock(&vfsmount_lock); 1125 spin_unlock(&vfsmount_lock);
1069out1: 1126out1:
1070 mutex_unlock(&nd->dentry->d_inode->i_mutex); 1127 mutex_unlock(&nd->path.dentry->d_inode->i_mutex);
1071out: 1128out:
1072 up_write(&namespace_sem); 1129 up_write(&namespace_sem);
1073 if (!err) 1130 if (!err)
1074 path_release(&parent_nd); 1131 path_put(&parent_nd.path);
1075 path_release(&old_nd); 1132 path_put(&old_nd.path);
1076 return err; 1133 return err;
1077} 1134}
1078 1135
1079/* 1136/*
1080 * create a new mount for userspace and request it to be added into the 1137 * create a new mount for userspace and request it to be added into the
1081 * namespace's tree 1138 * namespace's tree
1139 * noinline this do_mount helper to save do_mount stack space.
1082 */ 1140 */
1083static int do_new_mount(struct nameidata *nd, char *type, int flags, 1141static noinline int do_new_mount(struct nameidata *nd, char *type, int flags,
1084 int mnt_flags, char *name, void *data) 1142 int mnt_flags, char *name, void *data)
1085{ 1143{
1086 struct vfsmount *mnt; 1144 struct vfsmount *mnt;
@@ -1110,16 +1168,17 @@ int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
1110 1168
1111 down_write(&namespace_sem); 1169 down_write(&namespace_sem);
1112 /* Something was mounted here while we slept */ 1170 /* Something was mounted here while we slept */
1113 while (d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry)) 1171 while (d_mountpoint(nd->path.dentry) &&
1172 follow_down(&nd->path.mnt, &nd->path.dentry))
1114 ; 1173 ;
1115 err = -EINVAL; 1174 err = -EINVAL;
1116 if (!check_mnt(nd->mnt)) 1175 if (!check_mnt(nd->path.mnt))
1117 goto unlock; 1176 goto unlock;
1118 1177
1119 /* Refuse the same filesystem on the same mount point */ 1178 /* Refuse the same filesystem on the same mount point */
1120 err = -EBUSY; 1179 err = -EBUSY;
1121 if (nd->mnt->mnt_sb == newmnt->mnt_sb && 1180 if (nd->path.mnt->mnt_sb == newmnt->mnt_sb &&
1122 nd->mnt->mnt_root == nd->dentry) 1181 nd->path.mnt->mnt_root == nd->path.dentry)
1123 goto unlock; 1182 goto unlock;
1124 1183
1125 err = -EINVAL; 1184 err = -EINVAL;
@@ -1455,7 +1514,7 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
1455 retval = do_new_mount(&nd, type_page, flags, mnt_flags, 1514 retval = do_new_mount(&nd, type_page, flags, mnt_flags,
1456 dev_name, data_page); 1515 dev_name, data_page);
1457dput_out: 1516dput_out:
1458 path_release(&nd); 1517 path_put(&nd.path);
1459 return retval; 1518 return retval;
1460} 1519}
1461 1520
@@ -1502,17 +1561,17 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
1502 while (p) { 1561 while (p) {
1503 q->mnt_ns = new_ns; 1562 q->mnt_ns = new_ns;
1504 if (fs) { 1563 if (fs) {
1505 if (p == fs->rootmnt) { 1564 if (p == fs->root.mnt) {
1506 rootmnt = p; 1565 rootmnt = p;
1507 fs->rootmnt = mntget(q); 1566 fs->root.mnt = mntget(q);
1508 } 1567 }
1509 if (p == fs->pwdmnt) { 1568 if (p == fs->pwd.mnt) {
1510 pwdmnt = p; 1569 pwdmnt = p;
1511 fs->pwdmnt = mntget(q); 1570 fs->pwd.mnt = mntget(q);
1512 } 1571 }
1513 if (p == fs->altrootmnt) { 1572 if (p == fs->altroot.mnt) {
1514 altrootmnt = p; 1573 altrootmnt = p;
1515 fs->altrootmnt = mntget(q); 1574 fs->altroot.mnt = mntget(q);
1516 } 1575 }
1517 } 1576 }
1518 p = next_mnt(p, mnt_ns->root); 1577 p = next_mnt(p, mnt_ns->root);
@@ -1593,44 +1652,35 @@ out1:
1593 * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values. 1652 * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values.
1594 * It can block. Requires the big lock held. 1653 * It can block. Requires the big lock held.
1595 */ 1654 */
1596void set_fs_root(struct fs_struct *fs, struct vfsmount *mnt, 1655void set_fs_root(struct fs_struct *fs, struct path *path)
1597 struct dentry *dentry)
1598{ 1656{
1599 struct dentry *old_root; 1657 struct path old_root;
1600 struct vfsmount *old_rootmnt; 1658
1601 write_lock(&fs->lock); 1659 write_lock(&fs->lock);
1602 old_root = fs->root; 1660 old_root = fs->root;
1603 old_rootmnt = fs->rootmnt; 1661 fs->root = *path;
1604 fs->rootmnt = mntget(mnt); 1662 path_get(path);
1605 fs->root = dget(dentry);
1606 write_unlock(&fs->lock); 1663 write_unlock(&fs->lock);
1607 if (old_root) { 1664 if (old_root.dentry)
1608 dput(old_root); 1665 path_put(&old_root);
1609 mntput(old_rootmnt);
1610 }
1611} 1666}
1612 1667
1613/* 1668/*
1614 * Replace the fs->{pwdmnt,pwd} with {mnt,dentry}. Put the old values. 1669 * Replace the fs->{pwdmnt,pwd} with {mnt,dentry}. Put the old values.
1615 * It can block. Requires the big lock held. 1670 * It can block. Requires the big lock held.
1616 */ 1671 */
1617void set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt, 1672void set_fs_pwd(struct fs_struct *fs, struct path *path)
1618 struct dentry *dentry)
1619{ 1673{
1620 struct dentry *old_pwd; 1674 struct path old_pwd;
1621 struct vfsmount *old_pwdmnt;
1622 1675
1623 write_lock(&fs->lock); 1676 write_lock(&fs->lock);
1624 old_pwd = fs->pwd; 1677 old_pwd = fs->pwd;
1625 old_pwdmnt = fs->pwdmnt; 1678 fs->pwd = *path;
1626 fs->pwdmnt = mntget(mnt); 1679 path_get(path);
1627 fs->pwd = dget(dentry);
1628 write_unlock(&fs->lock); 1680 write_unlock(&fs->lock);
1629 1681
1630 if (old_pwd) { 1682 if (old_pwd.dentry)
1631 dput(old_pwd); 1683 path_put(&old_pwd);
1632 mntput(old_pwdmnt);
1633 }
1634} 1684}
1635 1685
1636static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd) 1686static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd)
@@ -1645,12 +1695,12 @@ static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd)
1645 if (fs) { 1695 if (fs) {
1646 atomic_inc(&fs->count); 1696 atomic_inc(&fs->count);
1647 task_unlock(p); 1697 task_unlock(p);
1648 if (fs->root == old_nd->dentry 1698 if (fs->root.dentry == old_nd->path.dentry
1649 && fs->rootmnt == old_nd->mnt) 1699 && fs->root.mnt == old_nd->path.mnt)
1650 set_fs_root(fs, new_nd->mnt, new_nd->dentry); 1700 set_fs_root(fs, &new_nd->path);
1651 if (fs->pwd == old_nd->dentry 1701 if (fs->pwd.dentry == old_nd->path.dentry
1652 && fs->pwdmnt == old_nd->mnt) 1702 && fs->pwd.mnt == old_nd->path.mnt)
1653 set_fs_pwd(fs, new_nd->mnt, new_nd->dentry); 1703 set_fs_pwd(fs, &new_nd->path);
1654 put_fs_struct(fs); 1704 put_fs_struct(fs);
1655 } else 1705 } else
1656 task_unlock(p); 1706 task_unlock(p);
@@ -1700,7 +1750,7 @@ asmlinkage long sys_pivot_root(const char __user * new_root,
1700 if (error) 1750 if (error)
1701 goto out0; 1751 goto out0;
1702 error = -EINVAL; 1752 error = -EINVAL;
1703 if (!check_mnt(new_nd.mnt)) 1753 if (!check_mnt(new_nd.path.mnt))
1704 goto out1; 1754 goto out1;
1705 1755
1706 error = __user_walk(put_old, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &old_nd); 1756 error = __user_walk(put_old, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &old_nd);
@@ -1709,74 +1759,78 @@ asmlinkage long sys_pivot_root(const char __user * new_root,
1709 1759
1710 error = security_sb_pivotroot(&old_nd, &new_nd); 1760 error = security_sb_pivotroot(&old_nd, &new_nd);
1711 if (error) { 1761 if (error) {
1712 path_release(&old_nd); 1762 path_put(&old_nd.path);
1713 goto out1; 1763 goto out1;
1714 } 1764 }
1715 1765
1716 read_lock(&current->fs->lock); 1766 read_lock(&current->fs->lock);
1717 user_nd.mnt = mntget(current->fs->rootmnt); 1767 user_nd.path = current->fs->root;
1718 user_nd.dentry = dget(current->fs->root); 1768 path_get(&current->fs->root);
1719 read_unlock(&current->fs->lock); 1769 read_unlock(&current->fs->lock);
1720 down_write(&namespace_sem); 1770 down_write(&namespace_sem);
1721 mutex_lock(&old_nd.dentry->d_inode->i_mutex); 1771 mutex_lock(&old_nd.path.dentry->d_inode->i_mutex);
1722 error = -EINVAL; 1772 error = -EINVAL;
1723 if (IS_MNT_SHARED(old_nd.mnt) || 1773 if (IS_MNT_SHARED(old_nd.path.mnt) ||
1724 IS_MNT_SHARED(new_nd.mnt->mnt_parent) || 1774 IS_MNT_SHARED(new_nd.path.mnt->mnt_parent) ||
1725 IS_MNT_SHARED(user_nd.mnt->mnt_parent)) 1775 IS_MNT_SHARED(user_nd.path.mnt->mnt_parent))
1726 goto out2; 1776 goto out2;
1727 if (!check_mnt(user_nd.mnt)) 1777 if (!check_mnt(user_nd.path.mnt))
1728 goto out2; 1778 goto out2;
1729 error = -ENOENT; 1779 error = -ENOENT;
1730 if (IS_DEADDIR(new_nd.dentry->d_inode)) 1780 if (IS_DEADDIR(new_nd.path.dentry->d_inode))
1731 goto out2; 1781 goto out2;
1732 if (d_unhashed(new_nd.dentry) && !IS_ROOT(new_nd.dentry)) 1782 if (d_unhashed(new_nd.path.dentry) && !IS_ROOT(new_nd.path.dentry))
1733 goto out2; 1783 goto out2;
1734 if (d_unhashed(old_nd.dentry) && !IS_ROOT(old_nd.dentry)) 1784 if (d_unhashed(old_nd.path.dentry) && !IS_ROOT(old_nd.path.dentry))
1735 goto out2; 1785 goto out2;
1736 error = -EBUSY; 1786 error = -EBUSY;
1737 if (new_nd.mnt == user_nd.mnt || old_nd.mnt == user_nd.mnt) 1787 if (new_nd.path.mnt == user_nd.path.mnt ||
1788 old_nd.path.mnt == user_nd.path.mnt)
1738 goto out2; /* loop, on the same file system */ 1789 goto out2; /* loop, on the same file system */
1739 error = -EINVAL; 1790 error = -EINVAL;
1740 if (user_nd.mnt->mnt_root != user_nd.dentry) 1791 if (user_nd.path.mnt->mnt_root != user_nd.path.dentry)
1741 goto out2; /* not a mountpoint */ 1792 goto out2; /* not a mountpoint */
1742 if (user_nd.mnt->mnt_parent == user_nd.mnt) 1793 if (user_nd.path.mnt->mnt_parent == user_nd.path.mnt)
1743 goto out2; /* not attached */ 1794 goto out2; /* not attached */
1744 if (new_nd.mnt->mnt_root != new_nd.dentry) 1795 if (new_nd.path.mnt->mnt_root != new_nd.path.dentry)
1745 goto out2; /* not a mountpoint */ 1796 goto out2; /* not a mountpoint */
1746 if (new_nd.mnt->mnt_parent == new_nd.mnt) 1797 if (new_nd.path.mnt->mnt_parent == new_nd.path.mnt)
1747 goto out2; /* not attached */ 1798 goto out2; /* not attached */
1748 tmp = old_nd.mnt; /* make sure we can reach put_old from new_root */ 1799 /* make sure we can reach put_old from new_root */
1800 tmp = old_nd.path.mnt;
1749 spin_lock(&vfsmount_lock); 1801 spin_lock(&vfsmount_lock);
1750 if (tmp != new_nd.mnt) { 1802 if (tmp != new_nd.path.mnt) {
1751 for (;;) { 1803 for (;;) {
1752 if (tmp->mnt_parent == tmp) 1804 if (tmp->mnt_parent == tmp)
1753 goto out3; /* already mounted on put_old */ 1805 goto out3; /* already mounted on put_old */
1754 if (tmp->mnt_parent == new_nd.mnt) 1806 if (tmp->mnt_parent == new_nd.path.mnt)
1755 break; 1807 break;
1756 tmp = tmp->mnt_parent; 1808 tmp = tmp->mnt_parent;
1757 } 1809 }
1758 if (!is_subdir(tmp->mnt_mountpoint, new_nd.dentry)) 1810 if (!is_subdir(tmp->mnt_mountpoint, new_nd.path.dentry))
1759 goto out3; 1811 goto out3;
1760 } else if (!is_subdir(old_nd.dentry, new_nd.dentry)) 1812 } else if (!is_subdir(old_nd.path.dentry, new_nd.path.dentry))
1761 goto out3; 1813 goto out3;
1762 detach_mnt(new_nd.mnt, &parent_nd); 1814 detach_mnt(new_nd.path.mnt, &parent_nd);
1763 detach_mnt(user_nd.mnt, &root_parent); 1815 detach_mnt(user_nd.path.mnt, &root_parent);
1764 attach_mnt(user_nd.mnt, &old_nd); /* mount old root on put_old */ 1816 /* mount old root on put_old */
1765 attach_mnt(new_nd.mnt, &root_parent); /* mount new_root on / */ 1817 attach_mnt(user_nd.path.mnt, &old_nd);
1818 /* mount new_root on / */
1819 attach_mnt(new_nd.path.mnt, &root_parent);
1766 touch_mnt_namespace(current->nsproxy->mnt_ns); 1820 touch_mnt_namespace(current->nsproxy->mnt_ns);
1767 spin_unlock(&vfsmount_lock); 1821 spin_unlock(&vfsmount_lock);
1768 chroot_fs_refs(&user_nd, &new_nd); 1822 chroot_fs_refs(&user_nd, &new_nd);
1769 security_sb_post_pivotroot(&user_nd, &new_nd); 1823 security_sb_post_pivotroot(&user_nd, &new_nd);
1770 error = 0; 1824 error = 0;
1771 path_release(&root_parent); 1825 path_put(&root_parent.path);
1772 path_release(&parent_nd); 1826 path_put(&parent_nd.path);
1773out2: 1827out2:
1774 mutex_unlock(&old_nd.dentry->d_inode->i_mutex); 1828 mutex_unlock(&old_nd.path.dentry->d_inode->i_mutex);
1775 up_write(&namespace_sem); 1829 up_write(&namespace_sem);
1776 path_release(&user_nd); 1830 path_put(&user_nd.path);
1777 path_release(&old_nd); 1831 path_put(&old_nd.path);
1778out1: 1832out1:
1779 path_release(&new_nd); 1833 path_put(&new_nd.path);
1780out0: 1834out0:
1781 unlock_kernel(); 1835 unlock_kernel();
1782 return error; 1836 return error;
@@ -1789,6 +1843,7 @@ static void __init init_mount_tree(void)
1789{ 1843{
1790 struct vfsmount *mnt; 1844 struct vfsmount *mnt;
1791 struct mnt_namespace *ns; 1845 struct mnt_namespace *ns;
1846 struct path root;
1792 1847
1793 mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); 1848 mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
1794 if (IS_ERR(mnt)) 1849 if (IS_ERR(mnt))
@@ -1807,15 +1862,16 @@ static void __init init_mount_tree(void)
1807 init_task.nsproxy->mnt_ns = ns; 1862 init_task.nsproxy->mnt_ns = ns;
1808 get_mnt_ns(ns); 1863 get_mnt_ns(ns);
1809 1864
1810 set_fs_pwd(current->fs, ns->root, ns->root->mnt_root); 1865 root.mnt = ns->root;
1811 set_fs_root(current->fs, ns->root, ns->root->mnt_root); 1866 root.dentry = ns->root->mnt_root;
1867
1868 set_fs_pwd(current->fs, &root);
1869 set_fs_root(current->fs, &root);
1812} 1870}
1813 1871
1814void __init mnt_init(void) 1872void __init mnt_init(void)
1815{ 1873{
1816 struct list_head *d; 1874 unsigned u;
1817 unsigned int nr_hash;
1818 int i;
1819 int err; 1875 int err;
1820 1876
1821 init_rwsem(&namespace_sem); 1877 init_rwsem(&namespace_sem);
@@ -1828,35 +1884,11 @@ void __init mnt_init(void)
1828 if (!mount_hashtable) 1884 if (!mount_hashtable)
1829 panic("Failed to allocate mount hash table\n"); 1885 panic("Failed to allocate mount hash table\n");
1830 1886
1831 /* 1887 printk("Mount-cache hash table entries: %lu\n", HASH_SIZE);
1832 * Find the power-of-two list-heads that can fit into the allocation.. 1888
1833 * We don't guarantee that "sizeof(struct list_head)" is necessarily 1889 for (u = 0; u < HASH_SIZE; u++)
1834 * a power-of-two. 1890 INIT_LIST_HEAD(&mount_hashtable[u]);
1835 */
1836 nr_hash = PAGE_SIZE / sizeof(struct list_head);
1837 hash_bits = 0;
1838 do {
1839 hash_bits++;
1840 } while ((nr_hash >> hash_bits) != 0);
1841 hash_bits--;
1842 1891
1843 /*
1844 * Re-calculate the actual number of entries and the mask
1845 * from the number of bits we can fit.
1846 */
1847 nr_hash = 1UL << hash_bits;
1848 hash_mask = nr_hash - 1;
1849
1850 printk("Mount-cache hash table entries: %d\n", nr_hash);
1851
1852 /* And initialize the newly allocated array */
1853 d = mount_hashtable;
1854 i = nr_hash;
1855 do {
1856 INIT_LIST_HEAD(d);
1857 d++;
1858 i--;
1859 } while (i);
1860 err = sysfs_init(); 1892 err = sysfs_init();
1861 if (err) 1893 if (err)
1862 printk(KERN_WARNING "%s: sysfs_init error: %d\n", 1894 printk(KERN_WARNING "%s: sysfs_init error: %d\n",