diff options
Diffstat (limited to 'fs/namespace.c')
-rw-r--r-- | fs/namespace.c | 390 |
1 files changed, 194 insertions, 196 deletions
diff --git a/fs/namespace.c b/fs/namespace.c index da5c49483430..ac2ce8a766e1 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -39,7 +39,7 @@ static int mnt_group_start = 1; | |||
39 | static struct list_head *mount_hashtable __read_mostly; | 39 | static struct list_head *mount_hashtable __read_mostly; |
40 | static struct list_head *mountpoint_hashtable __read_mostly; | 40 | static struct list_head *mountpoint_hashtable __read_mostly; |
41 | static struct kmem_cache *mnt_cache __read_mostly; | 41 | static struct kmem_cache *mnt_cache __read_mostly; |
42 | static struct rw_semaphore namespace_sem; | 42 | static DECLARE_RWSEM(namespace_sem); |
43 | 43 | ||
44 | /* /sys/fs */ | 44 | /* /sys/fs */ |
45 | struct kobject *fs_kobj; | 45 | struct kobject *fs_kobj; |
@@ -53,7 +53,7 @@ EXPORT_SYMBOL_GPL(fs_kobj); | |||
53 | * It should be taken for write in all cases where the vfsmount | 53 | * It should be taken for write in all cases where the vfsmount |
54 | * tree or hash is modified or when a vfsmount structure is modified. | 54 | * tree or hash is modified or when a vfsmount structure is modified. |
55 | */ | 55 | */ |
56 | DEFINE_BRLOCK(vfsmount_lock); | 56 | __cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock); |
57 | 57 | ||
58 | static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) | 58 | static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) |
59 | { | 59 | { |
@@ -63,8 +63,6 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) | |||
63 | return tmp & (HASH_SIZE - 1); | 63 | return tmp & (HASH_SIZE - 1); |
64 | } | 64 | } |
65 | 65 | ||
66 | #define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16) | ||
67 | |||
68 | /* | 66 | /* |
69 | * allocation is serialized by namespace_sem, but we need the spinlock to | 67 | * allocation is serialized by namespace_sem, but we need the spinlock to |
70 | * serialize with freeing. | 68 | * serialize with freeing. |
@@ -458,7 +456,7 @@ static int mnt_make_readonly(struct mount *mnt) | |||
458 | { | 456 | { |
459 | int ret = 0; | 457 | int ret = 0; |
460 | 458 | ||
461 | br_write_lock(&vfsmount_lock); | 459 | lock_mount_hash(); |
462 | mnt->mnt.mnt_flags |= MNT_WRITE_HOLD; | 460 | mnt->mnt.mnt_flags |= MNT_WRITE_HOLD; |
463 | /* | 461 | /* |
464 | * After storing MNT_WRITE_HOLD, we'll read the counters. This store | 462 | * After storing MNT_WRITE_HOLD, we'll read the counters. This store |
@@ -492,15 +490,15 @@ static int mnt_make_readonly(struct mount *mnt) | |||
492 | */ | 490 | */ |
493 | smp_wmb(); | 491 | smp_wmb(); |
494 | mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; | 492 | mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; |
495 | br_write_unlock(&vfsmount_lock); | 493 | unlock_mount_hash(); |
496 | return ret; | 494 | return ret; |
497 | } | 495 | } |
498 | 496 | ||
499 | static void __mnt_unmake_readonly(struct mount *mnt) | 497 | static void __mnt_unmake_readonly(struct mount *mnt) |
500 | { | 498 | { |
501 | br_write_lock(&vfsmount_lock); | 499 | lock_mount_hash(); |
502 | mnt->mnt.mnt_flags &= ~MNT_READONLY; | 500 | mnt->mnt.mnt_flags &= ~MNT_READONLY; |
503 | br_write_unlock(&vfsmount_lock); | 501 | unlock_mount_hash(); |
504 | } | 502 | } |
505 | 503 | ||
506 | int sb_prepare_remount_readonly(struct super_block *sb) | 504 | int sb_prepare_remount_readonly(struct super_block *sb) |
@@ -512,7 +510,7 @@ int sb_prepare_remount_readonly(struct super_block *sb) | |||
512 | if (atomic_long_read(&sb->s_remove_count)) | 510 | if (atomic_long_read(&sb->s_remove_count)) |
513 | return -EBUSY; | 511 | return -EBUSY; |
514 | 512 | ||
515 | br_write_lock(&vfsmount_lock); | 513 | lock_mount_hash(); |
516 | list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) { | 514 | list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) { |
517 | if (!(mnt->mnt.mnt_flags & MNT_READONLY)) { | 515 | if (!(mnt->mnt.mnt_flags & MNT_READONLY)) { |
518 | mnt->mnt.mnt_flags |= MNT_WRITE_HOLD; | 516 | mnt->mnt.mnt_flags |= MNT_WRITE_HOLD; |
@@ -534,7 +532,7 @@ int sb_prepare_remount_readonly(struct super_block *sb) | |||
534 | if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD) | 532 | if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD) |
535 | mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; | 533 | mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; |
536 | } | 534 | } |
537 | br_write_unlock(&vfsmount_lock); | 535 | unlock_mount_hash(); |
538 | 536 | ||
539 | return err; | 537 | return err; |
540 | } | 538 | } |
@@ -549,30 +547,56 @@ static void free_vfsmnt(struct mount *mnt) | |||
549 | kmem_cache_free(mnt_cache, mnt); | 547 | kmem_cache_free(mnt_cache, mnt); |
550 | } | 548 | } |
551 | 549 | ||
550 | /* call under rcu_read_lock */ | ||
551 | bool legitimize_mnt(struct vfsmount *bastard, unsigned seq) | ||
552 | { | ||
553 | struct mount *mnt; | ||
554 | if (read_seqretry(&mount_lock, seq)) | ||
555 | return false; | ||
556 | if (bastard == NULL) | ||
557 | return true; | ||
558 | mnt = real_mount(bastard); | ||
559 | mnt_add_count(mnt, 1); | ||
560 | if (likely(!read_seqretry(&mount_lock, seq))) | ||
561 | return true; | ||
562 | if (bastard->mnt_flags & MNT_SYNC_UMOUNT) { | ||
563 | mnt_add_count(mnt, -1); | ||
564 | return false; | ||
565 | } | ||
566 | rcu_read_unlock(); | ||
567 | mntput(bastard); | ||
568 | rcu_read_lock(); | ||
569 | return false; | ||
570 | } | ||
571 | |||
552 | /* | 572 | /* |
553 | * find the first or last mount at @dentry on vfsmount @mnt depending on | 573 | * find the first mount at @dentry on vfsmount @mnt. |
554 | * @dir. If @dir is set return the first mount else return the last mount. | 574 | * call under rcu_read_lock() |
555 | * vfsmount_lock must be held for read or write. | ||
556 | */ | 575 | */ |
557 | struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, | 576 | struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) |
558 | int dir) | ||
559 | { | 577 | { |
560 | struct list_head *head = mount_hashtable + hash(mnt, dentry); | 578 | struct list_head *head = mount_hashtable + hash(mnt, dentry); |
561 | struct list_head *tmp = head; | 579 | struct mount *p; |
562 | struct mount *p, *found = NULL; | ||
563 | 580 | ||
564 | for (;;) { | 581 | list_for_each_entry_rcu(p, head, mnt_hash) |
565 | tmp = dir ? tmp->next : tmp->prev; | 582 | if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) |
566 | p = NULL; | 583 | return p; |
567 | if (tmp == head) | 584 | return NULL; |
568 | break; | 585 | } |
569 | p = list_entry(tmp, struct mount, mnt_hash); | 586 | |
570 | if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) { | 587 | /* |
571 | found = p; | 588 | * find the last mount at @dentry on vfsmount @mnt. |
572 | break; | 589 | * mount_lock must be held. |
573 | } | 590 | */ |
574 | } | 591 | struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry) |
575 | return found; | 592 | { |
593 | struct list_head *head = mount_hashtable + hash(mnt, dentry); | ||
594 | struct mount *p; | ||
595 | |||
596 | list_for_each_entry_reverse(p, head, mnt_hash) | ||
597 | if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) | ||
598 | return p; | ||
599 | return NULL; | ||
576 | } | 600 | } |
577 | 601 | ||
578 | /* | 602 | /* |
@@ -594,17 +618,17 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, | |||
594 | struct vfsmount *lookup_mnt(struct path *path) | 618 | struct vfsmount *lookup_mnt(struct path *path) |
595 | { | 619 | { |
596 | struct mount *child_mnt; | 620 | struct mount *child_mnt; |
621 | struct vfsmount *m; | ||
622 | unsigned seq; | ||
597 | 623 | ||
598 | br_read_lock(&vfsmount_lock); | 624 | rcu_read_lock(); |
599 | child_mnt = __lookup_mnt(path->mnt, path->dentry, 1); | 625 | do { |
600 | if (child_mnt) { | 626 | seq = read_seqbegin(&mount_lock); |
601 | mnt_add_count(child_mnt, 1); | 627 | child_mnt = __lookup_mnt(path->mnt, path->dentry); |
602 | br_read_unlock(&vfsmount_lock); | 628 | m = child_mnt ? &child_mnt->mnt : NULL; |
603 | return &child_mnt->mnt; | 629 | } while (!legitimize_mnt(m, seq)); |
604 | } else { | 630 | rcu_read_unlock(); |
605 | br_read_unlock(&vfsmount_lock); | 631 | return m; |
606 | return NULL; | ||
607 | } | ||
608 | } | 632 | } |
609 | 633 | ||
610 | static struct mountpoint *new_mountpoint(struct dentry *dentry) | 634 | static struct mountpoint *new_mountpoint(struct dentry *dentry) |
@@ -796,9 +820,9 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void | |||
796 | mnt->mnt.mnt_sb = root->d_sb; | 820 | mnt->mnt.mnt_sb = root->d_sb; |
797 | mnt->mnt_mountpoint = mnt->mnt.mnt_root; | 821 | mnt->mnt_mountpoint = mnt->mnt.mnt_root; |
798 | mnt->mnt_parent = mnt; | 822 | mnt->mnt_parent = mnt; |
799 | br_write_lock(&vfsmount_lock); | 823 | lock_mount_hash(); |
800 | list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts); | 824 | list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts); |
801 | br_write_unlock(&vfsmount_lock); | 825 | unlock_mount_hash(); |
802 | return &mnt->mnt; | 826 | return &mnt->mnt; |
803 | } | 827 | } |
804 | EXPORT_SYMBOL_GPL(vfs_kern_mount); | 828 | EXPORT_SYMBOL_GPL(vfs_kern_mount); |
@@ -839,9 +863,9 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, | |||
839 | mnt->mnt.mnt_root = dget(root); | 863 | mnt->mnt.mnt_root = dget(root); |
840 | mnt->mnt_mountpoint = mnt->mnt.mnt_root; | 864 | mnt->mnt_mountpoint = mnt->mnt.mnt_root; |
841 | mnt->mnt_parent = mnt; | 865 | mnt->mnt_parent = mnt; |
842 | br_write_lock(&vfsmount_lock); | 866 | lock_mount_hash(); |
843 | list_add_tail(&mnt->mnt_instance, &sb->s_mounts); | 867 | list_add_tail(&mnt->mnt_instance, &sb->s_mounts); |
844 | br_write_unlock(&vfsmount_lock); | 868 | unlock_mount_hash(); |
845 | 869 | ||
846 | if ((flag & CL_SLAVE) || | 870 | if ((flag & CL_SLAVE) || |
847 | ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) { | 871 | ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) { |
@@ -872,64 +896,66 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, | |||
872 | return ERR_PTR(err); | 896 | return ERR_PTR(err); |
873 | } | 897 | } |
874 | 898 | ||
875 | static inline void mntfree(struct mount *mnt) | 899 | static void delayed_free(struct rcu_head *head) |
876 | { | 900 | { |
877 | struct vfsmount *m = &mnt->mnt; | 901 | struct mount *mnt = container_of(head, struct mount, mnt_rcu); |
878 | struct super_block *sb = m->mnt_sb; | 902 | kfree(mnt->mnt_devname); |
879 | 903 | #ifdef CONFIG_SMP | |
880 | /* | 904 | free_percpu(mnt->mnt_pcp); |
881 | * This probably indicates that somebody messed | 905 | #endif |
882 | * up a mnt_want/drop_write() pair. If this | 906 | kmem_cache_free(mnt_cache, mnt); |
883 | * happens, the filesystem was probably unable | ||
884 | * to make r/w->r/o transitions. | ||
885 | */ | ||
886 | /* | ||
887 | * The locking used to deal with mnt_count decrement provides barriers, | ||
888 | * so mnt_get_writers() below is safe. | ||
889 | */ | ||
890 | WARN_ON(mnt_get_writers(mnt)); | ||
891 | fsnotify_vfsmount_delete(m); | ||
892 | dput(m->mnt_root); | ||
893 | free_vfsmnt(mnt); | ||
894 | deactivate_super(sb); | ||
895 | } | 907 | } |
896 | 908 | ||
897 | static void mntput_no_expire(struct mount *mnt) | 909 | static void mntput_no_expire(struct mount *mnt) |
898 | { | 910 | { |
899 | put_again: | 911 | put_again: |
900 | #ifdef CONFIG_SMP | 912 | rcu_read_lock(); |
901 | br_read_lock(&vfsmount_lock); | 913 | mnt_add_count(mnt, -1); |
902 | if (likely(mnt->mnt_ns)) { | 914 | if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */ |
903 | /* shouldn't be the last one */ | 915 | rcu_read_unlock(); |
904 | mnt_add_count(mnt, -1); | ||
905 | br_read_unlock(&vfsmount_lock); | ||
906 | return; | 916 | return; |
907 | } | 917 | } |
908 | br_read_unlock(&vfsmount_lock); | 918 | lock_mount_hash(); |
909 | |||
910 | br_write_lock(&vfsmount_lock); | ||
911 | mnt_add_count(mnt, -1); | ||
912 | if (mnt_get_count(mnt)) { | 919 | if (mnt_get_count(mnt)) { |
913 | br_write_unlock(&vfsmount_lock); | 920 | rcu_read_unlock(); |
921 | unlock_mount_hash(); | ||
914 | return; | 922 | return; |
915 | } | 923 | } |
916 | #else | ||
917 | mnt_add_count(mnt, -1); | ||
918 | if (likely(mnt_get_count(mnt))) | ||
919 | return; | ||
920 | br_write_lock(&vfsmount_lock); | ||
921 | #endif | ||
922 | if (unlikely(mnt->mnt_pinned)) { | 924 | if (unlikely(mnt->mnt_pinned)) { |
923 | mnt_add_count(mnt, mnt->mnt_pinned + 1); | 925 | mnt_add_count(mnt, mnt->mnt_pinned + 1); |
924 | mnt->mnt_pinned = 0; | 926 | mnt->mnt_pinned = 0; |
925 | br_write_unlock(&vfsmount_lock); | 927 | rcu_read_unlock(); |
928 | unlock_mount_hash(); | ||
926 | acct_auto_close_mnt(&mnt->mnt); | 929 | acct_auto_close_mnt(&mnt->mnt); |
927 | goto put_again; | 930 | goto put_again; |
928 | } | 931 | } |
932 | if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) { | ||
933 | rcu_read_unlock(); | ||
934 | unlock_mount_hash(); | ||
935 | return; | ||
936 | } | ||
937 | mnt->mnt.mnt_flags |= MNT_DOOMED; | ||
938 | rcu_read_unlock(); | ||
929 | 939 | ||
930 | list_del(&mnt->mnt_instance); | 940 | list_del(&mnt->mnt_instance); |
931 | br_write_unlock(&vfsmount_lock); | 941 | unlock_mount_hash(); |
932 | mntfree(mnt); | 942 | |
943 | /* | ||
944 | * This probably indicates that somebody messed | ||
945 | * up a mnt_want/drop_write() pair. If this | ||
946 | * happens, the filesystem was probably unable | ||
947 | * to make r/w->r/o transitions. | ||
948 | */ | ||
949 | /* | ||
950 | * The locking used to deal with mnt_count decrement provides barriers, | ||
951 | * so mnt_get_writers() below is safe. | ||
952 | */ | ||
953 | WARN_ON(mnt_get_writers(mnt)); | ||
954 | fsnotify_vfsmount_delete(&mnt->mnt); | ||
955 | dput(mnt->mnt.mnt_root); | ||
956 | deactivate_super(mnt->mnt.mnt_sb); | ||
957 | mnt_free_id(mnt); | ||
958 | call_rcu(&mnt->mnt_rcu, delayed_free); | ||
933 | } | 959 | } |
934 | 960 | ||
935 | void mntput(struct vfsmount *mnt) | 961 | void mntput(struct vfsmount *mnt) |
@@ -954,21 +980,21 @@ EXPORT_SYMBOL(mntget); | |||
954 | 980 | ||
955 | void mnt_pin(struct vfsmount *mnt) | 981 | void mnt_pin(struct vfsmount *mnt) |
956 | { | 982 | { |
957 | br_write_lock(&vfsmount_lock); | 983 | lock_mount_hash(); |
958 | real_mount(mnt)->mnt_pinned++; | 984 | real_mount(mnt)->mnt_pinned++; |
959 | br_write_unlock(&vfsmount_lock); | 985 | unlock_mount_hash(); |
960 | } | 986 | } |
961 | EXPORT_SYMBOL(mnt_pin); | 987 | EXPORT_SYMBOL(mnt_pin); |
962 | 988 | ||
963 | void mnt_unpin(struct vfsmount *m) | 989 | void mnt_unpin(struct vfsmount *m) |
964 | { | 990 | { |
965 | struct mount *mnt = real_mount(m); | 991 | struct mount *mnt = real_mount(m); |
966 | br_write_lock(&vfsmount_lock); | 992 | lock_mount_hash(); |
967 | if (mnt->mnt_pinned) { | 993 | if (mnt->mnt_pinned) { |
968 | mnt_add_count(mnt, 1); | 994 | mnt_add_count(mnt, 1); |
969 | mnt->mnt_pinned--; | 995 | mnt->mnt_pinned--; |
970 | } | 996 | } |
971 | br_write_unlock(&vfsmount_lock); | 997 | unlock_mount_hash(); |
972 | } | 998 | } |
973 | EXPORT_SYMBOL(mnt_unpin); | 999 | EXPORT_SYMBOL(mnt_unpin); |
974 | 1000 | ||
@@ -1085,12 +1111,12 @@ int may_umount_tree(struct vfsmount *m) | |||
1085 | BUG_ON(!m); | 1111 | BUG_ON(!m); |
1086 | 1112 | ||
1087 | /* write lock needed for mnt_get_count */ | 1113 | /* write lock needed for mnt_get_count */ |
1088 | br_write_lock(&vfsmount_lock); | 1114 | lock_mount_hash(); |
1089 | for (p = mnt; p; p = next_mnt(p, mnt)) { | 1115 | for (p = mnt; p; p = next_mnt(p, mnt)) { |
1090 | actual_refs += mnt_get_count(p); | 1116 | actual_refs += mnt_get_count(p); |
1091 | minimum_refs += 2; | 1117 | minimum_refs += 2; |
1092 | } | 1118 | } |
1093 | br_write_unlock(&vfsmount_lock); | 1119 | unlock_mount_hash(); |
1094 | 1120 | ||
1095 | if (actual_refs > minimum_refs) | 1121 | if (actual_refs > minimum_refs) |
1096 | return 0; | 1122 | return 0; |
@@ -1117,10 +1143,10 @@ int may_umount(struct vfsmount *mnt) | |||
1117 | { | 1143 | { |
1118 | int ret = 1; | 1144 | int ret = 1; |
1119 | down_read(&namespace_sem); | 1145 | down_read(&namespace_sem); |
1120 | br_write_lock(&vfsmount_lock); | 1146 | lock_mount_hash(); |
1121 | if (propagate_mount_busy(real_mount(mnt), 2)) | 1147 | if (propagate_mount_busy(real_mount(mnt), 2)) |
1122 | ret = 0; | 1148 | ret = 0; |
1123 | br_write_unlock(&vfsmount_lock); | 1149 | unlock_mount_hash(); |
1124 | up_read(&namespace_sem); | 1150 | up_read(&namespace_sem); |
1125 | return ret; | 1151 | return ret; |
1126 | } | 1152 | } |
@@ -1142,23 +1168,13 @@ static void namespace_unlock(void) | |||
1142 | list_splice_init(&unmounted, &head); | 1168 | list_splice_init(&unmounted, &head); |
1143 | up_write(&namespace_sem); | 1169 | up_write(&namespace_sem); |
1144 | 1170 | ||
1171 | synchronize_rcu(); | ||
1172 | |||
1145 | while (!list_empty(&head)) { | 1173 | while (!list_empty(&head)) { |
1146 | mnt = list_first_entry(&head, struct mount, mnt_hash); | 1174 | mnt = list_first_entry(&head, struct mount, mnt_hash); |
1147 | list_del_init(&mnt->mnt_hash); | 1175 | list_del_init(&mnt->mnt_hash); |
1148 | if (mnt_has_parent(mnt)) { | 1176 | if (mnt->mnt_ex_mountpoint.mnt) |
1149 | struct dentry *dentry; | 1177 | path_put(&mnt->mnt_ex_mountpoint); |
1150 | struct mount *m; | ||
1151 | |||
1152 | br_write_lock(&vfsmount_lock); | ||
1153 | dentry = mnt->mnt_mountpoint; | ||
1154 | m = mnt->mnt_parent; | ||
1155 | mnt->mnt_mountpoint = mnt->mnt.mnt_root; | ||
1156 | mnt->mnt_parent = mnt; | ||
1157 | m->mnt_ghosts--; | ||
1158 | br_write_unlock(&vfsmount_lock); | ||
1159 | dput(dentry); | ||
1160 | mntput(&m->mnt); | ||
1161 | } | ||
1162 | mntput(&mnt->mnt); | 1178 | mntput(&mnt->mnt); |
1163 | } | 1179 | } |
1164 | } | 1180 | } |
@@ -1169,10 +1185,13 @@ static inline void namespace_lock(void) | |||
1169 | } | 1185 | } |
1170 | 1186 | ||
1171 | /* | 1187 | /* |
1172 | * vfsmount lock must be held for write | 1188 | * mount_lock must be held |
1173 | * namespace_sem must be held for write | 1189 | * namespace_sem must be held for write |
1190 | * how = 0 => just this tree, don't propagate | ||
1191 | * how = 1 => propagate; we know that nobody else has reference to any victims | ||
1192 | * how = 2 => lazy umount | ||
1174 | */ | 1193 | */ |
1175 | void umount_tree(struct mount *mnt, int propagate) | 1194 | void umount_tree(struct mount *mnt, int how) |
1176 | { | 1195 | { |
1177 | LIST_HEAD(tmp_list); | 1196 | LIST_HEAD(tmp_list); |
1178 | struct mount *p; | 1197 | struct mount *p; |
@@ -1180,7 +1199,7 @@ void umount_tree(struct mount *mnt, int propagate) | |||
1180 | for (p = mnt; p; p = next_mnt(p, mnt)) | 1199 | for (p = mnt; p; p = next_mnt(p, mnt)) |
1181 | list_move(&p->mnt_hash, &tmp_list); | 1200 | list_move(&p->mnt_hash, &tmp_list); |
1182 | 1201 | ||
1183 | if (propagate) | 1202 | if (how) |
1184 | propagate_umount(&tmp_list); | 1203 | propagate_umount(&tmp_list); |
1185 | 1204 | ||
1186 | list_for_each_entry(p, &tmp_list, mnt_hash) { | 1205 | list_for_each_entry(p, &tmp_list, mnt_hash) { |
@@ -1188,10 +1207,16 @@ void umount_tree(struct mount *mnt, int propagate) | |||
1188 | list_del_init(&p->mnt_list); | 1207 | list_del_init(&p->mnt_list); |
1189 | __touch_mnt_namespace(p->mnt_ns); | 1208 | __touch_mnt_namespace(p->mnt_ns); |
1190 | p->mnt_ns = NULL; | 1209 | p->mnt_ns = NULL; |
1210 | if (how < 2) | ||
1211 | p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; | ||
1191 | list_del_init(&p->mnt_child); | 1212 | list_del_init(&p->mnt_child); |
1192 | if (mnt_has_parent(p)) { | 1213 | if (mnt_has_parent(p)) { |
1193 | p->mnt_parent->mnt_ghosts++; | ||
1194 | put_mountpoint(p->mnt_mp); | 1214 | put_mountpoint(p->mnt_mp); |
1215 | /* move the reference to mountpoint into ->mnt_ex_mountpoint */ | ||
1216 | p->mnt_ex_mountpoint.dentry = p->mnt_mountpoint; | ||
1217 | p->mnt_ex_mountpoint.mnt = &p->mnt_parent->mnt; | ||
1218 | p->mnt_mountpoint = p->mnt.mnt_root; | ||
1219 | p->mnt_parent = p; | ||
1195 | p->mnt_mp = NULL; | 1220 | p->mnt_mp = NULL; |
1196 | } | 1221 | } |
1197 | change_mnt_propagation(p, MS_PRIVATE); | 1222 | change_mnt_propagation(p, MS_PRIVATE); |
@@ -1225,12 +1250,12 @@ static int do_umount(struct mount *mnt, int flags) | |||
1225 | * probably don't strictly need the lock here if we examined | 1250 | * probably don't strictly need the lock here if we examined |
1226 | * all race cases, but it's a slowpath. | 1251 | * all race cases, but it's a slowpath. |
1227 | */ | 1252 | */ |
1228 | br_write_lock(&vfsmount_lock); | 1253 | lock_mount_hash(); |
1229 | if (mnt_get_count(mnt) != 2) { | 1254 | if (mnt_get_count(mnt) != 2) { |
1230 | br_write_unlock(&vfsmount_lock); | 1255 | unlock_mount_hash(); |
1231 | return -EBUSY; | 1256 | return -EBUSY; |
1232 | } | 1257 | } |
1233 | br_write_unlock(&vfsmount_lock); | 1258 | unlock_mount_hash(); |
1234 | 1259 | ||
1235 | if (!xchg(&mnt->mnt_expiry_mark, 1)) | 1260 | if (!xchg(&mnt->mnt_expiry_mark, 1)) |
1236 | return -EAGAIN; | 1261 | return -EAGAIN; |
@@ -1272,19 +1297,23 @@ static int do_umount(struct mount *mnt, int flags) | |||
1272 | } | 1297 | } |
1273 | 1298 | ||
1274 | namespace_lock(); | 1299 | namespace_lock(); |
1275 | br_write_lock(&vfsmount_lock); | 1300 | lock_mount_hash(); |
1276 | event++; | 1301 | event++; |
1277 | 1302 | ||
1278 | if (!(flags & MNT_DETACH)) | 1303 | if (flags & MNT_DETACH) { |
1279 | shrink_submounts(mnt); | ||
1280 | |||
1281 | retval = -EBUSY; | ||
1282 | if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) { | ||
1283 | if (!list_empty(&mnt->mnt_list)) | 1304 | if (!list_empty(&mnt->mnt_list)) |
1284 | umount_tree(mnt, 1); | 1305 | umount_tree(mnt, 2); |
1285 | retval = 0; | 1306 | retval = 0; |
1307 | } else { | ||
1308 | shrink_submounts(mnt); | ||
1309 | retval = -EBUSY; | ||
1310 | if (!propagate_mount_busy(mnt, 2)) { | ||
1311 | if (!list_empty(&mnt->mnt_list)) | ||
1312 | umount_tree(mnt, 1); | ||
1313 | retval = 0; | ||
1314 | } | ||
1286 | } | 1315 | } |
1287 | br_write_unlock(&vfsmount_lock); | 1316 | unlock_mount_hash(); |
1288 | namespace_unlock(); | 1317 | namespace_unlock(); |
1289 | return retval; | 1318 | return retval; |
1290 | } | 1319 | } |
@@ -1427,18 +1456,18 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, | |||
1427 | q = clone_mnt(p, p->mnt.mnt_root, flag); | 1456 | q = clone_mnt(p, p->mnt.mnt_root, flag); |
1428 | if (IS_ERR(q)) | 1457 | if (IS_ERR(q)) |
1429 | goto out; | 1458 | goto out; |
1430 | br_write_lock(&vfsmount_lock); | 1459 | lock_mount_hash(); |
1431 | list_add_tail(&q->mnt_list, &res->mnt_list); | 1460 | list_add_tail(&q->mnt_list, &res->mnt_list); |
1432 | attach_mnt(q, parent, p->mnt_mp); | 1461 | attach_mnt(q, parent, p->mnt_mp); |
1433 | br_write_unlock(&vfsmount_lock); | 1462 | unlock_mount_hash(); |
1434 | } | 1463 | } |
1435 | } | 1464 | } |
1436 | return res; | 1465 | return res; |
1437 | out: | 1466 | out: |
1438 | if (res) { | 1467 | if (res) { |
1439 | br_write_lock(&vfsmount_lock); | 1468 | lock_mount_hash(); |
1440 | umount_tree(res, 0); | 1469 | umount_tree(res, 0); |
1441 | br_write_unlock(&vfsmount_lock); | 1470 | unlock_mount_hash(); |
1442 | } | 1471 | } |
1443 | return q; | 1472 | return q; |
1444 | } | 1473 | } |
@@ -1460,9 +1489,9 @@ struct vfsmount *collect_mounts(struct path *path) | |||
1460 | void drop_collected_mounts(struct vfsmount *mnt) | 1489 | void drop_collected_mounts(struct vfsmount *mnt) |
1461 | { | 1490 | { |
1462 | namespace_lock(); | 1491 | namespace_lock(); |
1463 | br_write_lock(&vfsmount_lock); | 1492 | lock_mount_hash(); |
1464 | umount_tree(real_mount(mnt), 0); | 1493 | umount_tree(real_mount(mnt), 0); |
1465 | br_write_unlock(&vfsmount_lock); | 1494 | unlock_mount_hash(); |
1466 | namespace_unlock(); | 1495 | namespace_unlock(); |
1467 | } | 1496 | } |
1468 | 1497 | ||
@@ -1589,7 +1618,7 @@ static int attach_recursive_mnt(struct mount *source_mnt, | |||
1589 | if (err) | 1618 | if (err) |
1590 | goto out_cleanup_ids; | 1619 | goto out_cleanup_ids; |
1591 | 1620 | ||
1592 | br_write_lock(&vfsmount_lock); | 1621 | lock_mount_hash(); |
1593 | 1622 | ||
1594 | if (IS_MNT_SHARED(dest_mnt)) { | 1623 | if (IS_MNT_SHARED(dest_mnt)) { |
1595 | for (p = source_mnt; p; p = next_mnt(p, source_mnt)) | 1624 | for (p = source_mnt; p; p = next_mnt(p, source_mnt)) |
@@ -1608,7 +1637,7 @@ static int attach_recursive_mnt(struct mount *source_mnt, | |||
1608 | list_del_init(&child->mnt_hash); | 1637 | list_del_init(&child->mnt_hash); |
1609 | commit_tree(child); | 1638 | commit_tree(child); |
1610 | } | 1639 | } |
1611 | br_write_unlock(&vfsmount_lock); | 1640 | unlock_mount_hash(); |
1612 | 1641 | ||
1613 | return 0; | 1642 | return 0; |
1614 | 1643 | ||
@@ -1710,10 +1739,10 @@ static int do_change_type(struct path *path, int flag) | |||
1710 | goto out_unlock; | 1739 | goto out_unlock; |
1711 | } | 1740 | } |
1712 | 1741 | ||
1713 | br_write_lock(&vfsmount_lock); | 1742 | lock_mount_hash(); |
1714 | for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) | 1743 | for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) |
1715 | change_mnt_propagation(m, type); | 1744 | change_mnt_propagation(m, type); |
1716 | br_write_unlock(&vfsmount_lock); | 1745 | unlock_mount_hash(); |
1717 | 1746 | ||
1718 | out_unlock: | 1747 | out_unlock: |
1719 | namespace_unlock(); | 1748 | namespace_unlock(); |
@@ -1785,9 +1814,9 @@ static int do_loopback(struct path *path, const char *old_name, | |||
1785 | 1814 | ||
1786 | err = graft_tree(mnt, parent, mp); | 1815 | err = graft_tree(mnt, parent, mp); |
1787 | if (err) { | 1816 | if (err) { |
1788 | br_write_lock(&vfsmount_lock); | 1817 | lock_mount_hash(); |
1789 | umount_tree(mnt, 0); | 1818 | umount_tree(mnt, 0); |
1790 | br_write_unlock(&vfsmount_lock); | 1819 | unlock_mount_hash(); |
1791 | } | 1820 | } |
1792 | out2: | 1821 | out2: |
1793 | unlock_mount(mp); | 1822 | unlock_mount(mp); |
@@ -1846,17 +1875,13 @@ static int do_remount(struct path *path, int flags, int mnt_flags, | |||
1846 | else | 1875 | else |
1847 | err = do_remount_sb(sb, flags, data, 0); | 1876 | err = do_remount_sb(sb, flags, data, 0); |
1848 | if (!err) { | 1877 | if (!err) { |
1849 | br_write_lock(&vfsmount_lock); | 1878 | lock_mount_hash(); |
1850 | mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK; | 1879 | mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK; |
1851 | mnt->mnt.mnt_flags = mnt_flags; | 1880 | mnt->mnt.mnt_flags = mnt_flags; |
1852 | br_write_unlock(&vfsmount_lock); | ||
1853 | } | ||
1854 | up_write(&sb->s_umount); | ||
1855 | if (!err) { | ||
1856 | br_write_lock(&vfsmount_lock); | ||
1857 | touch_mnt_namespace(mnt->mnt_ns); | 1881 | touch_mnt_namespace(mnt->mnt_ns); |
1858 | br_write_unlock(&vfsmount_lock); | 1882 | unlock_mount_hash(); |
1859 | } | 1883 | } |
1884 | up_write(&sb->s_umount); | ||
1860 | return err; | 1885 | return err; |
1861 | } | 1886 | } |
1862 | 1887 | ||
@@ -1972,7 +1997,7 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags) | |||
1972 | struct mount *parent; | 1997 | struct mount *parent; |
1973 | int err; | 1998 | int err; |
1974 | 1999 | ||
1975 | mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL); | 2000 | mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | MNT_DOOMED | MNT_SYNC_UMOUNT); |
1976 | 2001 | ||
1977 | mp = lock_mount(path); | 2002 | mp = lock_mount(path); |
1978 | if (IS_ERR(mp)) | 2003 | if (IS_ERR(mp)) |
@@ -2077,9 +2102,7 @@ fail: | |||
2077 | /* remove m from any expiration list it may be on */ | 2102 | /* remove m from any expiration list it may be on */ |
2078 | if (!list_empty(&mnt->mnt_expire)) { | 2103 | if (!list_empty(&mnt->mnt_expire)) { |
2079 | namespace_lock(); | 2104 | namespace_lock(); |
2080 | br_write_lock(&vfsmount_lock); | ||
2081 | list_del_init(&mnt->mnt_expire); | 2105 | list_del_init(&mnt->mnt_expire); |
2082 | br_write_unlock(&vfsmount_lock); | ||
2083 | namespace_unlock(); | 2106 | namespace_unlock(); |
2084 | } | 2107 | } |
2085 | mntput(m); | 2108 | mntput(m); |
@@ -2095,11 +2118,9 @@ fail: | |||
2095 | void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list) | 2118 | void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list) |
2096 | { | 2119 | { |
2097 | namespace_lock(); | 2120 | namespace_lock(); |
2098 | br_write_lock(&vfsmount_lock); | ||
2099 | 2121 | ||
2100 | list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list); | 2122 | list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list); |
2101 | 2123 | ||
2102 | br_write_unlock(&vfsmount_lock); | ||
2103 | namespace_unlock(); | 2124 | namespace_unlock(); |
2104 | } | 2125 | } |
2105 | EXPORT_SYMBOL(mnt_set_expiry); | 2126 | EXPORT_SYMBOL(mnt_set_expiry); |
@@ -2118,7 +2139,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) | |||
2118 | return; | 2139 | return; |
2119 | 2140 | ||
2120 | namespace_lock(); | 2141 | namespace_lock(); |
2121 | br_write_lock(&vfsmount_lock); | 2142 | lock_mount_hash(); |
2122 | 2143 | ||
2123 | /* extract from the expiration list every vfsmount that matches the | 2144 | /* extract from the expiration list every vfsmount that matches the |
2124 | * following criteria: | 2145 | * following criteria: |
@@ -2137,7 +2158,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) | |||
2137 | touch_mnt_namespace(mnt->mnt_ns); | 2158 | touch_mnt_namespace(mnt->mnt_ns); |
2138 | umount_tree(mnt, 1); | 2159 | umount_tree(mnt, 1); |
2139 | } | 2160 | } |
2140 | br_write_unlock(&vfsmount_lock); | 2161 | unlock_mount_hash(); |
2141 | namespace_unlock(); | 2162 | namespace_unlock(); |
2142 | } | 2163 | } |
2143 | 2164 | ||
@@ -2193,7 +2214,7 @@ resume: | |||
2193 | * process a list of expirable mountpoints with the intent of discarding any | 2214 | * process a list of expirable mountpoints with the intent of discarding any |
2194 | * submounts of a specific parent mountpoint | 2215 | * submounts of a specific parent mountpoint |
2195 | * | 2216 | * |
2196 | * vfsmount_lock must be held for write | 2217 | * mount_lock must be held for write |
2197 | */ | 2218 | */ |
2198 | static void shrink_submounts(struct mount *mnt) | 2219 | static void shrink_submounts(struct mount *mnt) |
2199 | { | 2220 | { |
@@ -2414,20 +2435,25 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns) | |||
2414 | return new_ns; | 2435 | return new_ns; |
2415 | } | 2436 | } |
2416 | 2437 | ||
2417 | /* | 2438 | struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, |
2418 | * Allocate a new namespace structure and populate it with contents | 2439 | struct user_namespace *user_ns, struct fs_struct *new_fs) |
2419 | * copied from the namespace of the passed in task structure. | ||
2420 | */ | ||
2421 | static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, | ||
2422 | struct user_namespace *user_ns, struct fs_struct *fs) | ||
2423 | { | 2440 | { |
2424 | struct mnt_namespace *new_ns; | 2441 | struct mnt_namespace *new_ns; |
2425 | struct vfsmount *rootmnt = NULL, *pwdmnt = NULL; | 2442 | struct vfsmount *rootmnt = NULL, *pwdmnt = NULL; |
2426 | struct mount *p, *q; | 2443 | struct mount *p, *q; |
2427 | struct mount *old = mnt_ns->root; | 2444 | struct mount *old; |
2428 | struct mount *new; | 2445 | struct mount *new; |
2429 | int copy_flags; | 2446 | int copy_flags; |
2430 | 2447 | ||
2448 | BUG_ON(!ns); | ||
2449 | |||
2450 | if (likely(!(flags & CLONE_NEWNS))) { | ||
2451 | get_mnt_ns(ns); | ||
2452 | return ns; | ||
2453 | } | ||
2454 | |||
2455 | old = ns->root; | ||
2456 | |||
2431 | new_ns = alloc_mnt_ns(user_ns); | 2457 | new_ns = alloc_mnt_ns(user_ns); |
2432 | if (IS_ERR(new_ns)) | 2458 | if (IS_ERR(new_ns)) |
2433 | return new_ns; | 2459 | return new_ns; |
@@ -2435,7 +2461,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, | |||
2435 | namespace_lock(); | 2461 | namespace_lock(); |
2436 | /* First pass: copy the tree topology */ | 2462 | /* First pass: copy the tree topology */ |
2437 | copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE; | 2463 | copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE; |
2438 | if (user_ns != mnt_ns->user_ns) | 2464 | if (user_ns != ns->user_ns) |
2439 | copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED; | 2465 | copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED; |
2440 | new = copy_tree(old, old->mnt.mnt_root, copy_flags); | 2466 | new = copy_tree(old, old->mnt.mnt_root, copy_flags); |
2441 | if (IS_ERR(new)) { | 2467 | if (IS_ERR(new)) { |
@@ -2444,9 +2470,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, | |||
2444 | return ERR_CAST(new); | 2470 | return ERR_CAST(new); |
2445 | } | 2471 | } |
2446 | new_ns->root = new; | 2472 | new_ns->root = new; |
2447 | br_write_lock(&vfsmount_lock); | ||
2448 | list_add_tail(&new_ns->list, &new->mnt_list); | 2473 | list_add_tail(&new_ns->list, &new->mnt_list); |
2449 | br_write_unlock(&vfsmount_lock); | ||
2450 | 2474 | ||
2451 | /* | 2475 | /* |
2452 | * Second pass: switch the tsk->fs->* elements and mark new vfsmounts | 2476 | * Second pass: switch the tsk->fs->* elements and mark new vfsmounts |
@@ -2457,13 +2481,13 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, | |||
2457 | q = new; | 2481 | q = new; |
2458 | while (p) { | 2482 | while (p) { |
2459 | q->mnt_ns = new_ns; | 2483 | q->mnt_ns = new_ns; |
2460 | if (fs) { | 2484 | if (new_fs) { |
2461 | if (&p->mnt == fs->root.mnt) { | 2485 | if (&p->mnt == new_fs->root.mnt) { |
2462 | fs->root.mnt = mntget(&q->mnt); | 2486 | new_fs->root.mnt = mntget(&q->mnt); |
2463 | rootmnt = &p->mnt; | 2487 | rootmnt = &p->mnt; |
2464 | } | 2488 | } |
2465 | if (&p->mnt == fs->pwd.mnt) { | 2489 | if (&p->mnt == new_fs->pwd.mnt) { |
2466 | fs->pwd.mnt = mntget(&q->mnt); | 2490 | new_fs->pwd.mnt = mntget(&q->mnt); |
2467 | pwdmnt = &p->mnt; | 2491 | pwdmnt = &p->mnt; |
2468 | } | 2492 | } |
2469 | } | 2493 | } |
@@ -2484,23 +2508,6 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, | |||
2484 | return new_ns; | 2508 | return new_ns; |
2485 | } | 2509 | } |
2486 | 2510 | ||
2487 | struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, | ||
2488 | struct user_namespace *user_ns, struct fs_struct *new_fs) | ||
2489 | { | ||
2490 | struct mnt_namespace *new_ns; | ||
2491 | |||
2492 | BUG_ON(!ns); | ||
2493 | get_mnt_ns(ns); | ||
2494 | |||
2495 | if (!(flags & CLONE_NEWNS)) | ||
2496 | return ns; | ||
2497 | |||
2498 | new_ns = dup_mnt_ns(ns, user_ns, new_fs); | ||
2499 | |||
2500 | put_mnt_ns(ns); | ||
2501 | return new_ns; | ||
2502 | } | ||
2503 | |||
2504 | /** | 2511 | /** |
2505 | * create_mnt_ns - creates a private namespace and adds a root filesystem | 2512 | * create_mnt_ns - creates a private namespace and adds a root filesystem |
2506 | * @mnt: pointer to the new root filesystem mountpoint | 2513 | * @mnt: pointer to the new root filesystem mountpoint |
@@ -2593,7 +2600,7 @@ out_type: | |||
2593 | /* | 2600 | /* |
2594 | * Return true if path is reachable from root | 2601 | * Return true if path is reachable from root |
2595 | * | 2602 | * |
2596 | * namespace_sem or vfsmount_lock is held | 2603 | * namespace_sem or mount_lock is held |
2597 | */ | 2604 | */ |
2598 | bool is_path_reachable(struct mount *mnt, struct dentry *dentry, | 2605 | bool is_path_reachable(struct mount *mnt, struct dentry *dentry, |
2599 | const struct path *root) | 2606 | const struct path *root) |
@@ -2608,9 +2615,9 @@ bool is_path_reachable(struct mount *mnt, struct dentry *dentry, | |||
2608 | int path_is_under(struct path *path1, struct path *path2) | 2615 | int path_is_under(struct path *path1, struct path *path2) |
2609 | { | 2616 | { |
2610 | int res; | 2617 | int res; |
2611 | br_read_lock(&vfsmount_lock); | 2618 | read_seqlock_excl(&mount_lock); |
2612 | res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2); | 2619 | res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2); |
2613 | br_read_unlock(&vfsmount_lock); | 2620 | read_sequnlock_excl(&mount_lock); |
2614 | return res; | 2621 | return res; |
2615 | } | 2622 | } |
2616 | EXPORT_SYMBOL(path_is_under); | 2623 | EXPORT_SYMBOL(path_is_under); |
@@ -2701,7 +2708,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, | |||
2701 | if (!is_path_reachable(old_mnt, old.dentry, &new)) | 2708 | if (!is_path_reachable(old_mnt, old.dentry, &new)) |
2702 | goto out4; | 2709 | goto out4; |
2703 | root_mp->m_count++; /* pin it so it won't go away */ | 2710 | root_mp->m_count++; /* pin it so it won't go away */ |
2704 | br_write_lock(&vfsmount_lock); | 2711 | lock_mount_hash(); |
2705 | detach_mnt(new_mnt, &parent_path); | 2712 | detach_mnt(new_mnt, &parent_path); |
2706 | detach_mnt(root_mnt, &root_parent); | 2713 | detach_mnt(root_mnt, &root_parent); |
2707 | if (root_mnt->mnt.mnt_flags & MNT_LOCKED) { | 2714 | if (root_mnt->mnt.mnt_flags & MNT_LOCKED) { |
@@ -2713,7 +2720,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, | |||
2713 | /* mount new_root on / */ | 2720 | /* mount new_root on / */ |
2714 | attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp); | 2721 | attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp); |
2715 | touch_mnt_namespace(current->nsproxy->mnt_ns); | 2722 | touch_mnt_namespace(current->nsproxy->mnt_ns); |
2716 | br_write_unlock(&vfsmount_lock); | 2723 | unlock_mount_hash(); |
2717 | chroot_fs_refs(&root, &new); | 2724 | chroot_fs_refs(&root, &new); |
2718 | put_mountpoint(root_mp); | 2725 | put_mountpoint(root_mp); |
2719 | error = 0; | 2726 | error = 0; |
@@ -2767,8 +2774,6 @@ void __init mnt_init(void) | |||
2767 | unsigned u; | 2774 | unsigned u; |
2768 | int err; | 2775 | int err; |
2769 | 2776 | ||
2770 | init_rwsem(&namespace_sem); | ||
2771 | |||
2772 | mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount), | 2777 | mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount), |
2773 | 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); | 2778 | 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); |
2774 | 2779 | ||
@@ -2785,8 +2790,6 @@ void __init mnt_init(void) | |||
2785 | for (u = 0; u < HASH_SIZE; u++) | 2790 | for (u = 0; u < HASH_SIZE; u++) |
2786 | INIT_LIST_HEAD(&mountpoint_hashtable[u]); | 2791 | INIT_LIST_HEAD(&mountpoint_hashtable[u]); |
2787 | 2792 | ||
2788 | br_lock_init(&vfsmount_lock); | ||
2789 | |||
2790 | err = sysfs_init(); | 2793 | err = sysfs_init(); |
2791 | if (err) | 2794 | if (err) |
2792 | printk(KERN_WARNING "%s: sysfs_init error: %d\n", | 2795 | printk(KERN_WARNING "%s: sysfs_init error: %d\n", |
@@ -2802,11 +2805,7 @@ void put_mnt_ns(struct mnt_namespace *ns) | |||
2802 | { | 2805 | { |
2803 | if (!atomic_dec_and_test(&ns->count)) | 2806 | if (!atomic_dec_and_test(&ns->count)) |
2804 | return; | 2807 | return; |
2805 | namespace_lock(); | 2808 | drop_collected_mounts(&ns->root->mnt); |
2806 | br_write_lock(&vfsmount_lock); | ||
2807 | umount_tree(ns->root, 0); | ||
2808 | br_write_unlock(&vfsmount_lock); | ||
2809 | namespace_unlock(); | ||
2810 | free_mnt_ns(ns); | 2809 | free_mnt_ns(ns); |
2811 | } | 2810 | } |
2812 | 2811 | ||
@@ -2829,9 +2828,8 @@ void kern_unmount(struct vfsmount *mnt) | |||
2829 | { | 2828 | { |
2830 | /* release long term mount so mount point can be released */ | 2829 | /* release long term mount so mount point can be released */ |
2831 | if (!IS_ERR_OR_NULL(mnt)) { | 2830 | if (!IS_ERR_OR_NULL(mnt)) { |
2832 | br_write_lock(&vfsmount_lock); | ||
2833 | real_mount(mnt)->mnt_ns = NULL; | 2831 | real_mount(mnt)->mnt_ns = NULL; |
2834 | br_write_unlock(&vfsmount_lock); | 2832 | synchronize_rcu(); /* yecchhh... */ |
2835 | mntput(mnt); | 2833 | mntput(mnt); |
2836 | } | 2834 | } |
2837 | } | 2835 | } |
@@ -2875,7 +2873,7 @@ bool fs_fully_visible(struct file_system_type *type) | |||
2875 | if (unlikely(!ns)) | 2873 | if (unlikely(!ns)) |
2876 | return false; | 2874 | return false; |
2877 | 2875 | ||
2878 | namespace_lock(); | 2876 | down_read(&namespace_sem); |
2879 | list_for_each_entry(mnt, &ns->list, mnt_list) { | 2877 | list_for_each_entry(mnt, &ns->list, mnt_list) { |
2880 | struct mount *child; | 2878 | struct mount *child; |
2881 | if (mnt->mnt.mnt_sb->s_type != type) | 2879 | if (mnt->mnt.mnt_sb->s_type != type) |
@@ -2896,7 +2894,7 @@ bool fs_fully_visible(struct file_system_type *type) | |||
2896 | next: ; | 2894 | next: ; |
2897 | } | 2895 | } |
2898 | found: | 2896 | found: |
2899 | namespace_unlock(); | 2897 | up_read(&namespace_sem); |
2900 | return visible; | 2898 | return visible; |
2901 | } | 2899 | } |
2902 | 2900 | ||