diff options
-rw-r--r-- | fs/dcache.c | 20 | ||||
-rw-r--r-- | fs/mount.h | 10 | ||||
-rw-r--r-- | fs/namei.c | 50 | ||||
-rw-r--r-- | fs/namespace.c | 135 | ||||
-rw-r--r-- | include/linux/mount.h | 2 | ||||
-rw-r--r-- | include/linux/namei.h | 2 |
6 files changed, 136 insertions, 83 deletions
diff --git a/fs/dcache.c b/fs/dcache.c index eb0978da1bd4..aafa2a146434 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -2887,24 +2887,28 @@ static int prepend_path(const struct path *path, | |||
2887 | struct vfsmount *vfsmnt = path->mnt; | 2887 | struct vfsmount *vfsmnt = path->mnt; |
2888 | struct mount *mnt = real_mount(vfsmnt); | 2888 | struct mount *mnt = real_mount(vfsmnt); |
2889 | int error = 0; | 2889 | int error = 0; |
2890 | unsigned seq = 0; | 2890 | unsigned seq, m_seq = 0; |
2891 | char *bptr; | 2891 | char *bptr; |
2892 | int blen; | 2892 | int blen; |
2893 | 2893 | ||
2894 | br_read_lock(&vfsmount_lock); | ||
2895 | rcu_read_lock(); | 2894 | rcu_read_lock(); |
2895 | restart_mnt: | ||
2896 | read_seqbegin_or_lock(&mount_lock, &m_seq); | ||
2897 | seq = 0; | ||
2896 | restart: | 2898 | restart: |
2897 | bptr = *buffer; | 2899 | bptr = *buffer; |
2898 | blen = *buflen; | 2900 | blen = *buflen; |
2901 | error = 0; | ||
2899 | read_seqbegin_or_lock(&rename_lock, &seq); | 2902 | read_seqbegin_or_lock(&rename_lock, &seq); |
2900 | while (dentry != root->dentry || vfsmnt != root->mnt) { | 2903 | while (dentry != root->dentry || vfsmnt != root->mnt) { |
2901 | struct dentry * parent; | 2904 | struct dentry * parent; |
2902 | 2905 | ||
2903 | if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { | 2906 | if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { |
2907 | struct mount *parent = ACCESS_ONCE(mnt->mnt_parent); | ||
2904 | /* Global root? */ | 2908 | /* Global root? */ |
2905 | if (mnt_has_parent(mnt)) { | 2909 | if (mnt != parent) { |
2906 | dentry = mnt->mnt_mountpoint; | 2910 | dentry = ACCESS_ONCE(mnt->mnt_mountpoint); |
2907 | mnt = mnt->mnt_parent; | 2911 | mnt = parent; |
2908 | vfsmnt = &mnt->mnt; | 2912 | vfsmnt = &mnt->mnt; |
2909 | continue; | 2913 | continue; |
2910 | } | 2914 | } |
@@ -2938,7 +2942,11 @@ restart: | |||
2938 | goto restart; | 2942 | goto restart; |
2939 | } | 2943 | } |
2940 | done_seqretry(&rename_lock, seq); | 2944 | done_seqretry(&rename_lock, seq); |
2941 | br_read_unlock(&vfsmount_lock); | 2945 | if (need_seqretry(&mount_lock, m_seq)) { |
2946 | m_seq = 1; | ||
2947 | goto restart_mnt; | ||
2948 | } | ||
2949 | done_seqretry(&mount_lock, m_seq); | ||
2942 | 2950 | ||
2943 | if (error >= 0 && bptr == *buffer) { | 2951 | if (error >= 0 && bptr == *buffer) { |
2944 | if (--blen < 0) | 2952 | if (--blen < 0) |
diff --git a/fs/mount.h b/fs/mount.h index f0866076de6e..d64c594be6c4 100644 --- a/fs/mount.h +++ b/fs/mount.h | |||
@@ -1,7 +1,6 @@ | |||
1 | #include <linux/mount.h> | 1 | #include <linux/mount.h> |
2 | #include <linux/seq_file.h> | 2 | #include <linux/seq_file.h> |
3 | #include <linux/poll.h> | 3 | #include <linux/poll.h> |
4 | #include <linux/lglock.h> | ||
5 | 4 | ||
6 | struct mnt_namespace { | 5 | struct mnt_namespace { |
7 | atomic_t count; | 6 | atomic_t count; |
@@ -30,6 +29,7 @@ struct mount { | |||
30 | struct mount *mnt_parent; | 29 | struct mount *mnt_parent; |
31 | struct dentry *mnt_mountpoint; | 30 | struct dentry *mnt_mountpoint; |
32 | struct vfsmount mnt; | 31 | struct vfsmount mnt; |
32 | struct rcu_head mnt_rcu; | ||
33 | #ifdef CONFIG_SMP | 33 | #ifdef CONFIG_SMP |
34 | struct mnt_pcp __percpu *mnt_pcp; | 34 | struct mnt_pcp __percpu *mnt_pcp; |
35 | #else | 35 | #else |
@@ -80,21 +80,23 @@ static inline int is_mounted(struct vfsmount *mnt) | |||
80 | extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *); | 80 | extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *); |
81 | extern struct mount *__lookup_mnt_last(struct vfsmount *, struct dentry *); | 81 | extern struct mount *__lookup_mnt_last(struct vfsmount *, struct dentry *); |
82 | 82 | ||
83 | extern bool legitimize_mnt(struct vfsmount *, unsigned); | ||
84 | |||
83 | static inline void get_mnt_ns(struct mnt_namespace *ns) | 85 | static inline void get_mnt_ns(struct mnt_namespace *ns) |
84 | { | 86 | { |
85 | atomic_inc(&ns->count); | 87 | atomic_inc(&ns->count); |
86 | } | 88 | } |
87 | 89 | ||
88 | extern struct lglock vfsmount_lock; | 90 | extern seqlock_t mount_lock; |
89 | 91 | ||
90 | static inline void lock_mount_hash(void) | 92 | static inline void lock_mount_hash(void) |
91 | { | 93 | { |
92 | br_write_lock(&vfsmount_lock); | 94 | write_seqlock(&mount_lock); |
93 | } | 95 | } |
94 | 96 | ||
95 | static inline void unlock_mount_hash(void) | 97 | static inline void unlock_mount_hash(void) |
96 | { | 98 | { |
97 | br_write_unlock(&vfsmount_lock); | 99 | write_sequnlock(&mount_lock); |
98 | } | 100 | } |
99 | 101 | ||
100 | struct proc_mounts { | 102 | struct proc_mounts { |
diff --git a/fs/namei.c b/fs/namei.c index 1f844fbfce72..cb0ebae07e52 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -484,14 +484,12 @@ EXPORT_SYMBOL(path_put); | |||
484 | 484 | ||
485 | static inline void lock_rcu_walk(void) | 485 | static inline void lock_rcu_walk(void) |
486 | { | 486 | { |
487 | br_read_lock(&vfsmount_lock); | ||
488 | rcu_read_lock(); | 487 | rcu_read_lock(); |
489 | } | 488 | } |
490 | 489 | ||
491 | static inline void unlock_rcu_walk(void) | 490 | static inline void unlock_rcu_walk(void) |
492 | { | 491 | { |
493 | rcu_read_unlock(); | 492 | rcu_read_unlock(); |
494 | br_read_unlock(&vfsmount_lock); | ||
495 | } | 493 | } |
496 | 494 | ||
497 | /** | 495 | /** |
@@ -512,26 +510,23 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry) | |||
512 | BUG_ON(!(nd->flags & LOOKUP_RCU)); | 510 | BUG_ON(!(nd->flags & LOOKUP_RCU)); |
513 | 511 | ||
514 | /* | 512 | /* |
515 | * Get a reference to the parent first: we're | 513 | * After legitimizing the bastards, terminate_walk() |
516 | * going to make "path_put(nd->path)" valid in | 514 | * will do the right thing for non-RCU mode, and all our |
517 | * non-RCU context for "terminate_walk()". | 515 | * subsequent exit cases should rcu_read_unlock() |
518 | * | 516 | * before returning. Do vfsmount first; if dentry |
519 | * If this doesn't work, return immediately with | 517 | * can't be legitimized, just set nd->path.dentry to NULL |
520 | * RCU walking still active (and then we will do | 518 | * and rely on dput(NULL) being a no-op. |
521 | * the RCU walk cleanup in terminate_walk()). | ||
522 | */ | 519 | */ |
523 | if (!lockref_get_not_dead(&parent->d_lockref)) | 520 | if (!legitimize_mnt(nd->path.mnt, nd->m_seq)) |
524 | return -ECHILD; | 521 | return -ECHILD; |
525 | |||
526 | /* | ||
527 | * After the mntget(), we terminate_walk() will do | ||
528 | * the right thing for non-RCU mode, and all our | ||
529 | * subsequent exit cases should unlock_rcu_walk() | ||
530 | * before returning. | ||
531 | */ | ||
532 | mntget(nd->path.mnt); | ||
533 | nd->flags &= ~LOOKUP_RCU; | 522 | nd->flags &= ~LOOKUP_RCU; |
534 | 523 | ||
524 | if (!lockref_get_not_dead(&parent->d_lockref)) { | ||
525 | nd->path.dentry = NULL; | ||
526 | unlock_rcu_walk(); | ||
527 | return -ECHILD; | ||
528 | } | ||
529 | |||
535 | /* | 530 | /* |
536 | * For a negative lookup, the lookup sequence point is the parents | 531 | * For a negative lookup, the lookup sequence point is the parents |
537 | * sequence point, and it only needs to revalidate the parent dentry. | 532 | * sequence point, and it only needs to revalidate the parent dentry. |
@@ -608,16 +603,21 @@ static int complete_walk(struct nameidata *nd) | |||
608 | if (!(nd->flags & LOOKUP_ROOT)) | 603 | if (!(nd->flags & LOOKUP_ROOT)) |
609 | nd->root.mnt = NULL; | 604 | nd->root.mnt = NULL; |
610 | 605 | ||
606 | if (!legitimize_mnt(nd->path.mnt, nd->m_seq)) { | ||
607 | unlock_rcu_walk(); | ||
608 | return -ECHILD; | ||
609 | } | ||
611 | if (unlikely(!lockref_get_not_dead(&dentry->d_lockref))) { | 610 | if (unlikely(!lockref_get_not_dead(&dentry->d_lockref))) { |
612 | unlock_rcu_walk(); | 611 | unlock_rcu_walk(); |
612 | mntput(nd->path.mnt); | ||
613 | return -ECHILD; | 613 | return -ECHILD; |
614 | } | 614 | } |
615 | if (read_seqcount_retry(&dentry->d_seq, nd->seq)) { | 615 | if (read_seqcount_retry(&dentry->d_seq, nd->seq)) { |
616 | unlock_rcu_walk(); | 616 | unlock_rcu_walk(); |
617 | dput(dentry); | 617 | dput(dentry); |
618 | mntput(nd->path.mnt); | ||
618 | return -ECHILD; | 619 | return -ECHILD; |
619 | } | 620 | } |
620 | mntget(nd->path.mnt); | ||
621 | unlock_rcu_walk(); | 621 | unlock_rcu_walk(); |
622 | } | 622 | } |
623 | 623 | ||
@@ -909,15 +909,15 @@ int follow_up(struct path *path) | |||
909 | struct mount *parent; | 909 | struct mount *parent; |
910 | struct dentry *mountpoint; | 910 | struct dentry *mountpoint; |
911 | 911 | ||
912 | br_read_lock(&vfsmount_lock); | 912 | read_seqlock_excl(&mount_lock); |
913 | parent = mnt->mnt_parent; | 913 | parent = mnt->mnt_parent; |
914 | if (parent == mnt) { | 914 | if (parent == mnt) { |
915 | br_read_unlock(&vfsmount_lock); | 915 | read_sequnlock_excl(&mount_lock); |
916 | return 0; | 916 | return 0; |
917 | } | 917 | } |
918 | mntget(&parent->mnt); | 918 | mntget(&parent->mnt); |
919 | mountpoint = dget(mnt->mnt_mountpoint); | 919 | mountpoint = dget(mnt->mnt_mountpoint); |
920 | br_read_unlock(&vfsmount_lock); | 920 | read_sequnlock_excl(&mount_lock); |
921 | dput(path->dentry); | 921 | dput(path->dentry); |
922 | path->dentry = mountpoint; | 922 | path->dentry = mountpoint; |
923 | mntput(path->mnt); | 923 | mntput(path->mnt); |
@@ -1048,8 +1048,8 @@ static int follow_managed(struct path *path, unsigned flags) | |||
1048 | 1048 | ||
1049 | /* Something is mounted on this dentry in another | 1049 | /* Something is mounted on this dentry in another |
1050 | * namespace and/or whatever was mounted there in this | 1050 | * namespace and/or whatever was mounted there in this |
1051 | * namespace got unmounted before we managed to get the | 1051 | * namespace got unmounted before lookup_mnt() could |
1052 | * vfsmount_lock */ | 1052 | * get it */ |
1053 | } | 1053 | } |
1054 | 1054 | ||
1055 | /* Handle an automount point */ | 1055 | /* Handle an automount point */ |
@@ -1864,6 +1864,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, | |||
1864 | if (flags & LOOKUP_RCU) { | 1864 | if (flags & LOOKUP_RCU) { |
1865 | lock_rcu_walk(); | 1865 | lock_rcu_walk(); |
1866 | nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); | 1866 | nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); |
1867 | nd->m_seq = read_seqbegin(&mount_lock); | ||
1867 | } else { | 1868 | } else { |
1868 | path_get(&nd->path); | 1869 | path_get(&nd->path); |
1869 | } | 1870 | } |
@@ -1872,6 +1873,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, | |||
1872 | 1873 | ||
1873 | nd->root.mnt = NULL; | 1874 | nd->root.mnt = NULL; |
1874 | 1875 | ||
1876 | nd->m_seq = read_seqbegin(&mount_lock); | ||
1875 | if (*name=='/') { | 1877 | if (*name=='/') { |
1876 | if (flags & LOOKUP_RCU) { | 1878 | if (flags & LOOKUP_RCU) { |
1877 | lock_rcu_walk(); | 1879 | lock_rcu_walk(); |
diff --git a/fs/namespace.c b/fs/namespace.c index 500202ce10db..ac2ce8a766e1 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -53,7 +53,7 @@ EXPORT_SYMBOL_GPL(fs_kobj); | |||
53 | * It should be taken for write in all cases where the vfsmount | 53 | * It should be taken for write in all cases where the vfsmount |
54 | * tree or hash is modified or when a vfsmount structure is modified. | 54 | * tree or hash is modified or when a vfsmount structure is modified. |
55 | */ | 55 | */ |
56 | DEFINE_BRLOCK(vfsmount_lock); | 56 | __cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock); |
57 | 57 | ||
58 | static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) | 58 | static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) |
59 | { | 59 | { |
@@ -547,16 +547,38 @@ static void free_vfsmnt(struct mount *mnt) | |||
547 | kmem_cache_free(mnt_cache, mnt); | 547 | kmem_cache_free(mnt_cache, mnt); |
548 | } | 548 | } |
549 | 549 | ||
550 | /* call under rcu_read_lock */ | ||
551 | bool legitimize_mnt(struct vfsmount *bastard, unsigned seq) | ||
552 | { | ||
553 | struct mount *mnt; | ||
554 | if (read_seqretry(&mount_lock, seq)) | ||
555 | return false; | ||
556 | if (bastard == NULL) | ||
557 | return true; | ||
558 | mnt = real_mount(bastard); | ||
559 | mnt_add_count(mnt, 1); | ||
560 | if (likely(!read_seqretry(&mount_lock, seq))) | ||
561 | return true; | ||
562 | if (bastard->mnt_flags & MNT_SYNC_UMOUNT) { | ||
563 | mnt_add_count(mnt, -1); | ||
564 | return false; | ||
565 | } | ||
566 | rcu_read_unlock(); | ||
567 | mntput(bastard); | ||
568 | rcu_read_lock(); | ||
569 | return false; | ||
570 | } | ||
571 | |||
550 | /* | 572 | /* |
551 | * find the first mount at @dentry on vfsmount @mnt. | 573 | * find the first mount at @dentry on vfsmount @mnt. |
552 | * vfsmount_lock must be held for read or write. | 574 | * call under rcu_read_lock() |
553 | */ | 575 | */ |
554 | struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) | 576 | struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) |
555 | { | 577 | { |
556 | struct list_head *head = mount_hashtable + hash(mnt, dentry); | 578 | struct list_head *head = mount_hashtable + hash(mnt, dentry); |
557 | struct mount *p; | 579 | struct mount *p; |
558 | 580 | ||
559 | list_for_each_entry(p, head, mnt_hash) | 581 | list_for_each_entry_rcu(p, head, mnt_hash) |
560 | if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) | 582 | if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) |
561 | return p; | 583 | return p; |
562 | return NULL; | 584 | return NULL; |
@@ -564,7 +586,7 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) | |||
564 | 586 | ||
565 | /* | 587 | /* |
566 | * find the last mount at @dentry on vfsmount @mnt. | 588 | * find the last mount at @dentry on vfsmount @mnt. |
567 | * vfsmount_lock must be held for read or write. | 589 | * mount_lock must be held. |
568 | */ | 590 | */ |
569 | struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry) | 591 | struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry) |
570 | { | 592 | { |
@@ -596,17 +618,17 @@ struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry) | |||
596 | struct vfsmount *lookup_mnt(struct path *path) | 618 | struct vfsmount *lookup_mnt(struct path *path) |
597 | { | 619 | { |
598 | struct mount *child_mnt; | 620 | struct mount *child_mnt; |
621 | struct vfsmount *m; | ||
622 | unsigned seq; | ||
599 | 623 | ||
600 | br_read_lock(&vfsmount_lock); | 624 | rcu_read_lock(); |
601 | child_mnt = __lookup_mnt(path->mnt, path->dentry); | 625 | do { |
602 | if (child_mnt) { | 626 | seq = read_seqbegin(&mount_lock); |
603 | mnt_add_count(child_mnt, 1); | 627 | child_mnt = __lookup_mnt(path->mnt, path->dentry); |
604 | br_read_unlock(&vfsmount_lock); | 628 | m = child_mnt ? &child_mnt->mnt : NULL; |
605 | return &child_mnt->mnt; | 629 | } while (!legitimize_mnt(m, seq)); |
606 | } else { | 630 | rcu_read_unlock(); |
607 | br_read_unlock(&vfsmount_lock); | 631 | return m; |
608 | return NULL; | ||
609 | } | ||
610 | } | 632 | } |
611 | 633 | ||
612 | static struct mountpoint *new_mountpoint(struct dentry *dentry) | 634 | static struct mountpoint *new_mountpoint(struct dentry *dentry) |
@@ -874,38 +896,46 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, | |||
874 | return ERR_PTR(err); | 896 | return ERR_PTR(err); |
875 | } | 897 | } |
876 | 898 | ||
899 | static void delayed_free(struct rcu_head *head) | ||
900 | { | ||
901 | struct mount *mnt = container_of(head, struct mount, mnt_rcu); | ||
902 | kfree(mnt->mnt_devname); | ||
903 | #ifdef CONFIG_SMP | ||
904 | free_percpu(mnt->mnt_pcp); | ||
905 | #endif | ||
906 | kmem_cache_free(mnt_cache, mnt); | ||
907 | } | ||
908 | |||
877 | static void mntput_no_expire(struct mount *mnt) | 909 | static void mntput_no_expire(struct mount *mnt) |
878 | { | 910 | { |
879 | put_again: | 911 | put_again: |
880 | #ifdef CONFIG_SMP | 912 | rcu_read_lock(); |
881 | br_read_lock(&vfsmount_lock); | 913 | mnt_add_count(mnt, -1); |
882 | if (likely(mnt->mnt_ns)) { | 914 | if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */ |
883 | /* shouldn't be the last one */ | 915 | rcu_read_unlock(); |
884 | mnt_add_count(mnt, -1); | ||
885 | br_read_unlock(&vfsmount_lock); | ||
886 | return; | 916 | return; |
887 | } | 917 | } |
888 | br_read_unlock(&vfsmount_lock); | ||
889 | |||
890 | lock_mount_hash(); | 918 | lock_mount_hash(); |
891 | mnt_add_count(mnt, -1); | ||
892 | if (mnt_get_count(mnt)) { | 919 | if (mnt_get_count(mnt)) { |
920 | rcu_read_unlock(); | ||
893 | unlock_mount_hash(); | 921 | unlock_mount_hash(); |
894 | return; | 922 | return; |
895 | } | 923 | } |
896 | #else | ||
897 | mnt_add_count(mnt, -1); | ||
898 | if (likely(mnt_get_count(mnt))) | ||
899 | return; | ||
900 | lock_mount_hash(); | ||
901 | #endif | ||
902 | if (unlikely(mnt->mnt_pinned)) { | 924 | if (unlikely(mnt->mnt_pinned)) { |
903 | mnt_add_count(mnt, mnt->mnt_pinned + 1); | 925 | mnt_add_count(mnt, mnt->mnt_pinned + 1); |
904 | mnt->mnt_pinned = 0; | 926 | mnt->mnt_pinned = 0; |
927 | rcu_read_unlock(); | ||
905 | unlock_mount_hash(); | 928 | unlock_mount_hash(); |
906 | acct_auto_close_mnt(&mnt->mnt); | 929 | acct_auto_close_mnt(&mnt->mnt); |
907 | goto put_again; | 930 | goto put_again; |
908 | } | 931 | } |
932 | if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) { | ||
933 | rcu_read_unlock(); | ||
934 | unlock_mount_hash(); | ||
935 | return; | ||
936 | } | ||
937 | mnt->mnt.mnt_flags |= MNT_DOOMED; | ||
938 | rcu_read_unlock(); | ||
909 | 939 | ||
910 | list_del(&mnt->mnt_instance); | 940 | list_del(&mnt->mnt_instance); |
911 | unlock_mount_hash(); | 941 | unlock_mount_hash(); |
@@ -924,7 +954,8 @@ put_again: | |||
924 | fsnotify_vfsmount_delete(&mnt->mnt); | 954 | fsnotify_vfsmount_delete(&mnt->mnt); |
925 | dput(mnt->mnt.mnt_root); | 955 | dput(mnt->mnt.mnt_root); |
926 | deactivate_super(mnt->mnt.mnt_sb); | 956 | deactivate_super(mnt->mnt.mnt_sb); |
927 | free_vfsmnt(mnt); | 957 | mnt_free_id(mnt); |
958 | call_rcu(&mnt->mnt_rcu, delayed_free); | ||
928 | } | 959 | } |
929 | 960 | ||
930 | void mntput(struct vfsmount *mnt) | 961 | void mntput(struct vfsmount *mnt) |
@@ -1137,6 +1168,8 @@ static void namespace_unlock(void) | |||
1137 | list_splice_init(&unmounted, &head); | 1168 | list_splice_init(&unmounted, &head); |
1138 | up_write(&namespace_sem); | 1169 | up_write(&namespace_sem); |
1139 | 1170 | ||
1171 | synchronize_rcu(); | ||
1172 | |||
1140 | while (!list_empty(&head)) { | 1173 | while (!list_empty(&head)) { |
1141 | mnt = list_first_entry(&head, struct mount, mnt_hash); | 1174 | mnt = list_first_entry(&head, struct mount, mnt_hash); |
1142 | list_del_init(&mnt->mnt_hash); | 1175 | list_del_init(&mnt->mnt_hash); |
@@ -1152,10 +1185,13 @@ static inline void namespace_lock(void) | |||
1152 | } | 1185 | } |
1153 | 1186 | ||
1154 | /* | 1187 | /* |
1155 | * vfsmount lock must be held for write | 1188 | * mount_lock must be held |
1156 | * namespace_sem must be held for write | 1189 | * namespace_sem must be held for write |
1190 | * how = 0 => just this tree, don't propagate | ||
1191 | * how = 1 => propagate; we know that nobody else has reference to any victims | ||
1192 | * how = 2 => lazy umount | ||
1157 | */ | 1193 | */ |
1158 | void umount_tree(struct mount *mnt, int propagate) | 1194 | void umount_tree(struct mount *mnt, int how) |
1159 | { | 1195 | { |
1160 | LIST_HEAD(tmp_list); | 1196 | LIST_HEAD(tmp_list); |
1161 | struct mount *p; | 1197 | struct mount *p; |
@@ -1163,7 +1199,7 @@ void umount_tree(struct mount *mnt, int propagate) | |||
1163 | for (p = mnt; p; p = next_mnt(p, mnt)) | 1199 | for (p = mnt; p; p = next_mnt(p, mnt)) |
1164 | list_move(&p->mnt_hash, &tmp_list); | 1200 | list_move(&p->mnt_hash, &tmp_list); |
1165 | 1201 | ||
1166 | if (propagate) | 1202 | if (how) |
1167 | propagate_umount(&tmp_list); | 1203 | propagate_umount(&tmp_list); |
1168 | 1204 | ||
1169 | list_for_each_entry(p, &tmp_list, mnt_hash) { | 1205 | list_for_each_entry(p, &tmp_list, mnt_hash) { |
@@ -1171,6 +1207,8 @@ void umount_tree(struct mount *mnt, int propagate) | |||
1171 | list_del_init(&p->mnt_list); | 1207 | list_del_init(&p->mnt_list); |
1172 | __touch_mnt_namespace(p->mnt_ns); | 1208 | __touch_mnt_namespace(p->mnt_ns); |
1173 | p->mnt_ns = NULL; | 1209 | p->mnt_ns = NULL; |
1210 | if (how < 2) | ||
1211 | p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; | ||
1174 | list_del_init(&p->mnt_child); | 1212 | list_del_init(&p->mnt_child); |
1175 | if (mnt_has_parent(p)) { | 1213 | if (mnt_has_parent(p)) { |
1176 | put_mountpoint(p->mnt_mp); | 1214 | put_mountpoint(p->mnt_mp); |
@@ -1262,14 +1300,18 @@ static int do_umount(struct mount *mnt, int flags) | |||
1262 | lock_mount_hash(); | 1300 | lock_mount_hash(); |
1263 | event++; | 1301 | event++; |
1264 | 1302 | ||
1265 | if (!(flags & MNT_DETACH)) | 1303 | if (flags & MNT_DETACH) { |
1266 | shrink_submounts(mnt); | ||
1267 | |||
1268 | retval = -EBUSY; | ||
1269 | if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) { | ||
1270 | if (!list_empty(&mnt->mnt_list)) | 1304 | if (!list_empty(&mnt->mnt_list)) |
1271 | umount_tree(mnt, 1); | 1305 | umount_tree(mnt, 2); |
1272 | retval = 0; | 1306 | retval = 0; |
1307 | } else { | ||
1308 | shrink_submounts(mnt); | ||
1309 | retval = -EBUSY; | ||
1310 | if (!propagate_mount_busy(mnt, 2)) { | ||
1311 | if (!list_empty(&mnt->mnt_list)) | ||
1312 | umount_tree(mnt, 1); | ||
1313 | retval = 0; | ||
1314 | } | ||
1273 | } | 1315 | } |
1274 | unlock_mount_hash(); | 1316 | unlock_mount_hash(); |
1275 | namespace_unlock(); | 1317 | namespace_unlock(); |
@@ -1955,7 +1997,7 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags) | |||
1955 | struct mount *parent; | 1997 | struct mount *parent; |
1956 | int err; | 1998 | int err; |
1957 | 1999 | ||
1958 | mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL); | 2000 | mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | MNT_DOOMED | MNT_SYNC_UMOUNT); |
1959 | 2001 | ||
1960 | mp = lock_mount(path); | 2002 | mp = lock_mount(path); |
1961 | if (IS_ERR(mp)) | 2003 | if (IS_ERR(mp)) |
@@ -2172,7 +2214,7 @@ resume: | |||
2172 | * process a list of expirable mountpoints with the intent of discarding any | 2214 | * process a list of expirable mountpoints with the intent of discarding any |
2173 | * submounts of a specific parent mountpoint | 2215 | * submounts of a specific parent mountpoint |
2174 | * | 2216 | * |
2175 | * vfsmount_lock must be held for write | 2217 | * mount_lock must be held for write |
2176 | */ | 2218 | */ |
2177 | static void shrink_submounts(struct mount *mnt) | 2219 | static void shrink_submounts(struct mount *mnt) |
2178 | { | 2220 | { |
@@ -2558,7 +2600,7 @@ out_type: | |||
2558 | /* | 2600 | /* |
2559 | * Return true if path is reachable from root | 2601 | * Return true if path is reachable from root |
2560 | * | 2602 | * |
2561 | * namespace_sem or vfsmount_lock is held | 2603 | * namespace_sem or mount_lock is held |
2562 | */ | 2604 | */ |
2563 | bool is_path_reachable(struct mount *mnt, struct dentry *dentry, | 2605 | bool is_path_reachable(struct mount *mnt, struct dentry *dentry, |
2564 | const struct path *root) | 2606 | const struct path *root) |
@@ -2573,9 +2615,9 @@ bool is_path_reachable(struct mount *mnt, struct dentry *dentry, | |||
2573 | int path_is_under(struct path *path1, struct path *path2) | 2615 | int path_is_under(struct path *path1, struct path *path2) |
2574 | { | 2616 | { |
2575 | int res; | 2617 | int res; |
2576 | br_read_lock(&vfsmount_lock); | 2618 | read_seqlock_excl(&mount_lock); |
2577 | res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2); | 2619 | res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2); |
2578 | br_read_unlock(&vfsmount_lock); | 2620 | read_sequnlock_excl(&mount_lock); |
2579 | return res; | 2621 | return res; |
2580 | } | 2622 | } |
2581 | EXPORT_SYMBOL(path_is_under); | 2623 | EXPORT_SYMBOL(path_is_under); |
@@ -2748,8 +2790,6 @@ void __init mnt_init(void) | |||
2748 | for (u = 0; u < HASH_SIZE; u++) | 2790 | for (u = 0; u < HASH_SIZE; u++) |
2749 | INIT_LIST_HEAD(&mountpoint_hashtable[u]); | 2791 | INIT_LIST_HEAD(&mountpoint_hashtable[u]); |
2750 | 2792 | ||
2751 | br_lock_init(&vfsmount_lock); | ||
2752 | |||
2753 | err = sysfs_init(); | 2793 | err = sysfs_init(); |
2754 | if (err) | 2794 | if (err) |
2755 | printk(KERN_WARNING "%s: sysfs_init error: %d\n", | 2795 | printk(KERN_WARNING "%s: sysfs_init error: %d\n", |
@@ -2788,9 +2828,8 @@ void kern_unmount(struct vfsmount *mnt) | |||
2788 | { | 2828 | { |
2789 | /* release long term mount so mount point can be released */ | 2829 | /* release long term mount so mount point can be released */ |
2790 | if (!IS_ERR_OR_NULL(mnt)) { | 2830 | if (!IS_ERR_OR_NULL(mnt)) { |
2791 | lock_mount_hash(); | ||
2792 | real_mount(mnt)->mnt_ns = NULL; | 2831 | real_mount(mnt)->mnt_ns = NULL; |
2793 | unlock_mount_hash(); | 2832 | synchronize_rcu(); /* yecchhh... */ |
2794 | mntput(mnt); | 2833 | mntput(mnt); |
2795 | } | 2834 | } |
2796 | } | 2835 | } |
diff --git a/include/linux/mount.h b/include/linux/mount.h index 38cd98f112a0..371d346fa270 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h | |||
@@ -49,6 +49,8 @@ struct mnt_namespace; | |||
49 | 49 | ||
50 | #define MNT_LOCK_READONLY 0x400000 | 50 | #define MNT_LOCK_READONLY 0x400000 |
51 | #define MNT_LOCKED 0x800000 | 51 | #define MNT_LOCKED 0x800000 |
52 | #define MNT_DOOMED 0x1000000 | ||
53 | #define MNT_SYNC_UMOUNT 0x2000000 | ||
52 | 54 | ||
53 | struct vfsmount { | 55 | struct vfsmount { |
54 | struct dentry *mnt_root; /* root of the mounted tree */ | 56 | struct dentry *mnt_root; /* root of the mounted tree */ |
diff --git a/include/linux/namei.h b/include/linux/namei.h index 8e47bc7a1665..492de72560fa 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h | |||
@@ -16,7 +16,7 @@ struct nameidata { | |||
16 | struct path root; | 16 | struct path root; |
17 | struct inode *inode; /* path.dentry.d_inode */ | 17 | struct inode *inode; /* path.dentry.d_inode */ |
18 | unsigned int flags; | 18 | unsigned int flags; |
19 | unsigned seq; | 19 | unsigned seq, m_seq; |
20 | int last_type; | 20 | int last_type; |
21 | unsigned depth; | 21 | unsigned depth; |
22 | char *saved_names[MAX_NESTED_LINKS + 1]; | 22 | char *saved_names[MAX_NESTED_LINKS + 1]; |