diff options
Diffstat (limited to 'fs/namespace.c')
-rw-r--r-- | fs/namespace.c | 242 |
1 files changed, 199 insertions, 43 deletions
diff --git a/fs/namespace.c b/fs/namespace.c index 03b82350f020..3ddfd9046c44 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -138,6 +138,64 @@ void mnt_release_group_id(struct vfsmount *mnt) | |||
138 | mnt->mnt_group_id = 0; | 138 | mnt->mnt_group_id = 0; |
139 | } | 139 | } |
140 | 140 | ||
141 | /* | ||
142 | * vfsmount lock must be held for read | ||
143 | */ | ||
144 | static inline void mnt_add_count(struct vfsmount *mnt, int n) | ||
145 | { | ||
146 | #ifdef CONFIG_SMP | ||
147 | this_cpu_add(mnt->mnt_pcp->mnt_count, n); | ||
148 | #else | ||
149 | preempt_disable(); | ||
150 | mnt->mnt_count += n; | ||
151 | preempt_enable(); | ||
152 | #endif | ||
153 | } | ||
154 | |||
155 | static inline void mnt_set_count(struct vfsmount *mnt, int n) | ||
156 | { | ||
157 | #ifdef CONFIG_SMP | ||
158 | this_cpu_write(mnt->mnt_pcp->mnt_count, n); | ||
159 | #else | ||
160 | mnt->mnt_count = n; | ||
161 | #endif | ||
162 | } | ||
163 | |||
164 | /* | ||
165 | * vfsmount lock must be held for read | ||
166 | */ | ||
167 | static inline void mnt_inc_count(struct vfsmount *mnt) | ||
168 | { | ||
169 | mnt_add_count(mnt, 1); | ||
170 | } | ||
171 | |||
172 | /* | ||
173 | * vfsmount lock must be held for read | ||
174 | */ | ||
175 | static inline void mnt_dec_count(struct vfsmount *mnt) | ||
176 | { | ||
177 | mnt_add_count(mnt, -1); | ||
178 | } | ||
179 | |||
180 | /* | ||
181 | * vfsmount lock must be held for write | ||
182 | */ | ||
183 | unsigned int mnt_get_count(struct vfsmount *mnt) | ||
184 | { | ||
185 | #ifdef CONFIG_SMP | ||
186 | unsigned int count = atomic_read(&mnt->mnt_longrefs); | ||
187 | int cpu; | ||
188 | |||
189 | for_each_possible_cpu(cpu) { | ||
190 | count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count; | ||
191 | } | ||
192 | |||
193 | return count; | ||
194 | #else | ||
195 | return mnt->mnt_count; | ||
196 | #endif | ||
197 | } | ||
198 | |||
141 | struct vfsmount *alloc_vfsmnt(const char *name) | 199 | struct vfsmount *alloc_vfsmnt(const char *name) |
142 | { | 200 | { |
143 | struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); | 201 | struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); |
@@ -154,7 +212,17 @@ struct vfsmount *alloc_vfsmnt(const char *name) | |||
154 | goto out_free_id; | 212 | goto out_free_id; |
155 | } | 213 | } |
156 | 214 | ||
157 | atomic_set(&mnt->mnt_count, 1); | 215 | #ifdef CONFIG_SMP |
216 | mnt->mnt_pcp = alloc_percpu(struct mnt_pcp); | ||
217 | if (!mnt->mnt_pcp) | ||
218 | goto out_free_devname; | ||
219 | |||
220 | atomic_set(&mnt->mnt_longrefs, 1); | ||
221 | #else | ||
222 | mnt->mnt_count = 1; | ||
223 | mnt->mnt_writers = 0; | ||
224 | #endif | ||
225 | |||
158 | INIT_LIST_HEAD(&mnt->mnt_hash); | 226 | INIT_LIST_HEAD(&mnt->mnt_hash); |
159 | INIT_LIST_HEAD(&mnt->mnt_child); | 227 | INIT_LIST_HEAD(&mnt->mnt_child); |
160 | INIT_LIST_HEAD(&mnt->mnt_mounts); | 228 | INIT_LIST_HEAD(&mnt->mnt_mounts); |
@@ -166,13 +234,6 @@ struct vfsmount *alloc_vfsmnt(const char *name) | |||
166 | #ifdef CONFIG_FSNOTIFY | 234 | #ifdef CONFIG_FSNOTIFY |
167 | INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); | 235 | INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); |
168 | #endif | 236 | #endif |
169 | #ifdef CONFIG_SMP | ||
170 | mnt->mnt_writers = alloc_percpu(int); | ||
171 | if (!mnt->mnt_writers) | ||
172 | goto out_free_devname; | ||
173 | #else | ||
174 | mnt->mnt_writers = 0; | ||
175 | #endif | ||
176 | } | 237 | } |
177 | return mnt; | 238 | return mnt; |
178 | 239 | ||
@@ -219,7 +280,7 @@ EXPORT_SYMBOL_GPL(__mnt_is_readonly); | |||
219 | static inline void mnt_inc_writers(struct vfsmount *mnt) | 280 | static inline void mnt_inc_writers(struct vfsmount *mnt) |
220 | { | 281 | { |
221 | #ifdef CONFIG_SMP | 282 | #ifdef CONFIG_SMP |
222 | (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))++; | 283 | this_cpu_inc(mnt->mnt_pcp->mnt_writers); |
223 | #else | 284 | #else |
224 | mnt->mnt_writers++; | 285 | mnt->mnt_writers++; |
225 | #endif | 286 | #endif |
@@ -228,7 +289,7 @@ static inline void mnt_inc_writers(struct vfsmount *mnt) | |||
228 | static inline void mnt_dec_writers(struct vfsmount *mnt) | 289 | static inline void mnt_dec_writers(struct vfsmount *mnt) |
229 | { | 290 | { |
230 | #ifdef CONFIG_SMP | 291 | #ifdef CONFIG_SMP |
231 | (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))--; | 292 | this_cpu_dec(mnt->mnt_pcp->mnt_writers); |
232 | #else | 293 | #else |
233 | mnt->mnt_writers--; | 294 | mnt->mnt_writers--; |
234 | #endif | 295 | #endif |
@@ -241,7 +302,7 @@ static unsigned int mnt_get_writers(struct vfsmount *mnt) | |||
241 | int cpu; | 302 | int cpu; |
242 | 303 | ||
243 | for_each_possible_cpu(cpu) { | 304 | for_each_possible_cpu(cpu) { |
244 | count += *per_cpu_ptr(mnt->mnt_writers, cpu); | 305 | count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers; |
245 | } | 306 | } |
246 | 307 | ||
247 | return count; | 308 | return count; |
@@ -418,7 +479,7 @@ void free_vfsmnt(struct vfsmount *mnt) | |||
418 | kfree(mnt->mnt_devname); | 479 | kfree(mnt->mnt_devname); |
419 | mnt_free_id(mnt); | 480 | mnt_free_id(mnt); |
420 | #ifdef CONFIG_SMP | 481 | #ifdef CONFIG_SMP |
421 | free_percpu(mnt->mnt_writers); | 482 | free_percpu(mnt->mnt_pcp); |
422 | #endif | 483 | #endif |
423 | kmem_cache_free(mnt_cache, mnt); | 484 | kmem_cache_free(mnt_cache, mnt); |
424 | } | 485 | } |
@@ -652,9 +713,10 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root, | |||
652 | return NULL; | 713 | return NULL; |
653 | } | 714 | } |
654 | 715 | ||
655 | static inline void __mntput(struct vfsmount *mnt) | 716 | static inline void mntfree(struct vfsmount *mnt) |
656 | { | 717 | { |
657 | struct super_block *sb = mnt->mnt_sb; | 718 | struct super_block *sb = mnt->mnt_sb; |
719 | |||
658 | /* | 720 | /* |
659 | * This probably indicates that somebody messed | 721 | * This probably indicates that somebody messed |
660 | * up a mnt_want/drop_write() pair. If this | 722 | * up a mnt_want/drop_write() pair. If this |
@@ -662,8 +724,8 @@ static inline void __mntput(struct vfsmount *mnt) | |||
662 | * to make r/w->r/o transitions. | 724 | * to make r/w->r/o transitions. |
663 | */ | 725 | */ |
664 | /* | 726 | /* |
665 | * atomic_dec_and_lock() used to deal with ->mnt_count decrements | 727 | * The locking used to deal with mnt_count decrement provides barriers, |
666 | * provides barriers, so mnt_get_writers() below is safe. AV | 728 | * so mnt_get_writers() below is safe. |
667 | */ | 729 | */ |
668 | WARN_ON(mnt_get_writers(mnt)); | 730 | WARN_ON(mnt_get_writers(mnt)); |
669 | fsnotify_vfsmount_delete(mnt); | 731 | fsnotify_vfsmount_delete(mnt); |
@@ -672,28 +734,113 @@ static inline void __mntput(struct vfsmount *mnt) | |||
672 | deactivate_super(sb); | 734 | deactivate_super(sb); |
673 | } | 735 | } |
674 | 736 | ||
675 | void mntput_no_expire(struct vfsmount *mnt) | 737 | #ifdef CONFIG_SMP |
676 | { | 738 | static inline void __mntput(struct vfsmount *mnt, int longrefs) |
677 | repeat: | 739 | { |
678 | if (atomic_add_unless(&mnt->mnt_count, -1, 1)) | 740 | if (!longrefs) { |
679 | return; | 741 | put_again: |
742 | br_read_lock(vfsmount_lock); | ||
743 | if (likely(atomic_read(&mnt->mnt_longrefs))) { | ||
744 | mnt_dec_count(mnt); | ||
745 | br_read_unlock(vfsmount_lock); | ||
746 | return; | ||
747 | } | ||
748 | br_read_unlock(vfsmount_lock); | ||
749 | } else { | ||
750 | BUG_ON(!atomic_read(&mnt->mnt_longrefs)); | ||
751 | if (atomic_add_unless(&mnt->mnt_longrefs, -1, 1)) | ||
752 | return; | ||
753 | } | ||
754 | |||
680 | br_write_lock(vfsmount_lock); | 755 | br_write_lock(vfsmount_lock); |
681 | if (!atomic_dec_and_test(&mnt->mnt_count)) { | 756 | if (!longrefs) |
757 | mnt_dec_count(mnt); | ||
758 | else | ||
759 | atomic_dec(&mnt->mnt_longrefs); | ||
760 | if (mnt_get_count(mnt)) { | ||
682 | br_write_unlock(vfsmount_lock); | 761 | br_write_unlock(vfsmount_lock); |
683 | return; | 762 | return; |
684 | } | 763 | } |
685 | if (likely(!mnt->mnt_pinned)) { | 764 | if (unlikely(mnt->mnt_pinned)) { |
765 | mnt_add_count(mnt, mnt->mnt_pinned + 1); | ||
766 | mnt->mnt_pinned = 0; | ||
686 | br_write_unlock(vfsmount_lock); | 767 | br_write_unlock(vfsmount_lock); |
687 | __mntput(mnt); | 768 | acct_auto_close_mnt(mnt); |
769 | goto put_again; | ||
770 | } | ||
771 | br_write_unlock(vfsmount_lock); | ||
772 | mntfree(mnt); | ||
773 | } | ||
774 | #else | ||
775 | static inline void __mntput(struct vfsmount *mnt, int longrefs) | ||
776 | { | ||
777 | put_again: | ||
778 | mnt_dec_count(mnt); | ||
779 | if (likely(mnt_get_count(mnt))) | ||
688 | return; | 780 | return; |
781 | br_write_lock(vfsmount_lock); | ||
782 | if (unlikely(mnt->mnt_pinned)) { | ||
783 | mnt_add_count(mnt, mnt->mnt_pinned + 1); | ||
784 | mnt->mnt_pinned = 0; | ||
785 | br_write_unlock(vfsmount_lock); | ||
786 | acct_auto_close_mnt(mnt); | ||
787 | goto put_again; | ||
689 | } | 788 | } |
690 | atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count); | ||
691 | mnt->mnt_pinned = 0; | ||
692 | br_write_unlock(vfsmount_lock); | 789 | br_write_unlock(vfsmount_lock); |
693 | acct_auto_close_mnt(mnt); | 790 | mntfree(mnt); |
694 | goto repeat; | 791 | } |
792 | #endif | ||
793 | |||
794 | static void mntput_no_expire(struct vfsmount *mnt) | ||
795 | { | ||
796 | __mntput(mnt, 0); | ||
797 | } | ||
798 | |||
799 | void mntput(struct vfsmount *mnt) | ||
800 | { | ||
801 | if (mnt) { | ||
802 | /* avoid cacheline pingpong, hope gcc doesn't get "smart" */ | ||
803 | if (unlikely(mnt->mnt_expiry_mark)) | ||
804 | mnt->mnt_expiry_mark = 0; | ||
805 | __mntput(mnt, 0); | ||
806 | } | ||
807 | } | ||
808 | EXPORT_SYMBOL(mntput); | ||
809 | |||
810 | struct vfsmount *mntget(struct vfsmount *mnt) | ||
811 | { | ||
812 | if (mnt) | ||
813 | mnt_inc_count(mnt); | ||
814 | return mnt; | ||
815 | } | ||
816 | EXPORT_SYMBOL(mntget); | ||
817 | |||
818 | void mntput_long(struct vfsmount *mnt) | ||
819 | { | ||
820 | #ifdef CONFIG_SMP | ||
821 | if (mnt) { | ||
822 | /* avoid cacheline pingpong, hope gcc doesn't get "smart" */ | ||
823 | if (unlikely(mnt->mnt_expiry_mark)) | ||
824 | mnt->mnt_expiry_mark = 0; | ||
825 | __mntput(mnt, 1); | ||
826 | } | ||
827 | #else | ||
828 | mntput(mnt); | ||
829 | #endif | ||
695 | } | 830 | } |
696 | EXPORT_SYMBOL(mntput_no_expire); | 831 | EXPORT_SYMBOL(mntput_long); |
832 | |||
833 | struct vfsmount *mntget_long(struct vfsmount *mnt) | ||
834 | { | ||
835 | #ifdef CONFIG_SMP | ||
836 | if (mnt) | ||
837 | atomic_inc(&mnt->mnt_longrefs); | ||
838 | return mnt; | ||
839 | #else | ||
840 | return mntget(mnt); | ||
841 | #endif | ||
842 | } | ||
843 | EXPORT_SYMBOL(mntget_long); | ||
697 | 844 | ||
698 | void mnt_pin(struct vfsmount *mnt) | 845 | void mnt_pin(struct vfsmount *mnt) |
699 | { | 846 | { |
@@ -701,19 +848,17 @@ void mnt_pin(struct vfsmount *mnt) | |||
701 | mnt->mnt_pinned++; | 848 | mnt->mnt_pinned++; |
702 | br_write_unlock(vfsmount_lock); | 849 | br_write_unlock(vfsmount_lock); |
703 | } | 850 | } |
704 | |||
705 | EXPORT_SYMBOL(mnt_pin); | 851 | EXPORT_SYMBOL(mnt_pin); |
706 | 852 | ||
707 | void mnt_unpin(struct vfsmount *mnt) | 853 | void mnt_unpin(struct vfsmount *mnt) |
708 | { | 854 | { |
709 | br_write_lock(vfsmount_lock); | 855 | br_write_lock(vfsmount_lock); |
710 | if (mnt->mnt_pinned) { | 856 | if (mnt->mnt_pinned) { |
711 | atomic_inc(&mnt->mnt_count); | 857 | mnt_inc_count(mnt); |
712 | mnt->mnt_pinned--; | 858 | mnt->mnt_pinned--; |
713 | } | 859 | } |
714 | br_write_unlock(vfsmount_lock); | 860 | br_write_unlock(vfsmount_lock); |
715 | } | 861 | } |
716 | |||
717 | EXPORT_SYMBOL(mnt_unpin); | 862 | EXPORT_SYMBOL(mnt_unpin); |
718 | 863 | ||
719 | static inline void mangle(struct seq_file *m, const char *s) | 864 | static inline void mangle(struct seq_file *m, const char *s) |
@@ -1008,12 +1153,13 @@ int may_umount_tree(struct vfsmount *mnt) | |||
1008 | int minimum_refs = 0; | 1153 | int minimum_refs = 0; |
1009 | struct vfsmount *p; | 1154 | struct vfsmount *p; |
1010 | 1155 | ||
1011 | br_read_lock(vfsmount_lock); | 1156 | /* write lock needed for mnt_get_count */ |
1157 | br_write_lock(vfsmount_lock); | ||
1012 | for (p = mnt; p; p = next_mnt(p, mnt)) { | 1158 | for (p = mnt; p; p = next_mnt(p, mnt)) { |
1013 | actual_refs += atomic_read(&p->mnt_count); | 1159 | actual_refs += mnt_get_count(p); |
1014 | minimum_refs += 2; | 1160 | minimum_refs += 2; |
1015 | } | 1161 | } |
1016 | br_read_unlock(vfsmount_lock); | 1162 | br_write_unlock(vfsmount_lock); |
1017 | 1163 | ||
1018 | if (actual_refs > minimum_refs) | 1164 | if (actual_refs > minimum_refs) |
1019 | return 0; | 1165 | return 0; |
@@ -1040,10 +1186,10 @@ int may_umount(struct vfsmount *mnt) | |||
1040 | { | 1186 | { |
1041 | int ret = 1; | 1187 | int ret = 1; |
1042 | down_read(&namespace_sem); | 1188 | down_read(&namespace_sem); |
1043 | br_read_lock(vfsmount_lock); | 1189 | br_write_lock(vfsmount_lock); |
1044 | if (propagate_mount_busy(mnt, 2)) | 1190 | if (propagate_mount_busy(mnt, 2)) |
1045 | ret = 0; | 1191 | ret = 0; |
1046 | br_read_unlock(vfsmount_lock); | 1192 | br_write_unlock(vfsmount_lock); |
1047 | up_read(&namespace_sem); | 1193 | up_read(&namespace_sem); |
1048 | return ret; | 1194 | return ret; |
1049 | } | 1195 | } |
@@ -1070,7 +1216,7 @@ void release_mounts(struct list_head *head) | |||
1070 | dput(dentry); | 1216 | dput(dentry); |
1071 | mntput(m); | 1217 | mntput(m); |
1072 | } | 1218 | } |
1073 | mntput(mnt); | 1219 | mntput_long(mnt); |
1074 | } | 1220 | } |
1075 | } | 1221 | } |
1076 | 1222 | ||
@@ -1125,8 +1271,16 @@ static int do_umount(struct vfsmount *mnt, int flags) | |||
1125 | flags & (MNT_FORCE | MNT_DETACH)) | 1271 | flags & (MNT_FORCE | MNT_DETACH)) |
1126 | return -EINVAL; | 1272 | return -EINVAL; |
1127 | 1273 | ||
1128 | if (atomic_read(&mnt->mnt_count) != 2) | 1274 | /* |
1275 | * probably don't strictly need the lock here if we examined | ||
1276 | * all race cases, but it's a slowpath. | ||
1277 | */ | ||
1278 | br_write_lock(vfsmount_lock); | ||
1279 | if (mnt_get_count(mnt) != 2) { | ||
1280 | br_write_lock(vfsmount_lock); | ||
1129 | return -EBUSY; | 1281 | return -EBUSY; |
1282 | } | ||
1283 | br_write_unlock(vfsmount_lock); | ||
1130 | 1284 | ||
1131 | if (!xchg(&mnt->mnt_expiry_mark, 1)) | 1285 | if (!xchg(&mnt->mnt_expiry_mark, 1)) |
1132 | return -EAGAIN; | 1286 | return -EAGAIN; |
@@ -1815,7 +1969,7 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path, | |||
1815 | 1969 | ||
1816 | unlock: | 1970 | unlock: |
1817 | up_write(&namespace_sem); | 1971 | up_write(&namespace_sem); |
1818 | mntput(newmnt); | 1972 | mntput_long(newmnt); |
1819 | return err; | 1973 | return err; |
1820 | } | 1974 | } |
1821 | 1975 | ||
@@ -2148,11 +2302,11 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, | |||
2148 | if (fs) { | 2302 | if (fs) { |
2149 | if (p == fs->root.mnt) { | 2303 | if (p == fs->root.mnt) { |
2150 | rootmnt = p; | 2304 | rootmnt = p; |
2151 | fs->root.mnt = mntget(q); | 2305 | fs->root.mnt = mntget_long(q); |
2152 | } | 2306 | } |
2153 | if (p == fs->pwd.mnt) { | 2307 | if (p == fs->pwd.mnt) { |
2154 | pwdmnt = p; | 2308 | pwdmnt = p; |
2155 | fs->pwd.mnt = mntget(q); | 2309 | fs->pwd.mnt = mntget_long(q); |
2156 | } | 2310 | } |
2157 | } | 2311 | } |
2158 | p = next_mnt(p, mnt_ns->root); | 2312 | p = next_mnt(p, mnt_ns->root); |
@@ -2161,9 +2315,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, | |||
2161 | up_write(&namespace_sem); | 2315 | up_write(&namespace_sem); |
2162 | 2316 | ||
2163 | if (rootmnt) | 2317 | if (rootmnt) |
2164 | mntput(rootmnt); | 2318 | mntput_long(rootmnt); |
2165 | if (pwdmnt) | 2319 | if (pwdmnt) |
2166 | mntput(pwdmnt); | 2320 | mntput_long(pwdmnt); |
2167 | 2321 | ||
2168 | return new_ns; | 2322 | return new_ns; |
2169 | } | 2323 | } |
@@ -2350,6 +2504,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, | |||
2350 | touch_mnt_namespace(current->nsproxy->mnt_ns); | 2504 | touch_mnt_namespace(current->nsproxy->mnt_ns); |
2351 | br_write_unlock(vfsmount_lock); | 2505 | br_write_unlock(vfsmount_lock); |
2352 | chroot_fs_refs(&root, &new); | 2506 | chroot_fs_refs(&root, &new); |
2507 | |||
2353 | error = 0; | 2508 | error = 0; |
2354 | path_put(&root_parent); | 2509 | path_put(&root_parent); |
2355 | path_put(&parent_path); | 2510 | path_put(&parent_path); |
@@ -2376,6 +2531,7 @@ static void __init init_mount_tree(void) | |||
2376 | mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); | 2531 | mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); |
2377 | if (IS_ERR(mnt)) | 2532 | if (IS_ERR(mnt)) |
2378 | panic("Can't create rootfs"); | 2533 | panic("Can't create rootfs"); |
2534 | |||
2379 | ns = create_mnt_ns(mnt); | 2535 | ns = create_mnt_ns(mnt); |
2380 | if (IS_ERR(ns)) | 2536 | if (IS_ERR(ns)) |
2381 | panic("Can't allocate initial namespace"); | 2537 | panic("Can't allocate initial namespace"); |