diff options
Diffstat (limited to 'fs/namespace.c')
-rw-r--r-- | fs/namespace.c | 516 |
1 files changed, 383 insertions, 133 deletions
diff --git a/fs/namespace.c b/fs/namespace.c index 7953c96a2071..678f7ce060f2 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/quotaops.h> | 17 | #include <linux/quotaops.h> |
18 | #include <linux/acct.h> | 18 | #include <linux/acct.h> |
19 | #include <linux/capability.h> | 19 | #include <linux/capability.h> |
20 | #include <linux/cpumask.h> | ||
20 | #include <linux/module.h> | 21 | #include <linux/module.h> |
21 | #include <linux/sysfs.h> | 22 | #include <linux/sysfs.h> |
22 | #include <linux/seq_file.h> | 23 | #include <linux/seq_file.h> |
@@ -55,6 +56,8 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) | |||
55 | return tmp & (HASH_SIZE - 1); | 56 | return tmp & (HASH_SIZE - 1); |
56 | } | 57 | } |
57 | 58 | ||
59 | #define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16) | ||
60 | |||
58 | struct vfsmount *alloc_vfsmnt(const char *name) | 61 | struct vfsmount *alloc_vfsmnt(const char *name) |
59 | { | 62 | { |
60 | struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); | 63 | struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); |
@@ -68,6 +71,7 @@ struct vfsmount *alloc_vfsmnt(const char *name) | |||
68 | INIT_LIST_HEAD(&mnt->mnt_share); | 71 | INIT_LIST_HEAD(&mnt->mnt_share); |
69 | INIT_LIST_HEAD(&mnt->mnt_slave_list); | 72 | INIT_LIST_HEAD(&mnt->mnt_slave_list); |
70 | INIT_LIST_HEAD(&mnt->mnt_slave); | 73 | INIT_LIST_HEAD(&mnt->mnt_slave); |
74 | atomic_set(&mnt->__mnt_writers, 0); | ||
71 | if (name) { | 75 | if (name) { |
72 | int size = strlen(name) + 1; | 76 | int size = strlen(name) + 1; |
73 | char *newname = kmalloc(size, GFP_KERNEL); | 77 | char *newname = kmalloc(size, GFP_KERNEL); |
@@ -80,6 +84,263 @@ struct vfsmount *alloc_vfsmnt(const char *name) | |||
80 | return mnt; | 84 | return mnt; |
81 | } | 85 | } |
82 | 86 | ||
87 | /* | ||
88 | * Most r/o checks on a fs are for operations that take | ||
89 | * discrete amounts of time, like a write() or unlink(). | ||
90 | * We must keep track of when those operations start | ||
91 | * (for permission checks) and when they end, so that | ||
92 | * we can determine when writes are able to occur to | ||
93 | * a filesystem. | ||
94 | */ | ||
95 | /* | ||
96 | * __mnt_is_readonly: check whether a mount is read-only | ||
97 | * @mnt: the mount to check for its write status | ||
98 | * | ||
99 | * This shouldn't be used directly ouside of the VFS. | ||
100 | * It does not guarantee that the filesystem will stay | ||
101 | * r/w, just that it is right *now*. This can not and | ||
102 | * should not be used in place of IS_RDONLY(inode). | ||
103 | * mnt_want/drop_write() will _keep_ the filesystem | ||
104 | * r/w. | ||
105 | */ | ||
106 | int __mnt_is_readonly(struct vfsmount *mnt) | ||
107 | { | ||
108 | if (mnt->mnt_flags & MNT_READONLY) | ||
109 | return 1; | ||
110 | if (mnt->mnt_sb->s_flags & MS_RDONLY) | ||
111 | return 1; | ||
112 | return 0; | ||
113 | } | ||
114 | EXPORT_SYMBOL_GPL(__mnt_is_readonly); | ||
115 | |||
116 | struct mnt_writer { | ||
117 | /* | ||
118 | * If holding multiple instances of this lock, they | ||
119 | * must be ordered by cpu number. | ||
120 | */ | ||
121 | spinlock_t lock; | ||
122 | struct lock_class_key lock_class; /* compiles out with !lockdep */ | ||
123 | unsigned long count; | ||
124 | struct vfsmount *mnt; | ||
125 | } ____cacheline_aligned_in_smp; | ||
126 | static DEFINE_PER_CPU(struct mnt_writer, mnt_writers); | ||
127 | |||
128 | static int __init init_mnt_writers(void) | ||
129 | { | ||
130 | int cpu; | ||
131 | for_each_possible_cpu(cpu) { | ||
132 | struct mnt_writer *writer = &per_cpu(mnt_writers, cpu); | ||
133 | spin_lock_init(&writer->lock); | ||
134 | lockdep_set_class(&writer->lock, &writer->lock_class); | ||
135 | writer->count = 0; | ||
136 | } | ||
137 | return 0; | ||
138 | } | ||
139 | fs_initcall(init_mnt_writers); | ||
140 | |||
141 | static void unlock_mnt_writers(void) | ||
142 | { | ||
143 | int cpu; | ||
144 | struct mnt_writer *cpu_writer; | ||
145 | |||
146 | for_each_possible_cpu(cpu) { | ||
147 | cpu_writer = &per_cpu(mnt_writers, cpu); | ||
148 | spin_unlock(&cpu_writer->lock); | ||
149 | } | ||
150 | } | ||
151 | |||
152 | static inline void __clear_mnt_count(struct mnt_writer *cpu_writer) | ||
153 | { | ||
154 | if (!cpu_writer->mnt) | ||
155 | return; | ||
156 | /* | ||
157 | * This is in case anyone ever leaves an invalid, | ||
158 | * old ->mnt and a count of 0. | ||
159 | */ | ||
160 | if (!cpu_writer->count) | ||
161 | return; | ||
162 | atomic_add(cpu_writer->count, &cpu_writer->mnt->__mnt_writers); | ||
163 | cpu_writer->count = 0; | ||
164 | } | ||
165 | /* | ||
166 | * must hold cpu_writer->lock | ||
167 | */ | ||
168 | static inline void use_cpu_writer_for_mount(struct mnt_writer *cpu_writer, | ||
169 | struct vfsmount *mnt) | ||
170 | { | ||
171 | if (cpu_writer->mnt == mnt) | ||
172 | return; | ||
173 | __clear_mnt_count(cpu_writer); | ||
174 | cpu_writer->mnt = mnt; | ||
175 | } | ||
176 | |||
177 | /* | ||
178 | * Most r/o checks on a fs are for operations that take | ||
179 | * discrete amounts of time, like a write() or unlink(). | ||
180 | * We must keep track of when those operations start | ||
181 | * (for permission checks) and when they end, so that | ||
182 | * we can determine when writes are able to occur to | ||
183 | * a filesystem. | ||
184 | */ | ||
185 | /** | ||
186 | * mnt_want_write - get write access to a mount | ||
187 | * @mnt: the mount on which to take a write | ||
188 | * | ||
189 | * This tells the low-level filesystem that a write is | ||
190 | * about to be performed to it, and makes sure that | ||
191 | * writes are allowed before returning success. When | ||
192 | * the write operation is finished, mnt_drop_write() | ||
193 | * must be called. This is effectively a refcount. | ||
194 | */ | ||
195 | int mnt_want_write(struct vfsmount *mnt) | ||
196 | { | ||
197 | int ret = 0; | ||
198 | struct mnt_writer *cpu_writer; | ||
199 | |||
200 | cpu_writer = &get_cpu_var(mnt_writers); | ||
201 | spin_lock(&cpu_writer->lock); | ||
202 | if (__mnt_is_readonly(mnt)) { | ||
203 | ret = -EROFS; | ||
204 | goto out; | ||
205 | } | ||
206 | use_cpu_writer_for_mount(cpu_writer, mnt); | ||
207 | cpu_writer->count++; | ||
208 | out: | ||
209 | spin_unlock(&cpu_writer->lock); | ||
210 | put_cpu_var(mnt_writers); | ||
211 | return ret; | ||
212 | } | ||
213 | EXPORT_SYMBOL_GPL(mnt_want_write); | ||
214 | |||
215 | static void lock_mnt_writers(void) | ||
216 | { | ||
217 | int cpu; | ||
218 | struct mnt_writer *cpu_writer; | ||
219 | |||
220 | for_each_possible_cpu(cpu) { | ||
221 | cpu_writer = &per_cpu(mnt_writers, cpu); | ||
222 | spin_lock(&cpu_writer->lock); | ||
223 | __clear_mnt_count(cpu_writer); | ||
224 | cpu_writer->mnt = NULL; | ||
225 | } | ||
226 | } | ||
227 | |||
228 | /* | ||
229 | * These per-cpu write counts are not guaranteed to have | ||
230 | * matched increments and decrements on any given cpu. | ||
231 | * A file open()ed for write on one cpu and close()d on | ||
232 | * another cpu will imbalance this count. Make sure it | ||
233 | * does not get too far out of whack. | ||
234 | */ | ||
235 | static void handle_write_count_underflow(struct vfsmount *mnt) | ||
236 | { | ||
237 | if (atomic_read(&mnt->__mnt_writers) >= | ||
238 | MNT_WRITER_UNDERFLOW_LIMIT) | ||
239 | return; | ||
240 | /* | ||
241 | * It isn't necessary to hold all of the locks | ||
242 | * at the same time, but doing it this way makes | ||
243 | * us share a lot more code. | ||
244 | */ | ||
245 | lock_mnt_writers(); | ||
246 | /* | ||
247 | * vfsmount_lock is for mnt_flags. | ||
248 | */ | ||
249 | spin_lock(&vfsmount_lock); | ||
250 | /* | ||
251 | * If coalescing the per-cpu writer counts did not | ||
252 | * get us back to a positive writer count, we have | ||
253 | * a bug. | ||
254 | */ | ||
255 | if ((atomic_read(&mnt->__mnt_writers) < 0) && | ||
256 | !(mnt->mnt_flags & MNT_IMBALANCED_WRITE_COUNT)) { | ||
257 | printk(KERN_DEBUG "leak detected on mount(%p) writers " | ||
258 | "count: %d\n", | ||
259 | mnt, atomic_read(&mnt->__mnt_writers)); | ||
260 | WARN_ON(1); | ||
261 | /* use the flag to keep the dmesg spam down */ | ||
262 | mnt->mnt_flags |= MNT_IMBALANCED_WRITE_COUNT; | ||
263 | } | ||
264 | spin_unlock(&vfsmount_lock); | ||
265 | unlock_mnt_writers(); | ||
266 | } | ||
267 | |||
268 | /** | ||
269 | * mnt_drop_write - give up write access to a mount | ||
270 | * @mnt: the mount on which to give up write access | ||
271 | * | ||
272 | * Tells the low-level filesystem that we are done | ||
273 | * performing writes to it. Must be matched with | ||
274 | * mnt_want_write() call above. | ||
275 | */ | ||
276 | void mnt_drop_write(struct vfsmount *mnt) | ||
277 | { | ||
278 | int must_check_underflow = 0; | ||
279 | struct mnt_writer *cpu_writer; | ||
280 | |||
281 | cpu_writer = &get_cpu_var(mnt_writers); | ||
282 | spin_lock(&cpu_writer->lock); | ||
283 | |||
284 | use_cpu_writer_for_mount(cpu_writer, mnt); | ||
285 | if (cpu_writer->count > 0) { | ||
286 | cpu_writer->count--; | ||
287 | } else { | ||
288 | must_check_underflow = 1; | ||
289 | atomic_dec(&mnt->__mnt_writers); | ||
290 | } | ||
291 | |||
292 | spin_unlock(&cpu_writer->lock); | ||
293 | /* | ||
294 | * Logically, we could call this each time, | ||
295 | * but the __mnt_writers cacheline tends to | ||
296 | * be cold, and makes this expensive. | ||
297 | */ | ||
298 | if (must_check_underflow) | ||
299 | handle_write_count_underflow(mnt); | ||
300 | /* | ||
301 | * This could be done right after the spinlock | ||
302 | * is taken because the spinlock keeps us on | ||
303 | * the cpu, and disables preemption. However, | ||
304 | * putting it here bounds the amount that | ||
305 | * __mnt_writers can underflow. Without it, | ||
306 | * we could theoretically wrap __mnt_writers. | ||
307 | */ | ||
308 | put_cpu_var(mnt_writers); | ||
309 | } | ||
310 | EXPORT_SYMBOL_GPL(mnt_drop_write); | ||
311 | |||
312 | static int mnt_make_readonly(struct vfsmount *mnt) | ||
313 | { | ||
314 | int ret = 0; | ||
315 | |||
316 | lock_mnt_writers(); | ||
317 | /* | ||
318 | * With all the locks held, this value is stable | ||
319 | */ | ||
320 | if (atomic_read(&mnt->__mnt_writers) > 0) { | ||
321 | ret = -EBUSY; | ||
322 | goto out; | ||
323 | } | ||
324 | /* | ||
325 | * nobody can do a successful mnt_want_write() with all | ||
326 | * of the counts in MNT_DENIED_WRITE and the locks held. | ||
327 | */ | ||
328 | spin_lock(&vfsmount_lock); | ||
329 | if (!ret) | ||
330 | mnt->mnt_flags |= MNT_READONLY; | ||
331 | spin_unlock(&vfsmount_lock); | ||
332 | out: | ||
333 | unlock_mnt_writers(); | ||
334 | return ret; | ||
335 | } | ||
336 | |||
337 | static void __mnt_unmake_readonly(struct vfsmount *mnt) | ||
338 | { | ||
339 | spin_lock(&vfsmount_lock); | ||
340 | mnt->mnt_flags &= ~MNT_READONLY; | ||
341 | spin_unlock(&vfsmount_lock); | ||
342 | } | ||
343 | |||
83 | int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb) | 344 | int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb) |
84 | { | 345 | { |
85 | mnt->mnt_sb = sb; | 346 | mnt->mnt_sb = sb; |
@@ -155,15 +416,15 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns) | |||
155 | } | 416 | } |
156 | } | 417 | } |
157 | 418 | ||
158 | static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd) | 419 | static void detach_mnt(struct vfsmount *mnt, struct path *old_path) |
159 | { | 420 | { |
160 | old_nd->path.dentry = mnt->mnt_mountpoint; | 421 | old_path->dentry = mnt->mnt_mountpoint; |
161 | old_nd->path.mnt = mnt->mnt_parent; | 422 | old_path->mnt = mnt->mnt_parent; |
162 | mnt->mnt_parent = mnt; | 423 | mnt->mnt_parent = mnt; |
163 | mnt->mnt_mountpoint = mnt->mnt_root; | 424 | mnt->mnt_mountpoint = mnt->mnt_root; |
164 | list_del_init(&mnt->mnt_child); | 425 | list_del_init(&mnt->mnt_child); |
165 | list_del_init(&mnt->mnt_hash); | 426 | list_del_init(&mnt->mnt_hash); |
166 | old_nd->path.dentry->d_mounted--; | 427 | old_path->dentry->d_mounted--; |
167 | } | 428 | } |
168 | 429 | ||
169 | void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, | 430 | void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, |
@@ -174,12 +435,12 @@ void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, | |||
174 | dentry->d_mounted++; | 435 | dentry->d_mounted++; |
175 | } | 436 | } |
176 | 437 | ||
177 | static void attach_mnt(struct vfsmount *mnt, struct nameidata *nd) | 438 | static void attach_mnt(struct vfsmount *mnt, struct path *path) |
178 | { | 439 | { |
179 | mnt_set_mountpoint(nd->path.mnt, nd->path.dentry, mnt); | 440 | mnt_set_mountpoint(path->mnt, path->dentry, mnt); |
180 | list_add_tail(&mnt->mnt_hash, mount_hashtable + | 441 | list_add_tail(&mnt->mnt_hash, mount_hashtable + |
181 | hash(nd->path.mnt, nd->path.dentry)); | 442 | hash(path->mnt, path->dentry)); |
182 | list_add_tail(&mnt->mnt_child, &nd->path.mnt->mnt_mounts); | 443 | list_add_tail(&mnt->mnt_child, &path->mnt->mnt_mounts); |
183 | } | 444 | } |
184 | 445 | ||
185 | /* | 446 | /* |
@@ -262,10 +523,8 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root, | |||
262 | /* stick the duplicate mount on the same expiry list | 523 | /* stick the duplicate mount on the same expiry list |
263 | * as the original if that was on one */ | 524 | * as the original if that was on one */ |
264 | if (flag & CL_EXPIRE) { | 525 | if (flag & CL_EXPIRE) { |
265 | spin_lock(&vfsmount_lock); | ||
266 | if (!list_empty(&old->mnt_expire)) | 526 | if (!list_empty(&old->mnt_expire)) |
267 | list_add(&mnt->mnt_expire, &old->mnt_expire); | 527 | list_add(&mnt->mnt_expire, &old->mnt_expire); |
268 | spin_unlock(&vfsmount_lock); | ||
269 | } | 528 | } |
270 | } | 529 | } |
271 | return mnt; | 530 | return mnt; |
@@ -273,7 +532,36 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root, | |||
273 | 532 | ||
274 | static inline void __mntput(struct vfsmount *mnt) | 533 | static inline void __mntput(struct vfsmount *mnt) |
275 | { | 534 | { |
535 | int cpu; | ||
276 | struct super_block *sb = mnt->mnt_sb; | 536 | struct super_block *sb = mnt->mnt_sb; |
537 | /* | ||
538 | * We don't have to hold all of the locks at the | ||
539 | * same time here because we know that we're the | ||
540 | * last reference to mnt and that no new writers | ||
541 | * can come in. | ||
542 | */ | ||
543 | for_each_possible_cpu(cpu) { | ||
544 | struct mnt_writer *cpu_writer = &per_cpu(mnt_writers, cpu); | ||
545 | if (cpu_writer->mnt != mnt) | ||
546 | continue; | ||
547 | spin_lock(&cpu_writer->lock); | ||
548 | atomic_add(cpu_writer->count, &mnt->__mnt_writers); | ||
549 | cpu_writer->count = 0; | ||
550 | /* | ||
551 | * Might as well do this so that no one | ||
552 | * ever sees the pointer and expects | ||
553 | * it to be valid. | ||
554 | */ | ||
555 | cpu_writer->mnt = NULL; | ||
556 | spin_unlock(&cpu_writer->lock); | ||
557 | } | ||
558 | /* | ||
559 | * This probably indicates that somebody messed | ||
560 | * up a mnt_want/drop_write() pair. If this | ||
561 | * happens, the filesystem was probably unable | ||
562 | * to make r/w->r/o transitions. | ||
563 | */ | ||
564 | WARN_ON(atomic_read(&mnt->__mnt_writers)); | ||
277 | dput(mnt->mnt_root); | 565 | dput(mnt->mnt_root); |
278 | free_vfsmnt(mnt); | 566 | free_vfsmnt(mnt); |
279 | deactivate_super(sb); | 567 | deactivate_super(sb); |
@@ -419,7 +707,7 @@ static int show_vfsmnt(struct seq_file *m, void *v) | |||
419 | seq_putc(m, '.'); | 707 | seq_putc(m, '.'); |
420 | mangle(m, mnt->mnt_sb->s_subtype); | 708 | mangle(m, mnt->mnt_sb->s_subtype); |
421 | } | 709 | } |
422 | seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw"); | 710 | seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw"); |
423 | for (fs_infop = fs_info; fs_infop->flag; fs_infop++) { | 711 | for (fs_infop = fs_info; fs_infop->flag; fs_infop++) { |
424 | if (mnt->mnt_sb->s_flags & fs_infop->flag) | 712 | if (mnt->mnt_sb->s_flags & fs_infop->flag) |
425 | seq_puts(m, fs_infop->str); | 713 | seq_puts(m, fs_infop->str); |
@@ -548,6 +836,7 @@ void release_mounts(struct list_head *head) | |||
548 | m = mnt->mnt_parent; | 836 | m = mnt->mnt_parent; |
549 | mnt->mnt_mountpoint = mnt->mnt_root; | 837 | mnt->mnt_mountpoint = mnt->mnt_root; |
550 | mnt->mnt_parent = mnt; | 838 | mnt->mnt_parent = mnt; |
839 | m->mnt_ghosts--; | ||
551 | spin_unlock(&vfsmount_lock); | 840 | spin_unlock(&vfsmount_lock); |
552 | dput(dentry); | 841 | dput(dentry); |
553 | mntput(m); | 842 | mntput(m); |
@@ -572,12 +861,16 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill) | |||
572 | __touch_mnt_namespace(p->mnt_ns); | 861 | __touch_mnt_namespace(p->mnt_ns); |
573 | p->mnt_ns = NULL; | 862 | p->mnt_ns = NULL; |
574 | list_del_init(&p->mnt_child); | 863 | list_del_init(&p->mnt_child); |
575 | if (p->mnt_parent != p) | 864 | if (p->mnt_parent != p) { |
865 | p->mnt_parent->mnt_ghosts++; | ||
576 | p->mnt_mountpoint->d_mounted--; | 866 | p->mnt_mountpoint->d_mounted--; |
867 | } | ||
577 | change_mnt_propagation(p, MS_PRIVATE); | 868 | change_mnt_propagation(p, MS_PRIVATE); |
578 | } | 869 | } |
579 | } | 870 | } |
580 | 871 | ||
872 | static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts); | ||
873 | |||
581 | static int do_umount(struct vfsmount *mnt, int flags) | 874 | static int do_umount(struct vfsmount *mnt, int flags) |
582 | { | 875 | { |
583 | struct super_block *sb = mnt->mnt_sb; | 876 | struct super_block *sb = mnt->mnt_sb; |
@@ -650,6 +943,9 @@ static int do_umount(struct vfsmount *mnt, int flags) | |||
650 | spin_lock(&vfsmount_lock); | 943 | spin_lock(&vfsmount_lock); |
651 | event++; | 944 | event++; |
652 | 945 | ||
946 | if (!(flags & MNT_DETACH)) | ||
947 | shrink_submounts(mnt, &umount_list); | ||
948 | |||
653 | retval = -EBUSY; | 949 | retval = -EBUSY; |
654 | if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) { | 950 | if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) { |
655 | if (!list_empty(&mnt->mnt_list)) | 951 | if (!list_empty(&mnt->mnt_list)) |
@@ -744,7 +1040,7 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry, | |||
744 | int flag) | 1040 | int flag) |
745 | { | 1041 | { |
746 | struct vfsmount *res, *p, *q, *r, *s; | 1042 | struct vfsmount *res, *p, *q, *r, *s; |
747 | struct nameidata nd; | 1043 | struct path path; |
748 | 1044 | ||
749 | if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt)) | 1045 | if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt)) |
750 | return NULL; | 1046 | return NULL; |
@@ -769,14 +1065,14 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry, | |||
769 | q = q->mnt_parent; | 1065 | q = q->mnt_parent; |
770 | } | 1066 | } |
771 | p = s; | 1067 | p = s; |
772 | nd.path.mnt = q; | 1068 | path.mnt = q; |
773 | nd.path.dentry = p->mnt_mountpoint; | 1069 | path.dentry = p->mnt_mountpoint; |
774 | q = clone_mnt(p, p->mnt_root, flag); | 1070 | q = clone_mnt(p, p->mnt_root, flag); |
775 | if (!q) | 1071 | if (!q) |
776 | goto Enomem; | 1072 | goto Enomem; |
777 | spin_lock(&vfsmount_lock); | 1073 | spin_lock(&vfsmount_lock); |
778 | list_add_tail(&q->mnt_list, &res->mnt_list); | 1074 | list_add_tail(&q->mnt_list, &res->mnt_list); |
779 | attach_mnt(q, &nd); | 1075 | attach_mnt(q, &path); |
780 | spin_unlock(&vfsmount_lock); | 1076 | spin_unlock(&vfsmount_lock); |
781 | } | 1077 | } |
782 | } | 1078 | } |
@@ -876,11 +1172,11 @@ void drop_collected_mounts(struct vfsmount *mnt) | |||
876 | * in allocations. | 1172 | * in allocations. |
877 | */ | 1173 | */ |
878 | static int attach_recursive_mnt(struct vfsmount *source_mnt, | 1174 | static int attach_recursive_mnt(struct vfsmount *source_mnt, |
879 | struct nameidata *nd, struct nameidata *parent_nd) | 1175 | struct path *path, struct path *parent_path) |
880 | { | 1176 | { |
881 | LIST_HEAD(tree_list); | 1177 | LIST_HEAD(tree_list); |
882 | struct vfsmount *dest_mnt = nd->path.mnt; | 1178 | struct vfsmount *dest_mnt = path->mnt; |
883 | struct dentry *dest_dentry = nd->path.dentry; | 1179 | struct dentry *dest_dentry = path->dentry; |
884 | struct vfsmount *child, *p; | 1180 | struct vfsmount *child, *p; |
885 | 1181 | ||
886 | if (propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list)) | 1182 | if (propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list)) |
@@ -892,9 +1188,9 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt, | |||
892 | } | 1188 | } |
893 | 1189 | ||
894 | spin_lock(&vfsmount_lock); | 1190 | spin_lock(&vfsmount_lock); |
895 | if (parent_nd) { | 1191 | if (parent_path) { |
896 | detach_mnt(source_mnt, parent_nd); | 1192 | detach_mnt(source_mnt, parent_path); |
897 | attach_mnt(source_mnt, nd); | 1193 | attach_mnt(source_mnt, path); |
898 | touch_mnt_namespace(current->nsproxy->mnt_ns); | 1194 | touch_mnt_namespace(current->nsproxy->mnt_ns); |
899 | } else { | 1195 | } else { |
900 | mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt); | 1196 | mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt); |
@@ -930,7 +1226,7 @@ static int graft_tree(struct vfsmount *mnt, struct nameidata *nd) | |||
930 | 1226 | ||
931 | err = -ENOENT; | 1227 | err = -ENOENT; |
932 | if (IS_ROOT(nd->path.dentry) || !d_unhashed(nd->path.dentry)) | 1228 | if (IS_ROOT(nd->path.dentry) || !d_unhashed(nd->path.dentry)) |
933 | err = attach_recursive_mnt(mnt, nd, NULL); | 1229 | err = attach_recursive_mnt(mnt, &nd->path, NULL); |
934 | out_unlock: | 1230 | out_unlock: |
935 | mutex_unlock(&nd->path.dentry->d_inode->i_mutex); | 1231 | mutex_unlock(&nd->path.dentry->d_inode->i_mutex); |
936 | if (!err) | 1232 | if (!err) |
@@ -1013,6 +1309,23 @@ out: | |||
1013 | return err; | 1309 | return err; |
1014 | } | 1310 | } |
1015 | 1311 | ||
1312 | static int change_mount_flags(struct vfsmount *mnt, int ms_flags) | ||
1313 | { | ||
1314 | int error = 0; | ||
1315 | int readonly_request = 0; | ||
1316 | |||
1317 | if (ms_flags & MS_RDONLY) | ||
1318 | readonly_request = 1; | ||
1319 | if (readonly_request == __mnt_is_readonly(mnt)) | ||
1320 | return 0; | ||
1321 | |||
1322 | if (readonly_request) | ||
1323 | error = mnt_make_readonly(mnt); | ||
1324 | else | ||
1325 | __mnt_unmake_readonly(mnt); | ||
1326 | return error; | ||
1327 | } | ||
1328 | |||
1016 | /* | 1329 | /* |
1017 | * change filesystem flags. dir should be a physical root of filesystem. | 1330 | * change filesystem flags. dir should be a physical root of filesystem. |
1018 | * If you've mounted a non-root directory somewhere and want to do remount | 1331 | * If you've mounted a non-root directory somewhere and want to do remount |
@@ -1035,7 +1348,10 @@ static noinline int do_remount(struct nameidata *nd, int flags, int mnt_flags, | |||
1035 | return -EINVAL; | 1348 | return -EINVAL; |
1036 | 1349 | ||
1037 | down_write(&sb->s_umount); | 1350 | down_write(&sb->s_umount); |
1038 | err = do_remount_sb(sb, flags, data, 0); | 1351 | if (flags & MS_BIND) |
1352 | err = change_mount_flags(nd->path.mnt, flags); | ||
1353 | else | ||
1354 | err = do_remount_sb(sb, flags, data, 0); | ||
1039 | if (!err) | 1355 | if (!err) |
1040 | nd->path.mnt->mnt_flags = mnt_flags; | 1356 | nd->path.mnt->mnt_flags = mnt_flags; |
1041 | up_write(&sb->s_umount); | 1357 | up_write(&sb->s_umount); |
@@ -1059,7 +1375,8 @@ static inline int tree_contains_unbindable(struct vfsmount *mnt) | |||
1059 | */ | 1375 | */ |
1060 | static noinline int do_move_mount(struct nameidata *nd, char *old_name) | 1376 | static noinline int do_move_mount(struct nameidata *nd, char *old_name) |
1061 | { | 1377 | { |
1062 | struct nameidata old_nd, parent_nd; | 1378 | struct nameidata old_nd; |
1379 | struct path parent_path; | ||
1063 | struct vfsmount *p; | 1380 | struct vfsmount *p; |
1064 | int err = 0; | 1381 | int err = 0; |
1065 | if (!capable(CAP_SYS_ADMIN)) | 1382 | if (!capable(CAP_SYS_ADMIN)) |
@@ -1114,21 +1431,19 @@ static noinline int do_move_mount(struct nameidata *nd, char *old_name) | |||
1114 | if (p == old_nd.path.mnt) | 1431 | if (p == old_nd.path.mnt) |
1115 | goto out1; | 1432 | goto out1; |
1116 | 1433 | ||
1117 | err = attach_recursive_mnt(old_nd.path.mnt, nd, &parent_nd); | 1434 | err = attach_recursive_mnt(old_nd.path.mnt, &nd->path, &parent_path); |
1118 | if (err) | 1435 | if (err) |
1119 | goto out1; | 1436 | goto out1; |
1120 | 1437 | ||
1121 | spin_lock(&vfsmount_lock); | ||
1122 | /* if the mount is moved, it should no longer be expire | 1438 | /* if the mount is moved, it should no longer be expire |
1123 | * automatically */ | 1439 | * automatically */ |
1124 | list_del_init(&old_nd.path.mnt->mnt_expire); | 1440 | list_del_init(&old_nd.path.mnt->mnt_expire); |
1125 | spin_unlock(&vfsmount_lock); | ||
1126 | out1: | 1441 | out1: |
1127 | mutex_unlock(&nd->path.dentry->d_inode->i_mutex); | 1442 | mutex_unlock(&nd->path.dentry->d_inode->i_mutex); |
1128 | out: | 1443 | out: |
1129 | up_write(&namespace_sem); | 1444 | up_write(&namespace_sem); |
1130 | if (!err) | 1445 | if (!err) |
1131 | path_put(&parent_nd.path); | 1446 | path_put(&parent_path); |
1132 | path_put(&old_nd.path); | 1447 | path_put(&old_nd.path); |
1133 | return err; | 1448 | return err; |
1134 | } | 1449 | } |
@@ -1189,12 +1504,9 @@ int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd, | |||
1189 | if ((err = graft_tree(newmnt, nd))) | 1504 | if ((err = graft_tree(newmnt, nd))) |
1190 | goto unlock; | 1505 | goto unlock; |
1191 | 1506 | ||
1192 | if (fslist) { | 1507 | if (fslist) /* add to the specified expiration list */ |
1193 | /* add to the specified expiration list */ | ||
1194 | spin_lock(&vfsmount_lock); | ||
1195 | list_add_tail(&newmnt->mnt_expire, fslist); | 1508 | list_add_tail(&newmnt->mnt_expire, fslist); |
1196 | spin_unlock(&vfsmount_lock); | 1509 | |
1197 | } | ||
1198 | up_write(&namespace_sem); | 1510 | up_write(&namespace_sem); |
1199 | return 0; | 1511 | return 0; |
1200 | 1512 | ||
@@ -1206,75 +1518,6 @@ unlock: | |||
1206 | 1518 | ||
1207 | EXPORT_SYMBOL_GPL(do_add_mount); | 1519 | EXPORT_SYMBOL_GPL(do_add_mount); |
1208 | 1520 | ||
1209 | static void expire_mount(struct vfsmount *mnt, struct list_head *mounts, | ||
1210 | struct list_head *umounts) | ||
1211 | { | ||
1212 | spin_lock(&vfsmount_lock); | ||
1213 | |||
1214 | /* | ||
1215 | * Check if mount is still attached, if not, let whoever holds it deal | ||
1216 | * with the sucker | ||
1217 | */ | ||
1218 | if (mnt->mnt_parent == mnt) { | ||
1219 | spin_unlock(&vfsmount_lock); | ||
1220 | return; | ||
1221 | } | ||
1222 | |||
1223 | /* | ||
1224 | * Check that it is still dead: the count should now be 2 - as | ||
1225 | * contributed by the vfsmount parent and the mntget above | ||
1226 | */ | ||
1227 | if (!propagate_mount_busy(mnt, 2)) { | ||
1228 | /* delete from the namespace */ | ||
1229 | touch_mnt_namespace(mnt->mnt_ns); | ||
1230 | list_del_init(&mnt->mnt_list); | ||
1231 | mnt->mnt_ns = NULL; | ||
1232 | umount_tree(mnt, 1, umounts); | ||
1233 | spin_unlock(&vfsmount_lock); | ||
1234 | } else { | ||
1235 | /* | ||
1236 | * Someone brought it back to life whilst we didn't have any | ||
1237 | * locks held so return it to the expiration list | ||
1238 | */ | ||
1239 | list_add_tail(&mnt->mnt_expire, mounts); | ||
1240 | spin_unlock(&vfsmount_lock); | ||
1241 | } | ||
1242 | } | ||
1243 | |||
1244 | /* | ||
1245 | * go through the vfsmounts we've just consigned to the graveyard to | ||
1246 | * - check that they're still dead | ||
1247 | * - delete the vfsmount from the appropriate namespace under lock | ||
1248 | * - dispose of the corpse | ||
1249 | */ | ||
1250 | static void expire_mount_list(struct list_head *graveyard, struct list_head *mounts) | ||
1251 | { | ||
1252 | struct mnt_namespace *ns; | ||
1253 | struct vfsmount *mnt; | ||
1254 | |||
1255 | while (!list_empty(graveyard)) { | ||
1256 | LIST_HEAD(umounts); | ||
1257 | mnt = list_first_entry(graveyard, struct vfsmount, mnt_expire); | ||
1258 | list_del_init(&mnt->mnt_expire); | ||
1259 | |||
1260 | /* don't do anything if the namespace is dead - all the | ||
1261 | * vfsmounts from it are going away anyway */ | ||
1262 | ns = mnt->mnt_ns; | ||
1263 | if (!ns || !ns->root) | ||
1264 | continue; | ||
1265 | get_mnt_ns(ns); | ||
1266 | |||
1267 | spin_unlock(&vfsmount_lock); | ||
1268 | down_write(&namespace_sem); | ||
1269 | expire_mount(mnt, mounts, &umounts); | ||
1270 | up_write(&namespace_sem); | ||
1271 | release_mounts(&umounts); | ||
1272 | mntput(mnt); | ||
1273 | put_mnt_ns(ns); | ||
1274 | spin_lock(&vfsmount_lock); | ||
1275 | } | ||
1276 | } | ||
1277 | |||
1278 | /* | 1521 | /* |
1279 | * process a list of expirable mountpoints with the intent of discarding any | 1522 | * process a list of expirable mountpoints with the intent of discarding any |
1280 | * mountpoints that aren't in use and haven't been touched since last we came | 1523 | * mountpoints that aren't in use and haven't been touched since last we came |
@@ -1284,10 +1527,12 @@ void mark_mounts_for_expiry(struct list_head *mounts) | |||
1284 | { | 1527 | { |
1285 | struct vfsmount *mnt, *next; | 1528 | struct vfsmount *mnt, *next; |
1286 | LIST_HEAD(graveyard); | 1529 | LIST_HEAD(graveyard); |
1530 | LIST_HEAD(umounts); | ||
1287 | 1531 | ||
1288 | if (list_empty(mounts)) | 1532 | if (list_empty(mounts)) |
1289 | return; | 1533 | return; |
1290 | 1534 | ||
1535 | down_write(&namespace_sem); | ||
1291 | spin_lock(&vfsmount_lock); | 1536 | spin_lock(&vfsmount_lock); |
1292 | 1537 | ||
1293 | /* extract from the expiration list every vfsmount that matches the | 1538 | /* extract from the expiration list every vfsmount that matches the |
@@ -1298,16 +1543,19 @@ void mark_mounts_for_expiry(struct list_head *mounts) | |||
1298 | */ | 1543 | */ |
1299 | list_for_each_entry_safe(mnt, next, mounts, mnt_expire) { | 1544 | list_for_each_entry_safe(mnt, next, mounts, mnt_expire) { |
1300 | if (!xchg(&mnt->mnt_expiry_mark, 1) || | 1545 | if (!xchg(&mnt->mnt_expiry_mark, 1) || |
1301 | atomic_read(&mnt->mnt_count) != 1) | 1546 | propagate_mount_busy(mnt, 1)) |
1302 | continue; | 1547 | continue; |
1303 | |||
1304 | mntget(mnt); | ||
1305 | list_move(&mnt->mnt_expire, &graveyard); | 1548 | list_move(&mnt->mnt_expire, &graveyard); |
1306 | } | 1549 | } |
1307 | 1550 | while (!list_empty(&graveyard)) { | |
1308 | expire_mount_list(&graveyard, mounts); | 1551 | mnt = list_first_entry(&graveyard, struct vfsmount, mnt_expire); |
1309 | 1552 | touch_mnt_namespace(mnt->mnt_ns); | |
1553 | umount_tree(mnt, 1, &umounts); | ||
1554 | } | ||
1310 | spin_unlock(&vfsmount_lock); | 1555 | spin_unlock(&vfsmount_lock); |
1556 | up_write(&namespace_sem); | ||
1557 | |||
1558 | release_mounts(&umounts); | ||
1311 | } | 1559 | } |
1312 | 1560 | ||
1313 | EXPORT_SYMBOL_GPL(mark_mounts_for_expiry); | 1561 | EXPORT_SYMBOL_GPL(mark_mounts_for_expiry); |
@@ -1343,7 +1591,6 @@ resume: | |||
1343 | } | 1591 | } |
1344 | 1592 | ||
1345 | if (!propagate_mount_busy(mnt, 1)) { | 1593 | if (!propagate_mount_busy(mnt, 1)) { |
1346 | mntget(mnt); | ||
1347 | list_move_tail(&mnt->mnt_expire, graveyard); | 1594 | list_move_tail(&mnt->mnt_expire, graveyard); |
1348 | found++; | 1595 | found++; |
1349 | } | 1596 | } |
@@ -1363,22 +1610,22 @@ resume: | |||
1363 | * process a list of expirable mountpoints with the intent of discarding any | 1610 | * process a list of expirable mountpoints with the intent of discarding any |
1364 | * submounts of a specific parent mountpoint | 1611 | * submounts of a specific parent mountpoint |
1365 | */ | 1612 | */ |
1366 | void shrink_submounts(struct vfsmount *mountpoint, struct list_head *mounts) | 1613 | static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts) |
1367 | { | 1614 | { |
1368 | LIST_HEAD(graveyard); | 1615 | LIST_HEAD(graveyard); |
1369 | int found; | 1616 | struct vfsmount *m; |
1370 | |||
1371 | spin_lock(&vfsmount_lock); | ||
1372 | 1617 | ||
1373 | /* extract submounts of 'mountpoint' from the expiration list */ | 1618 | /* extract submounts of 'mountpoint' from the expiration list */ |
1374 | while ((found = select_submounts(mountpoint, &graveyard)) != 0) | 1619 | while (select_submounts(mnt, &graveyard)) { |
1375 | expire_mount_list(&graveyard, mounts); | 1620 | while (!list_empty(&graveyard)) { |
1376 | 1621 | m = list_first_entry(&graveyard, struct vfsmount, | |
1377 | spin_unlock(&vfsmount_lock); | 1622 | mnt_expire); |
1623 | touch_mnt_namespace(mnt->mnt_ns); | ||
1624 | umount_tree(mnt, 1, umounts); | ||
1625 | } | ||
1626 | } | ||
1378 | } | 1627 | } |
1379 | 1628 | ||
1380 | EXPORT_SYMBOL_GPL(shrink_submounts); | ||
1381 | |||
1382 | /* | 1629 | /* |
1383 | * Some copy_from_user() implementations do not return the exact number of | 1630 | * Some copy_from_user() implementations do not return the exact number of |
1384 | * bytes remaining to copy on a fault. But copy_mount_options() requires that. | 1631 | * bytes remaining to copy on a fault. But copy_mount_options() requires that. |
@@ -1488,6 +1735,8 @@ long do_mount(char *dev_name, char *dir_name, char *type_page, | |||
1488 | mnt_flags |= MNT_NODIRATIME; | 1735 | mnt_flags |= MNT_NODIRATIME; |
1489 | if (flags & MS_RELATIME) | 1736 | if (flags & MS_RELATIME) |
1490 | mnt_flags |= MNT_RELATIME; | 1737 | mnt_flags |= MNT_RELATIME; |
1738 | if (flags & MS_RDONLY) | ||
1739 | mnt_flags |= MNT_READONLY; | ||
1491 | 1740 | ||
1492 | flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | | 1741 | flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | |
1493 | MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT); | 1742 | MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT); |
@@ -1683,7 +1932,7 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path) | |||
1683 | path_put(&old_pwd); | 1932 | path_put(&old_pwd); |
1684 | } | 1933 | } |
1685 | 1934 | ||
1686 | static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd) | 1935 | static void chroot_fs_refs(struct path *old_root, struct path *new_root) |
1687 | { | 1936 | { |
1688 | struct task_struct *g, *p; | 1937 | struct task_struct *g, *p; |
1689 | struct fs_struct *fs; | 1938 | struct fs_struct *fs; |
@@ -1695,12 +1944,12 @@ static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd) | |||
1695 | if (fs) { | 1944 | if (fs) { |
1696 | atomic_inc(&fs->count); | 1945 | atomic_inc(&fs->count); |
1697 | task_unlock(p); | 1946 | task_unlock(p); |
1698 | if (fs->root.dentry == old_nd->path.dentry | 1947 | if (fs->root.dentry == old_root->dentry |
1699 | && fs->root.mnt == old_nd->path.mnt) | 1948 | && fs->root.mnt == old_root->mnt) |
1700 | set_fs_root(fs, &new_nd->path); | 1949 | set_fs_root(fs, new_root); |
1701 | if (fs->pwd.dentry == old_nd->path.dentry | 1950 | if (fs->pwd.dentry == old_root->dentry |
1702 | && fs->pwd.mnt == old_nd->path.mnt) | 1951 | && fs->pwd.mnt == old_root->mnt) |
1703 | set_fs_pwd(fs, &new_nd->path); | 1952 | set_fs_pwd(fs, new_root); |
1704 | put_fs_struct(fs); | 1953 | put_fs_struct(fs); |
1705 | } else | 1954 | } else |
1706 | task_unlock(p); | 1955 | task_unlock(p); |
@@ -1737,7 +1986,8 @@ asmlinkage long sys_pivot_root(const char __user * new_root, | |||
1737 | const char __user * put_old) | 1986 | const char __user * put_old) |
1738 | { | 1987 | { |
1739 | struct vfsmount *tmp; | 1988 | struct vfsmount *tmp; |
1740 | struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd; | 1989 | struct nameidata new_nd, old_nd, user_nd; |
1990 | struct path parent_path, root_parent; | ||
1741 | int error; | 1991 | int error; |
1742 | 1992 | ||
1743 | if (!capable(CAP_SYS_ADMIN)) | 1993 | if (!capable(CAP_SYS_ADMIN)) |
@@ -1811,19 +2061,19 @@ asmlinkage long sys_pivot_root(const char __user * new_root, | |||
1811 | goto out3; | 2061 | goto out3; |
1812 | } else if (!is_subdir(old_nd.path.dentry, new_nd.path.dentry)) | 2062 | } else if (!is_subdir(old_nd.path.dentry, new_nd.path.dentry)) |
1813 | goto out3; | 2063 | goto out3; |
1814 | detach_mnt(new_nd.path.mnt, &parent_nd); | 2064 | detach_mnt(new_nd.path.mnt, &parent_path); |
1815 | detach_mnt(user_nd.path.mnt, &root_parent); | 2065 | detach_mnt(user_nd.path.mnt, &root_parent); |
1816 | /* mount old root on put_old */ | 2066 | /* mount old root on put_old */ |
1817 | attach_mnt(user_nd.path.mnt, &old_nd); | 2067 | attach_mnt(user_nd.path.mnt, &old_nd.path); |
1818 | /* mount new_root on / */ | 2068 | /* mount new_root on / */ |
1819 | attach_mnt(new_nd.path.mnt, &root_parent); | 2069 | attach_mnt(new_nd.path.mnt, &root_parent); |
1820 | touch_mnt_namespace(current->nsproxy->mnt_ns); | 2070 | touch_mnt_namespace(current->nsproxy->mnt_ns); |
1821 | spin_unlock(&vfsmount_lock); | 2071 | spin_unlock(&vfsmount_lock); |
1822 | chroot_fs_refs(&user_nd, &new_nd); | 2072 | chroot_fs_refs(&user_nd.path, &new_nd.path); |
1823 | security_sb_post_pivotroot(&user_nd, &new_nd); | 2073 | security_sb_post_pivotroot(&user_nd, &new_nd); |
1824 | error = 0; | 2074 | error = 0; |
1825 | path_put(&root_parent.path); | 2075 | path_put(&root_parent); |
1826 | path_put(&parent_nd.path); | 2076 | path_put(&parent_path); |
1827 | out2: | 2077 | out2: |
1828 | mutex_unlock(&old_nd.path.dentry->d_inode->i_mutex); | 2078 | mutex_unlock(&old_nd.path.dentry->d_inode->i_mutex); |
1829 | up_write(&namespace_sem); | 2079 | up_write(&namespace_sem); |