aboutsummaryrefslogtreecommitdiffstats
path: root/fs/namespace.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/namespace.c')
-rw-r--r--fs/namespace.c697
1 files changed, 480 insertions, 217 deletions
diff --git a/fs/namespace.c b/fs/namespace.c
index 2fa9fdf7d6f5..caa9187f67e5 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -24,6 +24,7 @@
24#include <linux/mount.h> 24#include <linux/mount.h>
25#include <asm/uaccess.h> 25#include <asm/uaccess.h>
26#include <asm/unistd.h> 26#include <asm/unistd.h>
27#include "pnode.h"
27 28
28extern int __init init_rootfs(void); 29extern int __init init_rootfs(void);
29 30
@@ -37,33 +38,39 @@ static inline int sysfs_init(void)
37#endif 38#endif
38 39
39/* spinlock for vfsmount related operations, inplace of dcache_lock */ 40/* spinlock for vfsmount related operations, inplace of dcache_lock */
40 __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); 41__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
42
43static int event;
41 44
42static struct list_head *mount_hashtable; 45static struct list_head *mount_hashtable;
43static int hash_mask __read_mostly, hash_bits __read_mostly; 46static int hash_mask __read_mostly, hash_bits __read_mostly;
44static kmem_cache_t *mnt_cache; 47static kmem_cache_t *mnt_cache;
48static struct rw_semaphore namespace_sem;
45 49
46static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) 50static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
47{ 51{
48 unsigned long tmp = ((unsigned long) mnt / L1_CACHE_BYTES); 52 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
49 tmp += ((unsigned long) dentry / L1_CACHE_BYTES); 53 tmp += ((unsigned long)dentry / L1_CACHE_BYTES);
50 tmp = tmp + (tmp >> hash_bits); 54 tmp = tmp + (tmp >> hash_bits);
51 return tmp & hash_mask; 55 return tmp & hash_mask;
52} 56}
53 57
54struct vfsmount *alloc_vfsmnt(const char *name) 58struct vfsmount *alloc_vfsmnt(const char *name)
55{ 59{
56 struct vfsmount *mnt = kmem_cache_alloc(mnt_cache, GFP_KERNEL); 60 struct vfsmount *mnt = kmem_cache_alloc(mnt_cache, GFP_KERNEL);
57 if (mnt) { 61 if (mnt) {
58 memset(mnt, 0, sizeof(struct vfsmount)); 62 memset(mnt, 0, sizeof(struct vfsmount));
59 atomic_set(&mnt->mnt_count,1); 63 atomic_set(&mnt->mnt_count, 1);
60 INIT_LIST_HEAD(&mnt->mnt_hash); 64 INIT_LIST_HEAD(&mnt->mnt_hash);
61 INIT_LIST_HEAD(&mnt->mnt_child); 65 INIT_LIST_HEAD(&mnt->mnt_child);
62 INIT_LIST_HEAD(&mnt->mnt_mounts); 66 INIT_LIST_HEAD(&mnt->mnt_mounts);
63 INIT_LIST_HEAD(&mnt->mnt_list); 67 INIT_LIST_HEAD(&mnt->mnt_list);
64 INIT_LIST_HEAD(&mnt->mnt_expire); 68 INIT_LIST_HEAD(&mnt->mnt_expire);
69 INIT_LIST_HEAD(&mnt->mnt_share);
70 INIT_LIST_HEAD(&mnt->mnt_slave_list);
71 INIT_LIST_HEAD(&mnt->mnt_slave);
65 if (name) { 72 if (name) {
66 int size = strlen(name)+1; 73 int size = strlen(name) + 1;
67 char *newname = kmalloc(size, GFP_KERNEL); 74 char *newname = kmalloc(size, GFP_KERNEL);
68 if (newname) { 75 if (newname) {
69 memcpy(newname, name, size); 76 memcpy(newname, name, size);
@@ -81,36 +88,65 @@ void free_vfsmnt(struct vfsmount *mnt)
81} 88}
82 89
83/* 90/*
84 * Now, lookup_mnt increments the ref count before returning 91 * find the first or last mount at @dentry on vfsmount @mnt depending on
85 * the vfsmount struct. 92 * @dir. If @dir is set return the first mount else return the last mount.
86 */ 93 */
87struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) 94struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
95 int dir)
88{ 96{
89 struct list_head * head = mount_hashtable + hash(mnt, dentry); 97 struct list_head *head = mount_hashtable + hash(mnt, dentry);
90 struct list_head * tmp = head; 98 struct list_head *tmp = head;
91 struct vfsmount *p, *found = NULL; 99 struct vfsmount *p, *found = NULL;
92 100
93 spin_lock(&vfsmount_lock);
94 for (;;) { 101 for (;;) {
95 tmp = tmp->next; 102 tmp = dir ? tmp->next : tmp->prev;
96 p = NULL; 103 p = NULL;
97 if (tmp == head) 104 if (tmp == head)
98 break; 105 break;
99 p = list_entry(tmp, struct vfsmount, mnt_hash); 106 p = list_entry(tmp, struct vfsmount, mnt_hash);
100 if (p->mnt_parent == mnt && p->mnt_mountpoint == dentry) { 107 if (p->mnt_parent == mnt && p->mnt_mountpoint == dentry) {
101 found = mntget(p); 108 found = p;
102 break; 109 break;
103 } 110 }
104 } 111 }
105 spin_unlock(&vfsmount_lock);
106 return found; 112 return found;
107} 113}
108 114
115/*
116 * lookup_mnt increments the ref count before returning
117 * the vfsmount struct.
118 */
119struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
120{
121 struct vfsmount *child_mnt;
122 spin_lock(&vfsmount_lock);
123 if ((child_mnt = __lookup_mnt(mnt, dentry, 1)))
124 mntget(child_mnt);
125 spin_unlock(&vfsmount_lock);
126 return child_mnt;
127}
128
109static inline int check_mnt(struct vfsmount *mnt) 129static inline int check_mnt(struct vfsmount *mnt)
110{ 130{
111 return mnt->mnt_namespace == current->namespace; 131 return mnt->mnt_namespace == current->namespace;
112} 132}
113 133
134static void touch_namespace(struct namespace *ns)
135{
136 if (ns) {
137 ns->event = ++event;
138 wake_up_interruptible(&ns->poll);
139 }
140}
141
142static void __touch_namespace(struct namespace *ns)
143{
144 if (ns && ns->event != event) {
145 ns->event = event;
146 wake_up_interruptible(&ns->poll);
147 }
148}
149
114static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd) 150static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd)
115{ 151{
116 old_nd->dentry = mnt->mnt_mountpoint; 152 old_nd->dentry = mnt->mnt_mountpoint;
@@ -122,13 +158,43 @@ static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd)
122 old_nd->dentry->d_mounted--; 158 old_nd->dentry->d_mounted--;
123} 159}
124 160
161void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
162 struct vfsmount *child_mnt)
163{
164 child_mnt->mnt_parent = mntget(mnt);
165 child_mnt->mnt_mountpoint = dget(dentry);
166 dentry->d_mounted++;
167}
168
125static void attach_mnt(struct vfsmount *mnt, struct nameidata *nd) 169static void attach_mnt(struct vfsmount *mnt, struct nameidata *nd)
126{ 170{
127 mnt->mnt_parent = mntget(nd->mnt); 171 mnt_set_mountpoint(nd->mnt, nd->dentry, mnt);
128 mnt->mnt_mountpoint = dget(nd->dentry); 172 list_add_tail(&mnt->mnt_hash, mount_hashtable +
129 list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry)); 173 hash(nd->mnt, nd->dentry));
130 list_add_tail(&mnt->mnt_child, &nd->mnt->mnt_mounts); 174 list_add_tail(&mnt->mnt_child, &nd->mnt->mnt_mounts);
131 nd->dentry->d_mounted++; 175}
176
177/*
178 * the caller must hold vfsmount_lock
179 */
180static void commit_tree(struct vfsmount *mnt)
181{
182 struct vfsmount *parent = mnt->mnt_parent;
183 struct vfsmount *m;
184 LIST_HEAD(head);
185 struct namespace *n = parent->mnt_namespace;
186
187 BUG_ON(parent == mnt);
188
189 list_add_tail(&head, &mnt->mnt_list);
190 list_for_each_entry(m, &head, mnt_list)
191 m->mnt_namespace = n;
192 list_splice(&head, n->list.prev);
193
194 list_add_tail(&mnt->mnt_hash, mount_hashtable +
195 hash(parent, mnt->mnt_mountpoint));
196 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
197 touch_namespace(n);
132} 198}
133 199
134static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root) 200static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root)
@@ -147,8 +213,18 @@ static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root)
147 return list_entry(next, struct vfsmount, mnt_child); 213 return list_entry(next, struct vfsmount, mnt_child);
148} 214}
149 215
150static struct vfsmount * 216static struct vfsmount *skip_mnt_tree(struct vfsmount *p)
151clone_mnt(struct vfsmount *old, struct dentry *root) 217{
218 struct list_head *prev = p->mnt_mounts.prev;
219 while (prev != &p->mnt_mounts) {
220 p = list_entry(prev, struct vfsmount, mnt_child);
221 prev = p->mnt_mounts.prev;
222 }
223 return p;
224}
225
226static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
227 int flag)
152{ 228{
153 struct super_block *sb = old->mnt_sb; 229 struct super_block *sb = old->mnt_sb;
154 struct vfsmount *mnt = alloc_vfsmnt(old->mnt_devname); 230 struct vfsmount *mnt = alloc_vfsmnt(old->mnt_devname);
@@ -160,19 +236,34 @@ clone_mnt(struct vfsmount *old, struct dentry *root)
160 mnt->mnt_root = dget(root); 236 mnt->mnt_root = dget(root);
161 mnt->mnt_mountpoint = mnt->mnt_root; 237 mnt->mnt_mountpoint = mnt->mnt_root;
162 mnt->mnt_parent = mnt; 238 mnt->mnt_parent = mnt;
163 mnt->mnt_namespace = current->namespace; 239
240 if (flag & CL_SLAVE) {
241 list_add(&mnt->mnt_slave, &old->mnt_slave_list);
242 mnt->mnt_master = old;
243 CLEAR_MNT_SHARED(mnt);
244 } else {
245 if ((flag & CL_PROPAGATION) || IS_MNT_SHARED(old))
246 list_add(&mnt->mnt_share, &old->mnt_share);
247 if (IS_MNT_SLAVE(old))
248 list_add(&mnt->mnt_slave, &old->mnt_slave);
249 mnt->mnt_master = old->mnt_master;
250 }
251 if (flag & CL_MAKE_SHARED)
252 set_mnt_shared(mnt);
164 253
165 /* stick the duplicate mount on the same expiry list 254 /* stick the duplicate mount on the same expiry list
166 * as the original if that was on one */ 255 * as the original if that was on one */
167 spin_lock(&vfsmount_lock); 256 if (flag & CL_EXPIRE) {
168 if (!list_empty(&old->mnt_expire)) 257 spin_lock(&vfsmount_lock);
169 list_add(&mnt->mnt_expire, &old->mnt_expire); 258 if (!list_empty(&old->mnt_expire))
170 spin_unlock(&vfsmount_lock); 259 list_add(&mnt->mnt_expire, &old->mnt_expire);
260 spin_unlock(&vfsmount_lock);
261 }
171 } 262 }
172 return mnt; 263 return mnt;
173} 264}
174 265
175void __mntput(struct vfsmount *mnt) 266static inline void __mntput(struct vfsmount *mnt)
176{ 267{
177 struct super_block *sb = mnt->mnt_sb; 268 struct super_block *sb = mnt->mnt_sb;
178 dput(mnt->mnt_root); 269 dput(mnt->mnt_root);
@@ -180,7 +271,46 @@ void __mntput(struct vfsmount *mnt)
180 deactivate_super(sb); 271 deactivate_super(sb);
181} 272}
182 273
183EXPORT_SYMBOL(__mntput); 274void mntput_no_expire(struct vfsmount *mnt)
275{
276repeat:
277 if (atomic_dec_and_lock(&mnt->mnt_count, &vfsmount_lock)) {
278 if (likely(!mnt->mnt_pinned)) {
279 spin_unlock(&vfsmount_lock);
280 __mntput(mnt);
281 return;
282 }
283 atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count);
284 mnt->mnt_pinned = 0;
285 spin_unlock(&vfsmount_lock);
286 acct_auto_close_mnt(mnt);
287 security_sb_umount_close(mnt);
288 goto repeat;
289 }
290}
291
292EXPORT_SYMBOL(mntput_no_expire);
293
294void mnt_pin(struct vfsmount *mnt)
295{
296 spin_lock(&vfsmount_lock);
297 mnt->mnt_pinned++;
298 spin_unlock(&vfsmount_lock);
299}
300
301EXPORT_SYMBOL(mnt_pin);
302
303void mnt_unpin(struct vfsmount *mnt)
304{
305 spin_lock(&vfsmount_lock);
306 if (mnt->mnt_pinned) {
307 atomic_inc(&mnt->mnt_count);
308 mnt->mnt_pinned--;
309 }
310 spin_unlock(&vfsmount_lock);
311}
312
313EXPORT_SYMBOL(mnt_unpin);
184 314
185/* iterator */ 315/* iterator */
186static void *m_start(struct seq_file *m, loff_t *pos) 316static void *m_start(struct seq_file *m, loff_t *pos)
@@ -189,7 +319,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
189 struct list_head *p; 319 struct list_head *p;
190 loff_t l = *pos; 320 loff_t l = *pos;
191 321
192 down_read(&n->sem); 322 down_read(&namespace_sem);
193 list_for_each(p, &n->list) 323 list_for_each(p, &n->list)
194 if (!l--) 324 if (!l--)
195 return list_entry(p, struct vfsmount, mnt_list); 325 return list_entry(p, struct vfsmount, mnt_list);
@@ -201,13 +331,12 @@ static void *m_next(struct seq_file *m, void *v, loff_t *pos)
201 struct namespace *n = m->private; 331 struct namespace *n = m->private;
202 struct list_head *p = ((struct vfsmount *)v)->mnt_list.next; 332 struct list_head *p = ((struct vfsmount *)v)->mnt_list.next;
203 (*pos)++; 333 (*pos)++;
204 return p==&n->list ? NULL : list_entry(p, struct vfsmount, mnt_list); 334 return p == &n->list ? NULL : list_entry(p, struct vfsmount, mnt_list);
205} 335}
206 336
207static void m_stop(struct seq_file *m, void *v) 337static void m_stop(struct seq_file *m, void *v)
208{ 338{
209 struct namespace *n = m->private; 339 up_read(&namespace_sem);
210 up_read(&n->sem);
211} 340}
212 341
213static inline void mangle(struct seq_file *m, const char *s) 342static inline void mangle(struct seq_file *m, const char *s)
@@ -275,35 +404,14 @@ struct seq_operations mounts_op = {
275 */ 404 */
276int may_umount_tree(struct vfsmount *mnt) 405int may_umount_tree(struct vfsmount *mnt)
277{ 406{
278 struct list_head *next; 407 int actual_refs = 0;
279 struct vfsmount *this_parent = mnt; 408 int minimum_refs = 0;
280 int actual_refs; 409 struct vfsmount *p;
281 int minimum_refs;
282 410
283 spin_lock(&vfsmount_lock); 411 spin_lock(&vfsmount_lock);
284 actual_refs = atomic_read(&mnt->mnt_count); 412 for (p = mnt; p; p = next_mnt(p, mnt)) {
285 minimum_refs = 2;
286repeat:
287 next = this_parent->mnt_mounts.next;
288resume:
289 while (next != &this_parent->mnt_mounts) {
290 struct vfsmount *p = list_entry(next, struct vfsmount, mnt_child);
291
292 next = next->next;
293
294 actual_refs += atomic_read(&p->mnt_count); 413 actual_refs += atomic_read(&p->mnt_count);
295 minimum_refs += 2; 414 minimum_refs += 2;
296
297 if (!list_empty(&p->mnt_mounts)) {
298 this_parent = p;
299 goto repeat;
300 }
301 }
302
303 if (this_parent != mnt) {
304 next = this_parent->mnt_child.next;
305 this_parent = this_parent->mnt_parent;
306 goto resume;
307 } 415 }
308 spin_unlock(&vfsmount_lock); 416 spin_unlock(&vfsmount_lock);
309 417
@@ -330,45 +438,67 @@ EXPORT_SYMBOL(may_umount_tree);
330 */ 438 */
331int may_umount(struct vfsmount *mnt) 439int may_umount(struct vfsmount *mnt)
332{ 440{
333 if (atomic_read(&mnt->mnt_count) > 2) 441 int ret = 0;
334 return -EBUSY; 442 spin_lock(&vfsmount_lock);
335 return 0; 443 if (propagate_mount_busy(mnt, 2))
444 ret = -EBUSY;
445 spin_unlock(&vfsmount_lock);
446 return ret;
336} 447}
337 448
338EXPORT_SYMBOL(may_umount); 449EXPORT_SYMBOL(may_umount);
339 450
340static void umount_tree(struct vfsmount *mnt) 451void release_mounts(struct list_head *head)
452{
453 struct vfsmount *mnt;
454 while(!list_empty(head)) {
455 mnt = list_entry(head->next, struct vfsmount, mnt_hash);
456 list_del_init(&mnt->mnt_hash);
457 if (mnt->mnt_parent != mnt) {
458 struct dentry *dentry;
459 struct vfsmount *m;
460 spin_lock(&vfsmount_lock);
461 dentry = mnt->mnt_mountpoint;
462 m = mnt->mnt_parent;
463 mnt->mnt_mountpoint = mnt->mnt_root;
464 mnt->mnt_parent = mnt;
465 spin_unlock(&vfsmount_lock);
466 dput(dentry);
467 mntput(m);
468 }
469 mntput(mnt);
470 }
471}
472
473void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
341{ 474{
342 struct vfsmount *p; 475 struct vfsmount *p;
343 LIST_HEAD(kill);
344 476
345 for (p = mnt; p; p = next_mnt(p, mnt)) { 477 for (p = mnt; p; p = next_mnt(p, mnt)) {
346 list_del(&p->mnt_list); 478 list_del(&p->mnt_hash);
347 list_add(&p->mnt_list, &kill); 479 list_add(&p->mnt_hash, kill);
348 p->mnt_namespace = NULL;
349 } 480 }
350 481
351 while (!list_empty(&kill)) { 482 if (propagate)
352 mnt = list_entry(kill.next, struct vfsmount, mnt_list); 483 propagate_umount(kill);
353 list_del_init(&mnt->mnt_list); 484
354 list_del_init(&mnt->mnt_expire); 485 list_for_each_entry(p, kill, mnt_hash) {
355 if (mnt->mnt_parent == mnt) { 486 list_del_init(&p->mnt_expire);
356 spin_unlock(&vfsmount_lock); 487 list_del_init(&p->mnt_list);
357 } else { 488 __touch_namespace(p->mnt_namespace);
358 struct nameidata old_nd; 489 p->mnt_namespace = NULL;
359 detach_mnt(mnt, &old_nd); 490 list_del_init(&p->mnt_child);
360 spin_unlock(&vfsmount_lock); 491 if (p->mnt_parent != p)
361 path_release(&old_nd); 492 mnt->mnt_mountpoint->d_mounted--;
362 } 493 change_mnt_propagation(p, MS_PRIVATE);
363 mntput(mnt);
364 spin_lock(&vfsmount_lock);
365 } 494 }
366} 495}
367 496
368static int do_umount(struct vfsmount *mnt, int flags) 497static int do_umount(struct vfsmount *mnt, int flags)
369{ 498{
370 struct super_block * sb = mnt->mnt_sb; 499 struct super_block *sb = mnt->mnt_sb;
371 int retval; 500 int retval;
501 LIST_HEAD(umount_list);
372 502
373 retval = security_sb_umount(mnt, flags); 503 retval = security_sb_umount(mnt, flags);
374 if (retval) 504 if (retval)
@@ -403,7 +533,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
403 */ 533 */
404 534
405 lock_kernel(); 535 lock_kernel();
406 if( (flags&MNT_FORCE) && sb->s_op->umount_begin) 536 if ((flags & MNT_FORCE) && sb->s_op->umount_begin)
407 sb->s_op->umount_begin(sb); 537 sb->s_op->umount_begin(sb);
408 unlock_kernel(); 538 unlock_kernel();
409 539
@@ -432,29 +562,21 @@ static int do_umount(struct vfsmount *mnt, int flags)
432 return retval; 562 return retval;
433 } 563 }
434 564
435 down_write(&current->namespace->sem); 565 down_write(&namespace_sem);
436 spin_lock(&vfsmount_lock); 566 spin_lock(&vfsmount_lock);
567 event++;
437 568
438 if (atomic_read(&sb->s_active) == 1) {
439 /* last instance - try to be smart */
440 spin_unlock(&vfsmount_lock);
441 lock_kernel();
442 DQUOT_OFF(sb);
443 acct_auto_close(sb);
444 unlock_kernel();
445 security_sb_umount_close(mnt);
446 spin_lock(&vfsmount_lock);
447 }
448 retval = -EBUSY; 569 retval = -EBUSY;
449 if (atomic_read(&mnt->mnt_count) == 2 || flags & MNT_DETACH) { 570 if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) {
450 if (!list_empty(&mnt->mnt_list)) 571 if (!list_empty(&mnt->mnt_list))
451 umount_tree(mnt); 572 umount_tree(mnt, 1, &umount_list);
452 retval = 0; 573 retval = 0;
453 } 574 }
454 spin_unlock(&vfsmount_lock); 575 spin_unlock(&vfsmount_lock);
455 if (retval) 576 if (retval)
456 security_sb_umount_busy(mnt); 577 security_sb_umount_busy(mnt);
457 up_write(&current->namespace->sem); 578 up_write(&namespace_sem);
579 release_mounts(&umount_list);
458 return retval; 580 return retval;
459} 581}
460 582
@@ -494,12 +616,11 @@ out:
494#ifdef __ARCH_WANT_SYS_OLDUMOUNT 616#ifdef __ARCH_WANT_SYS_OLDUMOUNT
495 617
496/* 618/*
497 * The 2.0 compatible umount. No flags. 619 * The 2.0 compatible umount. No flags.
498 */ 620 */
499
500asmlinkage long sys_oldumount(char __user * name) 621asmlinkage long sys_oldumount(char __user * name)
501{ 622{
502 return sys_umount(name,0); 623 return sys_umount(name, 0);
503} 624}
504 625
505#endif 626#endif
@@ -522,8 +643,7 @@ static int mount_is_safe(struct nameidata *nd)
522#endif 643#endif
523} 644}
524 645
525static int 646static int lives_below_in_same_fs(struct dentry *d, struct dentry *dentry)
526lives_below_in_same_fs(struct dentry *d, struct dentry *dentry)
527{ 647{
528 while (1) { 648 while (1) {
529 if (d == dentry) 649 if (d == dentry)
@@ -534,12 +654,16 @@ lives_below_in_same_fs(struct dentry *d, struct dentry *dentry)
534 } 654 }
535} 655}
536 656
537static struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry) 657struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
658 int flag)
538{ 659{
539 struct vfsmount *res, *p, *q, *r, *s; 660 struct vfsmount *res, *p, *q, *r, *s;
540 struct nameidata nd; 661 struct nameidata nd;
541 662
542 res = q = clone_mnt(mnt, dentry); 663 if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt))
664 return NULL;
665
666 res = q = clone_mnt(mnt, dentry, flag);
543 if (!q) 667 if (!q)
544 goto Enomem; 668 goto Enomem;
545 q->mnt_mountpoint = mnt->mnt_mountpoint; 669 q->mnt_mountpoint = mnt->mnt_mountpoint;
@@ -550,6 +674,10 @@ static struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry)
550 continue; 674 continue;
551 675
552 for (s = r; s; s = next_mnt(s, r)) { 676 for (s = r; s; s = next_mnt(s, r)) {
677 if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(s)) {
678 s = skip_mnt_tree(s);
679 continue;
680 }
553 while (p != s->mnt_parent) { 681 while (p != s->mnt_parent) {
554 p = p->mnt_parent; 682 p = p->mnt_parent;
555 q = q->mnt_parent; 683 q = q->mnt_parent;
@@ -557,7 +685,7 @@ static struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry)
557 p = s; 685 p = s;
558 nd.mnt = q; 686 nd.mnt = q;
559 nd.dentry = p->mnt_mountpoint; 687 nd.dentry = p->mnt_mountpoint;
560 q = clone_mnt(p, p->mnt_root); 688 q = clone_mnt(p, p->mnt_root, flag);
561 if (!q) 689 if (!q)
562 goto Enomem; 690 goto Enomem;
563 spin_lock(&vfsmount_lock); 691 spin_lock(&vfsmount_lock);
@@ -567,15 +695,114 @@ static struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry)
567 } 695 }
568 } 696 }
569 return res; 697 return res;
570 Enomem: 698Enomem:
571 if (res) { 699 if (res) {
700 LIST_HEAD(umount_list);
572 spin_lock(&vfsmount_lock); 701 spin_lock(&vfsmount_lock);
573 umount_tree(res); 702 umount_tree(res, 0, &umount_list);
574 spin_unlock(&vfsmount_lock); 703 spin_unlock(&vfsmount_lock);
704 release_mounts(&umount_list);
575 } 705 }
576 return NULL; 706 return NULL;
577} 707}
578 708
709/*
710 * @source_mnt : mount tree to be attached
711 * @nd : place the mount tree @source_mnt is attached
712 * @parent_nd : if non-null, detach the source_mnt from its parent and
713 * store the parent mount and mountpoint dentry.
714 * (done when source_mnt is moved)
715 *
716 * NOTE: in the table below explains the semantics when a source mount
717 * of a given type is attached to a destination mount of a given type.
718 * ---------------------------------------------------------------------------
719 * | BIND MOUNT OPERATION |
720 * |**************************************************************************
721 * | source-->| shared | private | slave | unbindable |
722 * | dest | | | | |
723 * | | | | | | |
724 * | v | | | | |
725 * |**************************************************************************
726 * | shared | shared (++) | shared (+) | shared(+++)| invalid |
727 * | | | | | |
728 * |non-shared| shared (+) | private | slave (*) | invalid |
729 * ***************************************************************************
730 * A bind operation clones the source mount and mounts the clone on the
731 * destination mount.
732 *
733 * (++) the cloned mount is propagated to all the mounts in the propagation
734 * tree of the destination mount and the cloned mount is added to
735 * the peer group of the source mount.
736 * (+) the cloned mount is created under the destination mount and is marked
737 * as shared. The cloned mount is added to the peer group of the source
738 * mount.
739 * (+++) the mount is propagated to all the mounts in the propagation tree
740 * of the destination mount and the cloned mount is made slave
741 * of the same master as that of the source mount. The cloned mount
742 * is marked as 'shared and slave'.
743 * (*) the cloned mount is made a slave of the same master as that of the
744 * source mount.
745 *
746 * ---------------------------------------------------------------------------
747 * | MOVE MOUNT OPERATION |
748 * |**************************************************************************
749 * | source-->| shared | private | slave | unbindable |
750 * | dest | | | | |
751 * | | | | | | |
752 * | v | | | | |
753 * |**************************************************************************
754 * | shared | shared (+) | shared (+) | shared(+++) | invalid |
755 * | | | | | |
756 * |non-shared| shared (+*) | private | slave (*) | unbindable |
757 * ***************************************************************************
758 *
759 * (+) the mount is moved to the destination. And is then propagated to
760 * all the mounts in the propagation tree of the destination mount.
761 * (+*) the mount is moved to the destination.
762 * (+++) the mount is moved to the destination and is then propagated to
763 * all the mounts belonging to the destination mount's propagation tree.
764 * the mount is marked as 'shared and slave'.
765 * (*) the mount continues to be a slave at the new location.
766 *
767 * if the source mount is a tree, the operations explained above is
768 * applied to each mount in the tree.
769 * Must be called without spinlocks held, since this function can sleep
770 * in allocations.
771 */
772static int attach_recursive_mnt(struct vfsmount *source_mnt,
773 struct nameidata *nd, struct nameidata *parent_nd)
774{
775 LIST_HEAD(tree_list);
776 struct vfsmount *dest_mnt = nd->mnt;
777 struct dentry *dest_dentry = nd->dentry;
778 struct vfsmount *child, *p;
779
780 if (propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list))
781 return -EINVAL;
782
783 if (IS_MNT_SHARED(dest_mnt)) {
784 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
785 set_mnt_shared(p);
786 }
787
788 spin_lock(&vfsmount_lock);
789 if (parent_nd) {
790 detach_mnt(source_mnt, parent_nd);
791 attach_mnt(source_mnt, nd);
792 touch_namespace(current->namespace);
793 } else {
794 mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
795 commit_tree(source_mnt);
796 }
797
798 list_for_each_entry_safe(child, p, &tree_list, mnt_hash) {
799 list_del_init(&child->mnt_hash);
800 commit_tree(child);
801 }
802 spin_unlock(&vfsmount_lock);
803 return 0;
804}
805
579static int graft_tree(struct vfsmount *mnt, struct nameidata *nd) 806static int graft_tree(struct vfsmount *mnt, struct nameidata *nd)
580{ 807{
581 int err; 808 int err;
@@ -596,17 +823,8 @@ static int graft_tree(struct vfsmount *mnt, struct nameidata *nd)
596 goto out_unlock; 823 goto out_unlock;
597 824
598 err = -ENOENT; 825 err = -ENOENT;
599 spin_lock(&vfsmount_lock); 826 if (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry))
600 if (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry)) { 827 err = attach_recursive_mnt(mnt, nd, NULL);
601 struct list_head head;
602
603 attach_mnt(mnt, nd);
604 list_add_tail(&head, &mnt->mnt_list);
605 list_splice(&head, current->namespace->list.prev);
606 mntget(mnt);
607 err = 0;
608 }
609 spin_unlock(&vfsmount_lock);
610out_unlock: 828out_unlock:
611 up(&nd->dentry->d_inode->i_sem); 829 up(&nd->dentry->d_inode->i_sem);
612 if (!err) 830 if (!err)
@@ -615,6 +833,27 @@ out_unlock:
615} 833}
616 834
617/* 835/*
836 * recursively change the type of the mountpoint.
837 */
838static int do_change_type(struct nameidata *nd, int flag)
839{
840 struct vfsmount *m, *mnt = nd->mnt;
841 int recurse = flag & MS_REC;
842 int type = flag & ~MS_REC;
843
844 if (nd->dentry != nd->mnt->mnt_root)
845 return -EINVAL;
846
847 down_write(&namespace_sem);
848 spin_lock(&vfsmount_lock);
849 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
850 change_mnt_propagation(m, type);
851 spin_unlock(&vfsmount_lock);
852 up_write(&namespace_sem);
853 return 0;
854}
855
856/*
618 * do loopback mount. 857 * do loopback mount.
619 */ 858 */
620static int do_loopback(struct nameidata *nd, char *old_name, int recurse) 859static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
@@ -630,32 +869,34 @@ static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
630 if (err) 869 if (err)
631 return err; 870 return err;
632 871
633 down_write(&current->namespace->sem); 872 down_write(&namespace_sem);
634 err = -EINVAL; 873 err = -EINVAL;
635 if (check_mnt(nd->mnt) && (!recurse || check_mnt(old_nd.mnt))) { 874 if (IS_MNT_UNBINDABLE(old_nd.mnt))
636 err = -ENOMEM; 875 goto out;
637 if (recurse)
638 mnt = copy_tree(old_nd.mnt, old_nd.dentry);
639 else
640 mnt = clone_mnt(old_nd.mnt, old_nd.dentry);
641 }
642 876
643 if (mnt) { 877 if (!check_mnt(nd->mnt) || !check_mnt(old_nd.mnt))
644 /* stop bind mounts from expiring */ 878 goto out;
879
880 err = -ENOMEM;
881 if (recurse)
882 mnt = copy_tree(old_nd.mnt, old_nd.dentry, 0);
883 else
884 mnt = clone_mnt(old_nd.mnt, old_nd.dentry, 0);
885
886 if (!mnt)
887 goto out;
888
889 err = graft_tree(mnt, nd);
890 if (err) {
891 LIST_HEAD(umount_list);
645 spin_lock(&vfsmount_lock); 892 spin_lock(&vfsmount_lock);
646 list_del_init(&mnt->mnt_expire); 893 umount_tree(mnt, 0, &umount_list);
647 spin_unlock(&vfsmount_lock); 894 spin_unlock(&vfsmount_lock);
648 895 release_mounts(&umount_list);
649 err = graft_tree(mnt, nd);
650 if (err) {
651 spin_lock(&vfsmount_lock);
652 umount_tree(mnt);
653 spin_unlock(&vfsmount_lock);
654 } else
655 mntput(mnt);
656 } 896 }
657 897
658 up_write(&current->namespace->sem); 898out:
899 up_write(&namespace_sem);
659 path_release(&old_nd); 900 path_release(&old_nd);
660 return err; 901 return err;
661} 902}
@@ -665,12 +906,11 @@ static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
665 * If you've mounted a non-root directory somewhere and want to do remount 906 * If you've mounted a non-root directory somewhere and want to do remount
666 * on it - tough luck. 907 * on it - tough luck.
667 */ 908 */
668
669static int do_remount(struct nameidata *nd, int flags, int mnt_flags, 909static int do_remount(struct nameidata *nd, int flags, int mnt_flags,
670 void *data) 910 void *data)
671{ 911{
672 int err; 912 int err;
673 struct super_block * sb = nd->mnt->mnt_sb; 913 struct super_block *sb = nd->mnt->mnt_sb;
674 914
675 if (!capable(CAP_SYS_ADMIN)) 915 if (!capable(CAP_SYS_ADMIN))
676 return -EPERM; 916 return -EPERM;
@@ -684,13 +924,23 @@ static int do_remount(struct nameidata *nd, int flags, int mnt_flags,
684 down_write(&sb->s_umount); 924 down_write(&sb->s_umount);
685 err = do_remount_sb(sb, flags, data, 0); 925 err = do_remount_sb(sb, flags, data, 0);
686 if (!err) 926 if (!err)
687 nd->mnt->mnt_flags=mnt_flags; 927 nd->mnt->mnt_flags = mnt_flags;
688 up_write(&sb->s_umount); 928 up_write(&sb->s_umount);
689 if (!err) 929 if (!err)
690 security_sb_post_remount(nd->mnt, flags, data); 930 security_sb_post_remount(nd->mnt, flags, data);
691 return err; 931 return err;
692} 932}
693 933
934static inline int tree_contains_unbindable(struct vfsmount *mnt)
935{
936 struct vfsmount *p;
937 for (p = mnt; p; p = next_mnt(p, mnt)) {
938 if (IS_MNT_UNBINDABLE(p))
939 return 1;
940 }
941 return 0;
942}
943
694static int do_move_mount(struct nameidata *nd, char *old_name) 944static int do_move_mount(struct nameidata *nd, char *old_name)
695{ 945{
696 struct nameidata old_nd, parent_nd; 946 struct nameidata old_nd, parent_nd;
@@ -704,8 +954,8 @@ static int do_move_mount(struct nameidata *nd, char *old_name)
704 if (err) 954 if (err)
705 return err; 955 return err;
706 956
707 down_write(&current->namespace->sem); 957 down_write(&namespace_sem);
708 while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry)) 958 while (d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
709 ; 959 ;
710 err = -EINVAL; 960 err = -EINVAL;
711 if (!check_mnt(nd->mnt) || !check_mnt(old_nd.mnt)) 961 if (!check_mnt(nd->mnt) || !check_mnt(old_nd.mnt))
@@ -716,39 +966,47 @@ static int do_move_mount(struct nameidata *nd, char *old_name)
716 if (IS_DEADDIR(nd->dentry->d_inode)) 966 if (IS_DEADDIR(nd->dentry->d_inode))
717 goto out1; 967 goto out1;
718 968
719 spin_lock(&vfsmount_lock);
720 if (!IS_ROOT(nd->dentry) && d_unhashed(nd->dentry)) 969 if (!IS_ROOT(nd->dentry) && d_unhashed(nd->dentry))
721 goto out2; 970 goto out1;
722 971
723 err = -EINVAL; 972 err = -EINVAL;
724 if (old_nd.dentry != old_nd.mnt->mnt_root) 973 if (old_nd.dentry != old_nd.mnt->mnt_root)
725 goto out2; 974 goto out1;
726 975
727 if (old_nd.mnt == old_nd.mnt->mnt_parent) 976 if (old_nd.mnt == old_nd.mnt->mnt_parent)
728 goto out2; 977 goto out1;
729 978
730 if (S_ISDIR(nd->dentry->d_inode->i_mode) != 979 if (S_ISDIR(nd->dentry->d_inode->i_mode) !=
731 S_ISDIR(old_nd.dentry->d_inode->i_mode)) 980 S_ISDIR(old_nd.dentry->d_inode->i_mode))
732 goto out2; 981 goto out1;
733 982 /*
983 * Don't move a mount residing in a shared parent.
984 */
985 if (old_nd.mnt->mnt_parent && IS_MNT_SHARED(old_nd.mnt->mnt_parent))
986 goto out1;
987 /*
988 * Don't move a mount tree containing unbindable mounts to a destination
989 * mount which is shared.
990 */
991 if (IS_MNT_SHARED(nd->mnt) && tree_contains_unbindable(old_nd.mnt))
992 goto out1;
734 err = -ELOOP; 993 err = -ELOOP;
735 for (p = nd->mnt; p->mnt_parent!=p; p = p->mnt_parent) 994 for (p = nd->mnt; p->mnt_parent != p; p = p->mnt_parent)
736 if (p == old_nd.mnt) 995 if (p == old_nd.mnt)
737 goto out2; 996 goto out1;
738 err = 0;
739 997
740 detach_mnt(old_nd.mnt, &parent_nd); 998 if ((err = attach_recursive_mnt(old_nd.mnt, nd, &parent_nd)))
741 attach_mnt(old_nd.mnt, nd); 999 goto out1;
742 1000
1001 spin_lock(&vfsmount_lock);
743 /* if the mount is moved, it should no longer be expire 1002 /* if the mount is moved, it should no longer be expire
744 * automatically */ 1003 * automatically */
745 list_del_init(&old_nd.mnt->mnt_expire); 1004 list_del_init(&old_nd.mnt->mnt_expire);
746out2:
747 spin_unlock(&vfsmount_lock); 1005 spin_unlock(&vfsmount_lock);
748out1: 1006out1:
749 up(&nd->dentry->d_inode->i_sem); 1007 up(&nd->dentry->d_inode->i_sem);
750out: 1008out:
751 up_write(&current->namespace->sem); 1009 up_write(&namespace_sem);
752 if (!err) 1010 if (!err)
753 path_release(&parent_nd); 1011 path_release(&parent_nd);
754 path_release(&old_nd); 1012 path_release(&old_nd);
@@ -787,9 +1045,9 @@ int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
787{ 1045{
788 int err; 1046 int err;
789 1047
790 down_write(&current->namespace->sem); 1048 down_write(&namespace_sem);
791 /* Something was mounted here while we slept */ 1049 /* Something was mounted here while we slept */
792 while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry)) 1050 while (d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
793 ; 1051 ;
794 err = -EINVAL; 1052 err = -EINVAL;
795 if (!check_mnt(nd->mnt)) 1053 if (!check_mnt(nd->mnt))
@@ -806,25 +1064,28 @@ int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
806 goto unlock; 1064 goto unlock;
807 1065
808 newmnt->mnt_flags = mnt_flags; 1066 newmnt->mnt_flags = mnt_flags;
809 newmnt->mnt_namespace = current->namespace; 1067 if ((err = graft_tree(newmnt, nd)))
810 err = graft_tree(newmnt, nd); 1068 goto unlock;
811 1069
812 if (err == 0 && fslist) { 1070 if (fslist) {
813 /* add to the specified expiration list */ 1071 /* add to the specified expiration list */
814 spin_lock(&vfsmount_lock); 1072 spin_lock(&vfsmount_lock);
815 list_add_tail(&newmnt->mnt_expire, fslist); 1073 list_add_tail(&newmnt->mnt_expire, fslist);
816 spin_unlock(&vfsmount_lock); 1074 spin_unlock(&vfsmount_lock);
817 } 1075 }
1076 up_write(&namespace_sem);
1077 return 0;
818 1078
819unlock: 1079unlock:
820 up_write(&current->namespace->sem); 1080 up_write(&namespace_sem);
821 mntput(newmnt); 1081 mntput(newmnt);
822 return err; 1082 return err;
823} 1083}
824 1084
825EXPORT_SYMBOL_GPL(do_add_mount); 1085EXPORT_SYMBOL_GPL(do_add_mount);
826 1086
827static void expire_mount(struct vfsmount *mnt, struct list_head *mounts) 1087static void expire_mount(struct vfsmount *mnt, struct list_head *mounts,
1088 struct list_head *umounts)
828{ 1089{
829 spin_lock(&vfsmount_lock); 1090 spin_lock(&vfsmount_lock);
830 1091
@@ -841,27 +1102,13 @@ static void expire_mount(struct vfsmount *mnt, struct list_head *mounts)
841 * Check that it is still dead: the count should now be 2 - as 1102 * Check that it is still dead: the count should now be 2 - as
842 * contributed by the vfsmount parent and the mntget above 1103 * contributed by the vfsmount parent and the mntget above
843 */ 1104 */
844 if (atomic_read(&mnt->mnt_count) == 2) { 1105 if (!propagate_mount_busy(mnt, 2)) {
845 struct nameidata old_nd;
846
847 /* delete from the namespace */ 1106 /* delete from the namespace */
1107 touch_namespace(mnt->mnt_namespace);
848 list_del_init(&mnt->mnt_list); 1108 list_del_init(&mnt->mnt_list);
849 mnt->mnt_namespace = NULL; 1109 mnt->mnt_namespace = NULL;
850 detach_mnt(mnt, &old_nd); 1110 umount_tree(mnt, 1, umounts);
851 spin_unlock(&vfsmount_lock); 1111 spin_unlock(&vfsmount_lock);
852 path_release(&old_nd);
853
854 /*
855 * Now lay it to rest if this was the last ref on the superblock
856 */
857 if (atomic_read(&mnt->mnt_sb->s_active) == 1) {
858 /* last instance - try to be smart */
859 lock_kernel();
860 DQUOT_OFF(mnt->mnt_sb);
861 acct_auto_close(mnt->mnt_sb);
862 unlock_kernel();
863 }
864 mntput(mnt);
865 } else { 1112 } else {
866 /* 1113 /*
867 * Someone brought it back to life whilst we didn't have any 1114 * Someone brought it back to life whilst we didn't have any
@@ -910,6 +1157,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
910 * - dispose of the corpse 1157 * - dispose of the corpse
911 */ 1158 */
912 while (!list_empty(&graveyard)) { 1159 while (!list_empty(&graveyard)) {
1160 LIST_HEAD(umounts);
913 mnt = list_entry(graveyard.next, struct vfsmount, mnt_expire); 1161 mnt = list_entry(graveyard.next, struct vfsmount, mnt_expire);
914 list_del_init(&mnt->mnt_expire); 1162 list_del_init(&mnt->mnt_expire);
915 1163
@@ -921,13 +1169,12 @@ void mark_mounts_for_expiry(struct list_head *mounts)
921 get_namespace(namespace); 1169 get_namespace(namespace);
922 1170
923 spin_unlock(&vfsmount_lock); 1171 spin_unlock(&vfsmount_lock);
924 down_write(&namespace->sem); 1172 down_write(&namespace_sem);
925 expire_mount(mnt, mounts); 1173 expire_mount(mnt, mounts, &umounts);
926 up_write(&namespace->sem); 1174 up_write(&namespace_sem);
927 1175 release_mounts(&umounts);
928 mntput(mnt); 1176 mntput(mnt);
929 put_namespace(namespace); 1177 put_namespace(namespace);
930
931 spin_lock(&vfsmount_lock); 1178 spin_lock(&vfsmount_lock);
932 } 1179 }
933 1180
@@ -942,8 +1189,8 @@ EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
942 * Note that this function differs from copy_from_user() in that it will oops 1189 * Note that this function differs from copy_from_user() in that it will oops
943 * on bad values of `to', rather than returning a short copy. 1190 * on bad values of `to', rather than returning a short copy.
944 */ 1191 */
945static long 1192static long exact_copy_from_user(void *to, const void __user * from,
946exact_copy_from_user(void *to, const void __user *from, unsigned long n) 1193 unsigned long n)
947{ 1194{
948 char *t = to; 1195 char *t = to;
949 const char __user *f = from; 1196 const char __user *f = from;
@@ -964,12 +1211,12 @@ exact_copy_from_user(void *to, const void __user *from, unsigned long n)
964 return n; 1211 return n;
965} 1212}
966 1213
967int copy_mount_options(const void __user *data, unsigned long *where) 1214int copy_mount_options(const void __user * data, unsigned long *where)
968{ 1215{
969 int i; 1216 int i;
970 unsigned long page; 1217 unsigned long page;
971 unsigned long size; 1218 unsigned long size;
972 1219
973 *where = 0; 1220 *where = 0;
974 if (!data) 1221 if (!data)
975 return 0; 1222 return 0;
@@ -988,7 +1235,7 @@ int copy_mount_options(const void __user *data, unsigned long *where)
988 1235
989 i = size - exact_copy_from_user((void *)page, data, size); 1236 i = size - exact_copy_from_user((void *)page, data, size);
990 if (!i) { 1237 if (!i) {
991 free_page(page); 1238 free_page(page);
992 return -EFAULT; 1239 return -EFAULT;
993 } 1240 }
994 if (i != PAGE_SIZE) 1241 if (i != PAGE_SIZE)
@@ -1011,7 +1258,7 @@ int copy_mount_options(const void __user *data, unsigned long *where)
1011 * Therefore, if this magic number is present, it carries no information 1258 * Therefore, if this magic number is present, it carries no information
1012 * and must be discarded. 1259 * and must be discarded.
1013 */ 1260 */
1014long do_mount(char * dev_name, char * dir_name, char *type_page, 1261long do_mount(char *dev_name, char *dir_name, char *type_page,
1015 unsigned long flags, void *data_page) 1262 unsigned long flags, void *data_page)
1016{ 1263{
1017 struct nameidata nd; 1264 struct nameidata nd;
@@ -1039,7 +1286,7 @@ long do_mount(char * dev_name, char * dir_name, char *type_page,
1039 mnt_flags |= MNT_NODEV; 1286 mnt_flags |= MNT_NODEV;
1040 if (flags & MS_NOEXEC) 1287 if (flags & MS_NOEXEC)
1041 mnt_flags |= MNT_NOEXEC; 1288 mnt_flags |= MNT_NOEXEC;
1042 flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_ACTIVE); 1289 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE);
1043 1290
1044 /* ... and get the mountpoint */ 1291 /* ... and get the mountpoint */
1045 retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd); 1292 retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd);
@@ -1055,6 +1302,8 @@ long do_mount(char * dev_name, char * dir_name, char *type_page,
1055 data_page); 1302 data_page);
1056 else if (flags & MS_BIND) 1303 else if (flags & MS_BIND)
1057 retval = do_loopback(&nd, dev_name, flags & MS_REC); 1304 retval = do_loopback(&nd, dev_name, flags & MS_REC);
1305 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
1306 retval = do_change_type(&nd, flags);
1058 else if (flags & MS_MOVE) 1307 else if (flags & MS_MOVE)
1059 retval = do_move_mount(&nd, dev_name); 1308 retval = do_move_mount(&nd, dev_name);
1060 else 1309 else
@@ -1091,14 +1340,16 @@ int copy_namespace(int flags, struct task_struct *tsk)
1091 goto out; 1340 goto out;
1092 1341
1093 atomic_set(&new_ns->count, 1); 1342 atomic_set(&new_ns->count, 1);
1094 init_rwsem(&new_ns->sem);
1095 INIT_LIST_HEAD(&new_ns->list); 1343 INIT_LIST_HEAD(&new_ns->list);
1344 init_waitqueue_head(&new_ns->poll);
1345 new_ns->event = 0;
1096 1346
1097 down_write(&tsk->namespace->sem); 1347 down_write(&namespace_sem);
1098 /* First pass: copy the tree topology */ 1348 /* First pass: copy the tree topology */
1099 new_ns->root = copy_tree(namespace->root, namespace->root->mnt_root); 1349 new_ns->root = copy_tree(namespace->root, namespace->root->mnt_root,
1350 CL_COPY_ALL | CL_EXPIRE);
1100 if (!new_ns->root) { 1351 if (!new_ns->root) {
1101 up_write(&tsk->namespace->sem); 1352 up_write(&namespace_sem);
1102 kfree(new_ns); 1353 kfree(new_ns);
1103 goto out; 1354 goto out;
1104 } 1355 }
@@ -1132,7 +1383,7 @@ int copy_namespace(int flags, struct task_struct *tsk)
1132 p = next_mnt(p, namespace->root); 1383 p = next_mnt(p, namespace->root);
1133 q = next_mnt(q, new_ns->root); 1384 q = next_mnt(q, new_ns->root);
1134 } 1385 }
1135 up_write(&tsk->namespace->sem); 1386 up_write(&namespace_sem);
1136 1387
1137 tsk->namespace = new_ns; 1388 tsk->namespace = new_ns;
1138 1389
@@ -1161,7 +1412,7 @@ asmlinkage long sys_mount(char __user * dev_name, char __user * dir_name,
1161 unsigned long dev_page; 1412 unsigned long dev_page;
1162 char *dir_page; 1413 char *dir_page;
1163 1414
1164 retval = copy_mount_options (type, &type_page); 1415 retval = copy_mount_options(type, &type_page);
1165 if (retval < 0) 1416 if (retval < 0)
1166 return retval; 1417 return retval;
1167 1418
@@ -1170,17 +1421,17 @@ asmlinkage long sys_mount(char __user * dev_name, char __user * dir_name,
1170 if (IS_ERR(dir_page)) 1421 if (IS_ERR(dir_page))
1171 goto out1; 1422 goto out1;
1172 1423
1173 retval = copy_mount_options (dev_name, &dev_page); 1424 retval = copy_mount_options(dev_name, &dev_page);
1174 if (retval < 0) 1425 if (retval < 0)
1175 goto out2; 1426 goto out2;
1176 1427
1177 retval = copy_mount_options (data, &data_page); 1428 retval = copy_mount_options(data, &data_page);
1178 if (retval < 0) 1429 if (retval < 0)
1179 goto out3; 1430 goto out3;
1180 1431
1181 lock_kernel(); 1432 lock_kernel();
1182 retval = do_mount((char*)dev_page, dir_page, (char*)type_page, 1433 retval = do_mount((char *)dev_page, dir_page, (char *)type_page,
1183 flags, (void*)data_page); 1434 flags, (void *)data_page);
1184 unlock_kernel(); 1435 unlock_kernel();
1185 free_page(data_page); 1436 free_page(data_page);
1186 1437
@@ -1249,9 +1500,11 @@ static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd)
1249 if (fs) { 1500 if (fs) {
1250 atomic_inc(&fs->count); 1501 atomic_inc(&fs->count);
1251 task_unlock(p); 1502 task_unlock(p);
1252 if (fs->root==old_nd->dentry&&fs->rootmnt==old_nd->mnt) 1503 if (fs->root == old_nd->dentry
1504 && fs->rootmnt == old_nd->mnt)
1253 set_fs_root(fs, new_nd->mnt, new_nd->dentry); 1505 set_fs_root(fs, new_nd->mnt, new_nd->dentry);
1254 if (fs->pwd==old_nd->dentry&&fs->pwdmnt==old_nd->mnt) 1506 if (fs->pwd == old_nd->dentry
1507 && fs->pwdmnt == old_nd->mnt)
1255 set_fs_pwd(fs, new_nd->mnt, new_nd->dentry); 1508 set_fs_pwd(fs, new_nd->mnt, new_nd->dentry);
1256 put_fs_struct(fs); 1509 put_fs_struct(fs);
1257 } else 1510 } else
@@ -1281,8 +1534,8 @@ static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd)
1281 * though, so you may need to say mount --bind /nfs/my_root /nfs/my_root 1534 * though, so you may need to say mount --bind /nfs/my_root /nfs/my_root
1282 * first. 1535 * first.
1283 */ 1536 */
1284 1537asmlinkage long sys_pivot_root(const char __user * new_root,
1285asmlinkage long sys_pivot_root(const char __user *new_root, const char __user *put_old) 1538 const char __user * put_old)
1286{ 1539{
1287 struct vfsmount *tmp; 1540 struct vfsmount *tmp;
1288 struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd; 1541 struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd;
@@ -1293,14 +1546,15 @@ asmlinkage long sys_pivot_root(const char __user *new_root, const char __user *p
1293 1546
1294 lock_kernel(); 1547 lock_kernel();
1295 1548
1296 error = __user_walk(new_root, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd); 1549 error = __user_walk(new_root, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
1550 &new_nd);
1297 if (error) 1551 if (error)
1298 goto out0; 1552 goto out0;
1299 error = -EINVAL; 1553 error = -EINVAL;
1300 if (!check_mnt(new_nd.mnt)) 1554 if (!check_mnt(new_nd.mnt))
1301 goto out1; 1555 goto out1;
1302 1556
1303 error = __user_walk(put_old, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd); 1557 error = __user_walk(put_old, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &old_nd);
1304 if (error) 1558 if (error)
1305 goto out1; 1559 goto out1;
1306 1560
@@ -1314,9 +1568,13 @@ asmlinkage long sys_pivot_root(const char __user *new_root, const char __user *p
1314 user_nd.mnt = mntget(current->fs->rootmnt); 1568 user_nd.mnt = mntget(current->fs->rootmnt);
1315 user_nd.dentry = dget(current->fs->root); 1569 user_nd.dentry = dget(current->fs->root);
1316 read_unlock(&current->fs->lock); 1570 read_unlock(&current->fs->lock);
1317 down_write(&current->namespace->sem); 1571 down_write(&namespace_sem);
1318 down(&old_nd.dentry->d_inode->i_sem); 1572 down(&old_nd.dentry->d_inode->i_sem);
1319 error = -EINVAL; 1573 error = -EINVAL;
1574 if (IS_MNT_SHARED(old_nd.mnt) ||
1575 IS_MNT_SHARED(new_nd.mnt->mnt_parent) ||
1576 IS_MNT_SHARED(user_nd.mnt->mnt_parent))
1577 goto out2;
1320 if (!check_mnt(user_nd.mnt)) 1578 if (!check_mnt(user_nd.mnt))
1321 goto out2; 1579 goto out2;
1322 error = -ENOENT; 1580 error = -ENOENT;
@@ -1356,6 +1614,7 @@ asmlinkage long sys_pivot_root(const char __user *new_root, const char __user *p
1356 detach_mnt(user_nd.mnt, &root_parent); 1614 detach_mnt(user_nd.mnt, &root_parent);
1357 attach_mnt(user_nd.mnt, &old_nd); /* mount old root on put_old */ 1615 attach_mnt(user_nd.mnt, &old_nd); /* mount old root on put_old */
1358 attach_mnt(new_nd.mnt, &root_parent); /* mount new_root on / */ 1616 attach_mnt(new_nd.mnt, &root_parent); /* mount new_root on / */
1617 touch_namespace(current->namespace);
1359 spin_unlock(&vfsmount_lock); 1618 spin_unlock(&vfsmount_lock);
1360 chroot_fs_refs(&user_nd, &new_nd); 1619 chroot_fs_refs(&user_nd, &new_nd);
1361 security_sb_post_pivotroot(&user_nd, &new_nd); 1620 security_sb_post_pivotroot(&user_nd, &new_nd);
@@ -1364,7 +1623,7 @@ asmlinkage long sys_pivot_root(const char __user *new_root, const char __user *p
1364 path_release(&parent_nd); 1623 path_release(&parent_nd);
1365out2: 1624out2:
1366 up(&old_nd.dentry->d_inode->i_sem); 1625 up(&old_nd.dentry->d_inode->i_sem);
1367 up_write(&current->namespace->sem); 1626 up_write(&namespace_sem);
1368 path_release(&user_nd); 1627 path_release(&user_nd);
1369 path_release(&old_nd); 1628 path_release(&old_nd);
1370out1: 1629out1:
@@ -1391,7 +1650,8 @@ static void __init init_mount_tree(void)
1391 panic("Can't allocate initial namespace"); 1650 panic("Can't allocate initial namespace");
1392 atomic_set(&namespace->count, 1); 1651 atomic_set(&namespace->count, 1);
1393 INIT_LIST_HEAD(&namespace->list); 1652 INIT_LIST_HEAD(&namespace->list);
1394 init_rwsem(&namespace->sem); 1653 init_waitqueue_head(&namespace->poll);
1654 namespace->event = 0;
1395 list_add(&mnt->mnt_list, &namespace->list); 1655 list_add(&mnt->mnt_list, &namespace->list);
1396 namespace->root = mnt; 1656 namespace->root = mnt;
1397 mnt->mnt_namespace = namespace; 1657 mnt->mnt_namespace = namespace;
@@ -1414,11 +1674,12 @@ void __init mnt_init(unsigned long mempages)
1414 unsigned int nr_hash; 1674 unsigned int nr_hash;
1415 int i; 1675 int i;
1416 1676
1677 init_rwsem(&namespace_sem);
1678
1417 mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount), 1679 mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount),
1418 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 1680 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL, NULL);
1419 1681
1420 mount_hashtable = (struct list_head *) 1682 mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
1421 __get_free_page(GFP_ATOMIC);
1422 1683
1423 if (!mount_hashtable) 1684 if (!mount_hashtable)
1424 panic("Failed to allocate mount hash table\n"); 1685 panic("Failed to allocate mount hash table\n");
@@ -1440,7 +1701,7 @@ void __init mnt_init(unsigned long mempages)
1440 * from the number of bits we can fit. 1701 * from the number of bits we can fit.
1441 */ 1702 */
1442 nr_hash = 1UL << hash_bits; 1703 nr_hash = 1UL << hash_bits;
1443 hash_mask = nr_hash-1; 1704 hash_mask = nr_hash - 1;
1444 1705
1445 printk("Mount-cache hash table entries: %d\n", nr_hash); 1706 printk("Mount-cache hash table entries: %d\n", nr_hash);
1446 1707
@@ -1460,12 +1721,14 @@ void __init mnt_init(unsigned long mempages)
1460void __put_namespace(struct namespace *namespace) 1721void __put_namespace(struct namespace *namespace)
1461{ 1722{
1462 struct vfsmount *root = namespace->root; 1723 struct vfsmount *root = namespace->root;
1724 LIST_HEAD(umount_list);
1463 namespace->root = NULL; 1725 namespace->root = NULL;
1464 spin_unlock(&vfsmount_lock); 1726 spin_unlock(&vfsmount_lock);
1465 down_write(&namespace->sem); 1727 down_write(&namespace_sem);
1466 spin_lock(&vfsmount_lock); 1728 spin_lock(&vfsmount_lock);
1467 umount_tree(root); 1729 umount_tree(root, 0, &umount_list);
1468 spin_unlock(&vfsmount_lock); 1730 spin_unlock(&vfsmount_lock);
1469 up_write(&namespace->sem); 1731 up_write(&namespace_sem);
1732 release_mounts(&umount_list);
1470 kfree(namespace); 1733 kfree(namespace);
1471} 1734}