aboutsummaryrefslogtreecommitdiffstats
path: root/fs/namespace.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/namespace.c')
-rw-r--r--fs/namespace.c417
1 files changed, 206 insertions, 211 deletions
diff --git a/fs/namespace.c b/fs/namespace.c
index 134d494158d9..3dc283fd4716 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -42,6 +42,8 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
42static int event; 42static int event;
43static DEFINE_IDA(mnt_id_ida); 43static DEFINE_IDA(mnt_id_ida);
44static DEFINE_IDA(mnt_group_ida); 44static DEFINE_IDA(mnt_group_ida);
45static int mnt_id_start = 0;
46static int mnt_group_start = 1;
45 47
46static struct list_head *mount_hashtable __read_mostly; 48static struct list_head *mount_hashtable __read_mostly;
47static struct kmem_cache *mnt_cache __read_mostly; 49static struct kmem_cache *mnt_cache __read_mostly;
@@ -69,7 +71,9 @@ static int mnt_alloc_id(struct vfsmount *mnt)
69retry: 71retry:
70 ida_pre_get(&mnt_id_ida, GFP_KERNEL); 72 ida_pre_get(&mnt_id_ida, GFP_KERNEL);
71 spin_lock(&vfsmount_lock); 73 spin_lock(&vfsmount_lock);
72 res = ida_get_new(&mnt_id_ida, &mnt->mnt_id); 74 res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id);
75 if (!res)
76 mnt_id_start = mnt->mnt_id + 1;
73 spin_unlock(&vfsmount_lock); 77 spin_unlock(&vfsmount_lock);
74 if (res == -EAGAIN) 78 if (res == -EAGAIN)
75 goto retry; 79 goto retry;
@@ -79,8 +83,11 @@ retry:
79 83
80static void mnt_free_id(struct vfsmount *mnt) 84static void mnt_free_id(struct vfsmount *mnt)
81{ 85{
86 int id = mnt->mnt_id;
82 spin_lock(&vfsmount_lock); 87 spin_lock(&vfsmount_lock);
83 ida_remove(&mnt_id_ida, mnt->mnt_id); 88 ida_remove(&mnt_id_ida, id);
89 if (mnt_id_start > id)
90 mnt_id_start = id;
84 spin_unlock(&vfsmount_lock); 91 spin_unlock(&vfsmount_lock);
85} 92}
86 93
@@ -91,10 +98,18 @@ static void mnt_free_id(struct vfsmount *mnt)
91 */ 98 */
92static int mnt_alloc_group_id(struct vfsmount *mnt) 99static int mnt_alloc_group_id(struct vfsmount *mnt)
93{ 100{
101 int res;
102
94 if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL)) 103 if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL))
95 return -ENOMEM; 104 return -ENOMEM;
96 105
97 return ida_get_new_above(&mnt_group_ida, 1, &mnt->mnt_group_id); 106 res = ida_get_new_above(&mnt_group_ida,
107 mnt_group_start,
108 &mnt->mnt_group_id);
109 if (!res)
110 mnt_group_start = mnt->mnt_group_id + 1;
111
112 return res;
98} 113}
99 114
100/* 115/*
@@ -102,7 +117,10 @@ static int mnt_alloc_group_id(struct vfsmount *mnt)
102 */ 117 */
103void mnt_release_group_id(struct vfsmount *mnt) 118void mnt_release_group_id(struct vfsmount *mnt)
104{ 119{
105 ida_remove(&mnt_group_ida, mnt->mnt_group_id); 120 int id = mnt->mnt_group_id;
121 ida_remove(&mnt_group_ida, id);
122 if (mnt_group_start > id)
123 mnt_group_start = id;
106 mnt->mnt_group_id = 0; 124 mnt->mnt_group_id = 0;
107} 125}
108 126
@@ -131,10 +149,20 @@ struct vfsmount *alloc_vfsmnt(const char *name)
131 INIT_LIST_HEAD(&mnt->mnt_share); 149 INIT_LIST_HEAD(&mnt->mnt_share);
132 INIT_LIST_HEAD(&mnt->mnt_slave_list); 150 INIT_LIST_HEAD(&mnt->mnt_slave_list);
133 INIT_LIST_HEAD(&mnt->mnt_slave); 151 INIT_LIST_HEAD(&mnt->mnt_slave);
134 atomic_set(&mnt->__mnt_writers, 0); 152#ifdef CONFIG_SMP
153 mnt->mnt_writers = alloc_percpu(int);
154 if (!mnt->mnt_writers)
155 goto out_free_devname;
156#else
157 mnt->mnt_writers = 0;
158#endif
135 } 159 }
136 return mnt; 160 return mnt;
137 161
162#ifdef CONFIG_SMP
163out_free_devname:
164 kfree(mnt->mnt_devname);
165#endif
138out_free_id: 166out_free_id:
139 mnt_free_id(mnt); 167 mnt_free_id(mnt);
140out_free_cache: 168out_free_cache:
@@ -171,65 +199,38 @@ int __mnt_is_readonly(struct vfsmount *mnt)
171} 199}
172EXPORT_SYMBOL_GPL(__mnt_is_readonly); 200EXPORT_SYMBOL_GPL(__mnt_is_readonly);
173 201
174struct mnt_writer { 202static inline void inc_mnt_writers(struct vfsmount *mnt)
175 /* 203{
176 * If holding multiple instances of this lock, they 204#ifdef CONFIG_SMP
177 * must be ordered by cpu number. 205 (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))++;
178 */ 206#else
179 spinlock_t lock; 207 mnt->mnt_writers++;
180 struct lock_class_key lock_class; /* compiles out with !lockdep */ 208#endif
181 unsigned long count; 209}
182 struct vfsmount *mnt;
183} ____cacheline_aligned_in_smp;
184static DEFINE_PER_CPU(struct mnt_writer, mnt_writers);
185 210
186static int __init init_mnt_writers(void) 211static inline void dec_mnt_writers(struct vfsmount *mnt)
187{ 212{
188 int cpu; 213#ifdef CONFIG_SMP
189 for_each_possible_cpu(cpu) { 214 (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))--;
190 struct mnt_writer *writer = &per_cpu(mnt_writers, cpu); 215#else
191 spin_lock_init(&writer->lock); 216 mnt->mnt_writers--;
192 lockdep_set_class(&writer->lock, &writer->lock_class); 217#endif
193 writer->count = 0;
194 }
195 return 0;
196} 218}
197fs_initcall(init_mnt_writers);
198 219
199static void unlock_mnt_writers(void) 220static unsigned int count_mnt_writers(struct vfsmount *mnt)
200{ 221{
222#ifdef CONFIG_SMP
223 unsigned int count = 0;
201 int cpu; 224 int cpu;
202 struct mnt_writer *cpu_writer;
203 225
204 for_each_possible_cpu(cpu) { 226 for_each_possible_cpu(cpu) {
205 cpu_writer = &per_cpu(mnt_writers, cpu); 227 count += *per_cpu_ptr(mnt->mnt_writers, cpu);
206 spin_unlock(&cpu_writer->lock);
207 } 228 }
208}
209 229
210static inline void __clear_mnt_count(struct mnt_writer *cpu_writer) 230 return count;
211{ 231#else
212 if (!cpu_writer->mnt) 232 return mnt->mnt_writers;
213 return; 233#endif
214 /*
215 * This is in case anyone ever leaves an invalid,
216 * old ->mnt and a count of 0.
217 */
218 if (!cpu_writer->count)
219 return;
220 atomic_add(cpu_writer->count, &cpu_writer->mnt->__mnt_writers);
221 cpu_writer->count = 0;
222}
223 /*
224 * must hold cpu_writer->lock
225 */
226static inline void use_cpu_writer_for_mount(struct mnt_writer *cpu_writer,
227 struct vfsmount *mnt)
228{
229 if (cpu_writer->mnt == mnt)
230 return;
231 __clear_mnt_count(cpu_writer);
232 cpu_writer->mnt = mnt;
233} 234}
234 235
235/* 236/*
@@ -253,74 +254,73 @@ static inline void use_cpu_writer_for_mount(struct mnt_writer *cpu_writer,
253int mnt_want_write(struct vfsmount *mnt) 254int mnt_want_write(struct vfsmount *mnt)
254{ 255{
255 int ret = 0; 256 int ret = 0;
256 struct mnt_writer *cpu_writer;
257 257
258 cpu_writer = &get_cpu_var(mnt_writers); 258 preempt_disable();
259 spin_lock(&cpu_writer->lock); 259 inc_mnt_writers(mnt);
260 /*
261 * The store to inc_mnt_writers must be visible before we pass
262 * MNT_WRITE_HOLD loop below, so that the slowpath can see our
263 * incremented count after it has set MNT_WRITE_HOLD.
264 */
265 smp_mb();
266 while (mnt->mnt_flags & MNT_WRITE_HOLD)
267 cpu_relax();
268 /*
269 * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
270 * be set to match its requirements. So we must not load that until
271 * MNT_WRITE_HOLD is cleared.
272 */
273 smp_rmb();
260 if (__mnt_is_readonly(mnt)) { 274 if (__mnt_is_readonly(mnt)) {
275 dec_mnt_writers(mnt);
261 ret = -EROFS; 276 ret = -EROFS;
262 goto out; 277 goto out;
263 } 278 }
264 use_cpu_writer_for_mount(cpu_writer, mnt);
265 cpu_writer->count++;
266out: 279out:
267 spin_unlock(&cpu_writer->lock); 280 preempt_enable();
268 put_cpu_var(mnt_writers);
269 return ret; 281 return ret;
270} 282}
271EXPORT_SYMBOL_GPL(mnt_want_write); 283EXPORT_SYMBOL_GPL(mnt_want_write);
272 284
273static void lock_mnt_writers(void) 285/**
274{ 286 * mnt_clone_write - get write access to a mount
275 int cpu; 287 * @mnt: the mount on which to take a write
276 struct mnt_writer *cpu_writer; 288 *
277 289 * This is effectively like mnt_want_write, except
278 for_each_possible_cpu(cpu) { 290 * it must only be used to take an extra write reference
279 cpu_writer = &per_cpu(mnt_writers, cpu); 291 * on a mountpoint that we already know has a write reference
280 spin_lock(&cpu_writer->lock); 292 * on it. This allows some optimisation.
281 __clear_mnt_count(cpu_writer); 293 *
282 cpu_writer->mnt = NULL; 294 * After finished, mnt_drop_write must be called as usual to
283 } 295 * drop the reference.
296 */
297int mnt_clone_write(struct vfsmount *mnt)
298{
299 /* superblock may be r/o */
300 if (__mnt_is_readonly(mnt))
301 return -EROFS;
302 preempt_disable();
303 inc_mnt_writers(mnt);
304 preempt_enable();
305 return 0;
284} 306}
307EXPORT_SYMBOL_GPL(mnt_clone_write);
285 308
286/* 309/**
287 * These per-cpu write counts are not guaranteed to have 310 * mnt_want_write_file - get write access to a file's mount
288 * matched increments and decrements on any given cpu. 311 * @file: the file who's mount on which to take a write
289 * A file open()ed for write on one cpu and close()d on 312 *
290 * another cpu will imbalance this count. Make sure it 313 * This is like mnt_want_write, but it takes a file and can
291 * does not get too far out of whack. 314 * do some optimisations if the file is open for write already
292 */ 315 */
293static void handle_write_count_underflow(struct vfsmount *mnt) 316int mnt_want_write_file(struct file *file)
294{ 317{
295 if (atomic_read(&mnt->__mnt_writers) >= 318 if (!(file->f_mode & FMODE_WRITE))
296 MNT_WRITER_UNDERFLOW_LIMIT) 319 return mnt_want_write(file->f_path.mnt);
297 return; 320 else
298 /* 321 return mnt_clone_write(file->f_path.mnt);
299 * It isn't necessary to hold all of the locks
300 * at the same time, but doing it this way makes
301 * us share a lot more code.
302 */
303 lock_mnt_writers();
304 /*
305 * vfsmount_lock is for mnt_flags.
306 */
307 spin_lock(&vfsmount_lock);
308 /*
309 * If coalescing the per-cpu writer counts did not
310 * get us back to a positive writer count, we have
311 * a bug.
312 */
313 if ((atomic_read(&mnt->__mnt_writers) < 0) &&
314 !(mnt->mnt_flags & MNT_IMBALANCED_WRITE_COUNT)) {
315 WARN(1, KERN_DEBUG "leak detected on mount(%p) writers "
316 "count: %d\n",
317 mnt, atomic_read(&mnt->__mnt_writers));
318 /* use the flag to keep the dmesg spam down */
319 mnt->mnt_flags |= MNT_IMBALANCED_WRITE_COUNT;
320 }
321 spin_unlock(&vfsmount_lock);
322 unlock_mnt_writers();
323} 322}
323EXPORT_SYMBOL_GPL(mnt_want_write_file);
324 324
325/** 325/**
326 * mnt_drop_write - give up write access to a mount 326 * mnt_drop_write - give up write access to a mount
@@ -332,37 +332,9 @@ static void handle_write_count_underflow(struct vfsmount *mnt)
332 */ 332 */
333void mnt_drop_write(struct vfsmount *mnt) 333void mnt_drop_write(struct vfsmount *mnt)
334{ 334{
335 int must_check_underflow = 0; 335 preempt_disable();
336 struct mnt_writer *cpu_writer; 336 dec_mnt_writers(mnt);
337 337 preempt_enable();
338 cpu_writer = &get_cpu_var(mnt_writers);
339 spin_lock(&cpu_writer->lock);
340
341 use_cpu_writer_for_mount(cpu_writer, mnt);
342 if (cpu_writer->count > 0) {
343 cpu_writer->count--;
344 } else {
345 must_check_underflow = 1;
346 atomic_dec(&mnt->__mnt_writers);
347 }
348
349 spin_unlock(&cpu_writer->lock);
350 /*
351 * Logically, we could call this each time,
352 * but the __mnt_writers cacheline tends to
353 * be cold, and makes this expensive.
354 */
355 if (must_check_underflow)
356 handle_write_count_underflow(mnt);
357 /*
358 * This could be done right after the spinlock
359 * is taken because the spinlock keeps us on
360 * the cpu, and disables preemption. However,
361 * putting it here bounds the amount that
362 * __mnt_writers can underflow. Without it,
363 * we could theoretically wrap __mnt_writers.
364 */
365 put_cpu_var(mnt_writers);
366} 338}
367EXPORT_SYMBOL_GPL(mnt_drop_write); 339EXPORT_SYMBOL_GPL(mnt_drop_write);
368 340
@@ -370,24 +342,41 @@ static int mnt_make_readonly(struct vfsmount *mnt)
370{ 342{
371 int ret = 0; 343 int ret = 0;
372 344
373 lock_mnt_writers(); 345 spin_lock(&vfsmount_lock);
346 mnt->mnt_flags |= MNT_WRITE_HOLD;
374 /* 347 /*
375 * With all the locks held, this value is stable 348 * After storing MNT_WRITE_HOLD, we'll read the counters. This store
349 * should be visible before we do.
376 */ 350 */
377 if (atomic_read(&mnt->__mnt_writers) > 0) { 351 smp_mb();
378 ret = -EBUSY; 352
379 goto out;
380 }
381 /* 353 /*
382 * nobody can do a successful mnt_want_write() with all 354 * With writers on hold, if this value is zero, then there are
383 * of the counts in MNT_DENIED_WRITE and the locks held. 355 * definitely no active writers (although held writers may subsequently
356 * increment the count, they'll have to wait, and decrement it after
357 * seeing MNT_READONLY).
358 *
359 * It is OK to have counter incremented on one CPU and decremented on
360 * another: the sum will add up correctly. The danger would be when we
361 * sum up each counter, if we read a counter before it is incremented,
362 * but then read another CPU's count which it has been subsequently
363 * decremented from -- we would see more decrements than we should.
364 * MNT_WRITE_HOLD protects against this scenario, because
365 * mnt_want_write first increments count, then smp_mb, then spins on
366 * MNT_WRITE_HOLD, so it can't be decremented by another CPU while
367 * we're counting up here.
384 */ 368 */
385 spin_lock(&vfsmount_lock); 369 if (count_mnt_writers(mnt) > 0)
386 if (!ret) 370 ret = -EBUSY;
371 else
387 mnt->mnt_flags |= MNT_READONLY; 372 mnt->mnt_flags |= MNT_READONLY;
373 /*
374 * MNT_READONLY must become visible before ~MNT_WRITE_HOLD, so writers
375 * that become unheld will see MNT_READONLY.
376 */
377 smp_wmb();
378 mnt->mnt_flags &= ~MNT_WRITE_HOLD;
388 spin_unlock(&vfsmount_lock); 379 spin_unlock(&vfsmount_lock);
389out:
390 unlock_mnt_writers();
391 return ret; 380 return ret;
392} 381}
393 382
@@ -410,6 +399,9 @@ void free_vfsmnt(struct vfsmount *mnt)
410{ 399{
411 kfree(mnt->mnt_devname); 400 kfree(mnt->mnt_devname);
412 mnt_free_id(mnt); 401 mnt_free_id(mnt);
402#ifdef CONFIG_SMP
403 free_percpu(mnt->mnt_writers);
404#endif
413 kmem_cache_free(mnt_cache, mnt); 405 kmem_cache_free(mnt_cache, mnt);
414} 406}
415 407
@@ -442,11 +434,11 @@ struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
442 * lookup_mnt increments the ref count before returning 434 * lookup_mnt increments the ref count before returning
443 * the vfsmount struct. 435 * the vfsmount struct.
444 */ 436 */
445struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) 437struct vfsmount *lookup_mnt(struct path *path)
446{ 438{
447 struct vfsmount *child_mnt; 439 struct vfsmount *child_mnt;
448 spin_lock(&vfsmount_lock); 440 spin_lock(&vfsmount_lock);
449 if ((child_mnt = __lookup_mnt(mnt, dentry, 1))) 441 if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1)))
450 mntget(child_mnt); 442 mntget(child_mnt);
451 spin_unlock(&vfsmount_lock); 443 spin_unlock(&vfsmount_lock);
452 return child_mnt; 444 return child_mnt;
@@ -604,38 +596,18 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
604 596
605static inline void __mntput(struct vfsmount *mnt) 597static inline void __mntput(struct vfsmount *mnt)
606{ 598{
607 int cpu;
608 struct super_block *sb = mnt->mnt_sb; 599 struct super_block *sb = mnt->mnt_sb;
609 /* 600 /*
610 * We don't have to hold all of the locks at the
611 * same time here because we know that we're the
612 * last reference to mnt and that no new writers
613 * can come in.
614 */
615 for_each_possible_cpu(cpu) {
616 struct mnt_writer *cpu_writer = &per_cpu(mnt_writers, cpu);
617 spin_lock(&cpu_writer->lock);
618 if (cpu_writer->mnt != mnt) {
619 spin_unlock(&cpu_writer->lock);
620 continue;
621 }
622 atomic_add(cpu_writer->count, &mnt->__mnt_writers);
623 cpu_writer->count = 0;
624 /*
625 * Might as well do this so that no one
626 * ever sees the pointer and expects
627 * it to be valid.
628 */
629 cpu_writer->mnt = NULL;
630 spin_unlock(&cpu_writer->lock);
631 }
632 /*
633 * This probably indicates that somebody messed 601 * This probably indicates that somebody messed
634 * up a mnt_want/drop_write() pair. If this 602 * up a mnt_want/drop_write() pair. If this
635 * happens, the filesystem was probably unable 603 * happens, the filesystem was probably unable
636 * to make r/w->r/o transitions. 604 * to make r/w->r/o transitions.
637 */ 605 */
638 WARN_ON(atomic_read(&mnt->__mnt_writers)); 606 /*
607 * atomic_dec_and_lock() used to deal with ->mnt_count decrements
608 * provides barriers, so count_mnt_writers() below is safe. AV
609 */
610 WARN_ON(count_mnt_writers(mnt));
639 dput(mnt->mnt_root); 611 dput(mnt->mnt_root);
640 free_vfsmnt(mnt); 612 free_vfsmnt(mnt);
641 deactivate_super(sb); 613 deactivate_super(sb);
@@ -1106,11 +1078,8 @@ static int do_umount(struct vfsmount *mnt, int flags)
1106 * we just try to remount it readonly. 1078 * we just try to remount it readonly.
1107 */ 1079 */
1108 down_write(&sb->s_umount); 1080 down_write(&sb->s_umount);
1109 if (!(sb->s_flags & MS_RDONLY)) { 1081 if (!(sb->s_flags & MS_RDONLY))
1110 lock_kernel();
1111 retval = do_remount_sb(sb, MS_RDONLY, NULL, 0); 1082 retval = do_remount_sb(sb, MS_RDONLY, NULL, 0);
1112 unlock_kernel();
1113 }
1114 up_write(&sb->s_umount); 1083 up_write(&sb->s_umount);
1115 return retval; 1084 return retval;
1116 } 1085 }
@@ -1253,11 +1222,11 @@ Enomem:
1253 return NULL; 1222 return NULL;
1254} 1223}
1255 1224
1256struct vfsmount *collect_mounts(struct vfsmount *mnt, struct dentry *dentry) 1225struct vfsmount *collect_mounts(struct path *path)
1257{ 1226{
1258 struct vfsmount *tree; 1227 struct vfsmount *tree;
1259 down_write(&namespace_sem); 1228 down_write(&namespace_sem);
1260 tree = copy_tree(mnt, dentry, CL_COPY_ALL | CL_PRIVATE); 1229 tree = copy_tree(path->mnt, path->dentry, CL_COPY_ALL | CL_PRIVATE);
1261 up_write(&namespace_sem); 1230 up_write(&namespace_sem);
1262 return tree; 1231 return tree;
1263} 1232}
@@ -1430,7 +1399,7 @@ static int graft_tree(struct vfsmount *mnt, struct path *path)
1430 goto out_unlock; 1399 goto out_unlock;
1431 1400
1432 err = -ENOENT; 1401 err = -ENOENT;
1433 if (IS_ROOT(path->dentry) || !d_unhashed(path->dentry)) 1402 if (!d_unlinked(path->dentry))
1434 err = attach_recursive_mnt(mnt, path, NULL); 1403 err = attach_recursive_mnt(mnt, path, NULL);
1435out_unlock: 1404out_unlock:
1436 mutex_unlock(&path->dentry->d_inode->i_mutex); 1405 mutex_unlock(&path->dentry->d_inode->i_mutex);
@@ -1601,7 +1570,7 @@ static int do_move_mount(struct path *path, char *old_name)
1601 1570
1602 down_write(&namespace_sem); 1571 down_write(&namespace_sem);
1603 while (d_mountpoint(path->dentry) && 1572 while (d_mountpoint(path->dentry) &&
1604 follow_down(&path->mnt, &path->dentry)) 1573 follow_down(path))
1605 ; 1574 ;
1606 err = -EINVAL; 1575 err = -EINVAL;
1607 if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt)) 1576 if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt))
@@ -1612,7 +1581,7 @@ static int do_move_mount(struct path *path, char *old_name)
1612 if (IS_DEADDIR(path->dentry->d_inode)) 1581 if (IS_DEADDIR(path->dentry->d_inode))
1613 goto out1; 1582 goto out1;
1614 1583
1615 if (!IS_ROOT(path->dentry) && d_unhashed(path->dentry)) 1584 if (d_unlinked(path->dentry))
1616 goto out1; 1585 goto out1;
1617 1586
1618 err = -EINVAL; 1587 err = -EINVAL;
@@ -1676,7 +1645,9 @@ static int do_new_mount(struct path *path, char *type, int flags,
1676 if (!capable(CAP_SYS_ADMIN)) 1645 if (!capable(CAP_SYS_ADMIN))
1677 return -EPERM; 1646 return -EPERM;
1678 1647
1648 lock_kernel();
1679 mnt = do_kern_mount(type, flags, name, data); 1649 mnt = do_kern_mount(type, flags, name, data);
1650 unlock_kernel();
1680 if (IS_ERR(mnt)) 1651 if (IS_ERR(mnt))
1681 return PTR_ERR(mnt); 1652 return PTR_ERR(mnt);
1682 1653
@@ -1695,10 +1666,10 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path,
1695 down_write(&namespace_sem); 1666 down_write(&namespace_sem);
1696 /* Something was mounted here while we slept */ 1667 /* Something was mounted here while we slept */
1697 while (d_mountpoint(path->dentry) && 1668 while (d_mountpoint(path->dentry) &&
1698 follow_down(&path->mnt, &path->dentry)) 1669 follow_down(path))
1699 ; 1670 ;
1700 err = -EINVAL; 1671 err = -EINVAL;
1701 if (!check_mnt(path->mnt)) 1672 if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt))
1702 goto unlock; 1673 goto unlock;
1703 1674
1704 /* Refuse the same filesystem on the same mount point */ 1675 /* Refuse the same filesystem on the same mount point */
@@ -1984,6 +1955,21 @@ dput_out:
1984 return retval; 1955 return retval;
1985} 1956}
1986 1957
1958static struct mnt_namespace *alloc_mnt_ns(void)
1959{
1960 struct mnt_namespace *new_ns;
1961
1962 new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
1963 if (!new_ns)
1964 return ERR_PTR(-ENOMEM);
1965 atomic_set(&new_ns->count, 1);
1966 new_ns->root = NULL;
1967 INIT_LIST_HEAD(&new_ns->list);
1968 init_waitqueue_head(&new_ns->poll);
1969 new_ns->event = 0;
1970 return new_ns;
1971}
1972
1987/* 1973/*
1988 * Allocate a new namespace structure and populate it with contents 1974 * Allocate a new namespace structure and populate it with contents
1989 * copied from the namespace of the passed in task structure. 1975 * copied from the namespace of the passed in task structure.
@@ -1995,14 +1981,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
1995 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL; 1981 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
1996 struct vfsmount *p, *q; 1982 struct vfsmount *p, *q;
1997 1983
1998 new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); 1984 new_ns = alloc_mnt_ns();
1999 if (!new_ns) 1985 if (IS_ERR(new_ns))
2000 return ERR_PTR(-ENOMEM); 1986 return new_ns;
2001
2002 atomic_set(&new_ns->count, 1);
2003 INIT_LIST_HEAD(&new_ns->list);
2004 init_waitqueue_head(&new_ns->poll);
2005 new_ns->event = 0;
2006 1987
2007 down_write(&namespace_sem); 1988 down_write(&namespace_sem);
2008 /* First pass: copy the tree topology */ 1989 /* First pass: copy the tree topology */
@@ -2066,6 +2047,24 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
2066 return new_ns; 2047 return new_ns;
2067} 2048}
2068 2049
2050/**
2051 * create_mnt_ns - creates a private namespace and adds a root filesystem
2052 * @mnt: pointer to the new root filesystem mountpoint
2053 */
2054struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
2055{
2056 struct mnt_namespace *new_ns;
2057
2058 new_ns = alloc_mnt_ns();
2059 if (!IS_ERR(new_ns)) {
2060 mnt->mnt_ns = new_ns;
2061 new_ns->root = mnt;
2062 list_add(&new_ns->list, &new_ns->root->mnt_list);
2063 }
2064 return new_ns;
2065}
2066EXPORT_SYMBOL(create_mnt_ns);
2067
2069SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, 2068SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
2070 char __user *, type, unsigned long, flags, void __user *, data) 2069 char __user *, type, unsigned long, flags, void __user *, data)
2071{ 2070{
@@ -2092,10 +2091,8 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
2092 if (retval < 0) 2091 if (retval < 0)
2093 goto out3; 2092 goto out3;
2094 2093
2095 lock_kernel();
2096 retval = do_mount((char *)dev_page, dir_page, (char *)type_page, 2094 retval = do_mount((char *)dev_page, dir_page, (char *)type_page,
2097 flags, (void *)data_page); 2095 flags, (void *)data_page);
2098 unlock_kernel();
2099 free_page(data_page); 2096 free_page(data_page);
2100 2097
2101out3: 2098out3:
@@ -2175,9 +2172,9 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2175 error = -ENOENT; 2172 error = -ENOENT;
2176 if (IS_DEADDIR(new.dentry->d_inode)) 2173 if (IS_DEADDIR(new.dentry->d_inode))
2177 goto out2; 2174 goto out2;
2178 if (d_unhashed(new.dentry) && !IS_ROOT(new.dentry)) 2175 if (d_unlinked(new.dentry))
2179 goto out2; 2176 goto out2;
2180 if (d_unhashed(old.dentry) && !IS_ROOT(old.dentry)) 2177 if (d_unlinked(old.dentry))
2181 goto out2; 2178 goto out2;
2182 error = -EBUSY; 2179 error = -EBUSY;
2183 if (new.mnt == root.mnt || 2180 if (new.mnt == root.mnt ||
@@ -2243,16 +2240,9 @@ static void __init init_mount_tree(void)
2243 mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); 2240 mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
2244 if (IS_ERR(mnt)) 2241 if (IS_ERR(mnt))
2245 panic("Can't create rootfs"); 2242 panic("Can't create rootfs");
2246 ns = kmalloc(sizeof(*ns), GFP_KERNEL); 2243 ns = create_mnt_ns(mnt);
2247 if (!ns) 2244 if (IS_ERR(ns))
2248 panic("Can't allocate initial namespace"); 2245 panic("Can't allocate initial namespace");
2249 atomic_set(&ns->count, 1);
2250 INIT_LIST_HEAD(&ns->list);
2251 init_waitqueue_head(&ns->poll);
2252 ns->event = 0;
2253 list_add(&mnt->mnt_list, &ns->list);
2254 ns->root = mnt;
2255 mnt->mnt_ns = ns;
2256 2246
2257 init_task.nsproxy->mnt_ns = ns; 2247 init_task.nsproxy->mnt_ns = ns;
2258 get_mnt_ns(ns); 2248 get_mnt_ns(ns);
@@ -2295,10 +2285,14 @@ void __init mnt_init(void)
2295 init_mount_tree(); 2285 init_mount_tree();
2296} 2286}
2297 2287
2298void __put_mnt_ns(struct mnt_namespace *ns) 2288void put_mnt_ns(struct mnt_namespace *ns)
2299{ 2289{
2300 struct vfsmount *root = ns->root; 2290 struct vfsmount *root;
2301 LIST_HEAD(umount_list); 2291 LIST_HEAD(umount_list);
2292
2293 if (!atomic_dec_and_lock(&ns->count, &vfsmount_lock))
2294 return;
2295 root = ns->root;
2302 ns->root = NULL; 2296 ns->root = NULL;
2303 spin_unlock(&vfsmount_lock); 2297 spin_unlock(&vfsmount_lock);
2304 down_write(&namespace_sem); 2298 down_write(&namespace_sem);
@@ -2309,3 +2303,4 @@ void __put_mnt_ns(struct mnt_namespace *ns)
2309 release_mounts(&umount_list); 2303 release_mounts(&umount_list);
2310 kfree(ns); 2304 kfree(ns);
2311} 2305}
2306EXPORT_SYMBOL(put_mnt_ns);