diff options
Diffstat (limited to 'fs/namespace.c')
-rw-r--r-- | fs/namespace.c | 419 |
1 files changed, 208 insertions, 211 deletions
diff --git a/fs/namespace.c b/fs/namespace.c index 134d494158d9..7230787d18b0 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/seq_file.h> | 22 | #include <linux/seq_file.h> |
23 | #include <linux/mnt_namespace.h> | 23 | #include <linux/mnt_namespace.h> |
24 | #include <linux/namei.h> | 24 | #include <linux/namei.h> |
25 | #include <linux/nsproxy.h> | ||
25 | #include <linux/security.h> | 26 | #include <linux/security.h> |
26 | #include <linux/mount.h> | 27 | #include <linux/mount.h> |
27 | #include <linux/ramfs.h> | 28 | #include <linux/ramfs.h> |
@@ -42,6 +43,8 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); | |||
42 | static int event; | 43 | static int event; |
43 | static DEFINE_IDA(mnt_id_ida); | 44 | static DEFINE_IDA(mnt_id_ida); |
44 | static DEFINE_IDA(mnt_group_ida); | 45 | static DEFINE_IDA(mnt_group_ida); |
46 | static int mnt_id_start = 0; | ||
47 | static int mnt_group_start = 1; | ||
45 | 48 | ||
46 | static struct list_head *mount_hashtable __read_mostly; | 49 | static struct list_head *mount_hashtable __read_mostly; |
47 | static struct kmem_cache *mnt_cache __read_mostly; | 50 | static struct kmem_cache *mnt_cache __read_mostly; |
@@ -69,7 +72,9 @@ static int mnt_alloc_id(struct vfsmount *mnt) | |||
69 | retry: | 72 | retry: |
70 | ida_pre_get(&mnt_id_ida, GFP_KERNEL); | 73 | ida_pre_get(&mnt_id_ida, GFP_KERNEL); |
71 | spin_lock(&vfsmount_lock); | 74 | spin_lock(&vfsmount_lock); |
72 | res = ida_get_new(&mnt_id_ida, &mnt->mnt_id); | 75 | res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id); |
76 | if (!res) | ||
77 | mnt_id_start = mnt->mnt_id + 1; | ||
73 | spin_unlock(&vfsmount_lock); | 78 | spin_unlock(&vfsmount_lock); |
74 | if (res == -EAGAIN) | 79 | if (res == -EAGAIN) |
75 | goto retry; | 80 | goto retry; |
@@ -79,8 +84,11 @@ retry: | |||
79 | 84 | ||
80 | static void mnt_free_id(struct vfsmount *mnt) | 85 | static void mnt_free_id(struct vfsmount *mnt) |
81 | { | 86 | { |
87 | int id = mnt->mnt_id; | ||
82 | spin_lock(&vfsmount_lock); | 88 | spin_lock(&vfsmount_lock); |
83 | ida_remove(&mnt_id_ida, mnt->mnt_id); | 89 | ida_remove(&mnt_id_ida, id); |
90 | if (mnt_id_start > id) | ||
91 | mnt_id_start = id; | ||
84 | spin_unlock(&vfsmount_lock); | 92 | spin_unlock(&vfsmount_lock); |
85 | } | 93 | } |
86 | 94 | ||
@@ -91,10 +99,18 @@ static void mnt_free_id(struct vfsmount *mnt) | |||
91 | */ | 99 | */ |
92 | static int mnt_alloc_group_id(struct vfsmount *mnt) | 100 | static int mnt_alloc_group_id(struct vfsmount *mnt) |
93 | { | 101 | { |
102 | int res; | ||
103 | |||
94 | if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL)) | 104 | if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL)) |
95 | return -ENOMEM; | 105 | return -ENOMEM; |
96 | 106 | ||
97 | return ida_get_new_above(&mnt_group_ida, 1, &mnt->mnt_group_id); | 107 | res = ida_get_new_above(&mnt_group_ida, |
108 | mnt_group_start, | ||
109 | &mnt->mnt_group_id); | ||
110 | if (!res) | ||
111 | mnt_group_start = mnt->mnt_group_id + 1; | ||
112 | |||
113 | return res; | ||
98 | } | 114 | } |
99 | 115 | ||
100 | /* | 116 | /* |
@@ -102,7 +118,10 @@ static int mnt_alloc_group_id(struct vfsmount *mnt) | |||
102 | */ | 118 | */ |
103 | void mnt_release_group_id(struct vfsmount *mnt) | 119 | void mnt_release_group_id(struct vfsmount *mnt) |
104 | { | 120 | { |
105 | ida_remove(&mnt_group_ida, mnt->mnt_group_id); | 121 | int id = mnt->mnt_group_id; |
122 | ida_remove(&mnt_group_ida, id); | ||
123 | if (mnt_group_start > id) | ||
124 | mnt_group_start = id; | ||
106 | mnt->mnt_group_id = 0; | 125 | mnt->mnt_group_id = 0; |
107 | } | 126 | } |
108 | 127 | ||
@@ -131,10 +150,20 @@ struct vfsmount *alloc_vfsmnt(const char *name) | |||
131 | INIT_LIST_HEAD(&mnt->mnt_share); | 150 | INIT_LIST_HEAD(&mnt->mnt_share); |
132 | INIT_LIST_HEAD(&mnt->mnt_slave_list); | 151 | INIT_LIST_HEAD(&mnt->mnt_slave_list); |
133 | INIT_LIST_HEAD(&mnt->mnt_slave); | 152 | INIT_LIST_HEAD(&mnt->mnt_slave); |
134 | atomic_set(&mnt->__mnt_writers, 0); | 153 | #ifdef CONFIG_SMP |
154 | mnt->mnt_writers = alloc_percpu(int); | ||
155 | if (!mnt->mnt_writers) | ||
156 | goto out_free_devname; | ||
157 | #else | ||
158 | mnt->mnt_writers = 0; | ||
159 | #endif | ||
135 | } | 160 | } |
136 | return mnt; | 161 | return mnt; |
137 | 162 | ||
163 | #ifdef CONFIG_SMP | ||
164 | out_free_devname: | ||
165 | kfree(mnt->mnt_devname); | ||
166 | #endif | ||
138 | out_free_id: | 167 | out_free_id: |
139 | mnt_free_id(mnt); | 168 | mnt_free_id(mnt); |
140 | out_free_cache: | 169 | out_free_cache: |
@@ -171,65 +200,38 @@ int __mnt_is_readonly(struct vfsmount *mnt) | |||
171 | } | 200 | } |
172 | EXPORT_SYMBOL_GPL(__mnt_is_readonly); | 201 | EXPORT_SYMBOL_GPL(__mnt_is_readonly); |
173 | 202 | ||
174 | struct mnt_writer { | 203 | static inline void inc_mnt_writers(struct vfsmount *mnt) |
175 | /* | 204 | { |
176 | * If holding multiple instances of this lock, they | 205 | #ifdef CONFIG_SMP |
177 | * must be ordered by cpu number. | 206 | (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))++; |
178 | */ | 207 | #else |
179 | spinlock_t lock; | 208 | mnt->mnt_writers++; |
180 | struct lock_class_key lock_class; /* compiles out with !lockdep */ | 209 | #endif |
181 | unsigned long count; | 210 | } |
182 | struct vfsmount *mnt; | ||
183 | } ____cacheline_aligned_in_smp; | ||
184 | static DEFINE_PER_CPU(struct mnt_writer, mnt_writers); | ||
185 | 211 | ||
186 | static int __init init_mnt_writers(void) | 212 | static inline void dec_mnt_writers(struct vfsmount *mnt) |
187 | { | 213 | { |
188 | int cpu; | 214 | #ifdef CONFIG_SMP |
189 | for_each_possible_cpu(cpu) { | 215 | (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))--; |
190 | struct mnt_writer *writer = &per_cpu(mnt_writers, cpu); | 216 | #else |
191 | spin_lock_init(&writer->lock); | 217 | mnt->mnt_writers--; |
192 | lockdep_set_class(&writer->lock, &writer->lock_class); | 218 | #endif |
193 | writer->count = 0; | ||
194 | } | ||
195 | return 0; | ||
196 | } | 219 | } |
197 | fs_initcall(init_mnt_writers); | ||
198 | 220 | ||
199 | static void unlock_mnt_writers(void) | 221 | static unsigned int count_mnt_writers(struct vfsmount *mnt) |
200 | { | 222 | { |
223 | #ifdef CONFIG_SMP | ||
224 | unsigned int count = 0; | ||
201 | int cpu; | 225 | int cpu; |
202 | struct mnt_writer *cpu_writer; | ||
203 | 226 | ||
204 | for_each_possible_cpu(cpu) { | 227 | for_each_possible_cpu(cpu) { |
205 | cpu_writer = &per_cpu(mnt_writers, cpu); | 228 | count += *per_cpu_ptr(mnt->mnt_writers, cpu); |
206 | spin_unlock(&cpu_writer->lock); | ||
207 | } | 229 | } |
208 | } | ||
209 | 230 | ||
210 | static inline void __clear_mnt_count(struct mnt_writer *cpu_writer) | 231 | return count; |
211 | { | 232 | #else |
212 | if (!cpu_writer->mnt) | 233 | return mnt->mnt_writers; |
213 | return; | 234 | #endif |
214 | /* | ||
215 | * This is in case anyone ever leaves an invalid, | ||
216 | * old ->mnt and a count of 0. | ||
217 | */ | ||
218 | if (!cpu_writer->count) | ||
219 | return; | ||
220 | atomic_add(cpu_writer->count, &cpu_writer->mnt->__mnt_writers); | ||
221 | cpu_writer->count = 0; | ||
222 | } | ||
223 | /* | ||
224 | * must hold cpu_writer->lock | ||
225 | */ | ||
226 | static inline void use_cpu_writer_for_mount(struct mnt_writer *cpu_writer, | ||
227 | struct vfsmount *mnt) | ||
228 | { | ||
229 | if (cpu_writer->mnt == mnt) | ||
230 | return; | ||
231 | __clear_mnt_count(cpu_writer); | ||
232 | cpu_writer->mnt = mnt; | ||
233 | } | 235 | } |
234 | 236 | ||
235 | /* | 237 | /* |
@@ -253,74 +255,74 @@ static inline void use_cpu_writer_for_mount(struct mnt_writer *cpu_writer, | |||
253 | int mnt_want_write(struct vfsmount *mnt) | 255 | int mnt_want_write(struct vfsmount *mnt) |
254 | { | 256 | { |
255 | int ret = 0; | 257 | int ret = 0; |
256 | struct mnt_writer *cpu_writer; | ||
257 | 258 | ||
258 | cpu_writer = &get_cpu_var(mnt_writers); | 259 | preempt_disable(); |
259 | spin_lock(&cpu_writer->lock); | 260 | inc_mnt_writers(mnt); |
261 | /* | ||
262 | * The store to inc_mnt_writers must be visible before we pass | ||
263 | * MNT_WRITE_HOLD loop below, so that the slowpath can see our | ||
264 | * incremented count after it has set MNT_WRITE_HOLD. | ||
265 | */ | ||
266 | smp_mb(); | ||
267 | while (mnt->mnt_flags & MNT_WRITE_HOLD) | ||
268 | cpu_relax(); | ||
269 | /* | ||
270 | * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will | ||
271 | * be set to match its requirements. So we must not load that until | ||
272 | * MNT_WRITE_HOLD is cleared. | ||
273 | */ | ||
274 | smp_rmb(); | ||
260 | if (__mnt_is_readonly(mnt)) { | 275 | if (__mnt_is_readonly(mnt)) { |
276 | dec_mnt_writers(mnt); | ||
261 | ret = -EROFS; | 277 | ret = -EROFS; |
262 | goto out; | 278 | goto out; |
263 | } | 279 | } |
264 | use_cpu_writer_for_mount(cpu_writer, mnt); | ||
265 | cpu_writer->count++; | ||
266 | out: | 280 | out: |
267 | spin_unlock(&cpu_writer->lock); | 281 | preempt_enable(); |
268 | put_cpu_var(mnt_writers); | ||
269 | return ret; | 282 | return ret; |
270 | } | 283 | } |
271 | EXPORT_SYMBOL_GPL(mnt_want_write); | 284 | EXPORT_SYMBOL_GPL(mnt_want_write); |
272 | 285 | ||
273 | static void lock_mnt_writers(void) | 286 | /** |
274 | { | 287 | * mnt_clone_write - get write access to a mount |
275 | int cpu; | 288 | * @mnt: the mount on which to take a write |
276 | struct mnt_writer *cpu_writer; | 289 | * |
277 | 290 | * This is effectively like mnt_want_write, except | |
278 | for_each_possible_cpu(cpu) { | 291 | * it must only be used to take an extra write reference |
279 | cpu_writer = &per_cpu(mnt_writers, cpu); | 292 | * on a mountpoint that we already know has a write reference |
280 | spin_lock(&cpu_writer->lock); | 293 | * on it. This allows some optimisation. |
281 | __clear_mnt_count(cpu_writer); | 294 | * |
282 | cpu_writer->mnt = NULL; | 295 | * After finished, mnt_drop_write must be called as usual to |
283 | } | 296 | * drop the reference. |
297 | */ | ||
298 | int mnt_clone_write(struct vfsmount *mnt) | ||
299 | { | ||
300 | /* superblock may be r/o */ | ||
301 | if (__mnt_is_readonly(mnt)) | ||
302 | return -EROFS; | ||
303 | preempt_disable(); | ||
304 | inc_mnt_writers(mnt); | ||
305 | preempt_enable(); | ||
306 | return 0; | ||
284 | } | 307 | } |
308 | EXPORT_SYMBOL_GPL(mnt_clone_write); | ||
285 | 309 | ||
286 | /* | 310 | /** |
287 | * These per-cpu write counts are not guaranteed to have | 311 | * mnt_want_write_file - get write access to a file's mount |
288 | * matched increments and decrements on any given cpu. | 312 | * @file: the file who's mount on which to take a write |
289 | * A file open()ed for write on one cpu and close()d on | 313 | * |
290 | * another cpu will imbalance this count. Make sure it | 314 | * This is like mnt_want_write, but it takes a file and can |
291 | * does not get too far out of whack. | 315 | * do some optimisations if the file is open for write already |
292 | */ | 316 | */ |
293 | static void handle_write_count_underflow(struct vfsmount *mnt) | 317 | int mnt_want_write_file(struct file *file) |
294 | { | 318 | { |
295 | if (atomic_read(&mnt->__mnt_writers) >= | 319 | struct inode *inode = file->f_dentry->d_inode; |
296 | MNT_WRITER_UNDERFLOW_LIMIT) | 320 | if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode)) |
297 | return; | 321 | return mnt_want_write(file->f_path.mnt); |
298 | /* | 322 | else |
299 | * It isn't necessary to hold all of the locks | 323 | return mnt_clone_write(file->f_path.mnt); |
300 | * at the same time, but doing it this way makes | ||
301 | * us share a lot more code. | ||
302 | */ | ||
303 | lock_mnt_writers(); | ||
304 | /* | ||
305 | * vfsmount_lock is for mnt_flags. | ||
306 | */ | ||
307 | spin_lock(&vfsmount_lock); | ||
308 | /* | ||
309 | * If coalescing the per-cpu writer counts did not | ||
310 | * get us back to a positive writer count, we have | ||
311 | * a bug. | ||
312 | */ | ||
313 | if ((atomic_read(&mnt->__mnt_writers) < 0) && | ||
314 | !(mnt->mnt_flags & MNT_IMBALANCED_WRITE_COUNT)) { | ||
315 | WARN(1, KERN_DEBUG "leak detected on mount(%p) writers " | ||
316 | "count: %d\n", | ||
317 | mnt, atomic_read(&mnt->__mnt_writers)); | ||
318 | /* use the flag to keep the dmesg spam down */ | ||
319 | mnt->mnt_flags |= MNT_IMBALANCED_WRITE_COUNT; | ||
320 | } | ||
321 | spin_unlock(&vfsmount_lock); | ||
322 | unlock_mnt_writers(); | ||
323 | } | 324 | } |
325 | EXPORT_SYMBOL_GPL(mnt_want_write_file); | ||
324 | 326 | ||
325 | /** | 327 | /** |
326 | * mnt_drop_write - give up write access to a mount | 328 | * mnt_drop_write - give up write access to a mount |
@@ -332,37 +334,9 @@ static void handle_write_count_underflow(struct vfsmount *mnt) | |||
332 | */ | 334 | */ |
333 | void mnt_drop_write(struct vfsmount *mnt) | 335 | void mnt_drop_write(struct vfsmount *mnt) |
334 | { | 336 | { |
335 | int must_check_underflow = 0; | 337 | preempt_disable(); |
336 | struct mnt_writer *cpu_writer; | 338 | dec_mnt_writers(mnt); |
337 | 339 | preempt_enable(); | |
338 | cpu_writer = &get_cpu_var(mnt_writers); | ||
339 | spin_lock(&cpu_writer->lock); | ||
340 | |||
341 | use_cpu_writer_for_mount(cpu_writer, mnt); | ||
342 | if (cpu_writer->count > 0) { | ||
343 | cpu_writer->count--; | ||
344 | } else { | ||
345 | must_check_underflow = 1; | ||
346 | atomic_dec(&mnt->__mnt_writers); | ||
347 | } | ||
348 | |||
349 | spin_unlock(&cpu_writer->lock); | ||
350 | /* | ||
351 | * Logically, we could call this each time, | ||
352 | * but the __mnt_writers cacheline tends to | ||
353 | * be cold, and makes this expensive. | ||
354 | */ | ||
355 | if (must_check_underflow) | ||
356 | handle_write_count_underflow(mnt); | ||
357 | /* | ||
358 | * This could be done right after the spinlock | ||
359 | * is taken because the spinlock keeps us on | ||
360 | * the cpu, and disables preemption. However, | ||
361 | * putting it here bounds the amount that | ||
362 | * __mnt_writers can underflow. Without it, | ||
363 | * we could theoretically wrap __mnt_writers. | ||
364 | */ | ||
365 | put_cpu_var(mnt_writers); | ||
366 | } | 340 | } |
367 | EXPORT_SYMBOL_GPL(mnt_drop_write); | 341 | EXPORT_SYMBOL_GPL(mnt_drop_write); |
368 | 342 | ||
@@ -370,24 +344,41 @@ static int mnt_make_readonly(struct vfsmount *mnt) | |||
370 | { | 344 | { |
371 | int ret = 0; | 345 | int ret = 0; |
372 | 346 | ||
373 | lock_mnt_writers(); | 347 | spin_lock(&vfsmount_lock); |
348 | mnt->mnt_flags |= MNT_WRITE_HOLD; | ||
374 | /* | 349 | /* |
375 | * With all the locks held, this value is stable | 350 | * After storing MNT_WRITE_HOLD, we'll read the counters. This store |
351 | * should be visible before we do. | ||
376 | */ | 352 | */ |
377 | if (atomic_read(&mnt->__mnt_writers) > 0) { | 353 | smp_mb(); |
378 | ret = -EBUSY; | 354 | |
379 | goto out; | ||
380 | } | ||
381 | /* | 355 | /* |
382 | * nobody can do a successful mnt_want_write() with all | 356 | * With writers on hold, if this value is zero, then there are |
383 | * of the counts in MNT_DENIED_WRITE and the locks held. | 357 | * definitely no active writers (although held writers may subsequently |
358 | * increment the count, they'll have to wait, and decrement it after | ||
359 | * seeing MNT_READONLY). | ||
360 | * | ||
361 | * It is OK to have counter incremented on one CPU and decremented on | ||
362 | * another: the sum will add up correctly. The danger would be when we | ||
363 | * sum up each counter, if we read a counter before it is incremented, | ||
364 | * but then read another CPU's count which it has been subsequently | ||
365 | * decremented from -- we would see more decrements than we should. | ||
366 | * MNT_WRITE_HOLD protects against this scenario, because | ||
367 | * mnt_want_write first increments count, then smp_mb, then spins on | ||
368 | * MNT_WRITE_HOLD, so it can't be decremented by another CPU while | ||
369 | * we're counting up here. | ||
384 | */ | 370 | */ |
385 | spin_lock(&vfsmount_lock); | 371 | if (count_mnt_writers(mnt) > 0) |
386 | if (!ret) | 372 | ret = -EBUSY; |
373 | else | ||
387 | mnt->mnt_flags |= MNT_READONLY; | 374 | mnt->mnt_flags |= MNT_READONLY; |
375 | /* | ||
376 | * MNT_READONLY must become visible before ~MNT_WRITE_HOLD, so writers | ||
377 | * that become unheld will see MNT_READONLY. | ||
378 | */ | ||
379 | smp_wmb(); | ||
380 | mnt->mnt_flags &= ~MNT_WRITE_HOLD; | ||
388 | spin_unlock(&vfsmount_lock); | 381 | spin_unlock(&vfsmount_lock); |
389 | out: | ||
390 | unlock_mnt_writers(); | ||
391 | return ret; | 382 | return ret; |
392 | } | 383 | } |
393 | 384 | ||
@@ -410,6 +401,9 @@ void free_vfsmnt(struct vfsmount *mnt) | |||
410 | { | 401 | { |
411 | kfree(mnt->mnt_devname); | 402 | kfree(mnt->mnt_devname); |
412 | mnt_free_id(mnt); | 403 | mnt_free_id(mnt); |
404 | #ifdef CONFIG_SMP | ||
405 | free_percpu(mnt->mnt_writers); | ||
406 | #endif | ||
413 | kmem_cache_free(mnt_cache, mnt); | 407 | kmem_cache_free(mnt_cache, mnt); |
414 | } | 408 | } |
415 | 409 | ||
@@ -442,11 +436,11 @@ struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, | |||
442 | * lookup_mnt increments the ref count before returning | 436 | * lookup_mnt increments the ref count before returning |
443 | * the vfsmount struct. | 437 | * the vfsmount struct. |
444 | */ | 438 | */ |
445 | struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) | 439 | struct vfsmount *lookup_mnt(struct path *path) |
446 | { | 440 | { |
447 | struct vfsmount *child_mnt; | 441 | struct vfsmount *child_mnt; |
448 | spin_lock(&vfsmount_lock); | 442 | spin_lock(&vfsmount_lock); |
449 | if ((child_mnt = __lookup_mnt(mnt, dentry, 1))) | 443 | if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1))) |
450 | mntget(child_mnt); | 444 | mntget(child_mnt); |
451 | spin_unlock(&vfsmount_lock); | 445 | spin_unlock(&vfsmount_lock); |
452 | return child_mnt; | 446 | return child_mnt; |
@@ -604,38 +598,18 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root, | |||
604 | 598 | ||
605 | static inline void __mntput(struct vfsmount *mnt) | 599 | static inline void __mntput(struct vfsmount *mnt) |
606 | { | 600 | { |
607 | int cpu; | ||
608 | struct super_block *sb = mnt->mnt_sb; | 601 | struct super_block *sb = mnt->mnt_sb; |
609 | /* | 602 | /* |
610 | * We don't have to hold all of the locks at the | ||
611 | * same time here because we know that we're the | ||
612 | * last reference to mnt and that no new writers | ||
613 | * can come in. | ||
614 | */ | ||
615 | for_each_possible_cpu(cpu) { | ||
616 | struct mnt_writer *cpu_writer = &per_cpu(mnt_writers, cpu); | ||
617 | spin_lock(&cpu_writer->lock); | ||
618 | if (cpu_writer->mnt != mnt) { | ||
619 | spin_unlock(&cpu_writer->lock); | ||
620 | continue; | ||
621 | } | ||
622 | atomic_add(cpu_writer->count, &mnt->__mnt_writers); | ||
623 | cpu_writer->count = 0; | ||
624 | /* | ||
625 | * Might as well do this so that no one | ||
626 | * ever sees the pointer and expects | ||
627 | * it to be valid. | ||
628 | */ | ||
629 | cpu_writer->mnt = NULL; | ||
630 | spin_unlock(&cpu_writer->lock); | ||
631 | } | ||
632 | /* | ||
633 | * This probably indicates that somebody messed | 603 | * This probably indicates that somebody messed |
634 | * up a mnt_want/drop_write() pair. If this | 604 | * up a mnt_want/drop_write() pair. If this |
635 | * happens, the filesystem was probably unable | 605 | * happens, the filesystem was probably unable |
636 | * to make r/w->r/o transitions. | 606 | * to make r/w->r/o transitions. |
637 | */ | 607 | */ |
638 | WARN_ON(atomic_read(&mnt->__mnt_writers)); | 608 | /* |
609 | * atomic_dec_and_lock() used to deal with ->mnt_count decrements | ||
610 | * provides barriers, so count_mnt_writers() below is safe. AV | ||
611 | */ | ||
612 | WARN_ON(count_mnt_writers(mnt)); | ||
639 | dput(mnt->mnt_root); | 613 | dput(mnt->mnt_root); |
640 | free_vfsmnt(mnt); | 614 | free_vfsmnt(mnt); |
641 | deactivate_super(sb); | 615 | deactivate_super(sb); |
@@ -1106,11 +1080,8 @@ static int do_umount(struct vfsmount *mnt, int flags) | |||
1106 | * we just try to remount it readonly. | 1080 | * we just try to remount it readonly. |
1107 | */ | 1081 | */ |
1108 | down_write(&sb->s_umount); | 1082 | down_write(&sb->s_umount); |
1109 | if (!(sb->s_flags & MS_RDONLY)) { | 1083 | if (!(sb->s_flags & MS_RDONLY)) |
1110 | lock_kernel(); | ||
1111 | retval = do_remount_sb(sb, MS_RDONLY, NULL, 0); | 1084 | retval = do_remount_sb(sb, MS_RDONLY, NULL, 0); |
1112 | unlock_kernel(); | ||
1113 | } | ||
1114 | up_write(&sb->s_umount); | 1085 | up_write(&sb->s_umount); |
1115 | return retval; | 1086 | return retval; |
1116 | } | 1087 | } |
@@ -1253,11 +1224,11 @@ Enomem: | |||
1253 | return NULL; | 1224 | return NULL; |
1254 | } | 1225 | } |
1255 | 1226 | ||
1256 | struct vfsmount *collect_mounts(struct vfsmount *mnt, struct dentry *dentry) | 1227 | struct vfsmount *collect_mounts(struct path *path) |
1257 | { | 1228 | { |
1258 | struct vfsmount *tree; | 1229 | struct vfsmount *tree; |
1259 | down_write(&namespace_sem); | 1230 | down_write(&namespace_sem); |
1260 | tree = copy_tree(mnt, dentry, CL_COPY_ALL | CL_PRIVATE); | 1231 | tree = copy_tree(path->mnt, path->dentry, CL_COPY_ALL | CL_PRIVATE); |
1261 | up_write(&namespace_sem); | 1232 | up_write(&namespace_sem); |
1262 | return tree; | 1233 | return tree; |
1263 | } | 1234 | } |
@@ -1430,7 +1401,7 @@ static int graft_tree(struct vfsmount *mnt, struct path *path) | |||
1430 | goto out_unlock; | 1401 | goto out_unlock; |
1431 | 1402 | ||
1432 | err = -ENOENT; | 1403 | err = -ENOENT; |
1433 | if (IS_ROOT(path->dentry) || !d_unhashed(path->dentry)) | 1404 | if (!d_unlinked(path->dentry)) |
1434 | err = attach_recursive_mnt(mnt, path, NULL); | 1405 | err = attach_recursive_mnt(mnt, path, NULL); |
1435 | out_unlock: | 1406 | out_unlock: |
1436 | mutex_unlock(&path->dentry->d_inode->i_mutex); | 1407 | mutex_unlock(&path->dentry->d_inode->i_mutex); |
@@ -1601,7 +1572,7 @@ static int do_move_mount(struct path *path, char *old_name) | |||
1601 | 1572 | ||
1602 | down_write(&namespace_sem); | 1573 | down_write(&namespace_sem); |
1603 | while (d_mountpoint(path->dentry) && | 1574 | while (d_mountpoint(path->dentry) && |
1604 | follow_down(&path->mnt, &path->dentry)) | 1575 | follow_down(path)) |
1605 | ; | 1576 | ; |
1606 | err = -EINVAL; | 1577 | err = -EINVAL; |
1607 | if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt)) | 1578 | if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt)) |
@@ -1612,7 +1583,7 @@ static int do_move_mount(struct path *path, char *old_name) | |||
1612 | if (IS_DEADDIR(path->dentry->d_inode)) | 1583 | if (IS_DEADDIR(path->dentry->d_inode)) |
1613 | goto out1; | 1584 | goto out1; |
1614 | 1585 | ||
1615 | if (!IS_ROOT(path->dentry) && d_unhashed(path->dentry)) | 1586 | if (d_unlinked(path->dentry)) |
1616 | goto out1; | 1587 | goto out1; |
1617 | 1588 | ||
1618 | err = -EINVAL; | 1589 | err = -EINVAL; |
@@ -1676,7 +1647,9 @@ static int do_new_mount(struct path *path, char *type, int flags, | |||
1676 | if (!capable(CAP_SYS_ADMIN)) | 1647 | if (!capable(CAP_SYS_ADMIN)) |
1677 | return -EPERM; | 1648 | return -EPERM; |
1678 | 1649 | ||
1650 | lock_kernel(); | ||
1679 | mnt = do_kern_mount(type, flags, name, data); | 1651 | mnt = do_kern_mount(type, flags, name, data); |
1652 | unlock_kernel(); | ||
1680 | if (IS_ERR(mnt)) | 1653 | if (IS_ERR(mnt)) |
1681 | return PTR_ERR(mnt); | 1654 | return PTR_ERR(mnt); |
1682 | 1655 | ||
@@ -1695,10 +1668,10 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path, | |||
1695 | down_write(&namespace_sem); | 1668 | down_write(&namespace_sem); |
1696 | /* Something was mounted here while we slept */ | 1669 | /* Something was mounted here while we slept */ |
1697 | while (d_mountpoint(path->dentry) && | 1670 | while (d_mountpoint(path->dentry) && |
1698 | follow_down(&path->mnt, &path->dentry)) | 1671 | follow_down(path)) |
1699 | ; | 1672 | ; |
1700 | err = -EINVAL; | 1673 | err = -EINVAL; |
1701 | if (!check_mnt(path->mnt)) | 1674 | if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt)) |
1702 | goto unlock; | 1675 | goto unlock; |
1703 | 1676 | ||
1704 | /* Refuse the same filesystem on the same mount point */ | 1677 | /* Refuse the same filesystem on the same mount point */ |
@@ -1984,6 +1957,21 @@ dput_out: | |||
1984 | return retval; | 1957 | return retval; |
1985 | } | 1958 | } |
1986 | 1959 | ||
1960 | static struct mnt_namespace *alloc_mnt_ns(void) | ||
1961 | { | ||
1962 | struct mnt_namespace *new_ns; | ||
1963 | |||
1964 | new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); | ||
1965 | if (!new_ns) | ||
1966 | return ERR_PTR(-ENOMEM); | ||
1967 | atomic_set(&new_ns->count, 1); | ||
1968 | new_ns->root = NULL; | ||
1969 | INIT_LIST_HEAD(&new_ns->list); | ||
1970 | init_waitqueue_head(&new_ns->poll); | ||
1971 | new_ns->event = 0; | ||
1972 | return new_ns; | ||
1973 | } | ||
1974 | |||
1987 | /* | 1975 | /* |
1988 | * Allocate a new namespace structure and populate it with contents | 1976 | * Allocate a new namespace structure and populate it with contents |
1989 | * copied from the namespace of the passed in task structure. | 1977 | * copied from the namespace of the passed in task structure. |
@@ -1995,14 +1983,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, | |||
1995 | struct vfsmount *rootmnt = NULL, *pwdmnt = NULL; | 1983 | struct vfsmount *rootmnt = NULL, *pwdmnt = NULL; |
1996 | struct vfsmount *p, *q; | 1984 | struct vfsmount *p, *q; |
1997 | 1985 | ||
1998 | new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); | 1986 | new_ns = alloc_mnt_ns(); |
1999 | if (!new_ns) | 1987 | if (IS_ERR(new_ns)) |
2000 | return ERR_PTR(-ENOMEM); | 1988 | return new_ns; |
2001 | |||
2002 | atomic_set(&new_ns->count, 1); | ||
2003 | INIT_LIST_HEAD(&new_ns->list); | ||
2004 | init_waitqueue_head(&new_ns->poll); | ||
2005 | new_ns->event = 0; | ||
2006 | 1989 | ||
2007 | down_write(&namespace_sem); | 1990 | down_write(&namespace_sem); |
2008 | /* First pass: copy the tree topology */ | 1991 | /* First pass: copy the tree topology */ |
@@ -2066,6 +2049,24 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, | |||
2066 | return new_ns; | 2049 | return new_ns; |
2067 | } | 2050 | } |
2068 | 2051 | ||
2052 | /** | ||
2053 | * create_mnt_ns - creates a private namespace and adds a root filesystem | ||
2054 | * @mnt: pointer to the new root filesystem mountpoint | ||
2055 | */ | ||
2056 | struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt) | ||
2057 | { | ||
2058 | struct mnt_namespace *new_ns; | ||
2059 | |||
2060 | new_ns = alloc_mnt_ns(); | ||
2061 | if (!IS_ERR(new_ns)) { | ||
2062 | mnt->mnt_ns = new_ns; | ||
2063 | new_ns->root = mnt; | ||
2064 | list_add(&new_ns->list, &new_ns->root->mnt_list); | ||
2065 | } | ||
2066 | return new_ns; | ||
2067 | } | ||
2068 | EXPORT_SYMBOL(create_mnt_ns); | ||
2069 | |||
2069 | SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, | 2070 | SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, |
2070 | char __user *, type, unsigned long, flags, void __user *, data) | 2071 | char __user *, type, unsigned long, flags, void __user *, data) |
2071 | { | 2072 | { |
@@ -2092,10 +2093,8 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, | |||
2092 | if (retval < 0) | 2093 | if (retval < 0) |
2093 | goto out3; | 2094 | goto out3; |
2094 | 2095 | ||
2095 | lock_kernel(); | ||
2096 | retval = do_mount((char *)dev_page, dir_page, (char *)type_page, | 2096 | retval = do_mount((char *)dev_page, dir_page, (char *)type_page, |
2097 | flags, (void *)data_page); | 2097 | flags, (void *)data_page); |
2098 | unlock_kernel(); | ||
2099 | free_page(data_page); | 2098 | free_page(data_page); |
2100 | 2099 | ||
2101 | out3: | 2100 | out3: |
@@ -2175,9 +2174,9 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, | |||
2175 | error = -ENOENT; | 2174 | error = -ENOENT; |
2176 | if (IS_DEADDIR(new.dentry->d_inode)) | 2175 | if (IS_DEADDIR(new.dentry->d_inode)) |
2177 | goto out2; | 2176 | goto out2; |
2178 | if (d_unhashed(new.dentry) && !IS_ROOT(new.dentry)) | 2177 | if (d_unlinked(new.dentry)) |
2179 | goto out2; | 2178 | goto out2; |
2180 | if (d_unhashed(old.dentry) && !IS_ROOT(old.dentry)) | 2179 | if (d_unlinked(old.dentry)) |
2181 | goto out2; | 2180 | goto out2; |
2182 | error = -EBUSY; | 2181 | error = -EBUSY; |
2183 | if (new.mnt == root.mnt || | 2182 | if (new.mnt == root.mnt || |
@@ -2243,16 +2242,9 @@ static void __init init_mount_tree(void) | |||
2243 | mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); | 2242 | mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); |
2244 | if (IS_ERR(mnt)) | 2243 | if (IS_ERR(mnt)) |
2245 | panic("Can't create rootfs"); | 2244 | panic("Can't create rootfs"); |
2246 | ns = kmalloc(sizeof(*ns), GFP_KERNEL); | 2245 | ns = create_mnt_ns(mnt); |
2247 | if (!ns) | 2246 | if (IS_ERR(ns)) |
2248 | panic("Can't allocate initial namespace"); | 2247 | panic("Can't allocate initial namespace"); |
2249 | atomic_set(&ns->count, 1); | ||
2250 | INIT_LIST_HEAD(&ns->list); | ||
2251 | init_waitqueue_head(&ns->poll); | ||
2252 | ns->event = 0; | ||
2253 | list_add(&mnt->mnt_list, &ns->list); | ||
2254 | ns->root = mnt; | ||
2255 | mnt->mnt_ns = ns; | ||
2256 | 2248 | ||
2257 | init_task.nsproxy->mnt_ns = ns; | 2249 | init_task.nsproxy->mnt_ns = ns; |
2258 | get_mnt_ns(ns); | 2250 | get_mnt_ns(ns); |
@@ -2295,10 +2287,14 @@ void __init mnt_init(void) | |||
2295 | init_mount_tree(); | 2287 | init_mount_tree(); |
2296 | } | 2288 | } |
2297 | 2289 | ||
2298 | void __put_mnt_ns(struct mnt_namespace *ns) | 2290 | void put_mnt_ns(struct mnt_namespace *ns) |
2299 | { | 2291 | { |
2300 | struct vfsmount *root = ns->root; | 2292 | struct vfsmount *root; |
2301 | LIST_HEAD(umount_list); | 2293 | LIST_HEAD(umount_list); |
2294 | |||
2295 | if (!atomic_dec_and_lock(&ns->count, &vfsmount_lock)) | ||
2296 | return; | ||
2297 | root = ns->root; | ||
2302 | ns->root = NULL; | 2298 | ns->root = NULL; |
2303 | spin_unlock(&vfsmount_lock); | 2299 | spin_unlock(&vfsmount_lock); |
2304 | down_write(&namespace_sem); | 2300 | down_write(&namespace_sem); |
@@ -2309,3 +2305,4 @@ void __put_mnt_ns(struct mnt_namespace *ns) | |||
2309 | release_mounts(&umount_list); | 2305 | release_mounts(&umount_list); |
2310 | kfree(ns); | 2306 | kfree(ns); |
2311 | } | 2307 | } |
2308 | EXPORT_SYMBOL(put_mnt_ns); | ||