diff options
author | Al Viro <viro@zeniv.linux.org.uk> | 2005-11-07 17:13:39 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-11-07 21:18:09 -0500 |
commit | 7b7b1ace2d9d06d76bce7481a045c22ed75e35dd (patch) | |
tree | 458f9f16b855ed0347013048c13d3a29031f00ee /fs | |
parent | 254ce8dc882f8d69e5d49ed4807c94a61976fb15 (diff) |
[PATCH] saner handling of auto_acct_off() and DQUOT_OFF() in umount
The way we currently deal with quota and process accounting that might
keep vfsmount busy at umount time is inherently broken; we try to turn
them off just in case (not quite correctly, at that) and
a) pray umount doesn't fail (otherwise they'll stay turned off)
b) pray nobody doesn anything funny just as we turn quota off
Moreover, LSM provides hooks for doing the same sort of broken logics.
The proper way to deal with that is to introduce the second kind of
reference to vfsmount. Semantics:
- when the last normal reference is dropped, all special ones are
converted to normal ones and if there had been any, cleanup is done.
- normal reference can be cloned into a special one
- special reference can be converted to normal one; that's a no-op if
we'd already passed the point of no return (i.e. mntput() had
converted special references to normal and started cleanup).
The way it works: e.g. starting process accounting converts the vfsmount
reference pinned by the opened file into special one and turns it back
to normal when it gets shut down; acct_auto_close() is done when no
normal references are left. That way it does *not* obstruct umount(2)
and it silently gets turned off when the last normal reference to
vfsmount is gone. Which is exactly what we want...
The same should be done by LSM module that holds some internal
references to vfsmount and wants to shut them down on umount - it should
make them special and security_sb_umount_close() will be called exactly
when the last normal reference to vfsmount is gone.
quota handling is even simpler - we don't use normal file IO anymore, so
there's no need to hold vfsmounts at all. DQUOT_OFF() is done from
deactivate_super(), where it really belongs.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/dquot.c | 18 | ||||
-rw-r--r-- | fs/namespace.c | 64 | ||||
-rw-r--r-- | fs/super.c | 1 |
3 files changed, 44 insertions, 39 deletions
diff --git a/fs/dquot.c b/fs/dquot.c index afa06a893468..05b60283c9c2 100644 --- a/fs/dquot.c +++ b/fs/dquot.c | |||
@@ -1321,13 +1321,11 @@ int vfs_quota_off(struct super_block *sb, int type) | |||
1321 | int cnt; | 1321 | int cnt; |
1322 | struct quota_info *dqopt = sb_dqopt(sb); | 1322 | struct quota_info *dqopt = sb_dqopt(sb); |
1323 | struct inode *toputinode[MAXQUOTAS]; | 1323 | struct inode *toputinode[MAXQUOTAS]; |
1324 | struct vfsmount *toputmnt[MAXQUOTAS]; | ||
1325 | 1324 | ||
1326 | /* We need to serialize quota_off() for device */ | 1325 | /* We need to serialize quota_off() for device */ |
1327 | down(&dqopt->dqonoff_sem); | 1326 | down(&dqopt->dqonoff_sem); |
1328 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) { | 1327 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) { |
1329 | toputinode[cnt] = NULL; | 1328 | toputinode[cnt] = NULL; |
1330 | toputmnt[cnt] = NULL; | ||
1331 | if (type != -1 && cnt != type) | 1329 | if (type != -1 && cnt != type) |
1332 | continue; | 1330 | continue; |
1333 | if (!sb_has_quota_enabled(sb, cnt)) | 1331 | if (!sb_has_quota_enabled(sb, cnt)) |
@@ -1348,9 +1346,7 @@ int vfs_quota_off(struct super_block *sb, int type) | |||
1348 | put_quota_format(dqopt->info[cnt].dqi_format); | 1346 | put_quota_format(dqopt->info[cnt].dqi_format); |
1349 | 1347 | ||
1350 | toputinode[cnt] = dqopt->files[cnt]; | 1348 | toputinode[cnt] = dqopt->files[cnt]; |
1351 | toputmnt[cnt] = dqopt->mnt[cnt]; | ||
1352 | dqopt->files[cnt] = NULL; | 1349 | dqopt->files[cnt] = NULL; |
1353 | dqopt->mnt[cnt] = NULL; | ||
1354 | dqopt->info[cnt].dqi_flags = 0; | 1350 | dqopt->info[cnt].dqi_flags = 0; |
1355 | dqopt->info[cnt].dqi_igrace = 0; | 1351 | dqopt->info[cnt].dqi_igrace = 0; |
1356 | dqopt->info[cnt].dqi_bgrace = 0; | 1352 | dqopt->info[cnt].dqi_bgrace = 0; |
@@ -1358,10 +1354,7 @@ int vfs_quota_off(struct super_block *sb, int type) | |||
1358 | } | 1354 | } |
1359 | up(&dqopt->dqonoff_sem); | 1355 | up(&dqopt->dqonoff_sem); |
1360 | /* Sync the superblock so that buffers with quota data are written to | 1356 | /* Sync the superblock so that buffers with quota data are written to |
1361 | * disk (and so userspace sees correct data afterwards). | 1357 | * disk (and so userspace sees correct data afterwards). */ |
1362 | * The reference to vfsmnt we are still holding protects us from | ||
1363 | * umount (we don't have it only when quotas are turned on/off for | ||
1364 | * journal replay but in that case we are guarded by the fs anyway). */ | ||
1365 | if (sb->s_op->sync_fs) | 1358 | if (sb->s_op->sync_fs) |
1366 | sb->s_op->sync_fs(sb, 1); | 1359 | sb->s_op->sync_fs(sb, 1); |
1367 | sync_blockdev(sb->s_bdev); | 1360 | sync_blockdev(sb->s_bdev); |
@@ -1385,10 +1378,6 @@ int vfs_quota_off(struct super_block *sb, int type) | |||
1385 | iput(toputinode[cnt]); | 1378 | iput(toputinode[cnt]); |
1386 | } | 1379 | } |
1387 | up(&dqopt->dqonoff_sem); | 1380 | up(&dqopt->dqonoff_sem); |
1388 | /* We don't hold the reference when we turned on quotas | ||
1389 | * just for the journal replay... */ | ||
1390 | if (toputmnt[cnt]) | ||
1391 | mntput(toputmnt[cnt]); | ||
1392 | } | 1381 | } |
1393 | if (sb->s_bdev) | 1382 | if (sb->s_bdev) |
1394 | invalidate_bdev(sb->s_bdev, 0); | 1383 | invalidate_bdev(sb->s_bdev, 0); |
@@ -1503,11 +1492,8 @@ int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path) | |||
1503 | /* Quota file not on the same filesystem? */ | 1492 | /* Quota file not on the same filesystem? */ |
1504 | if (nd.mnt->mnt_sb != sb) | 1493 | if (nd.mnt->mnt_sb != sb) |
1505 | error = -EXDEV; | 1494 | error = -EXDEV; |
1506 | else { | 1495 | else |
1507 | error = vfs_quota_on_inode(nd.dentry->d_inode, type, format_id); | 1496 | error = vfs_quota_on_inode(nd.dentry->d_inode, type, format_id); |
1508 | if (!error) | ||
1509 | sb_dqopt(sb)->mnt[type] = mntget(nd.mnt); | ||
1510 | } | ||
1511 | out_path: | 1497 | out_path: |
1512 | path_release(&nd); | 1498 | path_release(&nd); |
1513 | return error; | 1499 | return error; |
diff --git a/fs/namespace.c b/fs/namespace.c index 2fa9fdf7d6f5..1d83302f30c3 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -172,7 +172,7 @@ clone_mnt(struct vfsmount *old, struct dentry *root) | |||
172 | return mnt; | 172 | return mnt; |
173 | } | 173 | } |
174 | 174 | ||
175 | void __mntput(struct vfsmount *mnt) | 175 | static inline void __mntput(struct vfsmount *mnt) |
176 | { | 176 | { |
177 | struct super_block *sb = mnt->mnt_sb; | 177 | struct super_block *sb = mnt->mnt_sb; |
178 | dput(mnt->mnt_root); | 178 | dput(mnt->mnt_root); |
@@ -180,7 +180,46 @@ void __mntput(struct vfsmount *mnt) | |||
180 | deactivate_super(sb); | 180 | deactivate_super(sb); |
181 | } | 181 | } |
182 | 182 | ||
183 | EXPORT_SYMBOL(__mntput); | 183 | void mntput_no_expire(struct vfsmount *mnt) |
184 | { | ||
185 | repeat: | ||
186 | if (atomic_dec_and_lock(&mnt->mnt_count, &vfsmount_lock)) { | ||
187 | if (likely(!mnt->mnt_pinned)) { | ||
188 | spin_unlock(&vfsmount_lock); | ||
189 | __mntput(mnt); | ||
190 | return; | ||
191 | } | ||
192 | atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count); | ||
193 | mnt->mnt_pinned = 0; | ||
194 | spin_unlock(&vfsmount_lock); | ||
195 | acct_auto_close_mnt(mnt); | ||
196 | security_sb_umount_close(mnt); | ||
197 | goto repeat; | ||
198 | } | ||
199 | } | ||
200 | |||
201 | EXPORT_SYMBOL(mntput_no_expire); | ||
202 | |||
203 | void mnt_pin(struct vfsmount *mnt) | ||
204 | { | ||
205 | spin_lock(&vfsmount_lock); | ||
206 | mnt->mnt_pinned++; | ||
207 | spin_unlock(&vfsmount_lock); | ||
208 | } | ||
209 | |||
210 | EXPORT_SYMBOL(mnt_pin); | ||
211 | |||
212 | void mnt_unpin(struct vfsmount *mnt) | ||
213 | { | ||
214 | spin_lock(&vfsmount_lock); | ||
215 | if (mnt->mnt_pinned) { | ||
216 | atomic_inc(&mnt->mnt_count); | ||
217 | mnt->mnt_pinned--; | ||
218 | } | ||
219 | spin_unlock(&vfsmount_lock); | ||
220 | } | ||
221 | |||
222 | EXPORT_SYMBOL(mnt_unpin); | ||
184 | 223 | ||
185 | /* iterator */ | 224 | /* iterator */ |
186 | static void *m_start(struct seq_file *m, loff_t *pos) | 225 | static void *m_start(struct seq_file *m, loff_t *pos) |
@@ -435,16 +474,6 @@ static int do_umount(struct vfsmount *mnt, int flags) | |||
435 | down_write(¤t->namespace->sem); | 474 | down_write(¤t->namespace->sem); |
436 | spin_lock(&vfsmount_lock); | 475 | spin_lock(&vfsmount_lock); |
437 | 476 | ||
438 | if (atomic_read(&sb->s_active) == 1) { | ||
439 | /* last instance - try to be smart */ | ||
440 | spin_unlock(&vfsmount_lock); | ||
441 | lock_kernel(); | ||
442 | DQUOT_OFF(sb); | ||
443 | acct_auto_close(sb); | ||
444 | unlock_kernel(); | ||
445 | security_sb_umount_close(mnt); | ||
446 | spin_lock(&vfsmount_lock); | ||
447 | } | ||
448 | retval = -EBUSY; | 477 | retval = -EBUSY; |
449 | if (atomic_read(&mnt->mnt_count) == 2 || flags & MNT_DETACH) { | 478 | if (atomic_read(&mnt->mnt_count) == 2 || flags & MNT_DETACH) { |
450 | if (!list_empty(&mnt->mnt_list)) | 479 | if (!list_empty(&mnt->mnt_list)) |
@@ -850,17 +879,6 @@ static void expire_mount(struct vfsmount *mnt, struct list_head *mounts) | |||
850 | detach_mnt(mnt, &old_nd); | 879 | detach_mnt(mnt, &old_nd); |
851 | spin_unlock(&vfsmount_lock); | 880 | spin_unlock(&vfsmount_lock); |
852 | path_release(&old_nd); | 881 | path_release(&old_nd); |
853 | |||
854 | /* | ||
855 | * Now lay it to rest if this was the last ref on the superblock | ||
856 | */ | ||
857 | if (atomic_read(&mnt->mnt_sb->s_active) == 1) { | ||
858 | /* last instance - try to be smart */ | ||
859 | lock_kernel(); | ||
860 | DQUOT_OFF(mnt->mnt_sb); | ||
861 | acct_auto_close(mnt->mnt_sb); | ||
862 | unlock_kernel(); | ||
863 | } | ||
864 | mntput(mnt); | 882 | mntput(mnt); |
865 | } else { | 883 | } else { |
866 | /* | 884 | /* |
diff --git a/fs/super.c b/fs/super.c index eed6c3132905..6689dded3c84 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -171,6 +171,7 @@ void deactivate_super(struct super_block *s) | |||
171 | if (atomic_dec_and_lock(&s->s_active, &sb_lock)) { | 171 | if (atomic_dec_and_lock(&s->s_active, &sb_lock)) { |
172 | s->s_count -= S_BIAS-1; | 172 | s->s_count -= S_BIAS-1; |
173 | spin_unlock(&sb_lock); | 173 | spin_unlock(&sb_lock); |
174 | DQUOT_OFF(s); | ||
174 | down_write(&s->s_umount); | 175 | down_write(&s->s_umount); |
175 | fs->kill_sb(s); | 176 | fs->kill_sb(s); |
176 | put_filesystem(fs); | 177 | put_filesystem(fs); |