aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAl Viro <viro@zeniv.linux.org.uk>2005-11-07 17:13:39 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2005-11-07 21:18:09 -0500
commit7b7b1ace2d9d06d76bce7481a045c22ed75e35dd (patch)
tree458f9f16b855ed0347013048c13d3a29031f00ee
parent254ce8dc882f8d69e5d49ed4807c94a61976fb15 (diff)
[PATCH] saner handling of auto_acct_off() and DQUOT_OFF() in umount
The way we currently deal with quota and process accounting that might keep vfsmount busy at umount time is inherently broken; we try to turn them off just in case (not quite correctly, at that) and a) pray umount doesn't fail (otherwise they'll stay turned off) b) pray nobody doesn anything funny just as we turn quota off Moreover, LSM provides hooks for doing the same sort of broken logics. The proper way to deal with that is to introduce the second kind of reference to vfsmount. Semantics: - when the last normal reference is dropped, all special ones are converted to normal ones and if there had been any, cleanup is done. - normal reference can be cloned into a special one - special reference can be converted to normal one; that's a no-op if we'd already passed the point of no return (i.e. mntput() had converted special references to normal and started cleanup). The way it works: e.g. starting process accounting converts the vfsmount reference pinned by the opened file into special one and turns it back to normal when it gets shut down; acct_auto_close() is done when no normal references are left. That way it does *not* obstruct umount(2) and it silently gets turned off when the last normal reference to vfsmount is gone. Which is exactly what we want... The same should be done by LSM module that holds some internal references to vfsmount and wants to shut them down on umount - it should make them special and security_sb_umount_close() will be called exactly when the last normal reference to vfsmount is gone. quota handling is even simpler - we don't use normal file IO anymore, so there's no need to hold vfsmounts at all. DQUOT_OFF() is done from deactivate_super(), where it really belongs. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--fs/dquot.c18
-rw-r--r--fs/namespace.c64
-rw-r--r--fs/super.c1
-rw-r--r--include/linux/acct.h3
-rw-r--r--include/linux/mount.h13
-rw-r--r--include/linux/quota.h1
-rw-r--r--kernel/acct.c92
7 files changed, 113 insertions, 79 deletions
diff --git a/fs/dquot.c b/fs/dquot.c
index afa06a893468..05b60283c9c2 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -1321,13 +1321,11 @@ int vfs_quota_off(struct super_block *sb, int type)
1321 int cnt; 1321 int cnt;
1322 struct quota_info *dqopt = sb_dqopt(sb); 1322 struct quota_info *dqopt = sb_dqopt(sb);
1323 struct inode *toputinode[MAXQUOTAS]; 1323 struct inode *toputinode[MAXQUOTAS];
1324 struct vfsmount *toputmnt[MAXQUOTAS];
1325 1324
1326 /* We need to serialize quota_off() for device */ 1325 /* We need to serialize quota_off() for device */
1327 down(&dqopt->dqonoff_sem); 1326 down(&dqopt->dqonoff_sem);
1328 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1327 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1329 toputinode[cnt] = NULL; 1328 toputinode[cnt] = NULL;
1330 toputmnt[cnt] = NULL;
1331 if (type != -1 && cnt != type) 1329 if (type != -1 && cnt != type)
1332 continue; 1330 continue;
1333 if (!sb_has_quota_enabled(sb, cnt)) 1331 if (!sb_has_quota_enabled(sb, cnt))
@@ -1348,9 +1346,7 @@ int vfs_quota_off(struct super_block *sb, int type)
1348 put_quota_format(dqopt->info[cnt].dqi_format); 1346 put_quota_format(dqopt->info[cnt].dqi_format);
1349 1347
1350 toputinode[cnt] = dqopt->files[cnt]; 1348 toputinode[cnt] = dqopt->files[cnt];
1351 toputmnt[cnt] = dqopt->mnt[cnt];
1352 dqopt->files[cnt] = NULL; 1349 dqopt->files[cnt] = NULL;
1353 dqopt->mnt[cnt] = NULL;
1354 dqopt->info[cnt].dqi_flags = 0; 1350 dqopt->info[cnt].dqi_flags = 0;
1355 dqopt->info[cnt].dqi_igrace = 0; 1351 dqopt->info[cnt].dqi_igrace = 0;
1356 dqopt->info[cnt].dqi_bgrace = 0; 1352 dqopt->info[cnt].dqi_bgrace = 0;
@@ -1358,10 +1354,7 @@ int vfs_quota_off(struct super_block *sb, int type)
1358 } 1354 }
1359 up(&dqopt->dqonoff_sem); 1355 up(&dqopt->dqonoff_sem);
1360 /* Sync the superblock so that buffers with quota data are written to 1356 /* Sync the superblock so that buffers with quota data are written to
1361 * disk (and so userspace sees correct data afterwards). 1357 * disk (and so userspace sees correct data afterwards). */
1362 * The reference to vfsmnt we are still holding protects us from
1363 * umount (we don't have it only when quotas are turned on/off for
1364 * journal replay but in that case we are guarded by the fs anyway). */
1365 if (sb->s_op->sync_fs) 1358 if (sb->s_op->sync_fs)
1366 sb->s_op->sync_fs(sb, 1); 1359 sb->s_op->sync_fs(sb, 1);
1367 sync_blockdev(sb->s_bdev); 1360 sync_blockdev(sb->s_bdev);
@@ -1385,10 +1378,6 @@ int vfs_quota_off(struct super_block *sb, int type)
1385 iput(toputinode[cnt]); 1378 iput(toputinode[cnt]);
1386 } 1379 }
1387 up(&dqopt->dqonoff_sem); 1380 up(&dqopt->dqonoff_sem);
1388 /* We don't hold the reference when we turned on quotas
1389 * just for the journal replay... */
1390 if (toputmnt[cnt])
1391 mntput(toputmnt[cnt]);
1392 } 1381 }
1393 if (sb->s_bdev) 1382 if (sb->s_bdev)
1394 invalidate_bdev(sb->s_bdev, 0); 1383 invalidate_bdev(sb->s_bdev, 0);
@@ -1503,11 +1492,8 @@ int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path)
1503 /* Quota file not on the same filesystem? */ 1492 /* Quota file not on the same filesystem? */
1504 if (nd.mnt->mnt_sb != sb) 1493 if (nd.mnt->mnt_sb != sb)
1505 error = -EXDEV; 1494 error = -EXDEV;
1506 else { 1495 else
1507 error = vfs_quota_on_inode(nd.dentry->d_inode, type, format_id); 1496 error = vfs_quota_on_inode(nd.dentry->d_inode, type, format_id);
1508 if (!error)
1509 sb_dqopt(sb)->mnt[type] = mntget(nd.mnt);
1510 }
1511out_path: 1497out_path:
1512 path_release(&nd); 1498 path_release(&nd);
1513 return error; 1499 return error;
diff --git a/fs/namespace.c b/fs/namespace.c
index 2fa9fdf7d6f5..1d83302f30c3 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -172,7 +172,7 @@ clone_mnt(struct vfsmount *old, struct dentry *root)
172 return mnt; 172 return mnt;
173} 173}
174 174
175void __mntput(struct vfsmount *mnt) 175static inline void __mntput(struct vfsmount *mnt)
176{ 176{
177 struct super_block *sb = mnt->mnt_sb; 177 struct super_block *sb = mnt->mnt_sb;
178 dput(mnt->mnt_root); 178 dput(mnt->mnt_root);
@@ -180,7 +180,46 @@ void __mntput(struct vfsmount *mnt)
180 deactivate_super(sb); 180 deactivate_super(sb);
181} 181}
182 182
183EXPORT_SYMBOL(__mntput); 183void mntput_no_expire(struct vfsmount *mnt)
184{
185repeat:
186 if (atomic_dec_and_lock(&mnt->mnt_count, &vfsmount_lock)) {
187 if (likely(!mnt->mnt_pinned)) {
188 spin_unlock(&vfsmount_lock);
189 __mntput(mnt);
190 return;
191 }
192 atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count);
193 mnt->mnt_pinned = 0;
194 spin_unlock(&vfsmount_lock);
195 acct_auto_close_mnt(mnt);
196 security_sb_umount_close(mnt);
197 goto repeat;
198 }
199}
200
201EXPORT_SYMBOL(mntput_no_expire);
202
203void mnt_pin(struct vfsmount *mnt)
204{
205 spin_lock(&vfsmount_lock);
206 mnt->mnt_pinned++;
207 spin_unlock(&vfsmount_lock);
208}
209
210EXPORT_SYMBOL(mnt_pin);
211
212void mnt_unpin(struct vfsmount *mnt)
213{
214 spin_lock(&vfsmount_lock);
215 if (mnt->mnt_pinned) {
216 atomic_inc(&mnt->mnt_count);
217 mnt->mnt_pinned--;
218 }
219 spin_unlock(&vfsmount_lock);
220}
221
222EXPORT_SYMBOL(mnt_unpin);
184 223
185/* iterator */ 224/* iterator */
186static void *m_start(struct seq_file *m, loff_t *pos) 225static void *m_start(struct seq_file *m, loff_t *pos)
@@ -435,16 +474,6 @@ static int do_umount(struct vfsmount *mnt, int flags)
435 down_write(&current->namespace->sem); 474 down_write(&current->namespace->sem);
436 spin_lock(&vfsmount_lock); 475 spin_lock(&vfsmount_lock);
437 476
438 if (atomic_read(&sb->s_active) == 1) {
439 /* last instance - try to be smart */
440 spin_unlock(&vfsmount_lock);
441 lock_kernel();
442 DQUOT_OFF(sb);
443 acct_auto_close(sb);
444 unlock_kernel();
445 security_sb_umount_close(mnt);
446 spin_lock(&vfsmount_lock);
447 }
448 retval = -EBUSY; 477 retval = -EBUSY;
449 if (atomic_read(&mnt->mnt_count) == 2 || flags & MNT_DETACH) { 478 if (atomic_read(&mnt->mnt_count) == 2 || flags & MNT_DETACH) {
450 if (!list_empty(&mnt->mnt_list)) 479 if (!list_empty(&mnt->mnt_list))
@@ -850,17 +879,6 @@ static void expire_mount(struct vfsmount *mnt, struct list_head *mounts)
850 detach_mnt(mnt, &old_nd); 879 detach_mnt(mnt, &old_nd);
851 spin_unlock(&vfsmount_lock); 880 spin_unlock(&vfsmount_lock);
852 path_release(&old_nd); 881 path_release(&old_nd);
853
854 /*
855 * Now lay it to rest if this was the last ref on the superblock
856 */
857 if (atomic_read(&mnt->mnt_sb->s_active) == 1) {
858 /* last instance - try to be smart */
859 lock_kernel();
860 DQUOT_OFF(mnt->mnt_sb);
861 acct_auto_close(mnt->mnt_sb);
862 unlock_kernel();
863 }
864 mntput(mnt); 882 mntput(mnt);
865 } else { 883 } else {
866 /* 884 /*
diff --git a/fs/super.c b/fs/super.c
index eed6c3132905..6689dded3c84 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -171,6 +171,7 @@ void deactivate_super(struct super_block *s)
171 if (atomic_dec_and_lock(&s->s_active, &sb_lock)) { 171 if (atomic_dec_and_lock(&s->s_active, &sb_lock)) {
172 s->s_count -= S_BIAS-1; 172 s->s_count -= S_BIAS-1;
173 spin_unlock(&sb_lock); 173 spin_unlock(&sb_lock);
174 DQUOT_OFF(s);
174 down_write(&s->s_umount); 175 down_write(&s->s_umount);
175 fs->kill_sb(s); 176 fs->kill_sb(s);
176 put_filesystem(fs); 177 put_filesystem(fs);
diff --git a/include/linux/acct.h b/include/linux/acct.h
index 19f70462b3be..93c5b3cdf951 100644
--- a/include/linux/acct.h
+++ b/include/linux/acct.h
@@ -117,12 +117,15 @@ struct acct_v3
117#include <linux/config.h> 117#include <linux/config.h>
118 118
119#ifdef CONFIG_BSD_PROCESS_ACCT 119#ifdef CONFIG_BSD_PROCESS_ACCT
120struct vfsmount;
120struct super_block; 121struct super_block;
122extern void acct_auto_close_mnt(struct vfsmount *m);
121extern void acct_auto_close(struct super_block *sb); 123extern void acct_auto_close(struct super_block *sb);
122extern void acct_process(long exitcode); 124extern void acct_process(long exitcode);
123extern void acct_update_integrals(struct task_struct *tsk); 125extern void acct_update_integrals(struct task_struct *tsk);
124extern void acct_clear_integrals(struct task_struct *tsk); 126extern void acct_clear_integrals(struct task_struct *tsk);
125#else 127#else
128#define acct_auto_close_mnt(x) do { } while (0)
126#define acct_auto_close(x) do { } while (0) 129#define acct_auto_close(x) do { } while (0)
127#define acct_process(x) do { } while (0) 130#define acct_process(x) do { } while (0)
128#define acct_update_integrals(x) do { } while (0) 131#define acct_update_integrals(x) do { } while (0)
diff --git a/include/linux/mount.h b/include/linux/mount.h
index f8f39937e301..ffb0b5089880 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -37,6 +37,7 @@ struct vfsmount
37 struct list_head mnt_list; 37 struct list_head mnt_list;
38 struct list_head mnt_expire; /* link in fs-specific expiry list */ 38 struct list_head mnt_expire; /* link in fs-specific expiry list */
39 struct namespace *mnt_namespace; /* containing namespace */ 39 struct namespace *mnt_namespace; /* containing namespace */
40 int mnt_pinned;
40}; 41};
41 42
42static inline struct vfsmount *mntget(struct vfsmount *mnt) 43static inline struct vfsmount *mntget(struct vfsmount *mnt)
@@ -46,15 +47,9 @@ static inline struct vfsmount *mntget(struct vfsmount *mnt)
46 return mnt; 47 return mnt;
47} 48}
48 49
49extern void __mntput(struct vfsmount *mnt); 50extern void mntput_no_expire(struct vfsmount *mnt);
50 51extern void mnt_pin(struct vfsmount *mnt);
51static inline void mntput_no_expire(struct vfsmount *mnt) 52extern void mnt_unpin(struct vfsmount *mnt);
52{
53 if (mnt) {
54 if (atomic_dec_and_test(&mnt->mnt_count))
55 __mntput(mnt);
56 }
57}
58 53
59static inline void mntput(struct vfsmount *mnt) 54static inline void mntput(struct vfsmount *mnt)
60{ 55{
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 700ead45084f..f33aeb22c26a 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -289,7 +289,6 @@ struct quota_info {
289 struct semaphore dqonoff_sem; /* Serialize quotaon & quotaoff */ 289 struct semaphore dqonoff_sem; /* Serialize quotaon & quotaoff */
290 struct rw_semaphore dqptr_sem; /* serialize ops using quota_info struct, pointers from inode to dquots */ 290 struct rw_semaphore dqptr_sem; /* serialize ops using quota_info struct, pointers from inode to dquots */
291 struct inode *files[MAXQUOTAS]; /* inodes of quotafiles */ 291 struct inode *files[MAXQUOTAS]; /* inodes of quotafiles */
292 struct vfsmount *mnt[MAXQUOTAS]; /* mountpoint entries of filesystems with quota files */
293 struct mem_dqinfo info[MAXQUOTAS]; /* Information for each quota type */ 292 struct mem_dqinfo info[MAXQUOTAS]; /* Information for each quota type */
294 struct quota_format_ops *ops[MAXQUOTAS]; /* Operations for each type */ 293 struct quota_format_ops *ops[MAXQUOTAS]; /* Operations for each type */
295}; 294};
diff --git a/kernel/acct.c b/kernel/acct.c
index 2e3f4a47e7d0..6312d6bd43e3 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -54,6 +54,7 @@
54#include <linux/jiffies.h> 54#include <linux/jiffies.h>
55#include <linux/times.h> 55#include <linux/times.h>
56#include <linux/syscalls.h> 56#include <linux/syscalls.h>
57#include <linux/mount.h>
57#include <asm/uaccess.h> 58#include <asm/uaccess.h>
58#include <asm/div64.h> 59#include <asm/div64.h>
59#include <linux/blkdev.h> /* sector_div */ 60#include <linux/blkdev.h> /* sector_div */
@@ -192,6 +193,7 @@ static void acct_file_reopen(struct file *file)
192 add_timer(&acct_globals.timer); 193 add_timer(&acct_globals.timer);
193 } 194 }
194 if (old_acct) { 195 if (old_acct) {
196 mnt_unpin(old_acct->f_vfsmnt);
195 spin_unlock(&acct_globals.lock); 197 spin_unlock(&acct_globals.lock);
196 do_acct_process(0, old_acct); 198 do_acct_process(0, old_acct);
197 filp_close(old_acct, NULL); 199 filp_close(old_acct, NULL);
@@ -199,6 +201,42 @@ static void acct_file_reopen(struct file *file)
199 } 201 }
200} 202}
201 203
204static int acct_on(char *name)
205{
206 struct file *file;
207 int error;
208
209 /* Difference from BSD - they don't do O_APPEND */
210 file = filp_open(name, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
211 if (IS_ERR(file))
212 return PTR_ERR(file);
213
214 if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
215 filp_close(file, NULL);
216 return -EACCES;
217 }
218
219 if (!file->f_op->write) {
220 filp_close(file, NULL);
221 return -EIO;
222 }
223
224 error = security_acct(file);
225 if (error) {
226 filp_close(file, NULL);
227 return error;
228 }
229
230 spin_lock(&acct_globals.lock);
231 mnt_pin(file->f_vfsmnt);
232 acct_file_reopen(file);
233 spin_unlock(&acct_globals.lock);
234
235 mntput(file->f_vfsmnt); /* it's pinned, now give up active reference */
236
237 return 0;
238}
239
202/** 240/**
203 * sys_acct - enable/disable process accounting 241 * sys_acct - enable/disable process accounting
204 * @name: file name for accounting records or NULL to shutdown accounting 242 * @name: file name for accounting records or NULL to shutdown accounting
@@ -212,47 +250,41 @@ static void acct_file_reopen(struct file *file)
212 */ 250 */
213asmlinkage long sys_acct(const char __user *name) 251asmlinkage long sys_acct(const char __user *name)
214{ 252{
215 struct file *file = NULL;
216 char *tmp;
217 int error; 253 int error;
218 254
219 if (!capable(CAP_SYS_PACCT)) 255 if (!capable(CAP_SYS_PACCT))
220 return -EPERM; 256 return -EPERM;
221 257
222 if (name) { 258 if (name) {
223 tmp = getname(name); 259 char *tmp = getname(name);
224 if (IS_ERR(tmp)) { 260 if (IS_ERR(tmp))
225 return (PTR_ERR(tmp)); 261 return (PTR_ERR(tmp));
226 } 262 error = acct_on(tmp);
227 /* Difference from BSD - they don't do O_APPEND */
228 file = filp_open(tmp, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
229 putname(tmp); 263 putname(tmp);
230 if (IS_ERR(file)) { 264 } else {
231 return (PTR_ERR(file)); 265 error = security_acct(NULL);
232 } 266 if (!error) {
233 if (!S_ISREG(file->f_dentry->d_inode->i_mode)) { 267 spin_lock(&acct_globals.lock);
234 filp_close(file, NULL); 268 acct_file_reopen(NULL);
235 return (-EACCES); 269 spin_unlock(&acct_globals.lock);
236 }
237
238 if (!file->f_op->write) {
239 filp_close(file, NULL);
240 return (-EIO);
241 } 270 }
242 } 271 }
272 return error;
273}
243 274
244 error = security_acct(file); 275/**
245 if (error) { 276 * acct_auto_close - turn off a filesystem's accounting if it is on
246 if (file) 277 * @m: vfsmount being shut down
247 filp_close(file, NULL); 278 *
248 return error; 279 * If the accounting is turned on for a file in the subtree pointed to
249 } 280 * to by m, turn accounting off. Done when m is about to die.
250 281 */
282void acct_auto_close_mnt(struct vfsmount *m)
283{
251 spin_lock(&acct_globals.lock); 284 spin_lock(&acct_globals.lock);
252 acct_file_reopen(file); 285 if (acct_globals.file && acct_globals.file->f_vfsmnt == m)
286 acct_file_reopen(NULL);
253 spin_unlock(&acct_globals.lock); 287 spin_unlock(&acct_globals.lock);
254
255 return (0);
256} 288}
257 289
258/** 290/**
@@ -266,8 +298,8 @@ void acct_auto_close(struct super_block *sb)
266{ 298{
267 spin_lock(&acct_globals.lock); 299 spin_lock(&acct_globals.lock);
268 if (acct_globals.file && 300 if (acct_globals.file &&
269 acct_globals.file->f_dentry->d_inode->i_sb == sb) { 301 acct_globals.file->f_vfsmnt->mnt_sb == sb) {
270 acct_file_reopen((struct file *)NULL); 302 acct_file_reopen(NULL);
271 } 303 }
272 spin_unlock(&acct_globals.lock); 304 spin_unlock(&acct_globals.lock);
273} 305}