aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-08-11 14:44:11 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-08-11 14:44:11 -0400
commitf6f993328b2abcab86a3c99d7bd9f2066ab03d36 (patch)
treeea6f3902a0fa546493731b3b52a31d98cc747a90 /kernel
parentc7a19c795b4b0a3232c157ed29eea85077e95da6 (diff)
parent12a5b5294cb1896e9a3c9fca8ff5a7e3def4e8c6 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull vfs updates from Al Viro: "Stuff in here: - acct.c fixes and general rework of mnt_pin mechanism. That allows to go for delayed-mntput stuff, which will permit mntput() on deep stack without worrying about stack overflows - fs shutdown will happen on shallow stack. IOW, we can do Eric's umount-on-rmdir series without introducing tons of stack overflows on new mntput() call chains it introduces. - Bruce's d_splice_alias() patches - more Miklos' rename() stuff. - a couple of regression fixes (stable fodder, in the end of branch) and a fix for API idiocy in iov_iter.c. There definitely will be another pile, maybe even two. I'd like to get Eric's series in this time, but even if we miss it, it'll go right in the beginning of for-next in the next cycle - the tricky part of prereqs is in this pile" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (40 commits) fix copy_tree() regression __generic_file_write_iter(): fix handling of sync error after DIO switch iov_iter_get_pages() to passing maximal number of pages fs: mark __d_obtain_alias static dcache: d_splice_alias should detect loops exportfs: update Exporting documentation dcache: d_find_alias needn't recheck IS_ROOT && DCACHE_DISCONNECTED dcache: remove unused d_find_alias parameter dcache: d_obtain_alias callers don't all want DISCONNECTED dcache: d_splice_alias should ignore DCACHE_DISCONNECTED dcache: d_splice_alias mustn't create directory aliases dcache: close d_move race in d_splice_alias dcache: move d_splice_alias namei: trivial fix to vfs_rename_dir comment VFS: allow ->d_manage() to declare -EISDIR in rcu_walk mode. cifs: support RENAME_NOREPLACE hostfs: support rename flags shmem: support RENAME_EXCHANGE shmem: support RENAME_NOREPLACE btrfs: add RENAME_NOREPLACE ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/acct.c456
1 files changed, 201 insertions, 255 deletions
diff --git a/kernel/acct.c b/kernel/acct.c
index 51793520566f..b4c667d22e79 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -59,6 +59,7 @@
59#include <asm/div64.h> 59#include <asm/div64.h>
60#include <linux/blkdev.h> /* sector_div */ 60#include <linux/blkdev.h> /* sector_div */
61#include <linux/pid_namespace.h> 61#include <linux/pid_namespace.h>
62#include <linux/fs_pin.h>
62 63
63/* 64/*
64 * These constants control the amount of freespace that suspend and 65 * These constants control the amount of freespace that suspend and
@@ -75,172 +76,190 @@ int acct_parm[3] = {4, 2, 30};
75/* 76/*
76 * External references and all of the globals. 77 * External references and all of the globals.
77 */ 78 */
78static void do_acct_process(struct bsd_acct_struct *acct, 79static void do_acct_process(struct bsd_acct_struct *acct);
79 struct pid_namespace *ns, struct file *);
80 80
81/*
82 * This structure is used so that all the data protected by lock
83 * can be placed in the same cache line as the lock. This primes
84 * the cache line to have the data after getting the lock.
85 */
86struct bsd_acct_struct { 81struct bsd_acct_struct {
82 struct fs_pin pin;
83 struct mutex lock;
87 int active; 84 int active;
88 unsigned long needcheck; 85 unsigned long needcheck;
89 struct file *file; 86 struct file *file;
90 struct pid_namespace *ns; 87 struct pid_namespace *ns;
91 struct list_head list; 88 struct work_struct work;
89 struct completion done;
92}; 90};
93 91
94static DEFINE_SPINLOCK(acct_lock);
95static LIST_HEAD(acct_list);
96
97/* 92/*
98 * Check the amount of free space and suspend/resume accordingly. 93 * Check the amount of free space and suspend/resume accordingly.
99 */ 94 */
100static int check_free_space(struct bsd_acct_struct *acct, struct file *file) 95static int check_free_space(struct bsd_acct_struct *acct)
101{ 96{
102 struct kstatfs sbuf; 97 struct kstatfs sbuf;
103 int res; 98
104 int act; 99 if (time_is_before_jiffies(acct->needcheck))
105 u64 resume;
106 u64 suspend;
107
108 spin_lock(&acct_lock);
109 res = acct->active;
110 if (!file || time_is_before_jiffies(acct->needcheck))
111 goto out; 100 goto out;
112 spin_unlock(&acct_lock);
113 101
114 /* May block */ 102 /* May block */
115 if (vfs_statfs(&file->f_path, &sbuf)) 103 if (vfs_statfs(&acct->file->f_path, &sbuf))
116 return res;
117 suspend = sbuf.f_blocks * SUSPEND;
118 resume = sbuf.f_blocks * RESUME;
119
120 do_div(suspend, 100);
121 do_div(resume, 100);
122
123 if (sbuf.f_bavail <= suspend)
124 act = -1;
125 else if (sbuf.f_bavail >= resume)
126 act = 1;
127 else
128 act = 0;
129
130 /*
131 * If some joker switched acct->file under us we'ld better be
132 * silent and _not_ touch anything.
133 */
134 spin_lock(&acct_lock);
135 if (file != acct->file) {
136 if (act)
137 res = act > 0;
138 goto out; 104 goto out;
139 }
140 105
141 if (acct->active) { 106 if (acct->active) {
142 if (act < 0) { 107 u64 suspend = sbuf.f_blocks * SUSPEND;
108 do_div(suspend, 100);
109 if (sbuf.f_bavail <= suspend) {
143 acct->active = 0; 110 acct->active = 0;
144 pr_info("Process accounting paused\n"); 111 pr_info("Process accounting paused\n");
145 } 112 }
146 } else { 113 } else {
147 if (act > 0) { 114 u64 resume = sbuf.f_blocks * RESUME;
115 do_div(resume, 100);
116 if (sbuf.f_bavail >= resume) {
148 acct->active = 1; 117 acct->active = 1;
149 pr_info("Process accounting resumed\n"); 118 pr_info("Process accounting resumed\n");
150 } 119 }
151 } 120 }
152 121
153 acct->needcheck = jiffies + ACCT_TIMEOUT*HZ; 122 acct->needcheck = jiffies + ACCT_TIMEOUT*HZ;
154 res = acct->active;
155out: 123out:
156 spin_unlock(&acct_lock); 124 return acct->active;
125}
126
127static struct bsd_acct_struct *acct_get(struct pid_namespace *ns)
128{
129 struct bsd_acct_struct *res;
130again:
131 smp_rmb();
132 rcu_read_lock();
133 res = ACCESS_ONCE(ns->bacct);
134 if (!res) {
135 rcu_read_unlock();
136 return NULL;
137 }
138 if (!atomic_long_inc_not_zero(&res->pin.count)) {
139 rcu_read_unlock();
140 cpu_relax();
141 goto again;
142 }
143 rcu_read_unlock();
144 mutex_lock(&res->lock);
145 if (!res->ns) {
146 mutex_unlock(&res->lock);
147 pin_put(&res->pin);
148 goto again;
149 }
157 return res; 150 return res;
158} 151}
159 152
160/* 153static void close_work(struct work_struct *work)
161 * Close the old accounting file (if currently open) and then replace
162 * it with file (if non-NULL).
163 *
164 * NOTE: acct_lock MUST be held on entry and exit.
165 */
166static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file,
167 struct pid_namespace *ns)
168{ 154{
169 struct file *old_acct = NULL; 155 struct bsd_acct_struct *acct = container_of(work, struct bsd_acct_struct, work);
170 struct pid_namespace *old_ns = NULL; 156 struct file *file = acct->file;
171 157 if (file->f_op->flush)
172 if (acct->file) { 158 file->f_op->flush(file, NULL);
173 old_acct = acct->file; 159 __fput_sync(file);
174 old_ns = acct->ns; 160 complete(&acct->done);
175 acct->active = 0; 161}
176 acct->file = NULL; 162
163static void acct_kill(struct bsd_acct_struct *acct,
164 struct bsd_acct_struct *new)
165{
166 if (acct) {
167 struct pid_namespace *ns = acct->ns;
168 do_acct_process(acct);
169 INIT_WORK(&acct->work, close_work);
170 init_completion(&acct->done);
171 schedule_work(&acct->work);
172 wait_for_completion(&acct->done);
173 pin_remove(&acct->pin);
174 ns->bacct = new;
177 acct->ns = NULL; 175 acct->ns = NULL;
178 list_del(&acct->list); 176 atomic_long_dec(&acct->pin.count);
179 } 177 mutex_unlock(&acct->lock);
180 if (file) { 178 pin_put(&acct->pin);
181 acct->file = file;
182 acct->ns = ns;
183 acct->needcheck = jiffies + ACCT_TIMEOUT*HZ;
184 acct->active = 1;
185 list_add(&acct->list, &acct_list);
186 } 179 }
187 if (old_acct) { 180}
188 mnt_unpin(old_acct->f_path.mnt); 181
189 spin_unlock(&acct_lock); 182static void acct_pin_kill(struct fs_pin *pin)
190 do_acct_process(acct, old_ns, old_acct); 183{
191 filp_close(old_acct, NULL); 184 struct bsd_acct_struct *acct;
192 spin_lock(&acct_lock); 185 acct = container_of(pin, struct bsd_acct_struct, pin);
186 mutex_lock(&acct->lock);
187 if (!acct->ns) {
188 mutex_unlock(&acct->lock);
189 pin_put(pin);
190 acct = NULL;
193 } 191 }
192 acct_kill(acct, NULL);
194} 193}
195 194
196static int acct_on(struct filename *pathname) 195static int acct_on(struct filename *pathname)
197{ 196{
198 struct file *file; 197 struct file *file;
199 struct vfsmount *mnt; 198 struct vfsmount *mnt, *internal;
200 struct pid_namespace *ns; 199 struct pid_namespace *ns = task_active_pid_ns(current);
201 struct bsd_acct_struct *acct = NULL; 200 struct bsd_acct_struct *acct, *old;
201 int err;
202
203 acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
204 if (!acct)
205 return -ENOMEM;
202 206
203 /* Difference from BSD - they don't do O_APPEND */ 207 /* Difference from BSD - they don't do O_APPEND */
204 file = file_open_name(pathname, O_WRONLY|O_APPEND|O_LARGEFILE, 0); 208 file = file_open_name(pathname, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
205 if (IS_ERR(file)) 209 if (IS_ERR(file)) {
210 kfree(acct);
206 return PTR_ERR(file); 211 return PTR_ERR(file);
212 }
207 213
208 if (!S_ISREG(file_inode(file)->i_mode)) { 214 if (!S_ISREG(file_inode(file)->i_mode)) {
215 kfree(acct);
209 filp_close(file, NULL); 216 filp_close(file, NULL);
210 return -EACCES; 217 return -EACCES;
211 } 218 }
212 219
213 if (!file->f_op->write) { 220 if (!file->f_op->write) {
221 kfree(acct);
214 filp_close(file, NULL); 222 filp_close(file, NULL);
215 return -EIO; 223 return -EIO;
216 } 224 }
217 225 internal = mnt_clone_internal(&file->f_path);
218 ns = task_active_pid_ns(current); 226 if (IS_ERR(internal)) {
219 if (ns->bacct == NULL) { 227 kfree(acct);
220 acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL); 228 filp_close(file, NULL);
221 if (acct == NULL) { 229 return PTR_ERR(internal);
222 filp_close(file, NULL);
223 return -ENOMEM;
224 }
225 } 230 }
226 231 err = mnt_want_write(internal);
227 spin_lock(&acct_lock); 232 if (err) {
228 if (ns->bacct == NULL) { 233 mntput(internal);
229 ns->bacct = acct; 234 kfree(acct);
230 acct = NULL; 235 filp_close(file, NULL);
236 return err;
231 } 237 }
232
233 mnt = file->f_path.mnt; 238 mnt = file->f_path.mnt;
234 mnt_pin(mnt); 239 file->f_path.mnt = internal;
235 acct_file_reopen(ns->bacct, file, ns); 240
236 spin_unlock(&acct_lock); 241 atomic_long_set(&acct->pin.count, 1);
237 242 acct->pin.kill = acct_pin_kill;
238 mntput(mnt); /* it's pinned, now give up active reference */ 243 acct->file = file;
239 kfree(acct); 244 acct->needcheck = jiffies;
240 245 acct->ns = ns;
246 mutex_init(&acct->lock);
247 mutex_lock_nested(&acct->lock, 1); /* nobody has seen it yet */
248 pin_insert(&acct->pin, mnt);
249
250 old = acct_get(ns);
251 if (old)
252 acct_kill(old, acct);
253 else
254 ns->bacct = acct;
255 mutex_unlock(&acct->lock);
256 mnt_drop_write(mnt);
257 mntput(mnt);
241 return 0; 258 return 0;
242} 259}
243 260
261static DEFINE_MUTEX(acct_on_mutex);
262
244/** 263/**
245 * sys_acct - enable/disable process accounting 264 * sys_acct - enable/disable process accounting
246 * @name: file name for accounting records or NULL to shutdown accounting 265 * @name: file name for accounting records or NULL to shutdown accounting
@@ -264,78 +283,20 @@ SYSCALL_DEFINE1(acct, const char __user *, name)
264 283
265 if (IS_ERR(tmp)) 284 if (IS_ERR(tmp))
266 return PTR_ERR(tmp); 285 return PTR_ERR(tmp);
286 mutex_lock(&acct_on_mutex);
267 error = acct_on(tmp); 287 error = acct_on(tmp);
288 mutex_unlock(&acct_on_mutex);
268 putname(tmp); 289 putname(tmp);
269 } else { 290 } else {
270 struct bsd_acct_struct *acct; 291 acct_kill(acct_get(task_active_pid_ns(current)), NULL);
271
272 acct = task_active_pid_ns(current)->bacct;
273 if (acct == NULL)
274 return 0;
275
276 spin_lock(&acct_lock);
277 acct_file_reopen(acct, NULL, NULL);
278 spin_unlock(&acct_lock);
279 } 292 }
280 293
281 return error; 294 return error;
282} 295}
283 296
284/**
285 * acct_auto_close - turn off a filesystem's accounting if it is on
286 * @m: vfsmount being shut down
287 *
288 * If the accounting is turned on for a file in the subtree pointed to
289 * to by m, turn accounting off. Done when m is about to die.
290 */
291void acct_auto_close_mnt(struct vfsmount *m)
292{
293 struct bsd_acct_struct *acct;
294
295 spin_lock(&acct_lock);
296restart:
297 list_for_each_entry(acct, &acct_list, list)
298 if (acct->file && acct->file->f_path.mnt == m) {
299 acct_file_reopen(acct, NULL, NULL);
300 goto restart;
301 }
302 spin_unlock(&acct_lock);
303}
304
305/**
306 * acct_auto_close - turn off a filesystem's accounting if it is on
307 * @sb: super block for the filesystem
308 *
309 * If the accounting is turned on for a file in the filesystem pointed
310 * to by sb, turn accounting off.
311 */
312void acct_auto_close(struct super_block *sb)
313{
314 struct bsd_acct_struct *acct;
315
316 spin_lock(&acct_lock);
317restart:
318 list_for_each_entry(acct, &acct_list, list)
319 if (acct->file && acct->file->f_path.dentry->d_sb == sb) {
320 acct_file_reopen(acct, NULL, NULL);
321 goto restart;
322 }
323 spin_unlock(&acct_lock);
324}
325
326void acct_exit_ns(struct pid_namespace *ns) 297void acct_exit_ns(struct pid_namespace *ns)
327{ 298{
328 struct bsd_acct_struct *acct = ns->bacct; 299 acct_kill(acct_get(ns), NULL);
329
330 if (acct == NULL)
331 return;
332
333 spin_lock(&acct_lock);
334 if (acct->file != NULL)
335 acct_file_reopen(acct, NULL, NULL);
336 spin_unlock(&acct_lock);
337
338 kfree(acct);
339} 300}
340 301
341/* 302/*
@@ -450,38 +411,20 @@ static u32 encode_float(u64 value)
450 * do_exit() or when switching to a different output file. 411 * do_exit() or when switching to a different output file.
451 */ 412 */
452 413
453/* 414static void fill_ac(acct_t *ac)
454 * do_acct_process does all actual work. Caller holds the reference to file.
455 */
456static void do_acct_process(struct bsd_acct_struct *acct,
457 struct pid_namespace *ns, struct file *file)
458{ 415{
459 struct pacct_struct *pacct = &current->signal->pacct; 416 struct pacct_struct *pacct = &current->signal->pacct;
460 acct_t ac;
461 mm_segment_t fs;
462 unsigned long flim;
463 u64 elapsed, run_time; 417 u64 elapsed, run_time;
464 struct tty_struct *tty; 418 struct tty_struct *tty;
465 const struct cred *orig_cred;
466
467 /* Perform file operations on behalf of whoever enabled accounting */
468 orig_cred = override_creds(file->f_cred);
469
470 /*
471 * First check to see if there is enough free_space to continue
472 * the process accounting system.
473 */
474 if (!check_free_space(acct, file))
475 goto out;
476 419
477 /* 420 /*
478 * Fill the accounting struct with the needed info as recorded 421 * Fill the accounting struct with the needed info as recorded
479 * by the different kernel functions. 422 * by the different kernel functions.
480 */ 423 */
481 memset(&ac, 0, sizeof(acct_t)); 424 memset(ac, 0, sizeof(acct_t));
482 425
483 ac.ac_version = ACCT_VERSION | ACCT_BYTEORDER; 426 ac->ac_version = ACCT_VERSION | ACCT_BYTEORDER;
484 strlcpy(ac.ac_comm, current->comm, sizeof(ac.ac_comm)); 427 strlcpy(ac->ac_comm, current->comm, sizeof(ac->ac_comm));
485 428
486 /* calculate run_time in nsec*/ 429 /* calculate run_time in nsec*/
487 run_time = ktime_get_ns(); 430 run_time = ktime_get_ns();
@@ -489,9 +432,9 @@ static void do_acct_process(struct bsd_acct_struct *acct,
489 /* convert nsec -> AHZ */ 432 /* convert nsec -> AHZ */
490 elapsed = nsec_to_AHZ(run_time); 433 elapsed = nsec_to_AHZ(run_time);
491#if ACCT_VERSION == 3 434#if ACCT_VERSION == 3
492 ac.ac_etime = encode_float(elapsed); 435 ac->ac_etime = encode_float(elapsed);
493#else 436#else
494 ac.ac_etime = encode_comp_t(elapsed < (unsigned long) -1l ? 437 ac->ac_etime = encode_comp_t(elapsed < (unsigned long) -1l ?
495 (unsigned long) elapsed : (unsigned long) -1l); 438 (unsigned long) elapsed : (unsigned long) -1l);
496#endif 439#endif
497#if ACCT_VERSION == 1 || ACCT_VERSION == 2 440#if ACCT_VERSION == 1 || ACCT_VERSION == 2
@@ -499,18 +442,58 @@ static void do_acct_process(struct bsd_acct_struct *acct,
499 /* new enlarged etime field */ 442 /* new enlarged etime field */
500 comp2_t etime = encode_comp2_t(elapsed); 443 comp2_t etime = encode_comp2_t(elapsed);
501 444
502 ac.ac_etime_hi = etime >> 16; 445 ac->ac_etime_hi = etime >> 16;
503 ac.ac_etime_lo = (u16) etime; 446 ac->ac_etime_lo = (u16) etime;
504 } 447 }
505#endif 448#endif
506 do_div(elapsed, AHZ); 449 do_div(elapsed, AHZ);
507 ac.ac_btime = get_seconds() - elapsed; 450 ac->ac_btime = get_seconds() - elapsed;
451#if ACCT_VERSION==2
452 ac->ac_ahz = AHZ;
453#endif
454
455 spin_lock_irq(&current->sighand->siglock);
456 tty = current->signal->tty; /* Safe as we hold the siglock */
457 ac->ac_tty = tty ? old_encode_dev(tty_devnum(tty)) : 0;
458 ac->ac_utime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_utime)));
459 ac->ac_stime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_stime)));
460 ac->ac_flag = pacct->ac_flag;
461 ac->ac_mem = encode_comp_t(pacct->ac_mem);
462 ac->ac_minflt = encode_comp_t(pacct->ac_minflt);
463 ac->ac_majflt = encode_comp_t(pacct->ac_majflt);
464 ac->ac_exitcode = pacct->ac_exitcode;
465 spin_unlock_irq(&current->sighand->siglock);
466}
467/*
468 * do_acct_process does all actual work. Caller holds the reference to file.
469 */
470static void do_acct_process(struct bsd_acct_struct *acct)
471{
472 acct_t ac;
473 unsigned long flim;
474 const struct cred *orig_cred;
475 struct pid_namespace *ns = acct->ns;
476 struct file *file = acct->file;
477
478 /*
479 * Accounting records are not subject to resource limits.
480 */
481 flim = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
482 current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
483 /* Perform file operations on behalf of whoever enabled accounting */
484 orig_cred = override_creds(file->f_cred);
485
486 /*
487 * First check to see if there is enough free_space to continue
488 * the process accounting system.
489 */
490 if (!check_free_space(acct))
491 goto out;
492
493 fill_ac(&ac);
508 /* we really need to bite the bullet and change layout */ 494 /* we really need to bite the bullet and change layout */
509 ac.ac_uid = from_kuid_munged(file->f_cred->user_ns, orig_cred->uid); 495 ac.ac_uid = from_kuid_munged(file->f_cred->user_ns, orig_cred->uid);
510 ac.ac_gid = from_kgid_munged(file->f_cred->user_ns, orig_cred->gid); 496 ac.ac_gid = from_kgid_munged(file->f_cred->user_ns, orig_cred->gid);
511#if ACCT_VERSION == 2
512 ac.ac_ahz = AHZ;
513#endif
514#if ACCT_VERSION == 1 || ACCT_VERSION == 2 497#if ACCT_VERSION == 1 || ACCT_VERSION == 2
515 /* backward-compatible 16 bit fields */ 498 /* backward-compatible 16 bit fields */
516 ac.ac_uid16 = ac.ac_uid; 499 ac.ac_uid16 = ac.ac_uid;
@@ -522,45 +505,18 @@ static void do_acct_process(struct bsd_acct_struct *acct,
522 ac.ac_ppid = task_tgid_nr_ns(rcu_dereference(current->real_parent), ns); 505 ac.ac_ppid = task_tgid_nr_ns(rcu_dereference(current->real_parent), ns);
523 rcu_read_unlock(); 506 rcu_read_unlock();
524#endif 507#endif
525
526 spin_lock_irq(&current->sighand->siglock);
527 tty = current->signal->tty; /* Safe as we hold the siglock */
528 ac.ac_tty = tty ? old_encode_dev(tty_devnum(tty)) : 0;
529 ac.ac_utime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_utime)));
530 ac.ac_stime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_stime)));
531 ac.ac_flag = pacct->ac_flag;
532 ac.ac_mem = encode_comp_t(pacct->ac_mem);
533 ac.ac_minflt = encode_comp_t(pacct->ac_minflt);
534 ac.ac_majflt = encode_comp_t(pacct->ac_majflt);
535 ac.ac_exitcode = pacct->ac_exitcode;
536 spin_unlock_irq(&current->sighand->siglock);
537 ac.ac_io = encode_comp_t(0 /* current->io_usage */); /* %% */
538 ac.ac_rw = encode_comp_t(ac.ac_io / 1024);
539 ac.ac_swaps = encode_comp_t(0);
540
541 /* 508 /*
542 * Get freeze protection. If the fs is frozen, just skip the write 509 * Get freeze protection. If the fs is frozen, just skip the write
543 * as we could deadlock the system otherwise. 510 * as we could deadlock the system otherwise.
544 */ 511 */
545 if (!file_start_write_trylock(file)) 512 if (file_start_write_trylock(file)) {
546 goto out; 513 /* it's been opened O_APPEND, so position is irrelevant */
547 /* 514 loff_t pos = 0;
548 * Kernel segment override to datasegment and write it 515 __kernel_write(file, (char *)&ac, sizeof(acct_t), &pos);
549 * to the accounting file. 516 file_end_write(file);
550 */ 517 }
551 fs = get_fs();
552 set_fs(KERNEL_DS);
553 /*
554 * Accounting records are not subject to resource limits.
555 */
556 flim = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
557 current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
558 file->f_op->write(file, (char *)&ac,
559 sizeof(acct_t), &file->f_pos);
560 current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim;
561 set_fs(fs);
562 file_end_write(file);
563out: 518out:
519 current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim;
564 revert_creds(orig_cred); 520 revert_creds(orig_cred);
565} 521}
566 522
@@ -609,34 +565,20 @@ void acct_collect(long exitcode, int group_dead)
609 spin_unlock_irq(&current->sighand->siglock); 565 spin_unlock_irq(&current->sighand->siglock);
610} 566}
611 567
612static void acct_process_in_ns(struct pid_namespace *ns) 568static void slow_acct_process(struct pid_namespace *ns)
613{ 569{
614 struct file *file = NULL; 570 for ( ; ns; ns = ns->parent) {
615 struct bsd_acct_struct *acct; 571 struct bsd_acct_struct *acct = acct_get(ns);
616 572 if (acct) {
617 acct = ns->bacct; 573 do_acct_process(acct);
618 /* 574 mutex_unlock(&acct->lock);
619 * accelerate the common fastpath: 575 pin_put(&acct->pin);
620 */ 576 }
621 if (!acct || !acct->file)
622 return;
623
624 spin_lock(&acct_lock);
625 file = acct->file;
626 if (unlikely(!file)) {
627 spin_unlock(&acct_lock);
628 return;
629 } 577 }
630 get_file(file);
631 spin_unlock(&acct_lock);
632
633 do_acct_process(acct, ns, file);
634 fput(file);
635} 578}
636 579
637/** 580/**
638 * acct_process - now just a wrapper around acct_process_in_ns, 581 * acct_process
639 * which in turn is a wrapper around do_acct_process.
640 * 582 *
641 * handles process accounting for an exiting task 583 * handles process accounting for an exiting task
642 */ 584 */
@@ -649,6 +591,10 @@ void acct_process(void)
649 * alive and holds its namespace, which in turn holds 591 * alive and holds its namespace, which in turn holds
650 * its parent. 592 * its parent.
651 */ 593 */
652 for (ns = task_active_pid_ns(current); ns != NULL; ns = ns->parent) 594 for (ns = task_active_pid_ns(current); ns != NULL; ns = ns->parent) {
653 acct_process_in_ns(ns); 595 if (ns->bacct)
596 break;
597 }
598 if (unlikely(ns))
599 slow_acct_process(ns);
654} 600}