diff options
author | Al Viro <viro@zeniv.linux.org.uk> | 2014-08-07 07:51:03 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2014-08-07 14:40:08 -0400 |
commit | b8f00e6be46f4c9a112e05fd692712873c4c4048 (patch) | |
tree | f37aaef1d0fdfd9649f3ab14ee09de5f7100030d /kernel | |
parent | 9df7fa16ee956bf0cdf4a711eac827be92d584bc (diff) |
acct: new lifetime rules
Do not reuse bsd_acct_struct after closing the damn thing.
Structure lifetime is controlled by refcount now. We also
have a mutex in there, held over closing and writing (the
file is O_APPEND, so we are not losing any concurrency).
As the result, we do not need to bother with get_file()/fput()
on log write anymore. Moreover, do_acct_process() only needs
acct itself; file and pidns are picked from it.
Killed instances are distinguished by having NULL ->ns.
Refcount is protected by acct_lock; anybody taking the
mutex needs to grab a reference first.
The things will get a lot simpler in the next commits - this
is just the minimal chunk switching to the new lifetime rules.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/acct.c | 220 |
1 files changed, 114 insertions, 106 deletions
diff --git a/kernel/acct.c b/kernel/acct.c index 08963a292878..f9ef9db55c0e 100644 --- a/kernel/acct.c +++ b/kernel/acct.c | |||
@@ -75,15 +75,11 @@ int acct_parm[3] = {4, 2, 30}; | |||
75 | /* | 75 | /* |
76 | * External references and all of the globals. | 76 | * External references and all of the globals. |
77 | */ | 77 | */ |
78 | static void do_acct_process(struct bsd_acct_struct *acct, | 78 | static void do_acct_process(struct bsd_acct_struct *acct); |
79 | struct pid_namespace *ns, struct file *); | ||
80 | 79 | ||
81 | /* | ||
82 | * This structure is used so that all the data protected by lock | ||
83 | * can be placed in the same cache line as the lock. This primes | ||
84 | * the cache line to have the data after getting the lock. | ||
85 | */ | ||
86 | struct bsd_acct_struct { | 80 | struct bsd_acct_struct { |
81 | long count; | ||
82 | struct mutex lock; | ||
87 | int active; | 83 | int active; |
88 | unsigned long needcheck; | 84 | unsigned long needcheck; |
89 | struct file *file; | 85 | struct file *file; |
@@ -157,39 +153,59 @@ out: | |||
157 | return res; | 153 | return res; |
158 | } | 154 | } |
159 | 155 | ||
160 | /* | 156 | static void acct_put(struct bsd_acct_struct *p) |
161 | * Close the old accounting file (if currently open) and then replace | ||
162 | * it with file (if non-NULL). | ||
163 | * | ||
164 | * NOTE: acct_lock MUST be held on entry and exit. | ||
165 | */ | ||
166 | static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file, | ||
167 | struct pid_namespace *ns) | ||
168 | { | 157 | { |
169 | struct file *old_acct = NULL; | 158 | spin_lock(&acct_lock); |
170 | struct pid_namespace *old_ns = NULL; | 159 | if (!--p->count) |
171 | 160 | kfree(p); | |
172 | if (acct->file) { | 161 | spin_unlock(&acct_lock); |
173 | old_acct = acct->file; | 162 | } |
174 | old_ns = acct->ns; | 163 | |
175 | acct->active = 0; | 164 | static struct bsd_acct_struct *acct_get(struct bsd_acct_struct **p) |
176 | acct->file = NULL; | 165 | { |
177 | acct->ns = NULL; | 166 | struct bsd_acct_struct *res; |
178 | list_del(&acct->list); | 167 | spin_lock(&acct_lock); |
179 | } | 168 | again: |
180 | if (file) { | 169 | res = *p; |
181 | acct->file = file; | 170 | if (res) |
182 | acct->ns = ns; | 171 | res->count++; |
183 | acct->needcheck = jiffies; | 172 | spin_unlock(&acct_lock); |
184 | acct->active = 0; | 173 | if (res) { |
185 | list_add(&acct->list, &acct_list); | 174 | mutex_lock(&res->lock); |
175 | if (!res->ns) { | ||
176 | mutex_unlock(&res->lock); | ||
177 | spin_lock(&acct_lock); | ||
178 | if (!--res->count) | ||
179 | kfree(res); | ||
180 | goto again; | ||
181 | } | ||
186 | } | 182 | } |
187 | if (old_acct) { | 183 | return res; |
188 | mnt_unpin(old_acct->f_path.mnt); | 184 | } |
185 | |||
186 | static void acct_kill(struct bsd_acct_struct *acct, | ||
187 | struct bsd_acct_struct *new) | ||
188 | { | ||
189 | if (acct) { | ||
190 | struct file *file = acct->file; | ||
191 | struct pid_namespace *ns = acct->ns; | ||
192 | spin_lock(&acct_lock); | ||
193 | list_del(&acct->list); | ||
194 | mnt_unpin(file->f_path.mnt); | ||
189 | spin_unlock(&acct_lock); | 195 | spin_unlock(&acct_lock); |
190 | do_acct_process(acct, old_ns, old_acct); | 196 | do_acct_process(acct); |
191 | filp_close(old_acct, NULL); | 197 | filp_close(file, NULL); |
192 | spin_lock(&acct_lock); | 198 | spin_lock(&acct_lock); |
199 | ns->bacct = new; | ||
200 | if (new) { | ||
201 | mnt_pin(new->file->f_path.mnt); | ||
202 | list_add(&new->list, &acct_list); | ||
203 | } | ||
204 | acct->ns = NULL; | ||
205 | mutex_unlock(&acct->lock); | ||
206 | if (!(acct->count -= 2)) | ||
207 | kfree(acct); | ||
208 | spin_unlock(&acct_lock); | ||
193 | } | 209 | } |
194 | } | 210 | } |
195 | 211 | ||
@@ -197,47 +213,50 @@ static int acct_on(struct filename *pathname) | |||
197 | { | 213 | { |
198 | struct file *file; | 214 | struct file *file; |
199 | struct vfsmount *mnt; | 215 | struct vfsmount *mnt; |
200 | struct pid_namespace *ns; | 216 | struct pid_namespace *ns = task_active_pid_ns(current); |
201 | struct bsd_acct_struct *acct = NULL; | 217 | struct bsd_acct_struct *acct, *old; |
218 | |||
219 | acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL); | ||
220 | if (!acct) | ||
221 | return -ENOMEM; | ||
202 | 222 | ||
203 | /* Difference from BSD - they don't do O_APPEND */ | 223 | /* Difference from BSD - they don't do O_APPEND */ |
204 | file = file_open_name(pathname, O_WRONLY|O_APPEND|O_LARGEFILE, 0); | 224 | file = file_open_name(pathname, O_WRONLY|O_APPEND|O_LARGEFILE, 0); |
205 | if (IS_ERR(file)) | 225 | if (IS_ERR(file)) { |
226 | kfree(acct); | ||
206 | return PTR_ERR(file); | 227 | return PTR_ERR(file); |
228 | } | ||
207 | 229 | ||
208 | if (!S_ISREG(file_inode(file)->i_mode)) { | 230 | if (!S_ISREG(file_inode(file)->i_mode)) { |
231 | kfree(acct); | ||
209 | filp_close(file, NULL); | 232 | filp_close(file, NULL); |
210 | return -EACCES; | 233 | return -EACCES; |
211 | } | 234 | } |
212 | 235 | ||
213 | if (!file->f_op->write) { | 236 | if (!file->f_op->write) { |
237 | kfree(acct); | ||
214 | filp_close(file, NULL); | 238 | filp_close(file, NULL); |
215 | return -EIO; | 239 | return -EIO; |
216 | } | 240 | } |
217 | 241 | ||
218 | ns = task_active_pid_ns(current); | 242 | acct->count = 1; |
219 | if (ns->bacct == NULL) { | 243 | acct->file = file; |
220 | acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL); | 244 | acct->needcheck = jiffies; |
221 | if (acct == NULL) { | 245 | acct->ns = ns; |
222 | filp_close(file, NULL); | 246 | mutex_init(&acct->lock); |
223 | return -ENOMEM; | 247 | mnt = file->f_path.mnt; |
224 | } | ||
225 | } | ||
226 | 248 | ||
227 | spin_lock(&acct_lock); | 249 | old = acct_get(&ns->bacct); |
228 | if (ns->bacct == NULL) { | 250 | if (old) { |
251 | acct_kill(old, acct); | ||
252 | } else { | ||
253 | spin_lock(&acct_lock); | ||
229 | ns->bacct = acct; | 254 | ns->bacct = acct; |
230 | acct = NULL; | 255 | mnt_pin(mnt); |
256 | list_add(&acct->list, &acct_list); | ||
257 | spin_unlock(&acct_lock); | ||
231 | } | 258 | } |
232 | |||
233 | mnt = file->f_path.mnt; | ||
234 | mnt_pin(mnt); | ||
235 | acct_file_reopen(ns->bacct, file, ns); | ||
236 | spin_unlock(&acct_lock); | ||
237 | |||
238 | mntput(mnt); /* it's pinned, now give up active reference */ | 259 | mntput(mnt); /* it's pinned, now give up active reference */ |
239 | kfree(acct); | ||
240 | |||
241 | return 0; | 260 | return 0; |
242 | } | 261 | } |
243 | 262 | ||
@@ -270,15 +289,7 @@ SYSCALL_DEFINE1(acct, const char __user *, name) | |||
270 | mutex_unlock(&acct_on_mutex); | 289 | mutex_unlock(&acct_on_mutex); |
271 | putname(tmp); | 290 | putname(tmp); |
272 | } else { | 291 | } else { |
273 | struct bsd_acct_struct *acct; | 292 | acct_kill(acct_get(&task_active_pid_ns(current)->bacct), NULL); |
274 | |||
275 | acct = task_active_pid_ns(current)->bacct; | ||
276 | if (acct == NULL) | ||
277 | return 0; | ||
278 | |||
279 | spin_lock(&acct_lock); | ||
280 | acct_file_reopen(acct, NULL, NULL); | ||
281 | spin_unlock(&acct_lock); | ||
282 | } | 293 | } |
283 | 294 | ||
284 | return error; | 295 | return error; |
@@ -298,8 +309,19 @@ void acct_auto_close_mnt(struct vfsmount *m) | |||
298 | spin_lock(&acct_lock); | 309 | spin_lock(&acct_lock); |
299 | restart: | 310 | restart: |
300 | list_for_each_entry(acct, &acct_list, list) | 311 | list_for_each_entry(acct, &acct_list, list) |
301 | if (acct->file && acct->file->f_path.mnt == m) { | 312 | if (acct->file->f_path.mnt == m) { |
302 | acct_file_reopen(acct, NULL, NULL); | 313 | acct->count++; |
314 | spin_unlock(&acct_lock); | ||
315 | mutex_lock(&acct->lock); | ||
316 | if (!acct->ns) { | ||
317 | mutex_unlock(&acct->lock); | ||
318 | spin_lock(&acct_lock); | ||
319 | if (!--acct->count) | ||
320 | kfree(acct); | ||
321 | goto restart; | ||
322 | } | ||
323 | acct_kill(acct, NULL); | ||
324 | spin_lock(&acct_lock); | ||
303 | goto restart; | 325 | goto restart; |
304 | } | 326 | } |
305 | spin_unlock(&acct_lock); | 327 | spin_unlock(&acct_lock); |
@@ -319,8 +341,19 @@ void acct_auto_close(struct super_block *sb) | |||
319 | spin_lock(&acct_lock); | 341 | spin_lock(&acct_lock); |
320 | restart: | 342 | restart: |
321 | list_for_each_entry(acct, &acct_list, list) | 343 | list_for_each_entry(acct, &acct_list, list) |
322 | if (acct->file && acct->file->f_path.dentry->d_sb == sb) { | 344 | if (acct->file->f_path.dentry->d_sb == sb) { |
323 | acct_file_reopen(acct, NULL, NULL); | 345 | acct->count++; |
346 | spin_unlock(&acct_lock); | ||
347 | mutex_lock(&acct->lock); | ||
348 | if (!acct->ns) { | ||
349 | mutex_unlock(&acct->lock); | ||
350 | spin_lock(&acct_lock); | ||
351 | if (!--acct->count) | ||
352 | kfree(acct); | ||
353 | goto restart; | ||
354 | } | ||
355 | acct_kill(acct, NULL); | ||
356 | spin_lock(&acct_lock); | ||
324 | goto restart; | 357 | goto restart; |
325 | } | 358 | } |
326 | spin_unlock(&acct_lock); | 359 | spin_unlock(&acct_lock); |
@@ -328,17 +361,7 @@ restart: | |||
328 | 361 | ||
329 | void acct_exit_ns(struct pid_namespace *ns) | 362 | void acct_exit_ns(struct pid_namespace *ns) |
330 | { | 363 | { |
331 | struct bsd_acct_struct *acct = ns->bacct; | 364 | acct_kill(acct_get(&ns->bacct), NULL); |
332 | |||
333 | if (acct == NULL) | ||
334 | return; | ||
335 | |||
336 | spin_lock(&acct_lock); | ||
337 | if (acct->file != NULL) | ||
338 | acct_file_reopen(acct, NULL, NULL); | ||
339 | spin_unlock(&acct_lock); | ||
340 | |||
341 | kfree(acct); | ||
342 | } | 365 | } |
343 | 366 | ||
344 | /* | 367 | /* |
@@ -507,12 +530,13 @@ static void fill_ac(acct_t *ac) | |||
507 | /* | 530 | /* |
508 | * do_acct_process does all actual work. Caller holds the reference to file. | 531 | * do_acct_process does all actual work. Caller holds the reference to file. |
509 | */ | 532 | */ |
510 | static void do_acct_process(struct bsd_acct_struct *acct, | 533 | static void do_acct_process(struct bsd_acct_struct *acct) |
511 | struct pid_namespace *ns, struct file *file) | ||
512 | { | 534 | { |
513 | acct_t ac; | 535 | acct_t ac; |
514 | unsigned long flim; | 536 | unsigned long flim; |
515 | const struct cred *orig_cred; | 537 | const struct cred *orig_cred; |
538 | struct pid_namespace *ns = acct->ns; | ||
539 | struct file *file = acct->file; | ||
516 | 540 | ||
517 | /* | 541 | /* |
518 | * Accounting records are not subject to resource limits. | 542 | * Accounting records are not subject to resource limits. |
@@ -606,27 +630,12 @@ void acct_collect(long exitcode, int group_dead) | |||
606 | static void slow_acct_process(struct pid_namespace *ns) | 630 | static void slow_acct_process(struct pid_namespace *ns) |
607 | { | 631 | { |
608 | for ( ; ns; ns = ns->parent) { | 632 | for ( ; ns; ns = ns->parent) { |
609 | struct file *file = NULL; | 633 | struct bsd_acct_struct *acct = acct_get(&ns->bacct); |
610 | struct bsd_acct_struct *acct; | 634 | if (acct) { |
611 | 635 | do_acct_process(acct); | |
612 | acct = ns->bacct; | 636 | mutex_unlock(&acct->lock); |
613 | /* | 637 | acct_put(acct); |
614 | * accelerate the common fastpath: | ||
615 | */ | ||
616 | if (!acct || !acct->file) | ||
617 | continue; | ||
618 | |||
619 | spin_lock(&acct_lock); | ||
620 | file = acct->file; | ||
621 | if (unlikely(!file)) { | ||
622 | spin_unlock(&acct_lock); | ||
623 | continue; | ||
624 | } | 638 | } |
625 | get_file(file); | ||
626 | spin_unlock(&acct_lock); | ||
627 | |||
628 | do_acct_process(acct, ns, file); | ||
629 | fput(file); | ||
630 | } | 639 | } |
631 | } | 640 | } |
632 | 641 | ||
@@ -645,8 +654,7 @@ void acct_process(void) | |||
645 | * its parent. | 654 | * its parent. |
646 | */ | 655 | */ |
647 | for (ns = task_active_pid_ns(current); ns != NULL; ns = ns->parent) { | 656 | for (ns = task_active_pid_ns(current); ns != NULL; ns = ns->parent) { |
648 | struct bsd_acct_struct *acct = ns->bacct; | 657 | if (ns->bacct) |
649 | if (acct && acct->file) | ||
650 | break; | 658 | break; |
651 | } | 659 | } |
652 | if (unlikely(ns)) | 660 | if (unlikely(ns)) |