diff options
Diffstat (limited to 'kernel/taskstats.c')
-rw-r--r-- | kernel/taskstats.c | 227 |
1 files changed, 151 insertions, 76 deletions
diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 11281d5792bd..fc0f22005417 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c | |||
@@ -89,8 +89,7 @@ static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, | |||
89 | return -ENOMEM; | 89 | return -ENOMEM; |
90 | 90 | ||
91 | if (!info) { | 91 | if (!info) { |
92 | int seq = get_cpu_var(taskstats_seqnum)++; | 92 | int seq = this_cpu_inc_return(taskstats_seqnum) - 1; |
93 | put_cpu_var(taskstats_seqnum); | ||
94 | 93 | ||
95 | reply = genlmsg_put(skb, 0, seq, &family, 0, cmd); | 94 | reply = genlmsg_put(skb, 0, seq, &family, 0, cmd); |
96 | } else | 95 | } else |
@@ -175,22 +174,8 @@ static void send_cpu_listeners(struct sk_buff *skb, | |||
175 | up_write(&listeners->sem); | 174 | up_write(&listeners->sem); |
176 | } | 175 | } |
177 | 176 | ||
178 | static int fill_pid(pid_t pid, struct task_struct *tsk, | 177 | static void fill_stats(struct task_struct *tsk, struct taskstats *stats) |
179 | struct taskstats *stats) | ||
180 | { | 178 | { |
181 | int rc = 0; | ||
182 | |||
183 | if (!tsk) { | ||
184 | rcu_read_lock(); | ||
185 | tsk = find_task_by_vpid(pid); | ||
186 | if (tsk) | ||
187 | get_task_struct(tsk); | ||
188 | rcu_read_unlock(); | ||
189 | if (!tsk) | ||
190 | return -ESRCH; | ||
191 | } else | ||
192 | get_task_struct(tsk); | ||
193 | |||
194 | memset(stats, 0, sizeof(*stats)); | 179 | memset(stats, 0, sizeof(*stats)); |
195 | /* | 180 | /* |
196 | * Each accounting subsystem adds calls to its functions to | 181 | * Each accounting subsystem adds calls to its functions to |
@@ -209,17 +194,27 @@ static int fill_pid(pid_t pid, struct task_struct *tsk, | |||
209 | 194 | ||
210 | /* fill in extended acct fields */ | 195 | /* fill in extended acct fields */ |
211 | xacct_add_tsk(stats, tsk); | 196 | xacct_add_tsk(stats, tsk); |
197 | } | ||
212 | 198 | ||
213 | /* Define err: label here if needed */ | 199 | static int fill_stats_for_pid(pid_t pid, struct taskstats *stats) |
214 | put_task_struct(tsk); | 200 | { |
215 | return rc; | 201 | struct task_struct *tsk; |
216 | 202 | ||
203 | rcu_read_lock(); | ||
204 | tsk = find_task_by_vpid(pid); | ||
205 | if (tsk) | ||
206 | get_task_struct(tsk); | ||
207 | rcu_read_unlock(); | ||
208 | if (!tsk) | ||
209 | return -ESRCH; | ||
210 | fill_stats(tsk, stats); | ||
211 | put_task_struct(tsk); | ||
212 | return 0; | ||
217 | } | 213 | } |
218 | 214 | ||
219 | static int fill_tgid(pid_t tgid, struct task_struct *first, | 215 | static int fill_stats_for_tgid(pid_t tgid, struct taskstats *stats) |
220 | struct taskstats *stats) | ||
221 | { | 216 | { |
222 | struct task_struct *tsk; | 217 | struct task_struct *tsk, *first; |
223 | unsigned long flags; | 218 | unsigned long flags; |
224 | int rc = -ESRCH; | 219 | int rc = -ESRCH; |
225 | 220 | ||
@@ -228,8 +223,7 @@ static int fill_tgid(pid_t tgid, struct task_struct *first, | |||
228 | * leaders who are already counted with the dead tasks | 223 | * leaders who are already counted with the dead tasks |
229 | */ | 224 | */ |
230 | rcu_read_lock(); | 225 | rcu_read_lock(); |
231 | if (!first) | 226 | first = find_task_by_vpid(tgid); |
232 | first = find_task_by_vpid(tgid); | ||
233 | 227 | ||
234 | if (!first || !lock_task_sighand(first, &flags)) | 228 | if (!first || !lock_task_sighand(first, &flags)) |
235 | goto out; | 229 | goto out; |
@@ -268,7 +262,6 @@ out: | |||
268 | return rc; | 262 | return rc; |
269 | } | 263 | } |
270 | 264 | ||
271 | |||
272 | static void fill_tgid_exit(struct task_struct *tsk) | 265 | static void fill_tgid_exit(struct task_struct *tsk) |
273 | { | 266 | { |
274 | unsigned long flags; | 267 | unsigned long flags; |
@@ -292,16 +285,18 @@ ret: | |||
292 | static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd) | 285 | static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd) |
293 | { | 286 | { |
294 | struct listener_list *listeners; | 287 | struct listener_list *listeners; |
295 | struct listener *s, *tmp; | 288 | struct listener *s, *tmp, *s2; |
296 | unsigned int cpu; | 289 | unsigned int cpu; |
297 | 290 | ||
298 | if (!cpumask_subset(mask, cpu_possible_mask)) | 291 | if (!cpumask_subset(mask, cpu_possible_mask)) |
299 | return -EINVAL; | 292 | return -EINVAL; |
300 | 293 | ||
294 | s = NULL; | ||
301 | if (isadd == REGISTER) { | 295 | if (isadd == REGISTER) { |
302 | for_each_cpu(cpu, mask) { | 296 | for_each_cpu(cpu, mask) { |
303 | s = kmalloc_node(sizeof(struct listener), GFP_KERNEL, | 297 | if (!s) |
304 | cpu_to_node(cpu)); | 298 | s = kmalloc_node(sizeof(struct listener), |
299 | GFP_KERNEL, cpu_to_node(cpu)); | ||
305 | if (!s) | 300 | if (!s) |
306 | goto cleanup; | 301 | goto cleanup; |
307 | s->pid = pid; | 302 | s->pid = pid; |
@@ -310,9 +305,16 @@ static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd) | |||
310 | 305 | ||
311 | listeners = &per_cpu(listener_array, cpu); | 306 | listeners = &per_cpu(listener_array, cpu); |
312 | down_write(&listeners->sem); | 307 | down_write(&listeners->sem); |
308 | list_for_each_entry_safe(s2, tmp, &listeners->list, list) { | ||
309 | if (s2->pid == pid) | ||
310 | goto next_cpu; | ||
311 | } | ||
313 | list_add(&s->list, &listeners->list); | 312 | list_add(&s->list, &listeners->list); |
313 | s = NULL; | ||
314 | next_cpu: | ||
314 | up_write(&listeners->sem); | 315 | up_write(&listeners->sem); |
315 | } | 316 | } |
317 | kfree(s); | ||
316 | return 0; | 318 | return 0; |
317 | } | 319 | } |
318 | 320 | ||
@@ -355,6 +357,10 @@ static int parse(struct nlattr *na, struct cpumask *mask) | |||
355 | return ret; | 357 | return ret; |
356 | } | 358 | } |
357 | 359 | ||
360 | #if defined(CONFIG_64BIT) && !defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) | ||
361 | #define TASKSTATS_NEEDS_PADDING 1 | ||
362 | #endif | ||
363 | |||
358 | static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) | 364 | static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) |
359 | { | 365 | { |
360 | struct nlattr *na, *ret; | 366 | struct nlattr *na, *ret; |
@@ -364,9 +370,33 @@ static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) | |||
364 | ? TASKSTATS_TYPE_AGGR_PID | 370 | ? TASKSTATS_TYPE_AGGR_PID |
365 | : TASKSTATS_TYPE_AGGR_TGID; | 371 | : TASKSTATS_TYPE_AGGR_TGID; |
366 | 372 | ||
373 | /* | ||
374 | * The taskstats structure is internally aligned on 8 byte | ||
375 | * boundaries but the layout of the aggregrate reply, with | ||
376 | * two NLA headers and the pid (each 4 bytes), actually | ||
377 | * force the entire structure to be unaligned. This causes | ||
378 | * the kernel to issue unaligned access warnings on some | ||
379 | * architectures like ia64. Unfortunately, some software out there | ||
380 | * doesn't properly unroll the NLA packet and assumes that the start | ||
381 | * of the taskstats structure will always be 20 bytes from the start | ||
382 | * of the netlink payload. Aligning the start of the taskstats | ||
383 | * structure breaks this software, which we don't want. So, for now | ||
384 | * the alignment only happens on architectures that require it | ||
385 | * and those users will have to update to fixed versions of those | ||
386 | * packages. Space is reserved in the packet only when needed. | ||
387 | * This ifdef should be removed in several years e.g. 2012 once | ||
388 | * we can be confident that fixed versions are installed on most | ||
389 | * systems. We add the padding before the aggregate since the | ||
390 | * aggregate is already a defined type. | ||
391 | */ | ||
392 | #ifdef TASKSTATS_NEEDS_PADDING | ||
393 | if (nla_put(skb, TASKSTATS_TYPE_NULL, 0, NULL) < 0) | ||
394 | goto err; | ||
395 | #endif | ||
367 | na = nla_nest_start(skb, aggr); | 396 | na = nla_nest_start(skb, aggr); |
368 | if (!na) | 397 | if (!na) |
369 | goto err; | 398 | goto err; |
399 | |||
370 | if (nla_put(skb, type, sizeof(pid), &pid) < 0) | 400 | if (nla_put(skb, type, sizeof(pid), &pid) < 0) |
371 | goto err; | 401 | goto err; |
372 | ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats)); | 402 | ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats)); |
@@ -424,74 +454,122 @@ err: | |||
424 | return rc; | 454 | return rc; |
425 | } | 455 | } |
426 | 456 | ||
427 | static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) | 457 | static int cmd_attr_register_cpumask(struct genl_info *info) |
428 | { | 458 | { |
429 | int rc; | ||
430 | struct sk_buff *rep_skb; | ||
431 | struct taskstats *stats; | ||
432 | size_t size; | ||
433 | cpumask_var_t mask; | 459 | cpumask_var_t mask; |
460 | int rc; | ||
434 | 461 | ||
435 | if (!alloc_cpumask_var(&mask, GFP_KERNEL)) | 462 | if (!alloc_cpumask_var(&mask, GFP_KERNEL)) |
436 | return -ENOMEM; | 463 | return -ENOMEM; |
437 | |||
438 | rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], mask); | 464 | rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], mask); |
439 | if (rc < 0) | 465 | if (rc < 0) |
440 | goto free_return_rc; | 466 | goto out; |
441 | if (rc == 0) { | 467 | rc = add_del_listener(info->snd_pid, mask, REGISTER); |
442 | rc = add_del_listener(info->snd_pid, mask, REGISTER); | 468 | out: |
443 | goto free_return_rc; | 469 | free_cpumask_var(mask); |
444 | } | 470 | return rc; |
471 | } | ||
445 | 472 | ||
473 | static int cmd_attr_deregister_cpumask(struct genl_info *info) | ||
474 | { | ||
475 | cpumask_var_t mask; | ||
476 | int rc; | ||
477 | |||
478 | if (!alloc_cpumask_var(&mask, GFP_KERNEL)) | ||
479 | return -ENOMEM; | ||
446 | rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], mask); | 480 | rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], mask); |
447 | if (rc < 0) | 481 | if (rc < 0) |
448 | goto free_return_rc; | 482 | goto out; |
449 | if (rc == 0) { | 483 | rc = add_del_listener(info->snd_pid, mask, DEREGISTER); |
450 | rc = add_del_listener(info->snd_pid, mask, DEREGISTER); | 484 | out: |
451 | free_return_rc: | ||
452 | free_cpumask_var(mask); | ||
453 | return rc; | ||
454 | } | ||
455 | free_cpumask_var(mask); | 485 | free_cpumask_var(mask); |
486 | return rc; | ||
487 | } | ||
488 | |||
489 | static size_t taskstats_packet_size(void) | ||
490 | { | ||
491 | size_t size; | ||
456 | 492 | ||
457 | /* | ||
458 | * Size includes space for nested attributes | ||
459 | */ | ||
460 | size = nla_total_size(sizeof(u32)) + | 493 | size = nla_total_size(sizeof(u32)) + |
461 | nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); | 494 | nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); |
495 | #ifdef TASKSTATS_NEEDS_PADDING | ||
496 | size += nla_total_size(0); /* Padding for alignment */ | ||
497 | #endif | ||
498 | return size; | ||
499 | } | ||
500 | |||
501 | static int cmd_attr_pid(struct genl_info *info) | ||
502 | { | ||
503 | struct taskstats *stats; | ||
504 | struct sk_buff *rep_skb; | ||
505 | size_t size; | ||
506 | u32 pid; | ||
507 | int rc; | ||
508 | |||
509 | size = taskstats_packet_size(); | ||
462 | 510 | ||
463 | rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); | 511 | rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); |
464 | if (rc < 0) | 512 | if (rc < 0) |
465 | return rc; | 513 | return rc; |
466 | 514 | ||
467 | rc = -EINVAL; | 515 | rc = -EINVAL; |
468 | if (info->attrs[TASKSTATS_CMD_ATTR_PID]) { | 516 | pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]); |
469 | u32 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]); | 517 | stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, pid); |
470 | stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, pid); | 518 | if (!stats) |
471 | if (!stats) | 519 | goto err; |
472 | goto err; | 520 | |
473 | 521 | rc = fill_stats_for_pid(pid, stats); | |
474 | rc = fill_pid(pid, NULL, stats); | 522 | if (rc < 0) |
475 | if (rc < 0) | 523 | goto err; |
476 | goto err; | 524 | return send_reply(rep_skb, info); |
477 | } else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) { | 525 | err: |
478 | u32 tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]); | 526 | nlmsg_free(rep_skb); |
479 | stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tgid); | 527 | return rc; |
480 | if (!stats) | 528 | } |
481 | goto err; | 529 | |
482 | 530 | static int cmd_attr_tgid(struct genl_info *info) | |
483 | rc = fill_tgid(tgid, NULL, stats); | 531 | { |
484 | if (rc < 0) | 532 | struct taskstats *stats; |
485 | goto err; | 533 | struct sk_buff *rep_skb; |
486 | } else | 534 | size_t size; |
535 | u32 tgid; | ||
536 | int rc; | ||
537 | |||
538 | size = taskstats_packet_size(); | ||
539 | |||
540 | rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); | ||
541 | if (rc < 0) | ||
542 | return rc; | ||
543 | |||
544 | rc = -EINVAL; | ||
545 | tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]); | ||
546 | stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tgid); | ||
547 | if (!stats) | ||
487 | goto err; | 548 | goto err; |
488 | 549 | ||
550 | rc = fill_stats_for_tgid(tgid, stats); | ||
551 | if (rc < 0) | ||
552 | goto err; | ||
489 | return send_reply(rep_skb, info); | 553 | return send_reply(rep_skb, info); |
490 | err: | 554 | err: |
491 | nlmsg_free(rep_skb); | 555 | nlmsg_free(rep_skb); |
492 | return rc; | 556 | return rc; |
493 | } | 557 | } |
494 | 558 | ||
559 | static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) | ||
560 | { | ||
561 | if (info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK]) | ||
562 | return cmd_attr_register_cpumask(info); | ||
563 | else if (info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK]) | ||
564 | return cmd_attr_deregister_cpumask(info); | ||
565 | else if (info->attrs[TASKSTATS_CMD_ATTR_PID]) | ||
566 | return cmd_attr_pid(info); | ||
567 | else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) | ||
568 | return cmd_attr_tgid(info); | ||
569 | else | ||
570 | return -EINVAL; | ||
571 | } | ||
572 | |||
495 | static struct taskstats *taskstats_tgid_alloc(struct task_struct *tsk) | 573 | static struct taskstats *taskstats_tgid_alloc(struct task_struct *tsk) |
496 | { | 574 | { |
497 | struct signal_struct *sig = tsk->signal; | 575 | struct signal_struct *sig = tsk->signal; |
@@ -532,8 +610,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead) | |||
532 | /* | 610 | /* |
533 | * Size includes space for nested attributes | 611 | * Size includes space for nested attributes |
534 | */ | 612 | */ |
535 | size = nla_total_size(sizeof(u32)) + | 613 | size = taskstats_packet_size(); |
536 | nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); | ||
537 | 614 | ||
538 | is_thread_group = !!taskstats_tgid_alloc(tsk); | 615 | is_thread_group = !!taskstats_tgid_alloc(tsk); |
539 | if (is_thread_group) { | 616 | if (is_thread_group) { |
@@ -543,7 +620,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead) | |||
543 | fill_tgid_exit(tsk); | 620 | fill_tgid_exit(tsk); |
544 | } | 621 | } |
545 | 622 | ||
546 | listeners = &__raw_get_cpu_var(listener_array); | 623 | listeners = __this_cpu_ptr(&listener_array); |
547 | if (list_empty(&listeners->list)) | 624 | if (list_empty(&listeners->list)) |
548 | return; | 625 | return; |
549 | 626 | ||
@@ -555,9 +632,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead) | |||
555 | if (!stats) | 632 | if (!stats) |
556 | goto err; | 633 | goto err; |
557 | 634 | ||
558 | rc = fill_pid(-1, tsk, stats); | 635 | fill_stats(tsk, stats); |
559 | if (rc < 0) | ||
560 | goto err; | ||
561 | 636 | ||
562 | /* | 637 | /* |
563 | * Doesn't matter if tsk is the leader or the last group member leaving | 638 | * Doesn't matter if tsk is the leader or the last group member leaving |
@@ -619,7 +694,7 @@ static int __init taskstats_init(void) | |||
619 | goto err_cgroup_ops; | 694 | goto err_cgroup_ops; |
620 | 695 | ||
621 | family_registered = 1; | 696 | family_registered = 1; |
622 | printk("registered taskstats version %d\n", TASKSTATS_GENL_VERSION); | 697 | pr_info("registered taskstats version %d\n", TASKSTATS_GENL_VERSION); |
623 | return 0; | 698 | return 0; |
624 | err_cgroup_ops: | 699 | err_cgroup_ops: |
625 | genl_unregister_ops(&family, &taskstats_ops); | 700 | genl_unregister_ops(&family, &taskstats_ops); |