aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/taskstats.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/taskstats.c')
-rw-r--r--kernel/taskstats.c227
1 files changed, 151 insertions, 76 deletions
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 11281d5792bd..fc0f22005417 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -89,8 +89,7 @@ static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp,
89 return -ENOMEM; 89 return -ENOMEM;
90 90
91 if (!info) { 91 if (!info) {
92 int seq = get_cpu_var(taskstats_seqnum)++; 92 int seq = this_cpu_inc_return(taskstats_seqnum) - 1;
93 put_cpu_var(taskstats_seqnum);
94 93
95 reply = genlmsg_put(skb, 0, seq, &family, 0, cmd); 94 reply = genlmsg_put(skb, 0, seq, &family, 0, cmd);
96 } else 95 } else
@@ -175,22 +174,8 @@ static void send_cpu_listeners(struct sk_buff *skb,
175 up_write(&listeners->sem); 174 up_write(&listeners->sem);
176} 175}
177 176
178static int fill_pid(pid_t pid, struct task_struct *tsk, 177static void fill_stats(struct task_struct *tsk, struct taskstats *stats)
179 struct taskstats *stats)
180{ 178{
181 int rc = 0;
182
183 if (!tsk) {
184 rcu_read_lock();
185 tsk = find_task_by_vpid(pid);
186 if (tsk)
187 get_task_struct(tsk);
188 rcu_read_unlock();
189 if (!tsk)
190 return -ESRCH;
191 } else
192 get_task_struct(tsk);
193
194 memset(stats, 0, sizeof(*stats)); 179 memset(stats, 0, sizeof(*stats));
195 /* 180 /*
196 * Each accounting subsystem adds calls to its functions to 181 * Each accounting subsystem adds calls to its functions to
@@ -209,17 +194,27 @@ static int fill_pid(pid_t pid, struct task_struct *tsk,
209 194
210 /* fill in extended acct fields */ 195 /* fill in extended acct fields */
211 xacct_add_tsk(stats, tsk); 196 xacct_add_tsk(stats, tsk);
197}
212 198
213 /* Define err: label here if needed */ 199static int fill_stats_for_pid(pid_t pid, struct taskstats *stats)
214 put_task_struct(tsk); 200{
215 return rc; 201 struct task_struct *tsk;
216 202
203 rcu_read_lock();
204 tsk = find_task_by_vpid(pid);
205 if (tsk)
206 get_task_struct(tsk);
207 rcu_read_unlock();
208 if (!tsk)
209 return -ESRCH;
210 fill_stats(tsk, stats);
211 put_task_struct(tsk);
212 return 0;
217} 213}
218 214
219static int fill_tgid(pid_t tgid, struct task_struct *first, 215static int fill_stats_for_tgid(pid_t tgid, struct taskstats *stats)
220 struct taskstats *stats)
221{ 216{
222 struct task_struct *tsk; 217 struct task_struct *tsk, *first;
223 unsigned long flags; 218 unsigned long flags;
224 int rc = -ESRCH; 219 int rc = -ESRCH;
225 220
@@ -228,8 +223,7 @@ static int fill_tgid(pid_t tgid, struct task_struct *first,
228 * leaders who are already counted with the dead tasks 223 * leaders who are already counted with the dead tasks
229 */ 224 */
230 rcu_read_lock(); 225 rcu_read_lock();
231 if (!first) 226 first = find_task_by_vpid(tgid);
232 first = find_task_by_vpid(tgid);
233 227
234 if (!first || !lock_task_sighand(first, &flags)) 228 if (!first || !lock_task_sighand(first, &flags))
235 goto out; 229 goto out;
@@ -268,7 +262,6 @@ out:
268 return rc; 262 return rc;
269} 263}
270 264
271
272static void fill_tgid_exit(struct task_struct *tsk) 265static void fill_tgid_exit(struct task_struct *tsk)
273{ 266{
274 unsigned long flags; 267 unsigned long flags;
@@ -292,16 +285,18 @@ ret:
292static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd) 285static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd)
293{ 286{
294 struct listener_list *listeners; 287 struct listener_list *listeners;
295 struct listener *s, *tmp; 288 struct listener *s, *tmp, *s2;
296 unsigned int cpu; 289 unsigned int cpu;
297 290
298 if (!cpumask_subset(mask, cpu_possible_mask)) 291 if (!cpumask_subset(mask, cpu_possible_mask))
299 return -EINVAL; 292 return -EINVAL;
300 293
294 s = NULL;
301 if (isadd == REGISTER) { 295 if (isadd == REGISTER) {
302 for_each_cpu(cpu, mask) { 296 for_each_cpu(cpu, mask) {
303 s = kmalloc_node(sizeof(struct listener), GFP_KERNEL, 297 if (!s)
304 cpu_to_node(cpu)); 298 s = kmalloc_node(sizeof(struct listener),
299 GFP_KERNEL, cpu_to_node(cpu));
305 if (!s) 300 if (!s)
306 goto cleanup; 301 goto cleanup;
307 s->pid = pid; 302 s->pid = pid;
@@ -310,9 +305,16 @@ static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd)
310 305
311 listeners = &per_cpu(listener_array, cpu); 306 listeners = &per_cpu(listener_array, cpu);
312 down_write(&listeners->sem); 307 down_write(&listeners->sem);
308 list_for_each_entry_safe(s2, tmp, &listeners->list, list) {
309 if (s2->pid == pid)
310 goto next_cpu;
311 }
313 list_add(&s->list, &listeners->list); 312 list_add(&s->list, &listeners->list);
313 s = NULL;
314next_cpu:
314 up_write(&listeners->sem); 315 up_write(&listeners->sem);
315 } 316 }
317 kfree(s);
316 return 0; 318 return 0;
317 } 319 }
318 320
@@ -355,6 +357,10 @@ static int parse(struct nlattr *na, struct cpumask *mask)
355 return ret; 357 return ret;
356} 358}
357 359
360#if defined(CONFIG_64BIT) && !defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
361#define TASKSTATS_NEEDS_PADDING 1
362#endif
363
358static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) 364static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid)
359{ 365{
360 struct nlattr *na, *ret; 366 struct nlattr *na, *ret;
@@ -364,9 +370,33 @@ static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid)
364 ? TASKSTATS_TYPE_AGGR_PID 370 ? TASKSTATS_TYPE_AGGR_PID
365 : TASKSTATS_TYPE_AGGR_TGID; 371 : TASKSTATS_TYPE_AGGR_TGID;
366 372
373 /*
374 * The taskstats structure is internally aligned on 8 byte
375 * boundaries but the layout of the aggregrate reply, with
376 * two NLA headers and the pid (each 4 bytes), actually
377 * force the entire structure to be unaligned. This causes
378 * the kernel to issue unaligned access warnings on some
379 * architectures like ia64. Unfortunately, some software out there
380 * doesn't properly unroll the NLA packet and assumes that the start
381 * of the taskstats structure will always be 20 bytes from the start
382 * of the netlink payload. Aligning the start of the taskstats
383 * structure breaks this software, which we don't want. So, for now
384 * the alignment only happens on architectures that require it
385 * and those users will have to update to fixed versions of those
386 * packages. Space is reserved in the packet only when needed.
387 * This ifdef should be removed in several years e.g. 2012 once
388 * we can be confident that fixed versions are installed on most
389 * systems. We add the padding before the aggregate since the
390 * aggregate is already a defined type.
391 */
392#ifdef TASKSTATS_NEEDS_PADDING
393 if (nla_put(skb, TASKSTATS_TYPE_NULL, 0, NULL) < 0)
394 goto err;
395#endif
367 na = nla_nest_start(skb, aggr); 396 na = nla_nest_start(skb, aggr);
368 if (!na) 397 if (!na)
369 goto err; 398 goto err;
399
370 if (nla_put(skb, type, sizeof(pid), &pid) < 0) 400 if (nla_put(skb, type, sizeof(pid), &pid) < 0)
371 goto err; 401 goto err;
372 ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats)); 402 ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats));
@@ -424,74 +454,122 @@ err:
424 return rc; 454 return rc;
425} 455}
426 456
427static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) 457static int cmd_attr_register_cpumask(struct genl_info *info)
428{ 458{
429 int rc;
430 struct sk_buff *rep_skb;
431 struct taskstats *stats;
432 size_t size;
433 cpumask_var_t mask; 459 cpumask_var_t mask;
460 int rc;
434 461
435 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 462 if (!alloc_cpumask_var(&mask, GFP_KERNEL))
436 return -ENOMEM; 463 return -ENOMEM;
437
438 rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], mask); 464 rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], mask);
439 if (rc < 0) 465 if (rc < 0)
440 goto free_return_rc; 466 goto out;
441 if (rc == 0) { 467 rc = add_del_listener(info->snd_pid, mask, REGISTER);
442 rc = add_del_listener(info->snd_pid, mask, REGISTER); 468out:
443 goto free_return_rc; 469 free_cpumask_var(mask);
444 } 470 return rc;
471}
445 472
473static int cmd_attr_deregister_cpumask(struct genl_info *info)
474{
475 cpumask_var_t mask;
476 int rc;
477
478 if (!alloc_cpumask_var(&mask, GFP_KERNEL))
479 return -ENOMEM;
446 rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], mask); 480 rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], mask);
447 if (rc < 0) 481 if (rc < 0)
448 goto free_return_rc; 482 goto out;
449 if (rc == 0) { 483 rc = add_del_listener(info->snd_pid, mask, DEREGISTER);
450 rc = add_del_listener(info->snd_pid, mask, DEREGISTER); 484out:
451free_return_rc:
452 free_cpumask_var(mask);
453 return rc;
454 }
455 free_cpumask_var(mask); 485 free_cpumask_var(mask);
486 return rc;
487}
488
489static size_t taskstats_packet_size(void)
490{
491 size_t size;
456 492
457 /*
458 * Size includes space for nested attributes
459 */
460 size = nla_total_size(sizeof(u32)) + 493 size = nla_total_size(sizeof(u32)) +
461 nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); 494 nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
495#ifdef TASKSTATS_NEEDS_PADDING
496 size += nla_total_size(0); /* Padding for alignment */
497#endif
498 return size;
499}
500
501static int cmd_attr_pid(struct genl_info *info)
502{
503 struct taskstats *stats;
504 struct sk_buff *rep_skb;
505 size_t size;
506 u32 pid;
507 int rc;
508
509 size = taskstats_packet_size();
462 510
463 rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); 511 rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size);
464 if (rc < 0) 512 if (rc < 0)
465 return rc; 513 return rc;
466 514
467 rc = -EINVAL; 515 rc = -EINVAL;
468 if (info->attrs[TASKSTATS_CMD_ATTR_PID]) { 516 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]);
469 u32 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]); 517 stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, pid);
470 stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, pid); 518 if (!stats)
471 if (!stats) 519 goto err;
472 goto err; 520
473 521 rc = fill_stats_for_pid(pid, stats);
474 rc = fill_pid(pid, NULL, stats); 522 if (rc < 0)
475 if (rc < 0) 523 goto err;
476 goto err; 524 return send_reply(rep_skb, info);
477 } else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) { 525err:
478 u32 tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]); 526 nlmsg_free(rep_skb);
479 stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tgid); 527 return rc;
480 if (!stats) 528}
481 goto err; 529
482 530static int cmd_attr_tgid(struct genl_info *info)
483 rc = fill_tgid(tgid, NULL, stats); 531{
484 if (rc < 0) 532 struct taskstats *stats;
485 goto err; 533 struct sk_buff *rep_skb;
486 } else 534 size_t size;
535 u32 tgid;
536 int rc;
537
538 size = taskstats_packet_size();
539
540 rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size);
541 if (rc < 0)
542 return rc;
543
544 rc = -EINVAL;
545 tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]);
546 stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tgid);
547 if (!stats)
487 goto err; 548 goto err;
488 549
550 rc = fill_stats_for_tgid(tgid, stats);
551 if (rc < 0)
552 goto err;
489 return send_reply(rep_skb, info); 553 return send_reply(rep_skb, info);
490err: 554err:
491 nlmsg_free(rep_skb); 555 nlmsg_free(rep_skb);
492 return rc; 556 return rc;
493} 557}
494 558
559static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
560{
561 if (info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK])
562 return cmd_attr_register_cpumask(info);
563 else if (info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK])
564 return cmd_attr_deregister_cpumask(info);
565 else if (info->attrs[TASKSTATS_CMD_ATTR_PID])
566 return cmd_attr_pid(info);
567 else if (info->attrs[TASKSTATS_CMD_ATTR_TGID])
568 return cmd_attr_tgid(info);
569 else
570 return -EINVAL;
571}
572
495static struct taskstats *taskstats_tgid_alloc(struct task_struct *tsk) 573static struct taskstats *taskstats_tgid_alloc(struct task_struct *tsk)
496{ 574{
497 struct signal_struct *sig = tsk->signal; 575 struct signal_struct *sig = tsk->signal;
@@ -532,8 +610,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead)
532 /* 610 /*
533 * Size includes space for nested attributes 611 * Size includes space for nested attributes
534 */ 612 */
535 size = nla_total_size(sizeof(u32)) + 613 size = taskstats_packet_size();
536 nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
537 614
538 is_thread_group = !!taskstats_tgid_alloc(tsk); 615 is_thread_group = !!taskstats_tgid_alloc(tsk);
539 if (is_thread_group) { 616 if (is_thread_group) {
@@ -543,7 +620,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead)
543 fill_tgid_exit(tsk); 620 fill_tgid_exit(tsk);
544 } 621 }
545 622
546 listeners = &__raw_get_cpu_var(listener_array); 623 listeners = __this_cpu_ptr(&listener_array);
547 if (list_empty(&listeners->list)) 624 if (list_empty(&listeners->list))
548 return; 625 return;
549 626
@@ -555,9 +632,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead)
555 if (!stats) 632 if (!stats)
556 goto err; 633 goto err;
557 634
558 rc = fill_pid(-1, tsk, stats); 635 fill_stats(tsk, stats);
559 if (rc < 0)
560 goto err;
561 636
562 /* 637 /*
563 * Doesn't matter if tsk is the leader or the last group member leaving 638 * Doesn't matter if tsk is the leader or the last group member leaving
@@ -619,7 +694,7 @@ static int __init taskstats_init(void)
619 goto err_cgroup_ops; 694 goto err_cgroup_ops;
620 695
621 family_registered = 1; 696 family_registered = 1;
622 printk("registered taskstats version %d\n", TASKSTATS_GENL_VERSION); 697 pr_info("registered taskstats version %d\n", TASKSTATS_GENL_VERSION);
623 return 0; 698 return 0;
624err_cgroup_ops: 699err_cgroup_ops:
625 genl_unregister_ops(&family, &taskstats_ops); 700 genl_unregister_ops(&family, &taskstats_ops);