diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/exit.c | 5 | ||||
-rw-r--r-- | kernel/taskstats.c | 200 |
2 files changed, 192 insertions, 13 deletions
diff --git a/kernel/exit.c b/kernel/exit.c index 67c1e9a4f812..dba194a8d416 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -847,6 +847,7 @@ fastcall NORET_TYPE void do_exit(long code) | |||
847 | struct task_struct *tsk = current; | 847 | struct task_struct *tsk = current; |
848 | struct taskstats *tidstats; | 848 | struct taskstats *tidstats; |
849 | int group_dead; | 849 | int group_dead; |
850 | unsigned int mycpu; | ||
850 | 851 | ||
851 | profile_task_exit(tsk); | 852 | profile_task_exit(tsk); |
852 | 853 | ||
@@ -884,7 +885,7 @@ fastcall NORET_TYPE void do_exit(long code) | |||
884 | current->comm, current->pid, | 885 | current->comm, current->pid, |
885 | preempt_count()); | 886 | preempt_count()); |
886 | 887 | ||
887 | taskstats_exit_alloc(&tidstats); | 888 | taskstats_exit_alloc(&tidstats, &mycpu); |
888 | 889 | ||
889 | acct_update_integrals(tsk); | 890 | acct_update_integrals(tsk); |
890 | if (tsk->mm) { | 891 | if (tsk->mm) { |
@@ -905,7 +906,7 @@ fastcall NORET_TYPE void do_exit(long code) | |||
905 | #endif | 906 | #endif |
906 | if (unlikely(tsk->audit_context)) | 907 | if (unlikely(tsk->audit_context)) |
907 | audit_free(tsk); | 908 | audit_free(tsk); |
908 | taskstats_exit_send(tsk, tidstats, group_dead); | 909 | taskstats_exit_send(tsk, tidstats, group_dead, mycpu); |
909 | taskstats_exit_free(tidstats); | 910 | taskstats_exit_free(tidstats); |
910 | delayacct_tsk_exit(tsk); | 911 | delayacct_tsk_exit(tsk); |
911 | 912 | ||
diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 4a0a5022b299..abb59e323544 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c | |||
@@ -19,9 +19,17 @@ | |||
19 | #include <linux/kernel.h> | 19 | #include <linux/kernel.h> |
20 | #include <linux/taskstats_kern.h> | 20 | #include <linux/taskstats_kern.h> |
21 | #include <linux/delayacct.h> | 21 | #include <linux/delayacct.h> |
22 | #include <linux/cpumask.h> | ||
23 | #include <linux/percpu.h> | ||
22 | #include <net/genetlink.h> | 24 | #include <net/genetlink.h> |
23 | #include <asm/atomic.h> | 25 | #include <asm/atomic.h> |
24 | 26 | ||
27 | /* | ||
28 | * Maximum length of a cpumask that can be specified in | ||
29 | * the TASKSTATS_CMD_ATTR_REGISTER/DEREGISTER_CPUMASK attribute | ||
30 | */ | ||
31 | #define TASKSTATS_CPUMASK_MAXLEN (100+6*NR_CPUS) | ||
32 | |||
25 | static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 }; | 33 | static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 }; |
26 | static int family_registered; | 34 | static int family_registered; |
27 | kmem_cache_t *taskstats_cache; | 35 | kmem_cache_t *taskstats_cache; |
@@ -37,8 +45,25 @@ static struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1] | |||
37 | __read_mostly = { | 45 | __read_mostly = { |
38 | [TASKSTATS_CMD_ATTR_PID] = { .type = NLA_U32 }, | 46 | [TASKSTATS_CMD_ATTR_PID] = { .type = NLA_U32 }, |
39 | [TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 }, | 47 | [TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 }, |
48 | [TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING }, | ||
49 | [TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },}; | ||
50 | |||
51 | struct listener { | ||
52 | struct list_head list; | ||
53 | pid_t pid; | ||
40 | }; | 54 | }; |
41 | 55 | ||
56 | struct listener_list { | ||
57 | struct rw_semaphore sem; | ||
58 | struct list_head list; | ||
59 | }; | ||
60 | static DEFINE_PER_CPU(struct listener_list, listener_array); | ||
61 | |||
62 | enum actions { | ||
63 | REGISTER, | ||
64 | DEREGISTER, | ||
65 | CPU_DONT_CARE | ||
66 | }; | ||
42 | 67 | ||
43 | static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, | 68 | static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, |
44 | void **replyp, size_t size) | 69 | void **replyp, size_t size) |
@@ -74,25 +99,68 @@ static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, | |||
74 | return 0; | 99 | return 0; |
75 | } | 100 | } |
76 | 101 | ||
77 | static int send_reply(struct sk_buff *skb, pid_t pid, int event) | 102 | /* |
103 | * Send taskstats data in @skb to listener with nl_pid @pid | ||
104 | */ | ||
105 | static int send_reply(struct sk_buff *skb, pid_t pid) | ||
78 | { | 106 | { |
79 | struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data); | 107 | struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data); |
80 | void *reply; | 108 | void *reply = genlmsg_data(genlhdr); |
81 | int rc; | 109 | int rc; |
82 | 110 | ||
83 | reply = genlmsg_data(genlhdr); | ||
84 | |||
85 | rc = genlmsg_end(skb, reply); | 111 | rc = genlmsg_end(skb, reply); |
86 | if (rc < 0) { | 112 | if (rc < 0) { |
87 | nlmsg_free(skb); | 113 | nlmsg_free(skb); |
88 | return rc; | 114 | return rc; |
89 | } | 115 | } |
90 | 116 | ||
91 | if (event == TASKSTATS_MSG_MULTICAST) | ||
92 | return genlmsg_multicast(skb, pid, TASKSTATS_LISTEN_GROUP); | ||
93 | return genlmsg_unicast(skb, pid); | 117 | return genlmsg_unicast(skb, pid); |
94 | } | 118 | } |
95 | 119 | ||
120 | /* | ||
121 | * Send taskstats data in @skb to listeners registered for @cpu's exit data | ||
122 | */ | ||
123 | static int send_cpu_listeners(struct sk_buff *skb, unsigned int cpu) | ||
124 | { | ||
125 | struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data); | ||
126 | struct listener_list *listeners; | ||
127 | struct listener *s, *tmp; | ||
128 | struct sk_buff *skb_next, *skb_cur = skb; | ||
129 | void *reply = genlmsg_data(genlhdr); | ||
130 | int rc, ret; | ||
131 | |||
132 | rc = genlmsg_end(skb, reply); | ||
133 | if (rc < 0) { | ||
134 | nlmsg_free(skb); | ||
135 | return rc; | ||
136 | } | ||
137 | |||
138 | rc = 0; | ||
139 | listeners = &per_cpu(listener_array, cpu); | ||
140 | down_write(&listeners->sem); | ||
141 | list_for_each_entry_safe(s, tmp, &listeners->list, list) { | ||
142 | skb_next = NULL; | ||
143 | if (!list_is_last(&s->list, &listeners->list)) { | ||
144 | skb_next = skb_clone(skb_cur, GFP_KERNEL); | ||
145 | if (!skb_next) { | ||
146 | nlmsg_free(skb_cur); | ||
147 | rc = -ENOMEM; | ||
148 | break; | ||
149 | } | ||
150 | } | ||
151 | ret = genlmsg_unicast(skb_cur, s->pid); | ||
152 | if (ret == -ECONNREFUSED) { | ||
153 | list_del(&s->list); | ||
154 | kfree(s); | ||
155 | rc = ret; | ||
156 | } | ||
157 | skb_cur = skb_next; | ||
158 | } | ||
159 | up_write(&listeners->sem); | ||
160 | |||
161 | return rc; | ||
162 | } | ||
163 | |||
96 | static int fill_pid(pid_t pid, struct task_struct *pidtsk, | 164 | static int fill_pid(pid_t pid, struct task_struct *pidtsk, |
97 | struct taskstats *stats) | 165 | struct taskstats *stats) |
98 | { | 166 | { |
@@ -204,8 +272,73 @@ ret: | |||
204 | return; | 272 | return; |
205 | } | 273 | } |
206 | 274 | ||
275 | static int add_del_listener(pid_t pid, cpumask_t *maskp, int isadd) | ||
276 | { | ||
277 | struct listener_list *listeners; | ||
278 | struct listener *s, *tmp; | ||
279 | unsigned int cpu; | ||
280 | cpumask_t mask = *maskp; | ||
207 | 281 | ||
208 | static int taskstats_send_stats(struct sk_buff *skb, struct genl_info *info) | 282 | if (!cpus_subset(mask, cpu_possible_map)) |
283 | return -EINVAL; | ||
284 | |||
285 | if (isadd == REGISTER) { | ||
286 | for_each_cpu_mask(cpu, mask) { | ||
287 | s = kmalloc_node(sizeof(struct listener), GFP_KERNEL, | ||
288 | cpu_to_node(cpu)); | ||
289 | if (!s) | ||
290 | goto cleanup; | ||
291 | s->pid = pid; | ||
292 | INIT_LIST_HEAD(&s->list); | ||
293 | |||
294 | listeners = &per_cpu(listener_array, cpu); | ||
295 | down_write(&listeners->sem); | ||
296 | list_add(&s->list, &listeners->list); | ||
297 | up_write(&listeners->sem); | ||
298 | } | ||
299 | return 0; | ||
300 | } | ||
301 | |||
302 | /* Deregister or cleanup */ | ||
303 | cleanup: | ||
304 | for_each_cpu_mask(cpu, mask) { | ||
305 | listeners = &per_cpu(listener_array, cpu); | ||
306 | down_write(&listeners->sem); | ||
307 | list_for_each_entry_safe(s, tmp, &listeners->list, list) { | ||
308 | if (s->pid == pid) { | ||
309 | list_del(&s->list); | ||
310 | kfree(s); | ||
311 | break; | ||
312 | } | ||
313 | } | ||
314 | up_write(&listeners->sem); | ||
315 | } | ||
316 | return 0; | ||
317 | } | ||
318 | |||
319 | static int parse(struct nlattr *na, cpumask_t *mask) | ||
320 | { | ||
321 | char *data; | ||
322 | int len; | ||
323 | int ret; | ||
324 | |||
325 | if (na == NULL) | ||
326 | return 1; | ||
327 | len = nla_len(na); | ||
328 | if (len > TASKSTATS_CPUMASK_MAXLEN) | ||
329 | return -E2BIG; | ||
330 | if (len < 1) | ||
331 | return -EINVAL; | ||
332 | data = kmalloc(len, GFP_KERNEL); | ||
333 | if (!data) | ||
334 | return -ENOMEM; | ||
335 | nla_strlcpy(data, na, len); | ||
336 | ret = cpulist_parse(data, *mask); | ||
337 | kfree(data); | ||
338 | return ret; | ||
339 | } | ||
340 | |||
341 | static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) | ||
209 | { | 342 | { |
210 | int rc = 0; | 343 | int rc = 0; |
211 | struct sk_buff *rep_skb; | 344 | struct sk_buff *rep_skb; |
@@ -213,6 +346,19 @@ static int taskstats_send_stats(struct sk_buff *skb, struct genl_info *info) | |||
213 | void *reply; | 346 | void *reply; |
214 | size_t size; | 347 | size_t size; |
215 | struct nlattr *na; | 348 | struct nlattr *na; |
349 | cpumask_t mask; | ||
350 | |||
351 | rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], &mask); | ||
352 | if (rc < 0) | ||
353 | return rc; | ||
354 | if (rc == 0) | ||
355 | return add_del_listener(info->snd_pid, &mask, REGISTER); | ||
356 | |||
357 | rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], &mask); | ||
358 | if (rc < 0) | ||
359 | return rc; | ||
360 | if (rc == 0) | ||
361 | return add_del_listener(info->snd_pid, &mask, DEREGISTER); | ||
216 | 362 | ||
217 | /* | 363 | /* |
218 | * Size includes space for nested attributes | 364 | * Size includes space for nested attributes |
@@ -252,7 +398,7 @@ static int taskstats_send_stats(struct sk_buff *skb, struct genl_info *info) | |||
252 | 398 | ||
253 | nla_nest_end(rep_skb, na); | 399 | nla_nest_end(rep_skb, na); |
254 | 400 | ||
255 | return send_reply(rep_skb, info->snd_pid, TASKSTATS_MSG_UNICAST); | 401 | return send_reply(rep_skb, info->snd_pid); |
256 | 402 | ||
257 | nla_put_failure: | 403 | nla_put_failure: |
258 | return genlmsg_cancel(rep_skb, reply); | 404 | return genlmsg_cancel(rep_skb, reply); |
@@ -261,9 +407,35 @@ err: | |||
261 | return rc; | 407 | return rc; |
262 | } | 408 | } |
263 | 409 | ||
410 | void taskstats_exit_alloc(struct taskstats **ptidstats, unsigned int *mycpu) | ||
411 | { | ||
412 | struct listener_list *listeners; | ||
413 | struct taskstats *tmp; | ||
414 | /* | ||
415 | * This is the cpu on which the task is exiting currently and will | ||
416 | * be the one for which the exit event is sent, even if the cpu | ||
417 | * on which this function is running changes later. | ||
418 | */ | ||
419 | *mycpu = raw_smp_processor_id(); | ||
420 | |||
421 | *ptidstats = NULL; | ||
422 | tmp = kmem_cache_zalloc(taskstats_cache, SLAB_KERNEL); | ||
423 | if (!tmp) | ||
424 | return; | ||
425 | |||
426 | listeners = &per_cpu(listener_array, *mycpu); | ||
427 | down_read(&listeners->sem); | ||
428 | if (!list_empty(&listeners->list)) { | ||
429 | *ptidstats = tmp; | ||
430 | tmp = NULL; | ||
431 | } | ||
432 | up_read(&listeners->sem); | ||
433 | kfree(tmp); | ||
434 | } | ||
435 | |||
264 | /* Send pid data out on exit */ | 436 | /* Send pid data out on exit */ |
265 | void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats, | 437 | void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats, |
266 | int group_dead) | 438 | int group_dead, unsigned int mycpu) |
267 | { | 439 | { |
268 | int rc; | 440 | int rc; |
269 | struct sk_buff *rep_skb; | 441 | struct sk_buff *rep_skb; |
@@ -324,7 +496,7 @@ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats, | |||
324 | nla_nest_end(rep_skb, na); | 496 | nla_nest_end(rep_skb, na); |
325 | 497 | ||
326 | send: | 498 | send: |
327 | send_reply(rep_skb, 0, TASKSTATS_MSG_MULTICAST); | 499 | send_cpu_listeners(rep_skb, mycpu); |
328 | return; | 500 | return; |
329 | 501 | ||
330 | nla_put_failure: | 502 | nla_put_failure: |
@@ -338,16 +510,22 @@ ret: | |||
338 | 510 | ||
339 | static struct genl_ops taskstats_ops = { | 511 | static struct genl_ops taskstats_ops = { |
340 | .cmd = TASKSTATS_CMD_GET, | 512 | .cmd = TASKSTATS_CMD_GET, |
341 | .doit = taskstats_send_stats, | 513 | .doit = taskstats_user_cmd, |
342 | .policy = taskstats_cmd_get_policy, | 514 | .policy = taskstats_cmd_get_policy, |
343 | }; | 515 | }; |
344 | 516 | ||
345 | /* Needed early in initialization */ | 517 | /* Needed early in initialization */ |
346 | void __init taskstats_init_early(void) | 518 | void __init taskstats_init_early(void) |
347 | { | 519 | { |
520 | unsigned int i; | ||
521 | |||
348 | taskstats_cache = kmem_cache_create("taskstats_cache", | 522 | taskstats_cache = kmem_cache_create("taskstats_cache", |
349 | sizeof(struct taskstats), | 523 | sizeof(struct taskstats), |
350 | 0, SLAB_PANIC, NULL, NULL); | 524 | 0, SLAB_PANIC, NULL, NULL); |
525 | for_each_possible_cpu(i) { | ||
526 | INIT_LIST_HEAD(&(per_cpu(listener_array, i).list)); | ||
527 | init_rwsem(&(per_cpu(listener_array, i).sem)); | ||
528 | } | ||
351 | } | 529 | } |
352 | 530 | ||
353 | static int __init taskstats_init(void) | 531 | static int __init taskstats_init(void) |