diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 1 | ||||
-rw-r--r-- | kernel/exit.c | 7 | ||||
-rw-r--r-- | kernel/taskstats.c | 336 |
3 files changed, 344 insertions, 0 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 87bb34cc8938..d62ec66c1af2 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -49,6 +49,7 @@ obj-$(CONFIG_SECCOMP) += seccomp.o | |||
49 | obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o | 49 | obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o |
50 | obj-$(CONFIG_RELAY) += relay.o | 50 | obj-$(CONFIG_RELAY) += relay.o |
51 | obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o | 51 | obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o |
52 | obj-$(CONFIG_TASKSTATS) += taskstats.o | ||
52 | 53 | ||
53 | ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) | 54 | ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) |
54 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is | 55 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is |
diff --git a/kernel/exit.c b/kernel/exit.c index 3c2cf91defa7..9852ed8c2988 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/mount.h> | 25 | #include <linux/mount.h> |
26 | #include <linux/proc_fs.h> | 26 | #include <linux/proc_fs.h> |
27 | #include <linux/mempolicy.h> | 27 | #include <linux/mempolicy.h> |
28 | #include <linux/taskstats_kern.h> | ||
28 | #include <linux/delayacct.h> | 29 | #include <linux/delayacct.h> |
29 | #include <linux/cpuset.h> | 30 | #include <linux/cpuset.h> |
30 | #include <linux/syscalls.h> | 31 | #include <linux/syscalls.h> |
@@ -844,6 +845,7 @@ static void exit_notify(struct task_struct *tsk) | |||
844 | fastcall NORET_TYPE void do_exit(long code) | 845 | fastcall NORET_TYPE void do_exit(long code) |
845 | { | 846 | { |
846 | struct task_struct *tsk = current; | 847 | struct task_struct *tsk = current; |
848 | struct taskstats *tidstats, *tgidstats; | ||
847 | int group_dead; | 849 | int group_dead; |
848 | 850 | ||
849 | profile_task_exit(tsk); | 851 | profile_task_exit(tsk); |
@@ -882,6 +884,8 @@ fastcall NORET_TYPE void do_exit(long code) | |||
882 | current->comm, current->pid, | 884 | current->comm, current->pid, |
883 | preempt_count()); | 885 | preempt_count()); |
884 | 886 | ||
887 | taskstats_exit_alloc(&tidstats, &tgidstats); | ||
888 | |||
885 | acct_update_integrals(tsk); | 889 | acct_update_integrals(tsk); |
886 | if (tsk->mm) { | 890 | if (tsk->mm) { |
887 | update_hiwater_rss(tsk->mm); | 891 | update_hiwater_rss(tsk->mm); |
@@ -901,7 +905,10 @@ fastcall NORET_TYPE void do_exit(long code) | |||
901 | #endif | 905 | #endif |
902 | if (unlikely(tsk->audit_context)) | 906 | if (unlikely(tsk->audit_context)) |
903 | audit_free(tsk); | 907 | audit_free(tsk); |
908 | taskstats_exit_send(tsk, tidstats, tgidstats); | ||
909 | taskstats_exit_free(tidstats, tgidstats); | ||
904 | delayacct_tsk_exit(tsk); | 910 | delayacct_tsk_exit(tsk); |
911 | |||
905 | exit_mm(tsk); | 912 | exit_mm(tsk); |
906 | 913 | ||
907 | if (group_dead) | 914 | if (group_dead) |
diff --git a/kernel/taskstats.c b/kernel/taskstats.c new file mode 100644 index 000000000000..82ec9137d908 --- /dev/null +++ b/kernel/taskstats.c | |||
@@ -0,0 +1,336 @@ | |||
1 | /* | ||
2 | * taskstats.c - Export per-task statistics to userland | ||
3 | * | ||
4 | * Copyright (C) Shailabh Nagar, IBM Corp. 2006 | ||
5 | * (C) Balbir Singh, IBM Corp. 2006 | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | */ | ||
18 | |||
19 | #include <linux/kernel.h> | ||
20 | #include <linux/taskstats_kern.h> | ||
21 | #include <net/genetlink.h> | ||
22 | #include <asm/atomic.h> | ||
23 | |||
24 | static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 }; | ||
25 | static int family_registered; | ||
26 | kmem_cache_t *taskstats_cache; | ||
27 | static DEFINE_MUTEX(taskstats_exit_mutex); | ||
28 | |||
29 | static struct genl_family family = { | ||
30 | .id = GENL_ID_GENERATE, | ||
31 | .name = TASKSTATS_GENL_NAME, | ||
32 | .version = TASKSTATS_GENL_VERSION, | ||
33 | .maxattr = TASKSTATS_CMD_ATTR_MAX, | ||
34 | }; | ||
35 | |||
36 | static struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1] | ||
37 | __read_mostly = { | ||
38 | [TASKSTATS_CMD_ATTR_PID] = { .type = NLA_U32 }, | ||
39 | [TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 }, | ||
40 | }; | ||
41 | |||
42 | |||
43 | static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, | ||
44 | void **replyp, size_t size) | ||
45 | { | ||
46 | struct sk_buff *skb; | ||
47 | void *reply; | ||
48 | |||
49 | /* | ||
50 | * If new attributes are added, please revisit this allocation | ||
51 | */ | ||
52 | skb = nlmsg_new(size); | ||
53 | if (!skb) | ||
54 | return -ENOMEM; | ||
55 | |||
56 | if (!info) { | ||
57 | int seq = get_cpu_var(taskstats_seqnum)++; | ||
58 | put_cpu_var(taskstats_seqnum); | ||
59 | |||
60 | reply = genlmsg_put(skb, 0, seq, | ||
61 | family.id, 0, 0, | ||
62 | cmd, family.version); | ||
63 | } else | ||
64 | reply = genlmsg_put(skb, info->snd_pid, info->snd_seq, | ||
65 | family.id, 0, 0, | ||
66 | cmd, family.version); | ||
67 | if (reply == NULL) { | ||
68 | nlmsg_free(skb); | ||
69 | return -EINVAL; | ||
70 | } | ||
71 | |||
72 | *skbp = skb; | ||
73 | *replyp = reply; | ||
74 | return 0; | ||
75 | } | ||
76 | |||
77 | static int send_reply(struct sk_buff *skb, pid_t pid, int event) | ||
78 | { | ||
79 | struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data); | ||
80 | void *reply; | ||
81 | int rc; | ||
82 | |||
83 | reply = genlmsg_data(genlhdr); | ||
84 | |||
85 | rc = genlmsg_end(skb, reply); | ||
86 | if (rc < 0) { | ||
87 | nlmsg_free(skb); | ||
88 | return rc; | ||
89 | } | ||
90 | |||
91 | if (event == TASKSTATS_MSG_MULTICAST) | ||
92 | return genlmsg_multicast(skb, pid, TASKSTATS_LISTEN_GROUP); | ||
93 | return genlmsg_unicast(skb, pid); | ||
94 | } | ||
95 | |||
96 | static int fill_pid(pid_t pid, struct task_struct *pidtsk, | ||
97 | struct taskstats *stats) | ||
98 | { | ||
99 | int rc; | ||
100 | struct task_struct *tsk = pidtsk; | ||
101 | |||
102 | if (!pidtsk) { | ||
103 | read_lock(&tasklist_lock); | ||
104 | tsk = find_task_by_pid(pid); | ||
105 | if (!tsk) { | ||
106 | read_unlock(&tasklist_lock); | ||
107 | return -ESRCH; | ||
108 | } | ||
109 | get_task_struct(tsk); | ||
110 | read_unlock(&tasklist_lock); | ||
111 | } else | ||
112 | get_task_struct(tsk); | ||
113 | |||
114 | /* | ||
115 | * Each accounting subsystem adds calls to its functions to | ||
116 | * fill in relevant parts of struct taskstsats as follows | ||
117 | * | ||
118 | * rc = per-task-foo(stats, tsk); | ||
119 | * if (rc) | ||
120 | * goto err; | ||
121 | */ | ||
122 | |||
123 | err: | ||
124 | put_task_struct(tsk); | ||
125 | return rc; | ||
126 | |||
127 | } | ||
128 | |||
129 | static int fill_tgid(pid_t tgid, struct task_struct *tgidtsk, | ||
130 | struct taskstats *stats) | ||
131 | { | ||
132 | int rc; | ||
133 | struct task_struct *tsk, *first; | ||
134 | |||
135 | first = tgidtsk; | ||
136 | read_lock(&tasklist_lock); | ||
137 | if (!first) { | ||
138 | first = find_task_by_pid(tgid); | ||
139 | if (!first) { | ||
140 | read_unlock(&tasklist_lock); | ||
141 | return -ESRCH; | ||
142 | } | ||
143 | } | ||
144 | tsk = first; | ||
145 | do { | ||
146 | /* | ||
147 | * Each accounting subsystem adds calls its functions to | ||
148 | * fill in relevant parts of struct taskstsats as follows | ||
149 | * | ||
150 | * rc = per-task-foo(stats, tsk); | ||
151 | * if (rc) | ||
152 | * break; | ||
153 | */ | ||
154 | |||
155 | } while_each_thread(first, tsk); | ||
156 | read_unlock(&tasklist_lock); | ||
157 | |||
158 | /* | ||
159 | * Accounting subsytems can also add calls here if they don't | ||
160 | * wish to aggregate statistics for per-tgid stats | ||
161 | */ | ||
162 | |||
163 | return rc; | ||
164 | } | ||
165 | |||
166 | static int taskstats_send_stats(struct sk_buff *skb, struct genl_info *info) | ||
167 | { | ||
168 | int rc = 0; | ||
169 | struct sk_buff *rep_skb; | ||
170 | struct taskstats stats; | ||
171 | void *reply; | ||
172 | size_t size; | ||
173 | struct nlattr *na; | ||
174 | |||
175 | /* | ||
176 | * Size includes space for nested attributes | ||
177 | */ | ||
178 | size = nla_total_size(sizeof(u32)) + | ||
179 | nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); | ||
180 | |||
181 | memset(&stats, 0, sizeof(stats)); | ||
182 | rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, &reply, size); | ||
183 | if (rc < 0) | ||
184 | return rc; | ||
185 | |||
186 | if (info->attrs[TASKSTATS_CMD_ATTR_PID]) { | ||
187 | u32 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]); | ||
188 | rc = fill_pid(pid, NULL, &stats); | ||
189 | if (rc < 0) | ||
190 | goto err; | ||
191 | |||
192 | na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_PID); | ||
193 | NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_PID, pid); | ||
194 | NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, | ||
195 | stats); | ||
196 | } else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) { | ||
197 | u32 tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]); | ||
198 | rc = fill_tgid(tgid, NULL, &stats); | ||
199 | if (rc < 0) | ||
200 | goto err; | ||
201 | |||
202 | na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_TGID); | ||
203 | NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_TGID, tgid); | ||
204 | NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, | ||
205 | stats); | ||
206 | } else { | ||
207 | rc = -EINVAL; | ||
208 | goto err; | ||
209 | } | ||
210 | |||
211 | nla_nest_end(rep_skb, na); | ||
212 | |||
213 | return send_reply(rep_skb, info->snd_pid, TASKSTATS_MSG_UNICAST); | ||
214 | |||
215 | nla_put_failure: | ||
216 | return genlmsg_cancel(rep_skb, reply); | ||
217 | err: | ||
218 | nlmsg_free(rep_skb); | ||
219 | return rc; | ||
220 | } | ||
221 | |||
222 | /* Send pid data out on exit */ | ||
223 | void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats, | ||
224 | struct taskstats *tgidstats) | ||
225 | { | ||
226 | int rc; | ||
227 | struct sk_buff *rep_skb; | ||
228 | void *reply; | ||
229 | size_t size; | ||
230 | int is_thread_group; | ||
231 | struct nlattr *na; | ||
232 | |||
233 | if (!family_registered || !tidstats) | ||
234 | return; | ||
235 | |||
236 | mutex_lock(&taskstats_exit_mutex); | ||
237 | |||
238 | is_thread_group = !thread_group_empty(tsk); | ||
239 | rc = 0; | ||
240 | |||
241 | /* | ||
242 | * Size includes space for nested attributes | ||
243 | */ | ||
244 | size = nla_total_size(sizeof(u32)) + | ||
245 | nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); | ||
246 | |||
247 | if (is_thread_group) | ||
248 | size = 2 * size; /* PID + STATS + TGID + STATS */ | ||
249 | |||
250 | rc = prepare_reply(NULL, TASKSTATS_CMD_NEW, &rep_skb, &reply, size); | ||
251 | if (rc < 0) | ||
252 | goto ret; | ||
253 | |||
254 | rc = fill_pid(tsk->pid, tsk, tidstats); | ||
255 | if (rc < 0) | ||
256 | goto err_skb; | ||
257 | |||
258 | na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_PID); | ||
259 | NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_PID, (u32)tsk->pid); | ||
260 | NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, | ||
261 | *tidstats); | ||
262 | nla_nest_end(rep_skb, na); | ||
263 | |||
264 | if (!is_thread_group || !tgidstats) { | ||
265 | send_reply(rep_skb, 0, TASKSTATS_MSG_MULTICAST); | ||
266 | goto ret; | ||
267 | } | ||
268 | |||
269 | rc = fill_tgid(tsk->pid, tsk, tgidstats); | ||
270 | /* | ||
271 | * If fill_tgid() failed then one probable reason could be that the | ||
272 | * thread group leader has exited. fill_tgid() will fail, send out | ||
273 | * the pid statistics collected earlier. | ||
274 | */ | ||
275 | if (rc < 0) { | ||
276 | send_reply(rep_skb, 0, TASKSTATS_MSG_MULTICAST); | ||
277 | goto ret; | ||
278 | } | ||
279 | |||
280 | na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_TGID); | ||
281 | NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_TGID, (u32)tsk->tgid); | ||
282 | NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, | ||
283 | *tgidstats); | ||
284 | nla_nest_end(rep_skb, na); | ||
285 | |||
286 | send_reply(rep_skb, 0, TASKSTATS_MSG_MULTICAST); | ||
287 | goto ret; | ||
288 | |||
289 | nla_put_failure: | ||
290 | genlmsg_cancel(rep_skb, reply); | ||
291 | goto ret; | ||
292 | err_skb: | ||
293 | nlmsg_free(rep_skb); | ||
294 | ret: | ||
295 | mutex_unlock(&taskstats_exit_mutex); | ||
296 | return; | ||
297 | } | ||
298 | |||
299 | static struct genl_ops taskstats_ops = { | ||
300 | .cmd = TASKSTATS_CMD_GET, | ||
301 | .doit = taskstats_send_stats, | ||
302 | .policy = taskstats_cmd_get_policy, | ||
303 | }; | ||
304 | |||
305 | /* Needed early in initialization */ | ||
306 | void __init taskstats_init_early(void) | ||
307 | { | ||
308 | taskstats_cache = kmem_cache_create("taskstats_cache", | ||
309 | sizeof(struct taskstats), | ||
310 | 0, SLAB_PANIC, NULL, NULL); | ||
311 | } | ||
312 | |||
313 | static int __init taskstats_init(void) | ||
314 | { | ||
315 | int rc; | ||
316 | |||
317 | rc = genl_register_family(&family); | ||
318 | if (rc) | ||
319 | return rc; | ||
320 | |||
321 | rc = genl_register_ops(&family, &taskstats_ops); | ||
322 | if (rc < 0) | ||
323 | goto err; | ||
324 | |||
325 | family_registered = 1; | ||
326 | return 0; | ||
327 | err: | ||
328 | genl_unregister_family(&family); | ||
329 | return rc; | ||
330 | } | ||
331 | |||
332 | /* | ||
333 | * late initcall ensures initialization of statistics collection | ||
334 | * mechanisms precedes initialization of the taskstats interface | ||
335 | */ | ||
336 | late_initcall(taskstats_init); | ||