aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/accounting/taskstats.txt146
-rw-r--r--include/linux/taskstats.h84
-rw-r--r--include/linux/taskstats_kern.h57
-rw-r--r--init/Kconfig13
-rw-r--r--init/main.c2
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/exit.c7
-rw-r--r--kernel/taskstats.c336
8 files changed, 646 insertions, 0 deletions
diff --git a/Documentation/accounting/taskstats.txt b/Documentation/accounting/taskstats.txt
new file mode 100644
index 000000000000..ad9b6997e162
--- /dev/null
+++ b/Documentation/accounting/taskstats.txt
@@ -0,0 +1,146 @@
1Per-task statistics interface
2-----------------------------
3
4
5Taskstats is a netlink-based interface for sending per-task and
6per-process statistics from the kernel to userspace.
7
8Taskstats was designed for the following benefits:
9
10- efficiently provide statistics during lifetime of a task and on its exit
11- unified interface for multiple accounting subsystems
12- extensibility for use by future accounting patches
13
14Terminology
15-----------
16
17"pid", "tid" and "task" are used interchangeably and refer to the standard
18Linux task defined by struct task_struct. per-pid stats are the same as
19per-task stats.
20
21"tgid", "process" and "thread group" are used interchangeably and refer to the
22tasks that share an mm_struct i.e. the traditional Unix process. Despite the
23use of tgid, there is no special treatment for the task that is thread group
24leader - a process is deemed alive as long as it has any task belonging to it.
25
26Usage
27-----
28
29To get statistics during task's lifetime, userspace opens a unicast netlink
30socket (NETLINK_GENERIC family) and sends commands specifying a pid or a tgid.
31The response contains statistics for a task (if pid is specified) or the sum of
32statistics for all tasks of the process (if tgid is specified).
33
34To obtain statistics for tasks which are exiting, userspace opens a multicast
35netlink socket. Each time a task exits, two records are sent by the kernel to
36each listener on the multicast socket. The first the per-pid task's statistics
37and the second is the sum for all tasks of the process to which the task
38belongs (the task does not need to be the thread group leader). The need for
39per-tgid stats to be sent for each exiting task is explained in the per-tgid
40stats section below.
41
42
43Interface
44---------
45
46The user-kernel interface is encapsulated in include/linux/taskstats.h
47
48To avoid this documentation becoming obsolete as the interface evolves, only
49an outline of the current version is given. taskstats.h always overrides the
50description here.
51
52struct taskstats is the common accounting structure for both per-pid and
53per-tgid data. It is versioned and can be extended by each accounting subsystem
54that is added to the kernel. The fields and their semantics are defined in the
55taskstats.h file.
56
57The data exchanged between user and kernel space is a netlink message belonging
58to the NETLINK_GENERIC family and using the netlink attributes interface.
59The messages are in the format
60
61 +----------+- - -+-------------+-------------------+
62 | nlmsghdr | Pad | genlmsghdr | taskstats payload |
63 +----------+- - -+-------------+-------------------+
64
65
66The taskstats payload is one of the following three kinds:
67
681. Commands: Sent from user to kernel. The payload is one attribute, of type
69TASKSTATS_CMD_ATTR_PID/TGID, containing a u32 pid or tgid in the attribute
70payload. The pid/tgid denotes the task/process for which userspace wants
71statistics.
72
732. Response for a command: sent from the kernel in response to a userspace
74command. The payload is a series of three attributes of type:
75
76a) TASKSTATS_TYPE_AGGR_PID/TGID : attribute containing no payload but indicates
77a pid/tgid will be followed by some stats.
78
79b) TASKSTATS_TYPE_PID/TGID: attribute whose payload is the pid/tgid whose stats
80is being returned.
81
82c) TASKSTATS_TYPE_STATS: attribute with a struct taskstsats as payload. The
83same structure is used for both per-pid and per-tgid stats.
84
853. New message sent by kernel whenever a task exits. The payload consists of a
86 series of attributes of the following type:
87
88a) TASKSTATS_TYPE_AGGR_PID: indicates next two attributes will be pid+stats
89b) TASKSTATS_TYPE_PID: contains exiting task's pid
90c) TASKSTATS_TYPE_STATS: contains the exiting task's per-pid stats
91d) TASKSTATS_TYPE_AGGR_TGID: indicates next two attributes will be tgid+stats
92e) TASKSTATS_TYPE_TGID: contains tgid of process to which task belongs
93f) TASKSTATS_TYPE_STATS: contains the per-tgid stats for exiting task's process
94
95
96per-tgid stats
97--------------
98
99Taskstats provides per-process stats, in addition to per-task stats, since
100resource management is often done at a process granularity and aggregating task
101stats in userspace alone is inefficient and potentially inaccurate (due to lack
102of atomicity).
103
104However, maintaining per-process, in addition to per-task stats, within the
105kernel has space and time overheads. Hence the taskstats implementation
106dynamically sums up the per-task stats for each task belonging to a process
107whenever per-process stats are needed.
108
109Not maintaining per-tgid stats creates a problem when userspace is interested
110in getting these stats when the process dies i.e. the last thread of
111a process exits. It isn't possible to simply return some aggregated per-process
112statistic from the kernel.
113
114The approach taken by taskstats is to return the per-tgid stats *each* time
115a task exits, in addition to the per-pid stats for that task. Userspace can
116maintain task<->process mappings and use them to maintain the per-process stats
117in userspace, updating the aggregate appropriately as the tasks of a process
118exit.
119
120Extending taskstats
121-------------------
122
123There are two ways to extend the taskstats interface to export more
124per-task/process stats as patches to collect them get added to the kernel
125in future:
126
1271. Adding more fields to the end of the existing struct taskstats. Backward
128 compatibility is ensured by the version number within the
129 structure. Userspace will use only the fields of the struct that correspond
130 to the version its using.
131
1322. Defining separate statistic structs and using the netlink attributes
133 interface to return them. Since userspace processes each netlink attribute
134 independently, it can always ignore attributes whose type it does not
135 understand (because it is using an older version of the interface).
136
137
138Choosing between 1. and 2. is a matter of trading off flexibility and
139overhead. If only a few fields need to be added, then 1. is the preferable
140path since the kernel and userspace don't need to incur the overhead of
141processing new netlink attributes. But if the new fields expand the existing
142struct too much, requiring disparate userspace accounting utilities to
143unnecessarily receive large structures whose fields are of no interest, then
144extending the attributes structure would be worthwhile.
145
146----
diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h
new file mode 100644
index 000000000000..51f62759bea9
--- /dev/null
+++ b/include/linux/taskstats.h
@@ -0,0 +1,84 @@
1/* taskstats.h - exporting per-task statistics
2 *
3 * Copyright (C) Shailabh Nagar, IBM Corp. 2006
4 * (C) Balbir Singh, IBM Corp. 2006
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of version 2.1 of the GNU Lesser General Public License
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it would be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13 */
14
15#ifndef _LINUX_TASKSTATS_H
16#define _LINUX_TASKSTATS_H
17
18/* Format for per-task data returned to userland when
19 * - a task exits
20 * - listener requests stats for a task
21 *
22 * The struct is versioned. Newer versions should only add fields to
23 * the bottom of the struct to maintain backward compatibility.
24 *
25 *
26 * To add new fields
27 * a) bump up TASKSTATS_VERSION
28 * b) add comment indicating new version number at end of struct
29 * c) add new fields after version comment; maintain 64-bit alignment
30 */
31
32#define TASKSTATS_VERSION 1
33
34struct taskstats {
35
36 /* Version 1 */
37 __u64 version;
38};
39
40
41#define TASKSTATS_LISTEN_GROUP 0x1
42
43/*
44 * Commands sent from userspace
45 * Not versioned. New commands should only be inserted at the enum's end
46 * prior to __TASKSTATS_CMD_MAX
47 */
48
49enum {
50 TASKSTATS_CMD_UNSPEC = 0, /* Reserved */
51 TASKSTATS_CMD_GET, /* user->kernel request/get-response */
52 TASKSTATS_CMD_NEW, /* kernel->user event */
53 __TASKSTATS_CMD_MAX,
54};
55
56#define TASKSTATS_CMD_MAX (__TASKSTATS_CMD_MAX - 1)
57
58enum {
59 TASKSTATS_TYPE_UNSPEC = 0, /* Reserved */
60 TASKSTATS_TYPE_PID, /* Process id */
61 TASKSTATS_TYPE_TGID, /* Thread group id */
62 TASKSTATS_TYPE_STATS, /* taskstats structure */
63 TASKSTATS_TYPE_AGGR_PID, /* contains pid + stats */
64 TASKSTATS_TYPE_AGGR_TGID, /* contains tgid + stats */
65 __TASKSTATS_TYPE_MAX,
66};
67
68#define TASKSTATS_TYPE_MAX (__TASKSTATS_TYPE_MAX - 1)
69
70enum {
71 TASKSTATS_CMD_ATTR_UNSPEC = 0,
72 TASKSTATS_CMD_ATTR_PID,
73 TASKSTATS_CMD_ATTR_TGID,
74 __TASKSTATS_CMD_ATTR_MAX,
75};
76
77#define TASKSTATS_CMD_ATTR_MAX (__TASKSTATS_CMD_ATTR_MAX - 1)
78
79/* NETLINK_GENERIC related info */
80
81#define TASKSTATS_GENL_NAME "TASKSTATS"
82#define TASKSTATS_GENL_VERSION 0x1
83
84#endif /* _LINUX_TASKSTATS_H */
diff --git a/include/linux/taskstats_kern.h b/include/linux/taskstats_kern.h
new file mode 100644
index 000000000000..bd0ecb969c26
--- /dev/null
+++ b/include/linux/taskstats_kern.h
@@ -0,0 +1,57 @@
1/* taskstats_kern.h - kernel header for per-task statistics interface
2 *
3 * Copyright (C) Shailabh Nagar, IBM Corp. 2006
4 * (C) Balbir Singh, IBM Corp. 2006
5 */
6
7#ifndef _LINUX_TASKSTATS_KERN_H
8#define _LINUX_TASKSTATS_KERN_H
9
10#include <linux/taskstats.h>
11#include <linux/sched.h>
12
13enum {
14 TASKSTATS_MSG_UNICAST, /* send data only to requester */
15 TASKSTATS_MSG_MULTICAST, /* send data to a group */
16};
17
18#ifdef CONFIG_TASKSTATS
19extern kmem_cache_t *taskstats_cache;
20
21static inline void taskstats_exit_alloc(struct taskstats **ptidstats,
22 struct taskstats **ptgidstats)
23{
24 *ptidstats = kmem_cache_zalloc(taskstats_cache, SLAB_KERNEL);
25 *ptgidstats = kmem_cache_zalloc(taskstats_cache, SLAB_KERNEL);
26}
27
28static inline void taskstats_exit_free(struct taskstats *tidstats,
29 struct taskstats *tgidstats)
30{
31 if (tidstats)
32 kmem_cache_free(taskstats_cache, tidstats);
33 if (tgidstats)
34 kmem_cache_free(taskstats_cache, tgidstats);
35}
36
37extern void taskstats_exit_send(struct task_struct *, struct taskstats *,
38 struct taskstats *);
39extern void taskstats_init_early(void);
40
41#else
42static inline void taskstats_exit_alloc(struct taskstats **ptidstats,
43 struct taskstats **ptgidstats)
44{}
45static inline void taskstats_exit_free(struct taskstats *ptidstats,
46 struct taskstats *ptgidstats)
47{}
48static inline void taskstats_exit_send(struct task_struct *tsk,
49 struct taskstats *tidstats,
50 struct taskstats *tgidstats)
51{}
52static inline void taskstats_init_early(void)
53{}
54#endif /* CONFIG_TASKSTATS */
55
56#endif
57
diff --git a/init/Kconfig b/init/Kconfig
index 90498a3e53da..56a7093b4e4c 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -158,6 +158,19 @@ config BSD_PROCESS_ACCT_V3
158 for processing it. A preliminary version of these tools is available 158 for processing it. A preliminary version of these tools is available
159 at <http://www.physik3.uni-rostock.de/tim/kernel/utils/acct/>. 159 at <http://www.physik3.uni-rostock.de/tim/kernel/utils/acct/>.
160 160
161config TASKSTATS
162 bool "Export task/process statistics through netlink (EXPERIMENTAL)"
163 depends on NET
164 default n
165 help
166 Export selected statistics for tasks/processes through the
167 generic netlink interface. Unlike BSD process accounting, the
168 statistics are available during the lifetime of tasks/processes as
169 responses to commands. Like BSD accounting, they are sent to user
170 space on task exit.
171
172 Say N if unsure.
173
161config TASK_DELAY_ACCT 174config TASK_DELAY_ACCT
162 bool "Enable per-task delay accounting (EXPERIMENTAL)" 175 bool "Enable per-task delay accounting (EXPERIMENTAL)"
163 help 176 help
diff --git a/init/main.c b/init/main.c
index 9e8e8c152142..8651a720a092 100644
--- a/init/main.c
+++ b/init/main.c
@@ -41,6 +41,7 @@
41#include <linux/cpu.h> 41#include <linux/cpu.h>
42#include <linux/cpuset.h> 42#include <linux/cpuset.h>
43#include <linux/efi.h> 43#include <linux/efi.h>
44#include <linux/taskstats_kern.h>
44#include <linux/delayacct.h> 45#include <linux/delayacct.h>
45#include <linux/unistd.h> 46#include <linux/unistd.h>
46#include <linux/rmap.h> 47#include <linux/rmap.h>
@@ -575,6 +576,7 @@ asmlinkage void __init start_kernel(void)
575 proc_root_init(); 576 proc_root_init();
576#endif 577#endif
577 cpuset_init(); 578 cpuset_init();
579 taskstats_init_early();
578 delayacct_init(); 580 delayacct_init();
579 581
580 check_bugs(); 582 check_bugs();
diff --git a/kernel/Makefile b/kernel/Makefile
index 87bb34cc8938..d62ec66c1af2 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -49,6 +49,7 @@ obj-$(CONFIG_SECCOMP) += seccomp.o
49obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o 49obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
50obj-$(CONFIG_RELAY) += relay.o 50obj-$(CONFIG_RELAY) += relay.o
51obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o 51obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
52obj-$(CONFIG_TASKSTATS) += taskstats.o
52 53
53ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) 54ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
54# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is 55# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/exit.c b/kernel/exit.c
index 3c2cf91defa7..9852ed8c2988 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -25,6 +25,7 @@
25#include <linux/mount.h> 25#include <linux/mount.h>
26#include <linux/proc_fs.h> 26#include <linux/proc_fs.h>
27#include <linux/mempolicy.h> 27#include <linux/mempolicy.h>
28#include <linux/taskstats_kern.h>
28#include <linux/delayacct.h> 29#include <linux/delayacct.h>
29#include <linux/cpuset.h> 30#include <linux/cpuset.h>
30#include <linux/syscalls.h> 31#include <linux/syscalls.h>
@@ -844,6 +845,7 @@ static void exit_notify(struct task_struct *tsk)
844fastcall NORET_TYPE void do_exit(long code) 845fastcall NORET_TYPE void do_exit(long code)
845{ 846{
846 struct task_struct *tsk = current; 847 struct task_struct *tsk = current;
848 struct taskstats *tidstats, *tgidstats;
847 int group_dead; 849 int group_dead;
848 850
849 profile_task_exit(tsk); 851 profile_task_exit(tsk);
@@ -882,6 +884,8 @@ fastcall NORET_TYPE void do_exit(long code)
882 current->comm, current->pid, 884 current->comm, current->pid,
883 preempt_count()); 885 preempt_count());
884 886
887 taskstats_exit_alloc(&tidstats, &tgidstats);
888
885 acct_update_integrals(tsk); 889 acct_update_integrals(tsk);
886 if (tsk->mm) { 890 if (tsk->mm) {
887 update_hiwater_rss(tsk->mm); 891 update_hiwater_rss(tsk->mm);
@@ -901,7 +905,10 @@ fastcall NORET_TYPE void do_exit(long code)
901#endif 905#endif
902 if (unlikely(tsk->audit_context)) 906 if (unlikely(tsk->audit_context))
903 audit_free(tsk); 907 audit_free(tsk);
908 taskstats_exit_send(tsk, tidstats, tgidstats);
909 taskstats_exit_free(tidstats, tgidstats);
904 delayacct_tsk_exit(tsk); 910 delayacct_tsk_exit(tsk);
911
905 exit_mm(tsk); 912 exit_mm(tsk);
906 913
907 if (group_dead) 914 if (group_dead)
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
new file mode 100644
index 000000000000..82ec9137d908
--- /dev/null
+++ b/kernel/taskstats.c
@@ -0,0 +1,336 @@
1/*
2 * taskstats.c - Export per-task statistics to userland
3 *
4 * Copyright (C) Shailabh Nagar, IBM Corp. 2006
5 * (C) Balbir Singh, IBM Corp. 2006
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 */
18
19#include <linux/kernel.h>
20#include <linux/taskstats_kern.h>
21#include <net/genetlink.h>
22#include <asm/atomic.h>
23
24static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 };
25static int family_registered;
26kmem_cache_t *taskstats_cache;
27static DEFINE_MUTEX(taskstats_exit_mutex);
28
29static struct genl_family family = {
30 .id = GENL_ID_GENERATE,
31 .name = TASKSTATS_GENL_NAME,
32 .version = TASKSTATS_GENL_VERSION,
33 .maxattr = TASKSTATS_CMD_ATTR_MAX,
34};
35
36static struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1]
37__read_mostly = {
38 [TASKSTATS_CMD_ATTR_PID] = { .type = NLA_U32 },
39 [TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 },
40};
41
42
43static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp,
44 void **replyp, size_t size)
45{
46 struct sk_buff *skb;
47 void *reply;
48
49 /*
50 * If new attributes are added, please revisit this allocation
51 */
52 skb = nlmsg_new(size);
53 if (!skb)
54 return -ENOMEM;
55
56 if (!info) {
57 int seq = get_cpu_var(taskstats_seqnum)++;
58 put_cpu_var(taskstats_seqnum);
59
60 reply = genlmsg_put(skb, 0, seq,
61 family.id, 0, 0,
62 cmd, family.version);
63 } else
64 reply = genlmsg_put(skb, info->snd_pid, info->snd_seq,
65 family.id, 0, 0,
66 cmd, family.version);
67 if (reply == NULL) {
68 nlmsg_free(skb);
69 return -EINVAL;
70 }
71
72 *skbp = skb;
73 *replyp = reply;
74 return 0;
75}
76
77static int send_reply(struct sk_buff *skb, pid_t pid, int event)
78{
79 struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data);
80 void *reply;
81 int rc;
82
83 reply = genlmsg_data(genlhdr);
84
85 rc = genlmsg_end(skb, reply);
86 if (rc < 0) {
87 nlmsg_free(skb);
88 return rc;
89 }
90
91 if (event == TASKSTATS_MSG_MULTICAST)
92 return genlmsg_multicast(skb, pid, TASKSTATS_LISTEN_GROUP);
93 return genlmsg_unicast(skb, pid);
94}
95
96static int fill_pid(pid_t pid, struct task_struct *pidtsk,
97 struct taskstats *stats)
98{
99 int rc;
100 struct task_struct *tsk = pidtsk;
101
102 if (!pidtsk) {
103 read_lock(&tasklist_lock);
104 tsk = find_task_by_pid(pid);
105 if (!tsk) {
106 read_unlock(&tasklist_lock);
107 return -ESRCH;
108 }
109 get_task_struct(tsk);
110 read_unlock(&tasklist_lock);
111 } else
112 get_task_struct(tsk);
113
114 /*
115 * Each accounting subsystem adds calls to its functions to
116 * fill in relevant parts of struct taskstsats as follows
117 *
118 * rc = per-task-foo(stats, tsk);
119 * if (rc)
120 * goto err;
121 */
122
123err:
124 put_task_struct(tsk);
125 return rc;
126
127}
128
129static int fill_tgid(pid_t tgid, struct task_struct *tgidtsk,
130 struct taskstats *stats)
131{
132 int rc;
133 struct task_struct *tsk, *first;
134
135 first = tgidtsk;
136 read_lock(&tasklist_lock);
137 if (!first) {
138 first = find_task_by_pid(tgid);
139 if (!first) {
140 read_unlock(&tasklist_lock);
141 return -ESRCH;
142 }
143 }
144 tsk = first;
145 do {
146 /*
147 * Each accounting subsystem adds calls its functions to
148 * fill in relevant parts of struct taskstsats as follows
149 *
150 * rc = per-task-foo(stats, tsk);
151 * if (rc)
152 * break;
153 */
154
155 } while_each_thread(first, tsk);
156 read_unlock(&tasklist_lock);
157
158 /*
159 * Accounting subsytems can also add calls here if they don't
160 * wish to aggregate statistics for per-tgid stats
161 */
162
163 return rc;
164}
165
166static int taskstats_send_stats(struct sk_buff *skb, struct genl_info *info)
167{
168 int rc = 0;
169 struct sk_buff *rep_skb;
170 struct taskstats stats;
171 void *reply;
172 size_t size;
173 struct nlattr *na;
174
175 /*
176 * Size includes space for nested attributes
177 */
178 size = nla_total_size(sizeof(u32)) +
179 nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
180
181 memset(&stats, 0, sizeof(stats));
182 rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, &reply, size);
183 if (rc < 0)
184 return rc;
185
186 if (info->attrs[TASKSTATS_CMD_ATTR_PID]) {
187 u32 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]);
188 rc = fill_pid(pid, NULL, &stats);
189 if (rc < 0)
190 goto err;
191
192 na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_PID);
193 NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_PID, pid);
194 NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS,
195 stats);
196 } else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) {
197 u32 tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]);
198 rc = fill_tgid(tgid, NULL, &stats);
199 if (rc < 0)
200 goto err;
201
202 na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_TGID);
203 NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_TGID, tgid);
204 NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS,
205 stats);
206 } else {
207 rc = -EINVAL;
208 goto err;
209 }
210
211 nla_nest_end(rep_skb, na);
212
213 return send_reply(rep_skb, info->snd_pid, TASKSTATS_MSG_UNICAST);
214
215nla_put_failure:
216 return genlmsg_cancel(rep_skb, reply);
217err:
218 nlmsg_free(rep_skb);
219 return rc;
220}
221
222/* Send pid data out on exit */
223void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats,
224 struct taskstats *tgidstats)
225{
226 int rc;
227 struct sk_buff *rep_skb;
228 void *reply;
229 size_t size;
230 int is_thread_group;
231 struct nlattr *na;
232
233 if (!family_registered || !tidstats)
234 return;
235
236 mutex_lock(&taskstats_exit_mutex);
237
238 is_thread_group = !thread_group_empty(tsk);
239 rc = 0;
240
241 /*
242 * Size includes space for nested attributes
243 */
244 size = nla_total_size(sizeof(u32)) +
245 nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
246
247 if (is_thread_group)
248 size = 2 * size; /* PID + STATS + TGID + STATS */
249
250 rc = prepare_reply(NULL, TASKSTATS_CMD_NEW, &rep_skb, &reply, size);
251 if (rc < 0)
252 goto ret;
253
254 rc = fill_pid(tsk->pid, tsk, tidstats);
255 if (rc < 0)
256 goto err_skb;
257
258 na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_PID);
259 NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_PID, (u32)tsk->pid);
260 NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS,
261 *tidstats);
262 nla_nest_end(rep_skb, na);
263
264 if (!is_thread_group || !tgidstats) {
265 send_reply(rep_skb, 0, TASKSTATS_MSG_MULTICAST);
266 goto ret;
267 }
268
269 rc = fill_tgid(tsk->pid, tsk, tgidstats);
270 /*
271 * If fill_tgid() failed then one probable reason could be that the
272 * thread group leader has exited. fill_tgid() will fail, send out
273 * the pid statistics collected earlier.
274 */
275 if (rc < 0) {
276 send_reply(rep_skb, 0, TASKSTATS_MSG_MULTICAST);
277 goto ret;
278 }
279
280 na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_TGID);
281 NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_TGID, (u32)tsk->tgid);
282 NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS,
283 *tgidstats);
284 nla_nest_end(rep_skb, na);
285
286 send_reply(rep_skb, 0, TASKSTATS_MSG_MULTICAST);
287 goto ret;
288
289nla_put_failure:
290 genlmsg_cancel(rep_skb, reply);
291 goto ret;
292err_skb:
293 nlmsg_free(rep_skb);
294ret:
295 mutex_unlock(&taskstats_exit_mutex);
296 return;
297}
298
299static struct genl_ops taskstats_ops = {
300 .cmd = TASKSTATS_CMD_GET,
301 .doit = taskstats_send_stats,
302 .policy = taskstats_cmd_get_policy,
303};
304
305/* Needed early in initialization */
306void __init taskstats_init_early(void)
307{
308 taskstats_cache = kmem_cache_create("taskstats_cache",
309 sizeof(struct taskstats),
310 0, SLAB_PANIC, NULL, NULL);
311}
312
313static int __init taskstats_init(void)
314{
315 int rc;
316
317 rc = genl_register_family(&family);
318 if (rc)
319 return rc;
320
321 rc = genl_register_ops(&family, &taskstats_ops);
322 if (rc < 0)
323 goto err;
324
325 family_registered = 1;
326 return 0;
327err:
328 genl_unregister_family(&family);
329 return rc;
330}
331
332/*
333 * late initcall ensures initialization of statistics collection
334 * mechanisms precedes initialization of the taskstats interface
335 */
336late_initcall(taskstats_init);