diff options
-rw-r--r-- | Documentation/accounting/taskstats.txt | 146 | ||||
-rw-r--r-- | include/linux/taskstats.h | 84 | ||||
-rw-r--r-- | include/linux/taskstats_kern.h | 57 | ||||
-rw-r--r-- | init/Kconfig | 13 | ||||
-rw-r--r-- | init/main.c | 2 | ||||
-rw-r--r-- | kernel/Makefile | 1 | ||||
-rw-r--r-- | kernel/exit.c | 7 | ||||
-rw-r--r-- | kernel/taskstats.c | 336 |
8 files changed, 646 insertions, 0 deletions
diff --git a/Documentation/accounting/taskstats.txt b/Documentation/accounting/taskstats.txt new file mode 100644 index 000000000000..ad9b6997e162 --- /dev/null +++ b/Documentation/accounting/taskstats.txt | |||
@@ -0,0 +1,146 @@ | |||
1 | Per-task statistics interface | ||
2 | ----------------------------- | ||
3 | |||
4 | |||
5 | Taskstats is a netlink-based interface for sending per-task and | ||
6 | per-process statistics from the kernel to userspace. | ||
7 | |||
8 | Taskstats was designed for the following benefits: | ||
9 | |||
10 | - efficiently provide statistics during lifetime of a task and on its exit | ||
11 | - unified interface for multiple accounting subsystems | ||
12 | - extensibility for use by future accounting patches | ||
13 | |||
14 | Terminology | ||
15 | ----------- | ||
16 | |||
17 | "pid", "tid" and "task" are used interchangeably and refer to the standard | ||
18 | Linux task defined by struct task_struct. per-pid stats are the same as | ||
19 | per-task stats. | ||
20 | |||
21 | "tgid", "process" and "thread group" are used interchangeably and refer to the | ||
22 | tasks that share an mm_struct i.e. the traditional Unix process. Despite the | ||
23 | use of tgid, there is no special treatment for the task that is thread group | ||
24 | leader - a process is deemed alive as long as it has any task belonging to it. | ||
25 | |||
26 | Usage | ||
27 | ----- | ||
28 | |||
29 | To get statistics during task's lifetime, userspace opens a unicast netlink | ||
30 | socket (NETLINK_GENERIC family) and sends commands specifying a pid or a tgid. | ||
31 | The response contains statistics for a task (if pid is specified) or the sum of | ||
32 | statistics for all tasks of the process (if tgid is specified). | ||
33 | |||
34 | To obtain statistics for tasks which are exiting, userspace opens a multicast | ||
35 | netlink socket. Each time a task exits, two records are sent by the kernel to | ||
36 | each listener on the multicast socket. The first the per-pid task's statistics | ||
37 | and the second is the sum for all tasks of the process to which the task | ||
38 | belongs (the task does not need to be the thread group leader). The need for | ||
39 | per-tgid stats to be sent for each exiting task is explained in the per-tgid | ||
40 | stats section below. | ||
41 | |||
42 | |||
43 | Interface | ||
44 | --------- | ||
45 | |||
46 | The user-kernel interface is encapsulated in include/linux/taskstats.h | ||
47 | |||
48 | To avoid this documentation becoming obsolete as the interface evolves, only | ||
49 | an outline of the current version is given. taskstats.h always overrides the | ||
50 | description here. | ||
51 | |||
52 | struct taskstats is the common accounting structure for both per-pid and | ||
53 | per-tgid data. It is versioned and can be extended by each accounting subsystem | ||
54 | that is added to the kernel. The fields and their semantics are defined in the | ||
55 | taskstats.h file. | ||
56 | |||
57 | The data exchanged between user and kernel space is a netlink message belonging | ||
58 | to the NETLINK_GENERIC family and using the netlink attributes interface. | ||
59 | The messages are in the format | ||
60 | |||
61 | +----------+- - -+-------------+-------------------+ | ||
62 | | nlmsghdr | Pad | genlmsghdr | taskstats payload | | ||
63 | +----------+- - -+-------------+-------------------+ | ||
64 | |||
65 | |||
66 | The taskstats payload is one of the following three kinds: | ||
67 | |||
68 | 1. Commands: Sent from user to kernel. The payload is one attribute, of type | ||
69 | TASKSTATS_CMD_ATTR_PID/TGID, containing a u32 pid or tgid in the attribute | ||
70 | payload. The pid/tgid denotes the task/process for which userspace wants | ||
71 | statistics. | ||
72 | |||
73 | 2. Response for a command: sent from the kernel in response to a userspace | ||
74 | command. The payload is a series of three attributes of type: | ||
75 | |||
76 | a) TASKSTATS_TYPE_AGGR_PID/TGID : attribute containing no payload but indicates | ||
77 | a pid/tgid will be followed by some stats. | ||
78 | |||
79 | b) TASKSTATS_TYPE_PID/TGID: attribute whose payload is the pid/tgid whose stats | ||
80 | is being returned. | ||
81 | |||
82 | c) TASKSTATS_TYPE_STATS: attribute with a struct taskstsats as payload. The | ||
83 | same structure is used for both per-pid and per-tgid stats. | ||
84 | |||
85 | 3. New message sent by kernel whenever a task exits. The payload consists of a | ||
86 | series of attributes of the following type: | ||
87 | |||
88 | a) TASKSTATS_TYPE_AGGR_PID: indicates next two attributes will be pid+stats | ||
89 | b) TASKSTATS_TYPE_PID: contains exiting task's pid | ||
90 | c) TASKSTATS_TYPE_STATS: contains the exiting task's per-pid stats | ||
91 | d) TASKSTATS_TYPE_AGGR_TGID: indicates next two attributes will be tgid+stats | ||
92 | e) TASKSTATS_TYPE_TGID: contains tgid of process to which task belongs | ||
93 | f) TASKSTATS_TYPE_STATS: contains the per-tgid stats for exiting task's process | ||
94 | |||
95 | |||
96 | per-tgid stats | ||
97 | -------------- | ||
98 | |||
99 | Taskstats provides per-process stats, in addition to per-task stats, since | ||
100 | resource management is often done at a process granularity and aggregating task | ||
101 | stats in userspace alone is inefficient and potentially inaccurate (due to lack | ||
102 | of atomicity). | ||
103 | |||
104 | However, maintaining per-process, in addition to per-task stats, within the | ||
105 | kernel has space and time overheads. Hence the taskstats implementation | ||
106 | dynamically sums up the per-task stats for each task belonging to a process | ||
107 | whenever per-process stats are needed. | ||
108 | |||
109 | Not maintaining per-tgid stats creates a problem when userspace is interested | ||
110 | in getting these stats when the process dies i.e. the last thread of | ||
111 | a process exits. It isn't possible to simply return some aggregated per-process | ||
112 | statistic from the kernel. | ||
113 | |||
114 | The approach taken by taskstats is to return the per-tgid stats *each* time | ||
115 | a task exits, in addition to the per-pid stats for that task. Userspace can | ||
116 | maintain task<->process mappings and use them to maintain the per-process stats | ||
117 | in userspace, updating the aggregate appropriately as the tasks of a process | ||
118 | exit. | ||
119 | |||
120 | Extending taskstats | ||
121 | ------------------- | ||
122 | |||
123 | There are two ways to extend the taskstats interface to export more | ||
124 | per-task/process stats as patches to collect them get added to the kernel | ||
125 | in future: | ||
126 | |||
127 | 1. Adding more fields to the end of the existing struct taskstats. Backward | ||
128 | compatibility is ensured by the version number within the | ||
129 | structure. Userspace will use only the fields of the struct that correspond | ||
130 | to the version its using. | ||
131 | |||
132 | 2. Defining separate statistic structs and using the netlink attributes | ||
133 | interface to return them. Since userspace processes each netlink attribute | ||
134 | independently, it can always ignore attributes whose type it does not | ||
135 | understand (because it is using an older version of the interface). | ||
136 | |||
137 | |||
138 | Choosing between 1. and 2. is a matter of trading off flexibility and | ||
139 | overhead. If only a few fields need to be added, then 1. is the preferable | ||
140 | path since the kernel and userspace don't need to incur the overhead of | ||
141 | processing new netlink attributes. But if the new fields expand the existing | ||
142 | struct too much, requiring disparate userspace accounting utilities to | ||
143 | unnecessarily receive large structures whose fields are of no interest, then | ||
144 | extending the attributes structure would be worthwhile. | ||
145 | |||
146 | ---- | ||
diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h new file mode 100644 index 000000000000..51f62759bea9 --- /dev/null +++ b/include/linux/taskstats.h | |||
@@ -0,0 +1,84 @@ | |||
1 | /* taskstats.h - exporting per-task statistics | ||
2 | * | ||
3 | * Copyright (C) Shailabh Nagar, IBM Corp. 2006 | ||
4 | * (C) Balbir Singh, IBM Corp. 2006 | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of version 2.1 of the GNU Lesser General Public License | ||
8 | * as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope that it would be useful, but | ||
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
13 | */ | ||
14 | |||
15 | #ifndef _LINUX_TASKSTATS_H | ||
16 | #define _LINUX_TASKSTATS_H | ||
17 | |||
18 | /* Format for per-task data returned to userland when | ||
19 | * - a task exits | ||
20 | * - listener requests stats for a task | ||
21 | * | ||
22 | * The struct is versioned. Newer versions should only add fields to | ||
23 | * the bottom of the struct to maintain backward compatibility. | ||
24 | * | ||
25 | * | ||
26 | * To add new fields | ||
27 | * a) bump up TASKSTATS_VERSION | ||
28 | * b) add comment indicating new version number at end of struct | ||
29 | * c) add new fields after version comment; maintain 64-bit alignment | ||
30 | */ | ||
31 | |||
32 | #define TASKSTATS_VERSION 1 | ||
33 | |||
34 | struct taskstats { | ||
35 | |||
36 | /* Version 1 */ | ||
37 | __u64 version; | ||
38 | }; | ||
39 | |||
40 | |||
41 | #define TASKSTATS_LISTEN_GROUP 0x1 | ||
42 | |||
43 | /* | ||
44 | * Commands sent from userspace | ||
45 | * Not versioned. New commands should only be inserted at the enum's end | ||
46 | * prior to __TASKSTATS_CMD_MAX | ||
47 | */ | ||
48 | |||
49 | enum { | ||
50 | TASKSTATS_CMD_UNSPEC = 0, /* Reserved */ | ||
51 | TASKSTATS_CMD_GET, /* user->kernel request/get-response */ | ||
52 | TASKSTATS_CMD_NEW, /* kernel->user event */ | ||
53 | __TASKSTATS_CMD_MAX, | ||
54 | }; | ||
55 | |||
56 | #define TASKSTATS_CMD_MAX (__TASKSTATS_CMD_MAX - 1) | ||
57 | |||
58 | enum { | ||
59 | TASKSTATS_TYPE_UNSPEC = 0, /* Reserved */ | ||
60 | TASKSTATS_TYPE_PID, /* Process id */ | ||
61 | TASKSTATS_TYPE_TGID, /* Thread group id */ | ||
62 | TASKSTATS_TYPE_STATS, /* taskstats structure */ | ||
63 | TASKSTATS_TYPE_AGGR_PID, /* contains pid + stats */ | ||
64 | TASKSTATS_TYPE_AGGR_TGID, /* contains tgid + stats */ | ||
65 | __TASKSTATS_TYPE_MAX, | ||
66 | }; | ||
67 | |||
68 | #define TASKSTATS_TYPE_MAX (__TASKSTATS_TYPE_MAX - 1) | ||
69 | |||
70 | enum { | ||
71 | TASKSTATS_CMD_ATTR_UNSPEC = 0, | ||
72 | TASKSTATS_CMD_ATTR_PID, | ||
73 | TASKSTATS_CMD_ATTR_TGID, | ||
74 | __TASKSTATS_CMD_ATTR_MAX, | ||
75 | }; | ||
76 | |||
77 | #define TASKSTATS_CMD_ATTR_MAX (__TASKSTATS_CMD_ATTR_MAX - 1) | ||
78 | |||
79 | /* NETLINK_GENERIC related info */ | ||
80 | |||
81 | #define TASKSTATS_GENL_NAME "TASKSTATS" | ||
82 | #define TASKSTATS_GENL_VERSION 0x1 | ||
83 | |||
84 | #endif /* _LINUX_TASKSTATS_H */ | ||
diff --git a/include/linux/taskstats_kern.h b/include/linux/taskstats_kern.h new file mode 100644 index 000000000000..bd0ecb969c26 --- /dev/null +++ b/include/linux/taskstats_kern.h | |||
@@ -0,0 +1,57 @@ | |||
1 | /* taskstats_kern.h - kernel header for per-task statistics interface | ||
2 | * | ||
3 | * Copyright (C) Shailabh Nagar, IBM Corp. 2006 | ||
4 | * (C) Balbir Singh, IBM Corp. 2006 | ||
5 | */ | ||
6 | |||
7 | #ifndef _LINUX_TASKSTATS_KERN_H | ||
8 | #define _LINUX_TASKSTATS_KERN_H | ||
9 | |||
10 | #include <linux/taskstats.h> | ||
11 | #include <linux/sched.h> | ||
12 | |||
13 | enum { | ||
14 | TASKSTATS_MSG_UNICAST, /* send data only to requester */ | ||
15 | TASKSTATS_MSG_MULTICAST, /* send data to a group */ | ||
16 | }; | ||
17 | |||
18 | #ifdef CONFIG_TASKSTATS | ||
19 | extern kmem_cache_t *taskstats_cache; | ||
20 | |||
21 | static inline void taskstats_exit_alloc(struct taskstats **ptidstats, | ||
22 | struct taskstats **ptgidstats) | ||
23 | { | ||
24 | *ptidstats = kmem_cache_zalloc(taskstats_cache, SLAB_KERNEL); | ||
25 | *ptgidstats = kmem_cache_zalloc(taskstats_cache, SLAB_KERNEL); | ||
26 | } | ||
27 | |||
28 | static inline void taskstats_exit_free(struct taskstats *tidstats, | ||
29 | struct taskstats *tgidstats) | ||
30 | { | ||
31 | if (tidstats) | ||
32 | kmem_cache_free(taskstats_cache, tidstats); | ||
33 | if (tgidstats) | ||
34 | kmem_cache_free(taskstats_cache, tgidstats); | ||
35 | } | ||
36 | |||
37 | extern void taskstats_exit_send(struct task_struct *, struct taskstats *, | ||
38 | struct taskstats *); | ||
39 | extern void taskstats_init_early(void); | ||
40 | |||
41 | #else | ||
42 | static inline void taskstats_exit_alloc(struct taskstats **ptidstats, | ||
43 | struct taskstats **ptgidstats) | ||
44 | {} | ||
45 | static inline void taskstats_exit_free(struct taskstats *ptidstats, | ||
46 | struct taskstats *ptgidstats) | ||
47 | {} | ||
48 | static inline void taskstats_exit_send(struct task_struct *tsk, | ||
49 | struct taskstats *tidstats, | ||
50 | struct taskstats *tgidstats) | ||
51 | {} | ||
52 | static inline void taskstats_init_early(void) | ||
53 | {} | ||
54 | #endif /* CONFIG_TASKSTATS */ | ||
55 | |||
56 | #endif | ||
57 | |||
diff --git a/init/Kconfig b/init/Kconfig index 90498a3e53da..56a7093b4e4c 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -158,6 +158,19 @@ config BSD_PROCESS_ACCT_V3 | |||
158 | for processing it. A preliminary version of these tools is available | 158 | for processing it. A preliminary version of these tools is available |
159 | at <http://www.physik3.uni-rostock.de/tim/kernel/utils/acct/>. | 159 | at <http://www.physik3.uni-rostock.de/tim/kernel/utils/acct/>. |
160 | 160 | ||
161 | config TASKSTATS | ||
162 | bool "Export task/process statistics through netlink (EXPERIMENTAL)" | ||
163 | depends on NET | ||
164 | default n | ||
165 | help | ||
166 | Export selected statistics for tasks/processes through the | ||
167 | generic netlink interface. Unlike BSD process accounting, the | ||
168 | statistics are available during the lifetime of tasks/processes as | ||
169 | responses to commands. Like BSD accounting, they are sent to user | ||
170 | space on task exit. | ||
171 | |||
172 | Say N if unsure. | ||
173 | |||
161 | config TASK_DELAY_ACCT | 174 | config TASK_DELAY_ACCT |
162 | bool "Enable per-task delay accounting (EXPERIMENTAL)" | 175 | bool "Enable per-task delay accounting (EXPERIMENTAL)" |
163 | help | 176 | help |
diff --git a/init/main.c b/init/main.c index 9e8e8c152142..8651a720a092 100644 --- a/init/main.c +++ b/init/main.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <linux/cpu.h> | 41 | #include <linux/cpu.h> |
42 | #include <linux/cpuset.h> | 42 | #include <linux/cpuset.h> |
43 | #include <linux/efi.h> | 43 | #include <linux/efi.h> |
44 | #include <linux/taskstats_kern.h> | ||
44 | #include <linux/delayacct.h> | 45 | #include <linux/delayacct.h> |
45 | #include <linux/unistd.h> | 46 | #include <linux/unistd.h> |
46 | #include <linux/rmap.h> | 47 | #include <linux/rmap.h> |
@@ -575,6 +576,7 @@ asmlinkage void __init start_kernel(void) | |||
575 | proc_root_init(); | 576 | proc_root_init(); |
576 | #endif | 577 | #endif |
577 | cpuset_init(); | 578 | cpuset_init(); |
579 | taskstats_init_early(); | ||
578 | delayacct_init(); | 580 | delayacct_init(); |
579 | 581 | ||
580 | check_bugs(); | 582 | check_bugs(); |
diff --git a/kernel/Makefile b/kernel/Makefile index 87bb34cc8938..d62ec66c1af2 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -49,6 +49,7 @@ obj-$(CONFIG_SECCOMP) += seccomp.o | |||
49 | obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o | 49 | obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o |
50 | obj-$(CONFIG_RELAY) += relay.o | 50 | obj-$(CONFIG_RELAY) += relay.o |
51 | obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o | 51 | obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o |
52 | obj-$(CONFIG_TASKSTATS) += taskstats.o | ||
52 | 53 | ||
53 | ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) | 54 | ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) |
54 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is | 55 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is |
diff --git a/kernel/exit.c b/kernel/exit.c index 3c2cf91defa7..9852ed8c2988 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/mount.h> | 25 | #include <linux/mount.h> |
26 | #include <linux/proc_fs.h> | 26 | #include <linux/proc_fs.h> |
27 | #include <linux/mempolicy.h> | 27 | #include <linux/mempolicy.h> |
28 | #include <linux/taskstats_kern.h> | ||
28 | #include <linux/delayacct.h> | 29 | #include <linux/delayacct.h> |
29 | #include <linux/cpuset.h> | 30 | #include <linux/cpuset.h> |
30 | #include <linux/syscalls.h> | 31 | #include <linux/syscalls.h> |
@@ -844,6 +845,7 @@ static void exit_notify(struct task_struct *tsk) | |||
844 | fastcall NORET_TYPE void do_exit(long code) | 845 | fastcall NORET_TYPE void do_exit(long code) |
845 | { | 846 | { |
846 | struct task_struct *tsk = current; | 847 | struct task_struct *tsk = current; |
848 | struct taskstats *tidstats, *tgidstats; | ||
847 | int group_dead; | 849 | int group_dead; |
848 | 850 | ||
849 | profile_task_exit(tsk); | 851 | profile_task_exit(tsk); |
@@ -882,6 +884,8 @@ fastcall NORET_TYPE void do_exit(long code) | |||
882 | current->comm, current->pid, | 884 | current->comm, current->pid, |
883 | preempt_count()); | 885 | preempt_count()); |
884 | 886 | ||
887 | taskstats_exit_alloc(&tidstats, &tgidstats); | ||
888 | |||
885 | acct_update_integrals(tsk); | 889 | acct_update_integrals(tsk); |
886 | if (tsk->mm) { | 890 | if (tsk->mm) { |
887 | update_hiwater_rss(tsk->mm); | 891 | update_hiwater_rss(tsk->mm); |
@@ -901,7 +905,10 @@ fastcall NORET_TYPE void do_exit(long code) | |||
901 | #endif | 905 | #endif |
902 | if (unlikely(tsk->audit_context)) | 906 | if (unlikely(tsk->audit_context)) |
903 | audit_free(tsk); | 907 | audit_free(tsk); |
908 | taskstats_exit_send(tsk, tidstats, tgidstats); | ||
909 | taskstats_exit_free(tidstats, tgidstats); | ||
904 | delayacct_tsk_exit(tsk); | 910 | delayacct_tsk_exit(tsk); |
911 | |||
905 | exit_mm(tsk); | 912 | exit_mm(tsk); |
906 | 913 | ||
907 | if (group_dead) | 914 | if (group_dead) |
diff --git a/kernel/taskstats.c b/kernel/taskstats.c new file mode 100644 index 000000000000..82ec9137d908 --- /dev/null +++ b/kernel/taskstats.c | |||
@@ -0,0 +1,336 @@ | |||
1 | /* | ||
2 | * taskstats.c - Export per-task statistics to userland | ||
3 | * | ||
4 | * Copyright (C) Shailabh Nagar, IBM Corp. 2006 | ||
5 | * (C) Balbir Singh, IBM Corp. 2006 | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | */ | ||
18 | |||
19 | #include <linux/kernel.h> | ||
20 | #include <linux/taskstats_kern.h> | ||
21 | #include <net/genetlink.h> | ||
22 | #include <asm/atomic.h> | ||
23 | |||
24 | static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 }; | ||
25 | static int family_registered; | ||
26 | kmem_cache_t *taskstats_cache; | ||
27 | static DEFINE_MUTEX(taskstats_exit_mutex); | ||
28 | |||
29 | static struct genl_family family = { | ||
30 | .id = GENL_ID_GENERATE, | ||
31 | .name = TASKSTATS_GENL_NAME, | ||
32 | .version = TASKSTATS_GENL_VERSION, | ||
33 | .maxattr = TASKSTATS_CMD_ATTR_MAX, | ||
34 | }; | ||
35 | |||
36 | static struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1] | ||
37 | __read_mostly = { | ||
38 | [TASKSTATS_CMD_ATTR_PID] = { .type = NLA_U32 }, | ||
39 | [TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 }, | ||
40 | }; | ||
41 | |||
42 | |||
43 | static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, | ||
44 | void **replyp, size_t size) | ||
45 | { | ||
46 | struct sk_buff *skb; | ||
47 | void *reply; | ||
48 | |||
49 | /* | ||
50 | * If new attributes are added, please revisit this allocation | ||
51 | */ | ||
52 | skb = nlmsg_new(size); | ||
53 | if (!skb) | ||
54 | return -ENOMEM; | ||
55 | |||
56 | if (!info) { | ||
57 | int seq = get_cpu_var(taskstats_seqnum)++; | ||
58 | put_cpu_var(taskstats_seqnum); | ||
59 | |||
60 | reply = genlmsg_put(skb, 0, seq, | ||
61 | family.id, 0, 0, | ||
62 | cmd, family.version); | ||
63 | } else | ||
64 | reply = genlmsg_put(skb, info->snd_pid, info->snd_seq, | ||
65 | family.id, 0, 0, | ||
66 | cmd, family.version); | ||
67 | if (reply == NULL) { | ||
68 | nlmsg_free(skb); | ||
69 | return -EINVAL; | ||
70 | } | ||
71 | |||
72 | *skbp = skb; | ||
73 | *replyp = reply; | ||
74 | return 0; | ||
75 | } | ||
76 | |||
77 | static int send_reply(struct sk_buff *skb, pid_t pid, int event) | ||
78 | { | ||
79 | struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data); | ||
80 | void *reply; | ||
81 | int rc; | ||
82 | |||
83 | reply = genlmsg_data(genlhdr); | ||
84 | |||
85 | rc = genlmsg_end(skb, reply); | ||
86 | if (rc < 0) { | ||
87 | nlmsg_free(skb); | ||
88 | return rc; | ||
89 | } | ||
90 | |||
91 | if (event == TASKSTATS_MSG_MULTICAST) | ||
92 | return genlmsg_multicast(skb, pid, TASKSTATS_LISTEN_GROUP); | ||
93 | return genlmsg_unicast(skb, pid); | ||
94 | } | ||
95 | |||
96 | static int fill_pid(pid_t pid, struct task_struct *pidtsk, | ||
97 | struct taskstats *stats) | ||
98 | { | ||
99 | int rc; | ||
100 | struct task_struct *tsk = pidtsk; | ||
101 | |||
102 | if (!pidtsk) { | ||
103 | read_lock(&tasklist_lock); | ||
104 | tsk = find_task_by_pid(pid); | ||
105 | if (!tsk) { | ||
106 | read_unlock(&tasklist_lock); | ||
107 | return -ESRCH; | ||
108 | } | ||
109 | get_task_struct(tsk); | ||
110 | read_unlock(&tasklist_lock); | ||
111 | } else | ||
112 | get_task_struct(tsk); | ||
113 | |||
114 | /* | ||
115 | * Each accounting subsystem adds calls to its functions to | ||
116 | * fill in relevant parts of struct taskstsats as follows | ||
117 | * | ||
118 | * rc = per-task-foo(stats, tsk); | ||
119 | * if (rc) | ||
120 | * goto err; | ||
121 | */ | ||
122 | |||
123 | err: | ||
124 | put_task_struct(tsk); | ||
125 | return rc; | ||
126 | |||
127 | } | ||
128 | |||
129 | static int fill_tgid(pid_t tgid, struct task_struct *tgidtsk, | ||
130 | struct taskstats *stats) | ||
131 | { | ||
132 | int rc; | ||
133 | struct task_struct *tsk, *first; | ||
134 | |||
135 | first = tgidtsk; | ||
136 | read_lock(&tasklist_lock); | ||
137 | if (!first) { | ||
138 | first = find_task_by_pid(tgid); | ||
139 | if (!first) { | ||
140 | read_unlock(&tasklist_lock); | ||
141 | return -ESRCH; | ||
142 | } | ||
143 | } | ||
144 | tsk = first; | ||
145 | do { | ||
146 | /* | ||
147 | * Each accounting subsystem adds calls its functions to | ||
148 | * fill in relevant parts of struct taskstsats as follows | ||
149 | * | ||
150 | * rc = per-task-foo(stats, tsk); | ||
151 | * if (rc) | ||
152 | * break; | ||
153 | */ | ||
154 | |||
155 | } while_each_thread(first, tsk); | ||
156 | read_unlock(&tasklist_lock); | ||
157 | |||
158 | /* | ||
159 | * Accounting subsytems can also add calls here if they don't | ||
160 | * wish to aggregate statistics for per-tgid stats | ||
161 | */ | ||
162 | |||
163 | return rc; | ||
164 | } | ||
165 | |||
166 | static int taskstats_send_stats(struct sk_buff *skb, struct genl_info *info) | ||
167 | { | ||
168 | int rc = 0; | ||
169 | struct sk_buff *rep_skb; | ||
170 | struct taskstats stats; | ||
171 | void *reply; | ||
172 | size_t size; | ||
173 | struct nlattr *na; | ||
174 | |||
175 | /* | ||
176 | * Size includes space for nested attributes | ||
177 | */ | ||
178 | size = nla_total_size(sizeof(u32)) + | ||
179 | nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); | ||
180 | |||
181 | memset(&stats, 0, sizeof(stats)); | ||
182 | rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, &reply, size); | ||
183 | if (rc < 0) | ||
184 | return rc; | ||
185 | |||
186 | if (info->attrs[TASKSTATS_CMD_ATTR_PID]) { | ||
187 | u32 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]); | ||
188 | rc = fill_pid(pid, NULL, &stats); | ||
189 | if (rc < 0) | ||
190 | goto err; | ||
191 | |||
192 | na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_PID); | ||
193 | NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_PID, pid); | ||
194 | NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, | ||
195 | stats); | ||
196 | } else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) { | ||
197 | u32 tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]); | ||
198 | rc = fill_tgid(tgid, NULL, &stats); | ||
199 | if (rc < 0) | ||
200 | goto err; | ||
201 | |||
202 | na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_TGID); | ||
203 | NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_TGID, tgid); | ||
204 | NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, | ||
205 | stats); | ||
206 | } else { | ||
207 | rc = -EINVAL; | ||
208 | goto err; | ||
209 | } | ||
210 | |||
211 | nla_nest_end(rep_skb, na); | ||
212 | |||
213 | return send_reply(rep_skb, info->snd_pid, TASKSTATS_MSG_UNICAST); | ||
214 | |||
215 | nla_put_failure: | ||
216 | return genlmsg_cancel(rep_skb, reply); | ||
217 | err: | ||
218 | nlmsg_free(rep_skb); | ||
219 | return rc; | ||
220 | } | ||
221 | |||
222 | /* Send pid data out on exit */ | ||
223 | void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats, | ||
224 | struct taskstats *tgidstats) | ||
225 | { | ||
226 | int rc; | ||
227 | struct sk_buff *rep_skb; | ||
228 | void *reply; | ||
229 | size_t size; | ||
230 | int is_thread_group; | ||
231 | struct nlattr *na; | ||
232 | |||
233 | if (!family_registered || !tidstats) | ||
234 | return; | ||
235 | |||
236 | mutex_lock(&taskstats_exit_mutex); | ||
237 | |||
238 | is_thread_group = !thread_group_empty(tsk); | ||
239 | rc = 0; | ||
240 | |||
241 | /* | ||
242 | * Size includes space for nested attributes | ||
243 | */ | ||
244 | size = nla_total_size(sizeof(u32)) + | ||
245 | nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); | ||
246 | |||
247 | if (is_thread_group) | ||
248 | size = 2 * size; /* PID + STATS + TGID + STATS */ | ||
249 | |||
250 | rc = prepare_reply(NULL, TASKSTATS_CMD_NEW, &rep_skb, &reply, size); | ||
251 | if (rc < 0) | ||
252 | goto ret; | ||
253 | |||
254 | rc = fill_pid(tsk->pid, tsk, tidstats); | ||
255 | if (rc < 0) | ||
256 | goto err_skb; | ||
257 | |||
258 | na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_PID); | ||
259 | NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_PID, (u32)tsk->pid); | ||
260 | NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, | ||
261 | *tidstats); | ||
262 | nla_nest_end(rep_skb, na); | ||
263 | |||
264 | if (!is_thread_group || !tgidstats) { | ||
265 | send_reply(rep_skb, 0, TASKSTATS_MSG_MULTICAST); | ||
266 | goto ret; | ||
267 | } | ||
268 | |||
269 | rc = fill_tgid(tsk->pid, tsk, tgidstats); | ||
270 | /* | ||
271 | * If fill_tgid() failed then one probable reason could be that the | ||
272 | * thread group leader has exited. fill_tgid() will fail, send out | ||
273 | * the pid statistics collected earlier. | ||
274 | */ | ||
275 | if (rc < 0) { | ||
276 | send_reply(rep_skb, 0, TASKSTATS_MSG_MULTICAST); | ||
277 | goto ret; | ||
278 | } | ||
279 | |||
280 | na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_TGID); | ||
281 | NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_TGID, (u32)tsk->tgid); | ||
282 | NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, | ||
283 | *tgidstats); | ||
284 | nla_nest_end(rep_skb, na); | ||
285 | |||
286 | send_reply(rep_skb, 0, TASKSTATS_MSG_MULTICAST); | ||
287 | goto ret; | ||
288 | |||
289 | nla_put_failure: | ||
290 | genlmsg_cancel(rep_skb, reply); | ||
291 | goto ret; | ||
292 | err_skb: | ||
293 | nlmsg_free(rep_skb); | ||
294 | ret: | ||
295 | mutex_unlock(&taskstats_exit_mutex); | ||
296 | return; | ||
297 | } | ||
298 | |||
299 | static struct genl_ops taskstats_ops = { | ||
300 | .cmd = TASKSTATS_CMD_GET, | ||
301 | .doit = taskstats_send_stats, | ||
302 | .policy = taskstats_cmd_get_policy, | ||
303 | }; | ||
304 | |||
305 | /* Needed early in initialization */ | ||
306 | void __init taskstats_init_early(void) | ||
307 | { | ||
308 | taskstats_cache = kmem_cache_create("taskstats_cache", | ||
309 | sizeof(struct taskstats), | ||
310 | 0, SLAB_PANIC, NULL, NULL); | ||
311 | } | ||
312 | |||
313 | static int __init taskstats_init(void) | ||
314 | { | ||
315 | int rc; | ||
316 | |||
317 | rc = genl_register_family(&family); | ||
318 | if (rc) | ||
319 | return rc; | ||
320 | |||
321 | rc = genl_register_ops(&family, &taskstats_ops); | ||
322 | if (rc < 0) | ||
323 | goto err; | ||
324 | |||
325 | family_registered = 1; | ||
326 | return 0; | ||
327 | err: | ||
328 | genl_unregister_family(&family); | ||
329 | return rc; | ||
330 | } | ||
331 | |||
332 | /* | ||
333 | * late initcall ensures initialization of statistics collection | ||
334 | * mechanisms precedes initialization of the taskstats interface | ||
335 | */ | ||
336 | late_initcall(taskstats_init); | ||