aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorShailabh Nagar <nagar@watson.ibm.com>2006-07-14 03:24:42 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-07-15 00:53:57 -0400
commita3baf649ca9ca0a96fba538f03b0f17c043b755c (patch)
tree6022cb01cd494f59dd474030f2d9980413000036
parent6f44993fe1d7b2b097f6ac60cd5835c6f5ca0874 (diff)
[PATCH] per-task-delay-accounting: documentation
Some documentation for delay accounting. Signed-off-by: Shailabh Nagar <nagar@watson.ibm.com> Signed-off-by: Balbir Singh <balbir@in.ibm.com> Cc: Jes Sorensen <jes@sgi.com> Cc: Peter Chubb <peterc@gelato.unsw.edu.au> Cc: Erich Focht <efocht@ess.nec.de> Cc: Levent Serinol <lserinol@gmail.com> Cc: Jay Lan <jlan@engr.sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--Documentation/accounting/delay-accounting.txt115
-rw-r--r--Documentation/accounting/getdelays.c376
-rw-r--r--Documentation/accounting/taskstats.txt2
3 files changed, 493 insertions, 0 deletions
diff --git a/Documentation/accounting/delay-accounting.txt b/Documentation/accounting/delay-accounting.txt
new file mode 100644
index 000000000000..f3dc0ca04fa4
--- /dev/null
+++ b/Documentation/accounting/delay-accounting.txt
@@ -0,0 +1,115 @@
1Delay accounting
2----------------
3
4Tasks encounter delays in execution when they wait
5for some kernel resource to become available e.g. a
6runnable task may wait for a free CPU to run on.
7
8The per-task delay accounting functionality measures
9the delays experienced by a task while
10
11a) waiting for a CPU (while being runnable)
12b) completion of synchronous block I/O initiated by the task
13c) swapping in pages
14
15and makes these statistics available to userspace through
16the taskstats interface.
17
18Such delays provide feedback for setting a task's cpu priority,
19io priority and rss limit values appropriately. Long delays for
20important tasks could be a trigger for raising its corresponding priority.
21
22The functionality, through its use of the taskstats interface, also provides
23delay statistics aggregated for all tasks (or threads) belonging to a
24thread group (corresponding to a traditional Unix process). This is a commonly
25needed aggregation that is more efficiently done by the kernel.
26
27Userspace utilities, particularly resource management applications, can also
28aggregate delay statistics into arbitrary groups. To enable this, delay
29statistics of a task are available both during its lifetime as well as on its
30exit, ensuring continuous and complete monitoring can be done.
31
32
33Interface
34---------
35
36Delay accounting uses the taskstats interface which is described
37in detail in a separate document in this directory. Taskstats returns a
38generic data structure to userspace corresponding to per-pid and per-tgid
39statistics. The delay accounting functionality populates specific fields of
40this structure. See
41 include/linux/taskstats.h
42for a description of the fields pertaining to delay accounting.
43It will generally be in the form of counters returning the cumulative
44delay seen for cpu, sync block I/O, swapin etc.
45
46Taking the difference of two successive readings of a given
47counter (say cpu_delay_total) for a task will give the delay
48experienced by the task waiting for the corresponding resource
49in that interval.
50
51When a task exits, records containing the per-task and per-process statistics
52are sent to userspace without requiring a command. More details are given in
53the taskstats interface description.
54
55The getdelays.c userspace utility in this directory allows simple commands to
56be run and the corresponding delay statistics to be displayed. It also serves
57as an example of using the taskstats interface.
58
59Usage
60-----
61
62Compile the kernel with
63 CONFIG_TASK_DELAY_ACCT=y
64 CONFIG_TASKSTATS=y
65
66Enable the accounting at boot time by adding
67the following to the kernel boot options
68 delayacct
69
70and after the system has booted up, use a utility
71similar to getdelays.c to access the delays
72seen by a given task or a task group (tgid).
73The utility also allows a given command to be
74executed and the corresponding delays to be
75seen.
76
77General format of the getdelays command
78
79getdelays [-t tgid] [-p pid] [-c cmd...]
80
81
82Get delays, since system boot, for pid 10
83# ./getdelays -p 10
84(output similar to next case)
85
86Get sum of delays, since system boot, for all pids with tgid 5
87# ./getdelays -t 5
88
89
90CPU count real total virtual total delay total
91 7876 92005750 100000000 24001500
92IO count delay total
93 0 0
94MEM count delay total
95 0 0
96
97Get delays seen in executing a given simple command
98# ./getdelays -c ls /
99
100bin data1 data3 data5 dev home media opt root srv sys usr
101boot data2 data4 data6 etc lib mnt proc sbin subdomain tmp var
102
103
104CPU count real total virtual total delay total
105 6 4000250 4000000 0
106IO count delay total
107 0 0
108MEM count delay total
109 0 0
110
111
112
113
114
115
diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c
new file mode 100644
index 000000000000..33de89e56a3d
--- /dev/null
+++ b/Documentation/accounting/getdelays.c
@@ -0,0 +1,376 @@
1/* getdelays.c
2 *
3 * Utility to get per-pid and per-tgid delay accounting statistics
4 * Also illustrates usage of the taskstats interface
5 *
6 * Copyright (C) Shailabh Nagar, IBM Corp. 2005
7 * Copyright (C) Balbir Singh, IBM Corp. 2006
8 *
9 */
10
11#include <stdio.h>
12#include <stdlib.h>
13#include <errno.h>
14#include <unistd.h>
15#include <poll.h>
16#include <string.h>
17#include <fcntl.h>
18#include <sys/types.h>
19#include <sys/stat.h>
20#include <sys/socket.h>
21#include <sys/types.h>
22#include <signal.h>
23
24#include <linux/genetlink.h>
25#include <linux/taskstats.h>
26
27/*
28 * Generic macros for dealing with netlink sockets. Might be duplicated
29 * elsewhere. It is recommended that commercial grade applications use
30 * libnl or libnetlink and use the interfaces provided by the library
31 */
32#define GENLMSG_DATA(glh) ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN))
33#define GENLMSG_PAYLOAD(glh) (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN)
34#define NLA_DATA(na) ((void *)((char*)(na) + NLA_HDRLEN))
35#define NLA_PAYLOAD(len) (len - NLA_HDRLEN)
36
37#define err(code, fmt, arg...) do { printf(fmt, ##arg); exit(code); } while (0)
38int done = 0;
39
40/*
41 * Create a raw netlink socket and bind
42 */
43static int create_nl_socket(int protocol, int groups)
44{
45 socklen_t addr_len;
46 int fd;
47 struct sockaddr_nl local;
48
49 fd = socket(AF_NETLINK, SOCK_RAW, protocol);
50 if (fd < 0)
51 return -1;
52
53 memset(&local, 0, sizeof(local));
54 local.nl_family = AF_NETLINK;
55 local.nl_groups = groups;
56
57 if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0)
58 goto error;
59
60 return fd;
61 error:
62 close(fd);
63 return -1;
64}
65
66int sendto_fd(int s, const char *buf, int bufLen)
67{
68 struct sockaddr_nl nladdr;
69 int r;
70
71 memset(&nladdr, 0, sizeof(nladdr));
72 nladdr.nl_family = AF_NETLINK;
73
74 while ((r = sendto(s, buf, bufLen, 0, (struct sockaddr *) &nladdr,
75 sizeof(nladdr))) < bufLen) {
76 if (r > 0) {
77 buf += r;
78 bufLen -= r;
79 } else if (errno != EAGAIN)
80 return -1;
81 }
82 return 0;
83}
84
85/*
86 * Probe the controller in genetlink to find the family id
87 * for the TASKSTATS family
88 */
89int get_family_id(int sd)
90{
91 struct {
92 struct nlmsghdr n;
93 struct genlmsghdr g;
94 char buf[256];
95 } family_req;
96 struct {
97 struct nlmsghdr n;
98 struct genlmsghdr g;
99 char buf[256];
100 } ans;
101
102 int id;
103 struct nlattr *na;
104 int rep_len;
105
106 /* Get family name */
107 family_req.n.nlmsg_type = GENL_ID_CTRL;
108 family_req.n.nlmsg_flags = NLM_F_REQUEST;
109 family_req.n.nlmsg_seq = 0;
110 family_req.n.nlmsg_pid = getpid();
111 family_req.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
112 family_req.g.cmd = CTRL_CMD_GETFAMILY;
113 family_req.g.version = 0x1;
114 na = (struct nlattr *) GENLMSG_DATA(&family_req);
115 na->nla_type = CTRL_ATTR_FAMILY_NAME;
116 na->nla_len = strlen(TASKSTATS_GENL_NAME) + 1 + NLA_HDRLEN;
117 strcpy(NLA_DATA(na), TASKSTATS_GENL_NAME);
118 family_req.n.nlmsg_len += NLMSG_ALIGN(na->nla_len);
119
120 if (sendto_fd(sd, (char *) &family_req, family_req.n.nlmsg_len) < 0)
121 err(1, "error sending message via Netlink\n");
122
123 rep_len = recv(sd, &ans, sizeof(ans), 0);
124
125 if (rep_len < 0)
126 err(1, "error receiving reply message via Netlink\n");
127
128
129 /* Validate response message */
130 if (!NLMSG_OK((&ans.n), rep_len))
131 err(1, "invalid reply message received via Netlink\n");
132
133 if (ans.n.nlmsg_type == NLMSG_ERROR) { /* error */
134 printf("error received NACK - leaving\n");
135 exit(1);
136 }
137
138
139 na = (struct nlattr *) GENLMSG_DATA(&ans);
140 na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len));
141 if (na->nla_type == CTRL_ATTR_FAMILY_ID) {
142 id = *(__u16 *) NLA_DATA(na);
143 }
144 return id;
145}
146
147void print_taskstats(struct taskstats *t)
148{
149 printf("\n\nCPU %15s%15s%15s%15s\n"
150 " %15llu%15llu%15llu%15llu\n"
151 "IO %15s%15s\n"
152 " %15llu%15llu\n"
153 "MEM %15s%15s\n"
154 " %15llu%15llu\n\n",
155 "count", "real total", "virtual total", "delay total",
156 t->cpu_count, t->cpu_run_real_total, t->cpu_run_virtual_total,
157 t->cpu_delay_total,
158 "count", "delay total",
159 t->blkio_count, t->blkio_delay_total,
160 "count", "delay total", t->swapin_count, t->swapin_delay_total);
161}
162
163void sigchld(int sig)
164{
165 done = 1;
166}
167
168int main(int argc, char *argv[])
169{
170 int rc;
171 int sk_nl;
172 struct nlmsghdr *nlh;
173 struct genlmsghdr *genlhdr;
174 char *buf;
175 struct taskstats_cmd_param *param;
176 __u16 id;
177 struct nlattr *na;
178
179 /* For receiving */
180 struct sockaddr_nl kern_nla, from_nla;
181 socklen_t from_nla_len;
182 int recv_len;
183 struct taskstats_reply *reply;
184
185 struct {
186 struct nlmsghdr n;
187 struct genlmsghdr g;
188 char buf[256];
189 } req;
190
191 struct {
192 struct nlmsghdr n;
193 struct genlmsghdr g;
194 char buf[256];
195 } ans;
196
197 int nl_sd = -1;
198 int rep_len;
199 int len = 0;
200 int aggr_len, len2;
201 struct sockaddr_nl nladdr;
202 pid_t tid = 0;
203 pid_t rtid = 0;
204 int cmd_type = TASKSTATS_TYPE_TGID;
205 int c, status;
206 int forking = 0;
207 struct sigaction act = {
208 .sa_handler = SIG_IGN,
209 .sa_mask = SA_NOMASK,
210 };
211 struct sigaction tact ;
212
213 if (argc < 3) {
214 printf("usage %s [-t tgid][-p pid][-c cmd]\n", argv[0]);
215 exit(-1);
216 }
217
218 tact.sa_handler = sigchld;
219 sigemptyset(&tact.sa_mask);
220 if (sigaction(SIGCHLD, &tact, NULL) < 0)
221 err(1, "sigaction failed for SIGCHLD\n");
222
223 while (1) {
224
225 c = getopt(argc, argv, "t:p:c:");
226 if (c < 0)
227 break;
228
229 switch (c) {
230 case 't':
231 tid = atoi(optarg);
232 if (!tid)
233 err(1, "Invalid tgid\n");
234 cmd_type = TASKSTATS_CMD_ATTR_TGID;
235 break;
236 case 'p':
237 tid = atoi(optarg);
238 if (!tid)
239 err(1, "Invalid pid\n");
240 cmd_type = TASKSTATS_CMD_ATTR_TGID;
241 break;
242 case 'c':
243 opterr = 0;
244 tid = fork();
245 if (tid < 0)
246 err(1, "fork failed\n");
247
248 if (tid == 0) { /* child process */
249 if (execvp(argv[optind - 1], &argv[optind - 1]) < 0) {
250 exit(-1);
251 }
252 }
253 forking = 1;
254 break;
255 default:
256 printf("usage %s [-t tgid][-p pid][-c cmd]\n", argv[0]);
257 exit(-1);
258 break;
259 }
260 if (c == 'c')
261 break;
262 }
263
264 /* Construct Netlink request message */
265
266 /* Send Netlink request message & get reply */
267
268 if ((nl_sd =
269 create_nl_socket(NETLINK_GENERIC, TASKSTATS_LISTEN_GROUP)) < 0)
270 err(1, "error creating Netlink socket\n");
271
272
273 id = get_family_id(nl_sd);
274
275 /* Send command needed */
276 req.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
277 req.n.nlmsg_type = id;
278 req.n.nlmsg_flags = NLM_F_REQUEST;
279 req.n.nlmsg_seq = 0;
280 req.n.nlmsg_pid = tid;
281 req.g.cmd = TASKSTATS_CMD_GET;
282 na = (struct nlattr *) GENLMSG_DATA(&req);
283 na->nla_type = cmd_type;
284 na->nla_len = sizeof(unsigned int) + NLA_HDRLEN;
285 *(__u32 *) NLA_DATA(na) = tid;
286 req.n.nlmsg_len += NLMSG_ALIGN(na->nla_len);
287
288
289 if (!forking && sendto_fd(nl_sd, (char *) &req, req.n.nlmsg_len) < 0)
290 err(1, "error sending message via Netlink\n");
291
292 act.sa_handler = SIG_IGN;
293 sigemptyset(&act.sa_mask);
294 if (sigaction(SIGINT, &act, NULL) < 0)
295 err(1, "sigaction failed for SIGINT\n");
296
297 do {
298 int i;
299 struct pollfd pfd;
300 int pollres;
301
302 pfd.events = 0xffff & ~POLLOUT;
303 pfd.fd = nl_sd;
304 pollres = poll(&pfd, 1, 5000);
305 if (pollres < 0 || done) {
306 break;
307 }
308
309 rep_len = recv(nl_sd, &ans, sizeof(ans), 0);
310 nladdr.nl_family = AF_NETLINK;
311 nladdr.nl_groups = TASKSTATS_LISTEN_GROUP;
312
313 if (ans.n.nlmsg_type == NLMSG_ERROR) { /* error */
314 printf("error received NACK - leaving\n");
315 exit(1);
316 }
317
318 if (rep_len < 0) {
319 err(1, "error receiving reply message via Netlink\n");
320 break;
321 }
322
323 /* Validate response message */
324 if (!NLMSG_OK((&ans.n), rep_len))
325 err(1, "invalid reply message received via Netlink\n");
326
327 rep_len = GENLMSG_PAYLOAD(&ans.n);
328
329 na = (struct nlattr *) GENLMSG_DATA(&ans);
330 len = 0;
331 i = 0;
332 while (len < rep_len) {
333 len += NLA_ALIGN(na->nla_len);
334 switch (na->nla_type) {
335 case TASKSTATS_TYPE_AGGR_PID:
336 /* Fall through */
337 case TASKSTATS_TYPE_AGGR_TGID:
338 aggr_len = NLA_PAYLOAD(na->nla_len);
339 len2 = 0;
340 /* For nested attributes, na follows */
341 na = (struct nlattr *) NLA_DATA(na);
342 done = 0;
343 while (len2 < aggr_len) {
344 switch (na->nla_type) {
345 case TASKSTATS_TYPE_PID:
346 rtid = *(int *) NLA_DATA(na);
347 break;
348 case TASKSTATS_TYPE_TGID:
349 rtid = *(int *) NLA_DATA(na);
350 break;
351 case TASKSTATS_TYPE_STATS:
352 if (rtid == tid) {
353 print_taskstats((struct taskstats *)
354 NLA_DATA(na));
355 done = 1;
356 }
357 break;
358 }
359 len2 += NLA_ALIGN(na->nla_len);
360 na = (struct nlattr *) ((char *) na + len2);
361 if (done)
362 break;
363 }
364 }
365 na = (struct nlattr *) (GENLMSG_DATA(&ans) + len);
366 if (done)
367 break;
368 }
369 if (done)
370 break;
371 }
372 while (1);
373
374 close(nl_sd);
375 return 0;
376}
diff --git a/Documentation/accounting/taskstats.txt b/Documentation/accounting/taskstats.txt
index ad9b6997e162..acc6b4f37fc7 100644
--- a/Documentation/accounting/taskstats.txt
+++ b/Documentation/accounting/taskstats.txt
@@ -39,6 +39,8 @@ belongs (the task does not need to be the thread group leader). The need for
39per-tgid stats to be sent for each exiting task is explained in the per-tgid 39per-tgid stats to be sent for each exiting task is explained in the per-tgid
40stats section below. 40stats section below.
41 41
42getdelays.c is a simple utility demonstrating usage of the taskstats interface
43for reporting delay accounting statistics.
42 44
43Interface 45Interface
44--------- 46---------