diff options
author | Shailabh Nagar <nagar@watson.ibm.com> | 2006-07-14 03:24:45 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-07-15 00:53:57 -0400 |
commit | 9e06d3f9f6b14f6e3120923ed215032726246c98 (patch) | |
tree | df0509fedb0cf62bc59edc0038e55880bbc6a592 | |
parent | ad4ecbcba72855a2b5319b96e2a3a65ed1ca3bfd (diff) |
[PATCH] per task delay accounting taskstats interface: documentation fix
Change documentation and example program to reflect the flow control issues
being addressed by the cpumask changes.
Signed-off-by: Shailabh Nagar <nagar@watson.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | Documentation/accounting/getdelays.c | 606 | ||||
-rw-r--r-- | Documentation/accounting/taskstats.txt | 64 |
2 files changed, 365 insertions, 305 deletions
diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c index 33de89e56a3d..795ca3911cc5 100644 --- a/Documentation/accounting/getdelays.c +++ b/Documentation/accounting/getdelays.c | |||
@@ -5,6 +5,7 @@ | |||
5 | * | 5 | * |
6 | * Copyright (C) Shailabh Nagar, IBM Corp. 2005 | 6 | * Copyright (C) Shailabh Nagar, IBM Corp. 2005 |
7 | * Copyright (C) Balbir Singh, IBM Corp. 2006 | 7 | * Copyright (C) Balbir Singh, IBM Corp. 2006 |
8 | * Copyright (c) Jay Lan, SGI. 2006 | ||
8 | * | 9 | * |
9 | */ | 10 | */ |
10 | 11 | ||
@@ -36,341 +37,360 @@ | |||
36 | 37 | ||
37 | #define err(code, fmt, arg...) do { printf(fmt, ##arg); exit(code); } while (0) | 38 | #define err(code, fmt, arg...) do { printf(fmt, ##arg); exit(code); } while (0) |
38 | int done = 0; | 39 | int done = 0; |
40 | int rcvbufsz=0; | ||
41 | |||
42 | char name[100]; | ||
43 | int dbg=0, print_delays=0; | ||
44 | __u64 stime, utime; | ||
45 | #define PRINTF(fmt, arg...) { \ | ||
46 | if (dbg) { \ | ||
47 | printf(fmt, ##arg); \ | ||
48 | } \ | ||
49 | } | ||
50 | |||
51 | /* Maximum size of response requested or message sent */ | ||
52 | #define MAX_MSG_SIZE 256 | ||
53 | /* Maximum number of cpus expected to be specified in a cpumask */ | ||
54 | #define MAX_CPUS 32 | ||
55 | /* Maximum length of pathname to log file */ | ||
56 | #define MAX_FILENAME 256 | ||
57 | |||
58 | struct msgtemplate { | ||
59 | struct nlmsghdr n; | ||
60 | struct genlmsghdr g; | ||
61 | char buf[MAX_MSG_SIZE]; | ||
62 | }; | ||
63 | |||
64 | char cpumask[100+6*MAX_CPUS]; | ||
39 | 65 | ||
40 | /* | 66 | /* |
41 | * Create a raw netlink socket and bind | 67 | * Create a raw netlink socket and bind |
42 | */ | 68 | */ |
43 | static int create_nl_socket(int protocol, int groups) | 69 | static int create_nl_socket(int protocol) |
44 | { | 70 | { |
45 | socklen_t addr_len; | 71 | int fd; |
46 | int fd; | 72 | struct sockaddr_nl local; |
47 | struct sockaddr_nl local; | 73 | |
48 | 74 | fd = socket(AF_NETLINK, SOCK_RAW, protocol); | |
49 | fd = socket(AF_NETLINK, SOCK_RAW, protocol); | 75 | if (fd < 0) |
50 | if (fd < 0) | 76 | return -1; |
51 | return -1; | 77 | |
78 | if (rcvbufsz) | ||
79 | if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, | ||
80 | &rcvbufsz, sizeof(rcvbufsz)) < 0) { | ||
81 | printf("Unable to set socket rcv buf size to %d\n", | ||
82 | rcvbufsz); | ||
83 | return -1; | ||
84 | } | ||
52 | 85 | ||
53 | memset(&local, 0, sizeof(local)); | 86 | memset(&local, 0, sizeof(local)); |
54 | local.nl_family = AF_NETLINK; | 87 | local.nl_family = AF_NETLINK; |
55 | local.nl_groups = groups; | ||
56 | 88 | ||
57 | if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0) | 89 | if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0) |
58 | goto error; | 90 | goto error; |
59 | 91 | ||
60 | return fd; | 92 | return fd; |
61 | error: | 93 | error: |
62 | close(fd); | 94 | close(fd); |
63 | return -1; | 95 | return -1; |
64 | } | 96 | } |
65 | 97 | ||
66 | int sendto_fd(int s, const char *buf, int bufLen) | 98 | |
99 | int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid, | ||
100 | __u8 genl_cmd, __u16 nla_type, | ||
101 | void *nla_data, int nla_len) | ||
67 | { | 102 | { |
68 | struct sockaddr_nl nladdr; | 103 | struct nlattr *na; |
69 | int r; | 104 | struct sockaddr_nl nladdr; |
70 | 105 | int r, buflen; | |
71 | memset(&nladdr, 0, sizeof(nladdr)); | 106 | char *buf; |
72 | nladdr.nl_family = AF_NETLINK; | 107 | |
73 | 108 | struct msgtemplate msg; | |
74 | while ((r = sendto(s, buf, bufLen, 0, (struct sockaddr *) &nladdr, | 109 | |
75 | sizeof(nladdr))) < bufLen) { | 110 | msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN); |
76 | if (r > 0) { | 111 | msg.n.nlmsg_type = nlmsg_type; |
77 | buf += r; | 112 | msg.n.nlmsg_flags = NLM_F_REQUEST; |
78 | bufLen -= r; | 113 | msg.n.nlmsg_seq = 0; |
79 | } else if (errno != EAGAIN) | 114 | msg.n.nlmsg_pid = nlmsg_pid; |
80 | return -1; | 115 | msg.g.cmd = genl_cmd; |
81 | } | 116 | msg.g.version = 0x1; |
82 | return 0; | 117 | na = (struct nlattr *) GENLMSG_DATA(&msg); |
118 | na->nla_type = nla_type; | ||
119 | na->nla_len = nla_len + 1 + NLA_HDRLEN; | ||
120 | memcpy(NLA_DATA(na), nla_data, nla_len); | ||
121 | msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len); | ||
122 | |||
123 | buf = (char *) &msg; | ||
124 | buflen = msg.n.nlmsg_len ; | ||
125 | memset(&nladdr, 0, sizeof(nladdr)); | ||
126 | nladdr.nl_family = AF_NETLINK; | ||
127 | while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *) &nladdr, | ||
128 | sizeof(nladdr))) < buflen) { | ||
129 | if (r > 0) { | ||
130 | buf += r; | ||
131 | buflen -= r; | ||
132 | } else if (errno != EAGAIN) | ||
133 | return -1; | ||
134 | } | ||
135 | return 0; | ||
83 | } | 136 | } |
84 | 137 | ||
138 | |||
85 | /* | 139 | /* |
86 | * Probe the controller in genetlink to find the family id | 140 | * Probe the controller in genetlink to find the family id |
87 | * for the TASKSTATS family | 141 | * for the TASKSTATS family |
88 | */ | 142 | */ |
89 | int get_family_id(int sd) | 143 | int get_family_id(int sd) |
90 | { | 144 | { |
91 | struct { | 145 | struct { |
92 | struct nlmsghdr n; | 146 | struct nlmsghdr n; |
93 | struct genlmsghdr g; | 147 | struct genlmsghdr g; |
94 | char buf[256]; | 148 | char buf[256]; |
95 | } family_req; | 149 | } ans; |
96 | struct { | 150 | |
97 | struct nlmsghdr n; | 151 | int id, rc; |
98 | struct genlmsghdr g; | 152 | struct nlattr *na; |
99 | char buf[256]; | 153 | int rep_len; |
100 | } ans; | 154 | |
101 | 155 | strcpy(name, TASKSTATS_GENL_NAME); | |
102 | int id; | 156 | rc = send_cmd(sd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY, |
103 | struct nlattr *na; | 157 | CTRL_ATTR_FAMILY_NAME, (void *)name, |
104 | int rep_len; | 158 | strlen(TASKSTATS_GENL_NAME)+1); |
105 | 159 | ||
106 | /* Get family name */ | 160 | rep_len = recv(sd, &ans, sizeof(ans), 0); |
107 | family_req.n.nlmsg_type = GENL_ID_CTRL; | 161 | if (ans.n.nlmsg_type == NLMSG_ERROR || |
108 | family_req.n.nlmsg_flags = NLM_F_REQUEST; | 162 | (rep_len < 0) || !NLMSG_OK((&ans.n), rep_len)) |
109 | family_req.n.nlmsg_seq = 0; | 163 | return 0; |
110 | family_req.n.nlmsg_pid = getpid(); | ||
111 | family_req.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN); | ||
112 | family_req.g.cmd = CTRL_CMD_GETFAMILY; | ||
113 | family_req.g.version = 0x1; | ||
114 | na = (struct nlattr *) GENLMSG_DATA(&family_req); | ||
115 | na->nla_type = CTRL_ATTR_FAMILY_NAME; | ||
116 | na->nla_len = strlen(TASKSTATS_GENL_NAME) + 1 + NLA_HDRLEN; | ||
117 | strcpy(NLA_DATA(na), TASKSTATS_GENL_NAME); | ||
118 | family_req.n.nlmsg_len += NLMSG_ALIGN(na->nla_len); | ||
119 | |||
120 | if (sendto_fd(sd, (char *) &family_req, family_req.n.nlmsg_len) < 0) | ||
121 | err(1, "error sending message via Netlink\n"); | ||
122 | |||
123 | rep_len = recv(sd, &ans, sizeof(ans), 0); | ||
124 | |||
125 | if (rep_len < 0) | ||
126 | err(1, "error receiving reply message via Netlink\n"); | ||
127 | |||
128 | |||
129 | /* Validate response message */ | ||
130 | if (!NLMSG_OK((&ans.n), rep_len)) | ||
131 | err(1, "invalid reply message received via Netlink\n"); | ||
132 | |||
133 | if (ans.n.nlmsg_type == NLMSG_ERROR) { /* error */ | ||
134 | printf("error received NACK - leaving\n"); | ||
135 | exit(1); | ||
136 | } | ||
137 | |||
138 | |||
139 | na = (struct nlattr *) GENLMSG_DATA(&ans); | ||
140 | na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len)); | ||
141 | if (na->nla_type == CTRL_ATTR_FAMILY_ID) { | ||
142 | id = *(__u16 *) NLA_DATA(na); | ||
143 | } | ||
144 | return id; | ||
145 | } | ||
146 | 164 | ||
147 | void print_taskstats(struct taskstats *t) | 165 | na = (struct nlattr *) GENLMSG_DATA(&ans); |
148 | { | 166 | na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len)); |
149 | printf("\n\nCPU %15s%15s%15s%15s\n" | 167 | if (na->nla_type == CTRL_ATTR_FAMILY_ID) { |
150 | " %15llu%15llu%15llu%15llu\n" | 168 | id = *(__u16 *) NLA_DATA(na); |
151 | "IO %15s%15s\n" | 169 | } |
152 | " %15llu%15llu\n" | 170 | return id; |
153 | "MEM %15s%15s\n" | ||
154 | " %15llu%15llu\n\n", | ||
155 | "count", "real total", "virtual total", "delay total", | ||
156 | t->cpu_count, t->cpu_run_real_total, t->cpu_run_virtual_total, | ||
157 | t->cpu_delay_total, | ||
158 | "count", "delay total", | ||
159 | t->blkio_count, t->blkio_delay_total, | ||
160 | "count", "delay total", t->swapin_count, t->swapin_delay_total); | ||
161 | } | 171 | } |
162 | 172 | ||
163 | void sigchld(int sig) | 173 | void print_delayacct(struct taskstats *t) |
164 | { | 174 | { |
165 | done = 1; | 175 | printf("\n\nCPU %15s%15s%15s%15s\n" |
176 | " %15llu%15llu%15llu%15llu\n" | ||
177 | "IO %15s%15s\n" | ||
178 | " %15llu%15llu\n" | ||
179 | "MEM %15s%15s\n" | ||
180 | " %15llu%15llu\n\n", | ||
181 | "count", "real total", "virtual total", "delay total", | ||
182 | t->cpu_count, t->cpu_run_real_total, t->cpu_run_virtual_total, | ||
183 | t->cpu_delay_total, | ||
184 | "count", "delay total", | ||
185 | t->blkio_count, t->blkio_delay_total, | ||
186 | "count", "delay total", t->swapin_count, t->swapin_delay_total); | ||
166 | } | 187 | } |
167 | 188 | ||
168 | int main(int argc, char *argv[]) | 189 | int main(int argc, char *argv[]) |
169 | { | 190 | { |
170 | int rc; | 191 | int c, rc, rep_len, aggr_len, len2, cmd_type; |
171 | int sk_nl; | 192 | __u16 id; |
172 | struct nlmsghdr *nlh; | 193 | __u32 mypid; |
173 | struct genlmsghdr *genlhdr; | 194 | |
174 | char *buf; | 195 | struct nlattr *na; |
175 | struct taskstats_cmd_param *param; | 196 | int nl_sd = -1; |
176 | __u16 id; | 197 | int len = 0; |
177 | struct nlattr *na; | 198 | pid_t tid = 0; |
178 | 199 | pid_t rtid = 0; | |
179 | /* For receiving */ | 200 | |
180 | struct sockaddr_nl kern_nla, from_nla; | 201 | int fd = 0; |
181 | socklen_t from_nla_len; | 202 | int count = 0; |
182 | int recv_len; | 203 | int write_file = 0; |
183 | struct taskstats_reply *reply; | 204 | int maskset = 0; |
184 | 205 | char logfile[128]; | |
185 | struct { | 206 | int loop = 0; |
186 | struct nlmsghdr n; | 207 | |
187 | struct genlmsghdr g; | 208 | struct msgtemplate msg; |
188 | char buf[256]; | 209 | |
189 | } req; | 210 | while (1) { |
211 | c = getopt(argc, argv, "dw:r:m:t:p:v:l"); | ||
212 | if (c < 0) | ||
213 | break; | ||
190 | 214 | ||
191 | struct { | 215 | switch (c) { |
192 | struct nlmsghdr n; | 216 | case 'd': |
193 | struct genlmsghdr g; | 217 | printf("print delayacct stats ON\n"); |
194 | char buf[256]; | 218 | print_delays = 1; |
195 | } ans; | 219 | break; |
196 | 220 | case 'w': | |
197 | int nl_sd = -1; | 221 | strncpy(logfile, optarg, MAX_FILENAME); |
198 | int rep_len; | 222 | printf("write to file %s\n", logfile); |
199 | int len = 0; | 223 | write_file = 1; |
200 | int aggr_len, len2; | 224 | break; |
201 | struct sockaddr_nl nladdr; | 225 | case 'r': |
202 | pid_t tid = 0; | 226 | rcvbufsz = atoi(optarg); |
203 | pid_t rtid = 0; | 227 | printf("receive buf size %d\n", rcvbufsz); |
204 | int cmd_type = TASKSTATS_TYPE_TGID; | 228 | if (rcvbufsz < 0) |
205 | int c, status; | 229 | err(1, "Invalid rcv buf size\n"); |
206 | int forking = 0; | 230 | break; |
207 | struct sigaction act = { | 231 | case 'm': |
208 | .sa_handler = SIG_IGN, | 232 | strncpy(cpumask, optarg, sizeof(cpumask)); |
209 | .sa_mask = SA_NOMASK, | 233 | maskset = 1; |
210 | }; | 234 | printf("cpumask %s maskset %d\n", cpumask, maskset); |
211 | struct sigaction tact ; | 235 | break; |
212 | 236 | case 't': | |
213 | if (argc < 3) { | 237 | tid = atoi(optarg); |
214 | printf("usage %s [-t tgid][-p pid][-c cmd]\n", argv[0]); | 238 | if (!tid) |
215 | exit(-1); | 239 | err(1, "Invalid tgid\n"); |
216 | } | 240 | cmd_type = TASKSTATS_CMD_ATTR_TGID; |
217 | 241 | print_delays = 1; | |
218 | tact.sa_handler = sigchld; | 242 | break; |
219 | sigemptyset(&tact.sa_mask); | 243 | case 'p': |
220 | if (sigaction(SIGCHLD, &tact, NULL) < 0) | 244 | tid = atoi(optarg); |
221 | err(1, "sigaction failed for SIGCHLD\n"); | 245 | if (!tid) |
222 | 246 | err(1, "Invalid pid\n"); | |
223 | while (1) { | 247 | cmd_type = TASKSTATS_CMD_ATTR_PID; |
224 | 248 | print_delays = 1; | |
225 | c = getopt(argc, argv, "t:p:c:"); | 249 | break; |
226 | if (c < 0) | 250 | case 'v': |
227 | break; | 251 | printf("debug on\n"); |
228 | 252 | dbg = 1; | |
229 | switch (c) { | 253 | break; |
230 | case 't': | 254 | case 'l': |
231 | tid = atoi(optarg); | 255 | printf("listen forever\n"); |
232 | if (!tid) | 256 | loop = 1; |
233 | err(1, "Invalid tgid\n"); | 257 | break; |
234 | cmd_type = TASKSTATS_CMD_ATTR_TGID; | 258 | default: |
235 | break; | 259 | printf("Unknown option %d\n", c); |
236 | case 'p': | 260 | exit(-1); |
237 | tid = atoi(optarg); | ||
238 | if (!tid) | ||
239 | err(1, "Invalid pid\n"); | ||
240 | cmd_type = TASKSTATS_CMD_ATTR_TGID; | ||
241 | break; | ||
242 | case 'c': | ||
243 | opterr = 0; | ||
244 | tid = fork(); | ||
245 | if (tid < 0) | ||
246 | err(1, "fork failed\n"); | ||
247 | |||
248 | if (tid == 0) { /* child process */ | ||
249 | if (execvp(argv[optind - 1], &argv[optind - 1]) < 0) { | ||
250 | exit(-1); | ||
251 | } | 261 | } |
252 | } | ||
253 | forking = 1; | ||
254 | break; | ||
255 | default: | ||
256 | printf("usage %s [-t tgid][-p pid][-c cmd]\n", argv[0]); | ||
257 | exit(-1); | ||
258 | break; | ||
259 | } | 262 | } |
260 | if (c == 'c') | ||
261 | break; | ||
262 | } | ||
263 | |||
264 | /* Construct Netlink request message */ | ||
265 | |||
266 | /* Send Netlink request message & get reply */ | ||
267 | 263 | ||
268 | if ((nl_sd = | 264 | if (write_file) { |
269 | create_nl_socket(NETLINK_GENERIC, TASKSTATS_LISTEN_GROUP)) < 0) | 265 | fd = open(logfile, O_WRONLY | O_CREAT | O_TRUNC, |
270 | err(1, "error creating Netlink socket\n"); | 266 | S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); |
271 | 267 | if (fd == -1) { | |
272 | 268 | perror("Cannot open output file\n"); | |
273 | id = get_family_id(nl_sd); | 269 | exit(1); |
274 | 270 | } | |
275 | /* Send command needed */ | 271 | } |
276 | req.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN); | ||
277 | req.n.nlmsg_type = id; | ||
278 | req.n.nlmsg_flags = NLM_F_REQUEST; | ||
279 | req.n.nlmsg_seq = 0; | ||
280 | req.n.nlmsg_pid = tid; | ||
281 | req.g.cmd = TASKSTATS_CMD_GET; | ||
282 | na = (struct nlattr *) GENLMSG_DATA(&req); | ||
283 | na->nla_type = cmd_type; | ||
284 | na->nla_len = sizeof(unsigned int) + NLA_HDRLEN; | ||
285 | *(__u32 *) NLA_DATA(na) = tid; | ||
286 | req.n.nlmsg_len += NLMSG_ALIGN(na->nla_len); | ||
287 | |||
288 | |||
289 | if (!forking && sendto_fd(nl_sd, (char *) &req, req.n.nlmsg_len) < 0) | ||
290 | err(1, "error sending message via Netlink\n"); | ||
291 | 272 | ||
292 | act.sa_handler = SIG_IGN; | 273 | if ((nl_sd = create_nl_socket(NETLINK_GENERIC)) < 0) |
293 | sigemptyset(&act.sa_mask); | 274 | err(1, "error creating Netlink socket\n"); |
294 | if (sigaction(SIGINT, &act, NULL) < 0) | ||
295 | err(1, "sigaction failed for SIGINT\n"); | ||
296 | 275 | ||
297 | do { | ||
298 | int i; | ||
299 | struct pollfd pfd; | ||
300 | int pollres; | ||
301 | 276 | ||
302 | pfd.events = 0xffff & ~POLLOUT; | 277 | mypid = getpid(); |
303 | pfd.fd = nl_sd; | 278 | id = get_family_id(nl_sd); |
304 | pollres = poll(&pfd, 1, 5000); | 279 | if (!id) { |
305 | if (pollres < 0 || done) { | 280 | printf("Error getting family id, errno %d", errno); |
306 | break; | 281 | goto err; |
307 | } | 282 | } |
308 | 283 | PRINTF("family id %d\n", id); | |
309 | rep_len = recv(nl_sd, &ans, sizeof(ans), 0); | 284 | |
310 | nladdr.nl_family = AF_NETLINK; | 285 | if (maskset) { |
311 | nladdr.nl_groups = TASKSTATS_LISTEN_GROUP; | 286 | rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET, |
312 | 287 | TASKSTATS_CMD_ATTR_REGISTER_CPUMASK, | |
313 | if (ans.n.nlmsg_type == NLMSG_ERROR) { /* error */ | 288 | &cpumask, sizeof(cpumask)); |
314 | printf("error received NACK - leaving\n"); | 289 | PRINTF("Sent register cpumask, retval %d\n", rc); |
315 | exit(1); | 290 | if (rc < 0) { |
291 | printf("error sending register cpumask\n"); | ||
292 | goto err; | ||
293 | } | ||
316 | } | 294 | } |
317 | 295 | ||
318 | if (rep_len < 0) { | 296 | if (tid) { |
319 | err(1, "error receiving reply message via Netlink\n"); | 297 | rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET, |
320 | break; | 298 | cmd_type, &tid, sizeof(__u32)); |
299 | PRINTF("Sent pid/tgid, retval %d\n", rc); | ||
300 | if (rc < 0) { | ||
301 | printf("error sending tid/tgid cmd\n"); | ||
302 | goto done; | ||
303 | } | ||
321 | } | 304 | } |
322 | 305 | ||
323 | /* Validate response message */ | 306 | do { |
324 | if (!NLMSG_OK((&ans.n), rep_len)) | 307 | int i; |
325 | err(1, "invalid reply message received via Netlink\n"); | ||
326 | 308 | ||
327 | rep_len = GENLMSG_PAYLOAD(&ans.n); | 309 | rep_len = recv(nl_sd, &msg, sizeof(msg), 0); |
310 | PRINTF("received %d bytes\n", rep_len); | ||
328 | 311 | ||
329 | na = (struct nlattr *) GENLMSG_DATA(&ans); | 312 | if (rep_len < 0) { |
330 | len = 0; | 313 | printf("nonfatal reply error: errno %d\n", errno); |
331 | i = 0; | 314 | continue; |
332 | while (len < rep_len) { | 315 | } |
333 | len += NLA_ALIGN(na->nla_len); | 316 | if (msg.n.nlmsg_type == NLMSG_ERROR || |
334 | switch (na->nla_type) { | 317 | !NLMSG_OK((&msg.n), rep_len)) { |
335 | case TASKSTATS_TYPE_AGGR_PID: | 318 | printf("fatal reply error, errno %d\n", errno); |
336 | /* Fall through */ | 319 | goto done; |
337 | case TASKSTATS_TYPE_AGGR_TGID: | 320 | } |
338 | aggr_len = NLA_PAYLOAD(na->nla_len); | 321 | |
339 | len2 = 0; | 322 | PRINTF("nlmsghdr size=%d, nlmsg_len=%d, rep_len=%d\n", |
340 | /* For nested attributes, na follows */ | 323 | sizeof(struct nlmsghdr), msg.n.nlmsg_len, rep_len); |
341 | na = (struct nlattr *) NLA_DATA(na); | 324 | |
342 | done = 0; | 325 | |
343 | while (len2 < aggr_len) { | 326 | rep_len = GENLMSG_PAYLOAD(&msg.n); |
344 | switch (na->nla_type) { | 327 | |
345 | case TASKSTATS_TYPE_PID: | 328 | na = (struct nlattr *) GENLMSG_DATA(&msg); |
346 | rtid = *(int *) NLA_DATA(na); | 329 | len = 0; |
347 | break; | 330 | i = 0; |
348 | case TASKSTATS_TYPE_TGID: | 331 | while (len < rep_len) { |
349 | rtid = *(int *) NLA_DATA(na); | 332 | len += NLA_ALIGN(na->nla_len); |
350 | break; | 333 | switch (na->nla_type) { |
351 | case TASKSTATS_TYPE_STATS: | 334 | case TASKSTATS_TYPE_AGGR_TGID: |
352 | if (rtid == tid) { | 335 | /* Fall through */ |
353 | print_taskstats((struct taskstats *) | 336 | case TASKSTATS_TYPE_AGGR_PID: |
354 | NLA_DATA(na)); | 337 | aggr_len = NLA_PAYLOAD(na->nla_len); |
355 | done = 1; | 338 | len2 = 0; |
339 | /* For nested attributes, na follows */ | ||
340 | na = (struct nlattr *) NLA_DATA(na); | ||
341 | done = 0; | ||
342 | while (len2 < aggr_len) { | ||
343 | switch (na->nla_type) { | ||
344 | case TASKSTATS_TYPE_PID: | ||
345 | rtid = *(int *) NLA_DATA(na); | ||
346 | if (print_delays) | ||
347 | printf("PID\t%d\n", rtid); | ||
348 | break; | ||
349 | case TASKSTATS_TYPE_TGID: | ||
350 | rtid = *(int *) NLA_DATA(na); | ||
351 | if (print_delays) | ||
352 | printf("TGID\t%d\n", rtid); | ||
353 | break; | ||
354 | case TASKSTATS_TYPE_STATS: | ||
355 | count++; | ||
356 | if (print_delays) | ||
357 | print_delayacct((struct taskstats *) NLA_DATA(na)); | ||
358 | if (fd) { | ||
359 | if (write(fd, NLA_DATA(na), na->nla_len) < 0) { | ||
360 | err(1,"write error\n"); | ||
361 | } | ||
362 | } | ||
363 | if (!loop) | ||
364 | goto done; | ||
365 | break; | ||
366 | default: | ||
367 | printf("Unknown nested nla_type %d\n", na->nla_type); | ||
368 | break; | ||
369 | } | ||
370 | len2 += NLA_ALIGN(na->nla_len); | ||
371 | na = (struct nlattr *) ((char *) na + len2); | ||
372 | } | ||
373 | break; | ||
374 | |||
375 | default: | ||
376 | printf("Unknown nla_type %d\n", na->nla_type); | ||
377 | break; | ||
356 | } | 378 | } |
357 | break; | 379 | na = (struct nlattr *) (GENLMSG_DATA(&msg) + len); |
358 | } | ||
359 | len2 += NLA_ALIGN(na->nla_len); | ||
360 | na = (struct nlattr *) ((char *) na + len2); | ||
361 | if (done) | ||
362 | break; | ||
363 | } | 380 | } |
364 | } | 381 | } while (loop); |
365 | na = (struct nlattr *) (GENLMSG_DATA(&ans) + len); | 382 | done: |
366 | if (done) | 383 | if (maskset) { |
367 | break; | 384 | rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET, |
385 | TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK, | ||
386 | &cpumask, sizeof(cpumask)); | ||
387 | printf("Sent deregister mask, retval %d\n", rc); | ||
388 | if (rc < 0) | ||
389 | err(rc, "error sending deregister cpumask\n"); | ||
368 | } | 390 | } |
369 | if (done) | 391 | err: |
370 | break; | 392 | close(nl_sd); |
371 | } | 393 | if (fd) |
372 | while (1); | 394 | close(fd); |
373 | 395 | return 0; | |
374 | close(nl_sd); | ||
375 | return 0; | ||
376 | } | 396 | } |
diff --git a/Documentation/accounting/taskstats.txt b/Documentation/accounting/taskstats.txt index efd8f605bcd5..92ebf29e9041 100644 --- a/Documentation/accounting/taskstats.txt +++ b/Documentation/accounting/taskstats.txt | |||
@@ -26,20 +26,28 @@ leader - a process is deemed alive as long as it has any task belonging to it. | |||
26 | Usage | 26 | Usage |
27 | ----- | 27 | ----- |
28 | 28 | ||
29 | To get statistics during task's lifetime, userspace opens a unicast netlink | 29 | To get statistics during a task's lifetime, userspace opens a unicast netlink |
30 | socket (NETLINK_GENERIC family) and sends commands specifying a pid or a tgid. | 30 | socket (NETLINK_GENERIC family) and sends commands specifying a pid or a tgid. |
31 | The response contains statistics for a task (if pid is specified) or the sum of | 31 | The response contains statistics for a task (if pid is specified) or the sum of |
32 | statistics for all tasks of the process (if tgid is specified). | 32 | statistics for all tasks of the process (if tgid is specified). |
33 | 33 | ||
34 | To obtain statistics for tasks which are exiting, userspace opens a multicast | 34 | To obtain statistics for tasks which are exiting, the userspace listener |
35 | netlink socket. Each time a task exits, its per-pid statistics is always sent | 35 | sends a register command and specifies a cpumask. Whenever a task exits on |
36 | by the kernel to each listener on the multicast socket. In addition, if it is | 36 | one of the cpus in the cpumask, its per-pid statistics are sent to the |
37 | the last thread exiting its thread group, an additional record containing the | 37 | registered listener. Using cpumasks allows the data received by one listener |
38 | per-tgid stats are also sent. The latter contains the sum of per-pid stats for | 38 | to be limited and assists in flow control over the netlink interface and is |
39 | all threads in the thread group, both past and present. | 39 | explained in more detail below. |
40 | |||
41 | If the exiting task is the last thread exiting its thread group, | ||
42 | an additional record containing the per-tgid stats is also sent to userspace. | ||
43 | The latter contains the sum of per-pid stats for all threads in the thread | ||
44 | group, both past and present. | ||
40 | 45 | ||
41 | getdelays.c is a simple utility demonstrating usage of the taskstats interface | 46 | getdelays.c is a simple utility demonstrating usage of the taskstats interface |
42 | for reporting delay accounting statistics. | 47 | for reporting delay accounting statistics. Users can register cpumasks, |
48 | send commands and process responses, listen for per-tid/tgid exit data, | ||
49 | write the data received to a file and do basic flow control by increasing | ||
50 | receive buffer sizes. | ||
43 | 51 | ||
44 | Interface | 52 | Interface |
45 | --------- | 53 | --------- |
@@ -66,10 +74,20 @@ The messages are in the format | |||
66 | 74 | ||
67 | The taskstats payload is one of the following three kinds: | 75 | The taskstats payload is one of the following three kinds: |
68 | 76 | ||
69 | 1. Commands: Sent from user to kernel. The payload is one attribute, of type | 77 | 1. Commands: Sent from user to kernel. Commands to get data on |
70 | TASKSTATS_CMD_ATTR_PID/TGID, containing a u32 pid or tgid in the attribute | 78 | a pid/tgid consist of one attribute, of type TASKSTATS_CMD_ATTR_PID/TGID, |
71 | payload. The pid/tgid denotes the task/process for which userspace wants | 79 | containing a u32 pid or tgid in the attribute payload. The pid/tgid denotes |
72 | statistics. | 80 | the task/process for which userspace wants statistics. |
81 | |||
82 | Commands to register/deregister interest in exit data from a set of cpus | ||
83 | consist of one attribute, of type | ||
84 | TASKSTATS_CMD_ATTR_REGISTER/DEREGISTER_CPUMASK and contain a cpumask in the | ||
85 | attribute payload. The cpumask is specified as an ascii string of | ||
86 | comma-separated cpu ranges e.g. to listen to exit data from cpus 1,2,3,5,7,8 | ||
87 | the cpumask would be "1-3,5,7-8". If userspace forgets to deregister interest | ||
88 | in cpus before closing the listening socket, the kernel cleans up its interest | ||
89 | set over time. However, for the sake of efficiency, an explicit deregistration | ||
90 | is advisable. | ||
73 | 91 | ||
74 | 2. Response for a command: sent from the kernel in response to a userspace | 92 | 2. Response for a command: sent from the kernel in response to a userspace |
75 | command. The payload is a series of three attributes of type: | 93 | command. The payload is a series of three attributes of type: |
@@ -138,4 +156,26 @@ struct too much, requiring disparate userspace accounting utilities to | |||
138 | unnecessarily receive large structures whose fields are of no interest, then | 156 | unnecessarily receive large structures whose fields are of no interest, then |
139 | extending the attributes structure would be worthwhile. | 157 | extending the attributes structure would be worthwhile. |
140 | 158 | ||
159 | Flow control for taskstats | ||
160 | -------------------------- | ||
161 | |||
162 | When the rate of task exits becomes large, a listener may not be able to keep | ||
163 | up with the kernel's rate of sending per-tid/tgid exit data leading to data | ||
164 | loss. This possibility gets compounded when the taskstats structure gets | ||
165 | extended and the number of cpus grows large. | ||
166 | |||
167 | To avoid losing statistics, userspace should do one or more of the following: | ||
168 | |||
169 | - increase the receive buffer sizes for the netlink sockets opened by | ||
170 | listeners to receive exit data. | ||
171 | |||
172 | - create more listeners and reduce the number of cpus being listened to by | ||
173 | each listener. In the extreme case, there could be one listener for each cpu. | ||
174 | Users may also consider setting the cpu affinity of the listener to the subset | ||
175 | of cpus to which it listens, especially if they are listening to just one cpu. | ||
176 | |||
177 | Despite these measures, if the userspace receives ENOBUFS error messages | ||
178 | indicated overflow of receive buffers, it should take measures to handle the | ||
179 | loss of data. | ||
180 | |||
141 | ---- | 181 | ---- |