aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeff Mahoney <jeffm@suse.com>2010-12-21 20:24:30 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2010-12-22 22:43:34 -0500
commit4be2c95d1f7706ca0e74499f2bd118e1cee19669 (patch)
tree97e8e316b9072ea84cb0afa5a161b1d1b10640cd
parent4e06fd14d5fa78826397c891654a37e5a36ee827 (diff)
taskstats: pad taskstats netlink response for aligment issues on ia64
The taskstats structure is internally aligned on 8 byte boundaries but the layout of the aggregrate reply, with two NLA headers and the pid (each 4 bytes), actually force the entire structure to be unaligned. This causes the kernel to issue unaligned access warnings on some architectures like ia64. Unfortunately, some software out there doesn't properly unroll the NLA packet and assumes that the start of the taskstats structure will always be 20 bytes from the start of the netlink payload. Aligning the start of the taskstats structure breaks this software, which we don't want. So, for now the alignment only happens on architectures that require it and those users will have to update to fixed versions of those packages. Space is reserved in the packet only when needed. This ifdef should be removed in several years e.g. 2012 once we can be confident that fixed versions are installed on most systems. We add the padding before the aggregate since the aggregate is already a defined type. Commit 85893120 ("delayacct: align to 8 byte boundary on 64-bit systems") previously addressed the alignment issues by padding out the pid field. This was supposed to be a compatible change but the circumstances described above mean that it wasn't. This patch backs out that change, since it was a hack, and introduces a new NULL attribute type to provide the padding. Padding the response with 4 bytes avoids allocating an aligned taskstats structure and copying it back. Since the structure weighs in at 328 bytes, it's too big to do it on the stack. Signed-off-by: Jeff Mahoney <jeffm@suse.com> Reported-by: Brian Rogers <brian@xyzw.org> Cc: Jeff Mahoney <jeffm@suse.com> Cc: Guillaume Chazarain <guichaz@gmail.com> Cc: Balbir Singh <balbir@in.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/accounting/getdelays.c1
-rw-r--r--include/linux/taskstats.h3
-rw-r--r--kernel/taskstats.c57
3 files changed, 47 insertions, 14 deletions
diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c
index a2976a6de033..e9c77788a39d 100644
--- a/Documentation/accounting/getdelays.c
+++ b/Documentation/accounting/getdelays.c
@@ -516,6 +516,7 @@ int main(int argc, char *argv[])
516 default: 516 default:
517 fprintf(stderr, "Unknown nla_type %d\n", 517 fprintf(stderr, "Unknown nla_type %d\n",
518 na->nla_type); 518 na->nla_type);
519 case TASKSTATS_TYPE_NULL:
519 break; 520 break;
520 } 521 }
521 na = (struct nlattr *) (GENLMSG_DATA(&msg) + len); 522 na = (struct nlattr *) (GENLMSG_DATA(&msg) + len);
diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h
index 341dddb55090..2466e550a41d 100644
--- a/include/linux/taskstats.h
+++ b/include/linux/taskstats.h
@@ -33,7 +33,7 @@
33 */ 33 */
34 34
35 35
36#define TASKSTATS_VERSION 7 36#define TASKSTATS_VERSION 8
37#define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN 37#define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN
38 * in linux/sched.h */ 38 * in linux/sched.h */
39 39
@@ -188,6 +188,7 @@ enum {
188 TASKSTATS_TYPE_STATS, /* taskstats structure */ 188 TASKSTATS_TYPE_STATS, /* taskstats structure */
189 TASKSTATS_TYPE_AGGR_PID, /* contains pid + stats */ 189 TASKSTATS_TYPE_AGGR_PID, /* contains pid + stats */
190 TASKSTATS_TYPE_AGGR_TGID, /* contains tgid + stats */ 190 TASKSTATS_TYPE_AGGR_TGID, /* contains tgid + stats */
191 TASKSTATS_TYPE_NULL, /* contains nothing */
191 __TASKSTATS_TYPE_MAX, 192 __TASKSTATS_TYPE_MAX,
192}; 193};
193 194
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index c8231fb15708..3308fd7f1b52 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -349,25 +349,47 @@ static int parse(struct nlattr *na, struct cpumask *mask)
349 return ret; 349 return ret;
350} 350}
351 351
352#ifdef CONFIG_IA64
353#define TASKSTATS_NEEDS_PADDING 1
354#endif
355
352static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) 356static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid)
353{ 357{
354 struct nlattr *na, *ret; 358 struct nlattr *na, *ret;
355 int aggr; 359 int aggr;
356 360
357 /* If we don't pad, we end up with alignment on a 4 byte boundary.
358 * This causes lots of runtime warnings on systems requiring 8 byte
359 * alignment */
360 u32 pids[2] = { pid, 0 };
361 int pid_size = ALIGN(sizeof(pid), sizeof(long));
362
363 aggr = (type == TASKSTATS_TYPE_PID) 361 aggr = (type == TASKSTATS_TYPE_PID)
364 ? TASKSTATS_TYPE_AGGR_PID 362 ? TASKSTATS_TYPE_AGGR_PID
365 : TASKSTATS_TYPE_AGGR_TGID; 363 : TASKSTATS_TYPE_AGGR_TGID;
366 364
365 /*
366 * The taskstats structure is internally aligned on 8 byte
367 * boundaries but the layout of the aggregrate reply, with
368 * two NLA headers and the pid (each 4 bytes), actually
369 * force the entire structure to be unaligned. This causes
370 * the kernel to issue unaligned access warnings on some
371 * architectures like ia64. Unfortunately, some software out there
372 * doesn't properly unroll the NLA packet and assumes that the start
373 * of the taskstats structure will always be 20 bytes from the start
374 * of the netlink payload. Aligning the start of the taskstats
375 * structure breaks this software, which we don't want. So, for now
376 * the alignment only happens on architectures that require it
377 * and those users will have to update to fixed versions of those
378 * packages. Space is reserved in the packet only when needed.
379 * This ifdef should be removed in several years e.g. 2012 once
380 * we can be confident that fixed versions are installed on most
381 * systems. We add the padding before the aggregate since the
382 * aggregate is already a defined type.
383 */
384#ifdef TASKSTATS_NEEDS_PADDING
385 if (nla_put(skb, TASKSTATS_TYPE_NULL, 0, NULL) < 0)
386 goto err;
387#endif
367 na = nla_nest_start(skb, aggr); 388 na = nla_nest_start(skb, aggr);
368 if (!na) 389 if (!na)
369 goto err; 390 goto err;
370 if (nla_put(skb, type, pid_size, pids) < 0) 391
392 if (nla_put(skb, type, sizeof(pid), &pid) < 0)
371 goto err; 393 goto err;
372 ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats)); 394 ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats));
373 if (!ret) 395 if (!ret)
@@ -456,6 +478,18 @@ out:
456 return rc; 478 return rc;
457} 479}
458 480
481static size_t taskstats_packet_size(void)
482{
483 size_t size;
484
485 size = nla_total_size(sizeof(u32)) +
486 nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
487#ifdef TASKSTATS_NEEDS_PADDING
488 size += nla_total_size(0); /* Padding for alignment */
489#endif
490 return size;
491}
492
459static int cmd_attr_pid(struct genl_info *info) 493static int cmd_attr_pid(struct genl_info *info)
460{ 494{
461 struct taskstats *stats; 495 struct taskstats *stats;
@@ -464,8 +498,7 @@ static int cmd_attr_pid(struct genl_info *info)
464 u32 pid; 498 u32 pid;
465 int rc; 499 int rc;
466 500
467 size = nla_total_size(sizeof(u32)) + 501 size = taskstats_packet_size();
468 nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
469 502
470 rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); 503 rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size);
471 if (rc < 0) 504 if (rc < 0)
@@ -494,8 +527,7 @@ static int cmd_attr_tgid(struct genl_info *info)
494 u32 tgid; 527 u32 tgid;
495 int rc; 528 int rc;
496 529
497 size = nla_total_size(sizeof(u32)) + 530 size = taskstats_packet_size();
498 nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
499 531
500 rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); 532 rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size);
501 if (rc < 0) 533 if (rc < 0)
@@ -570,8 +602,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead)
570 /* 602 /*
571 * Size includes space for nested attributes 603 * Size includes space for nested attributes
572 */ 604 */
573 size = nla_total_size(sizeof(u32)) + 605 size = taskstats_packet_size();
574 nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
575 606
576 is_thread_group = !!taskstats_tgid_alloc(tsk); 607 is_thread_group = !!taskstats_tgid_alloc(tsk);
577 if (is_thread_group) { 608 if (is_thread_group) {