diff options
-rw-r--r-- | Documentation/accounting/cgroupstats.txt | 27 | ||||
-rw-r--r-- | include/linux/Kbuild | 1 | ||||
-rw-r--r-- | include/linux/cgroup.h | 8 | ||||
-rw-r--r-- | include/linux/cgroupstats.h | 70 | ||||
-rw-r--r-- | include/linux/delayacct.h | 13 | ||||
-rw-r--r-- | kernel/cgroup.c | 55 | ||||
-rw-r--r-- | kernel/taskstats.c | 67 |
7 files changed, 241 insertions, 0 deletions
diff --git a/Documentation/accounting/cgroupstats.txt b/Documentation/accounting/cgroupstats.txt new file mode 100644 index 000000000000..eda40fd39cad --- /dev/null +++ b/Documentation/accounting/cgroupstats.txt | |||
@@ -0,0 +1,27 @@ | |||
1 | Control Groupstats is inspired by the discussion at | ||
2 | http://lkml.org/lkml/2007/4/11/187 and implements per cgroup statistics as | ||
3 | suggested by Andrew Morton in http://lkml.org/lkml/2007/4/11/263. | ||
4 | |||
5 | Per cgroup statistics infrastructure re-uses code from the taskstats | ||
6 | interface. A new set of cgroup operations are registered with commands | ||
7 | and attributes specific to cgroups. It should be very easy to | ||
8 | extend per cgroup statistics, by adding members to the cgroupstats | ||
9 | structure. | ||
10 | |||
11 | The current model for cgroupstats is a pull, a push model (to post | ||
12 | statistics on interesting events), should be very easy to add. Currently | ||
13 | user space requests for statistics by passing the cgroup path. | ||
14 | Statistics about the state of all the tasks in the cgroup is returned to | ||
15 | user space. | ||
16 | |||
17 | NOTE: We currently rely on delay accounting for extracting information | ||
18 | about tasks blocked on I/O. If CONFIG_TASK_DELAY_ACCT is disabled, this | ||
19 | information will not be available. | ||
20 | |||
21 | To extract cgroup statistics a utility very similar to getdelays.c | ||
22 | has been developed, the sample output of the utility is shown below | ||
23 | |||
24 | ~/balbir/cgroupstats # ./getdelays -C "/cgroup/a" | ||
25 | sleeping 1, blocked 0, running 1, stopped 0, uninterruptible 0 | ||
26 | ~/balbir/cgroupstats # ./getdelays -C "/cgroup" | ||
27 | sleeping 155, blocked 0, running 1, stopped 0, uninterruptible 2 | ||
diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 7ac8303c8471..e3ffd14a3f0b 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild | |||
@@ -47,6 +47,7 @@ header-y += coda_psdev.h | |||
47 | header-y += coff.h | 47 | header-y += coff.h |
48 | header-y += comstats.h | 48 | header-y += comstats.h |
49 | header-y += const.h | 49 | header-y += const.h |
50 | header-y += cgroupstats.h | ||
50 | header-y += cycx_cfm.h | 51 | header-y += cycx_cfm.h |
51 | header-y += dlm_device.h | 52 | header-y += dlm_device.h |
52 | header-y += dlm_netlink.h | 53 | header-y += dlm_netlink.h |
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 9e9b7efa180b..87479328d46d 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/cpumask.h> | 13 | #include <linux/cpumask.h> |
14 | #include <linux/nodemask.h> | 14 | #include <linux/nodemask.h> |
15 | #include <linux/rcupdate.h> | 15 | #include <linux/rcupdate.h> |
16 | #include <linux/cgroupstats.h> | ||
16 | 17 | ||
17 | #ifdef CONFIG_CGROUPS | 18 | #ifdef CONFIG_CGROUPS |
18 | 19 | ||
@@ -29,6 +30,8 @@ extern void cgroup_fork(struct task_struct *p); | |||
29 | extern void cgroup_fork_callbacks(struct task_struct *p); | 30 | extern void cgroup_fork_callbacks(struct task_struct *p); |
30 | extern void cgroup_post_fork(struct task_struct *p); | 31 | extern void cgroup_post_fork(struct task_struct *p); |
31 | extern void cgroup_exit(struct task_struct *p, int run_callbacks); | 32 | extern void cgroup_exit(struct task_struct *p, int run_callbacks); |
33 | extern int cgroupstats_build(struct cgroupstats *stats, | ||
34 | struct dentry *dentry); | ||
32 | 35 | ||
33 | extern struct file_operations proc_cgroup_operations; | 36 | extern struct file_operations proc_cgroup_operations; |
34 | 37 | ||
@@ -313,6 +316,11 @@ static inline void cgroup_exit(struct task_struct *p, int callbacks) {} | |||
313 | 316 | ||
314 | static inline void cgroup_lock(void) {} | 317 | static inline void cgroup_lock(void) {} |
315 | static inline void cgroup_unlock(void) {} | 318 | static inline void cgroup_unlock(void) {} |
319 | static inline int cgroupstats_build(struct cgroupstats *stats, | ||
320 | struct dentry *dentry) | ||
321 | { | ||
322 | return -EINVAL; | ||
323 | } | ||
316 | 324 | ||
317 | #endif /* !CONFIG_CGROUPS */ | 325 | #endif /* !CONFIG_CGROUPS */ |
318 | 326 | ||
diff --git a/include/linux/cgroupstats.h b/include/linux/cgroupstats.h new file mode 100644 index 000000000000..4f53abf6855d --- /dev/null +++ b/include/linux/cgroupstats.h | |||
@@ -0,0 +1,70 @@ | |||
1 | /* cgroupstats.h - exporting per-cgroup statistics | ||
2 | * | ||
3 | * Copyright IBM Corporation, 2007 | ||
4 | * Author Balbir Singh <balbir@linux.vnet.ibm.com> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of version 2.1 of the GNU Lesser General Public License | ||
8 | * as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope that it would be useful, but | ||
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
13 | */ | ||
14 | |||
15 | #ifndef _LINUX_CGROUPSTATS_H | ||
16 | #define _LINUX_CGROUPSTATS_H | ||
17 | |||
18 | #include <linux/taskstats.h> | ||
19 | |||
20 | /* | ||
21 | * Data shared between user space and kernel space on a per cgroup | ||
22 | * basis. This data is shared using taskstats. | ||
23 | * | ||
24 | * Most of these states are derived by looking at the task->state value | ||
25 | * For the nr_io_wait state, a flag in the delay accounting structure | ||
26 | * indicates that the task is waiting on IO | ||
27 | * | ||
28 | * Each member is aligned to a 8 byte boundary. | ||
29 | */ | ||
30 | struct cgroupstats { | ||
31 | __u64 nr_sleeping; /* Number of tasks sleeping */ | ||
32 | __u64 nr_running; /* Number of tasks running */ | ||
33 | __u64 nr_stopped; /* Number of tasks in stopped state */ | ||
34 | __u64 nr_uninterruptible; /* Number of tasks in uninterruptible */ | ||
35 | /* state */ | ||
36 | __u64 nr_io_wait; /* Number of tasks waiting on IO */ | ||
37 | }; | ||
38 | |||
39 | /* | ||
40 | * Commands sent from userspace | ||
41 | * Not versioned. New commands should only be inserted at the enum's end | ||
42 | * prior to __CGROUPSTATS_CMD_MAX | ||
43 | */ | ||
44 | |||
45 | enum { | ||
46 | CGROUPSTATS_CMD_UNSPEC = __TASKSTATS_CMD_MAX, /* Reserved */ | ||
47 | CGROUPSTATS_CMD_GET, /* user->kernel request/get-response */ | ||
48 | CGROUPSTATS_CMD_NEW, /* kernel->user event */ | ||
49 | __CGROUPSTATS_CMD_MAX, | ||
50 | }; | ||
51 | |||
52 | #define CGROUPSTATS_CMD_MAX (__CGROUPSTATS_CMD_MAX - 1) | ||
53 | |||
54 | enum { | ||
55 | CGROUPSTATS_TYPE_UNSPEC = 0, /* Reserved */ | ||
56 | CGROUPSTATS_TYPE_CGROUP_STATS, /* contains name + stats */ | ||
57 | __CGROUPSTATS_TYPE_MAX, | ||
58 | }; | ||
59 | |||
60 | #define CGROUPSTATS_TYPE_MAX (__CGROUPSTATS_TYPE_MAX - 1) | ||
61 | |||
62 | enum { | ||
63 | CGROUPSTATS_CMD_ATTR_UNSPEC = 0, | ||
64 | CGROUPSTATS_CMD_ATTR_FD, | ||
65 | __CGROUPSTATS_CMD_ATTR_MAX, | ||
66 | }; | ||
67 | |||
68 | #define CGROUPSTATS_CMD_ATTR_MAX (__CGROUPSTATS_CMD_ATTR_MAX - 1) | ||
69 | |||
70 | #endif /* _LINUX_CGROUPSTATS_H */ | ||
diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 55d1ca5e60f5..ab94bc083558 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h | |||
@@ -26,6 +26,7 @@ | |||
26 | * Used to set current->delays->flags | 26 | * Used to set current->delays->flags |
27 | */ | 27 | */ |
28 | #define DELAYACCT_PF_SWAPIN 0x00000001 /* I am doing a swapin */ | 28 | #define DELAYACCT_PF_SWAPIN 0x00000001 /* I am doing a swapin */ |
29 | #define DELAYACCT_PF_BLKIO 0x00000002 /* I am waiting on IO */ | ||
29 | 30 | ||
30 | #ifdef CONFIG_TASK_DELAY_ACCT | 31 | #ifdef CONFIG_TASK_DELAY_ACCT |
31 | 32 | ||
@@ -39,6 +40,14 @@ extern void __delayacct_blkio_end(void); | |||
39 | extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *); | 40 | extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *); |
40 | extern __u64 __delayacct_blkio_ticks(struct task_struct *); | 41 | extern __u64 __delayacct_blkio_ticks(struct task_struct *); |
41 | 42 | ||
43 | static inline int delayacct_is_task_waiting_on_io(struct task_struct *p) | ||
44 | { | ||
45 | if (p->delays) | ||
46 | return (p->delays->flags & DELAYACCT_PF_BLKIO); | ||
47 | else | ||
48 | return 0; | ||
49 | } | ||
50 | |||
42 | static inline void delayacct_set_flag(int flag) | 51 | static inline void delayacct_set_flag(int flag) |
43 | { | 52 | { |
44 | if (current->delays) | 53 | if (current->delays) |
@@ -71,6 +80,7 @@ static inline void delayacct_tsk_free(struct task_struct *tsk) | |||
71 | 80 | ||
72 | static inline void delayacct_blkio_start(void) | 81 | static inline void delayacct_blkio_start(void) |
73 | { | 82 | { |
83 | delayacct_set_flag(DELAYACCT_PF_BLKIO); | ||
74 | if (current->delays) | 84 | if (current->delays) |
75 | __delayacct_blkio_start(); | 85 | __delayacct_blkio_start(); |
76 | } | 86 | } |
@@ -79,6 +89,7 @@ static inline void delayacct_blkio_end(void) | |||
79 | { | 89 | { |
80 | if (current->delays) | 90 | if (current->delays) |
81 | __delayacct_blkio_end(); | 91 | __delayacct_blkio_end(); |
92 | delayacct_clear_flag(DELAYACCT_PF_BLKIO); | ||
82 | } | 93 | } |
83 | 94 | ||
84 | static inline int delayacct_add_tsk(struct taskstats *d, | 95 | static inline int delayacct_add_tsk(struct taskstats *d, |
@@ -116,6 +127,8 @@ static inline int delayacct_add_tsk(struct taskstats *d, | |||
116 | { return 0; } | 127 | { return 0; } |
117 | static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk) | 128 | static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk) |
118 | { return 0; } | 129 | { return 0; } |
130 | static inline int delayacct_is_task_waiting_on_io(struct task_struct *p) | ||
131 | { return 0; } | ||
119 | #endif /* CONFIG_TASK_DELAY_ACCT */ | 132 | #endif /* CONFIG_TASK_DELAY_ACCT */ |
120 | 133 | ||
121 | #endif | 134 | #endif |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index d65a1246829f..ca38db223f84 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -44,6 +44,9 @@ | |||
44 | #include <linux/string.h> | 44 | #include <linux/string.h> |
45 | #include <linux/sort.h> | 45 | #include <linux/sort.h> |
46 | #include <linux/kmod.h> | 46 | #include <linux/kmod.h> |
47 | #include <linux/delayacct.h> | ||
48 | #include <linux/cgroupstats.h> | ||
49 | |||
47 | #include <asm/atomic.h> | 50 | #include <asm/atomic.h> |
48 | 51 | ||
49 | static DEFINE_MUTEX(cgroup_mutex); | 52 | static DEFINE_MUTEX(cgroup_mutex); |
@@ -1766,6 +1769,58 @@ static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cont) | |||
1766 | return n; | 1769 | return n; |
1767 | } | 1770 | } |
1768 | 1771 | ||
1772 | /** | ||
1773 | * Build and fill cgroupstats so that taskstats can export it to user | ||
1774 | * space. | ||
1775 | * | ||
1776 | * @stats: cgroupstats to fill information into | ||
1777 | * @dentry: A dentry entry belonging to the cgroup for which stats have | ||
1778 | * been requested. | ||
1779 | */ | ||
1780 | int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) | ||
1781 | { | ||
1782 | int ret = -EINVAL; | ||
1783 | struct cgroup *cont; | ||
1784 | struct cgroup_iter it; | ||
1785 | struct task_struct *tsk; | ||
1786 | /* | ||
1787 | * Validate dentry by checking the superblock operations | ||
1788 | */ | ||
1789 | if (dentry->d_sb->s_op != &cgroup_ops) | ||
1790 | goto err; | ||
1791 | |||
1792 | ret = 0; | ||
1793 | cont = dentry->d_fsdata; | ||
1794 | rcu_read_lock(); | ||
1795 | |||
1796 | cgroup_iter_start(cont, &it); | ||
1797 | while ((tsk = cgroup_iter_next(cont, &it))) { | ||
1798 | switch (tsk->state) { | ||
1799 | case TASK_RUNNING: | ||
1800 | stats->nr_running++; | ||
1801 | break; | ||
1802 | case TASK_INTERRUPTIBLE: | ||
1803 | stats->nr_sleeping++; | ||
1804 | break; | ||
1805 | case TASK_UNINTERRUPTIBLE: | ||
1806 | stats->nr_uninterruptible++; | ||
1807 | break; | ||
1808 | case TASK_STOPPED: | ||
1809 | stats->nr_stopped++; | ||
1810 | break; | ||
1811 | default: | ||
1812 | if (delayacct_is_task_waiting_on_io(tsk)) | ||
1813 | stats->nr_io_wait++; | ||
1814 | break; | ||
1815 | } | ||
1816 | } | ||
1817 | cgroup_iter_end(cont, &it); | ||
1818 | |||
1819 | rcu_read_unlock(); | ||
1820 | err: | ||
1821 | return ret; | ||
1822 | } | ||
1823 | |||
1769 | static int cmppid(const void *a, const void *b) | 1824 | static int cmppid(const void *a, const void *b) |
1770 | { | 1825 | { |
1771 | return *(pid_t *)a - *(pid_t *)b; | 1826 | return *(pid_t *)a - *(pid_t *)b; |
diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 7d4d7f9c1bb2..9f360f68aad6 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c | |||
@@ -22,6 +22,10 @@ | |||
22 | #include <linux/delayacct.h> | 22 | #include <linux/delayacct.h> |
23 | #include <linux/cpumask.h> | 23 | #include <linux/cpumask.h> |
24 | #include <linux/percpu.h> | 24 | #include <linux/percpu.h> |
25 | #include <linux/cgroupstats.h> | ||
26 | #include <linux/cgroup.h> | ||
27 | #include <linux/fs.h> | ||
28 | #include <linux/file.h> | ||
25 | #include <net/genetlink.h> | 29 | #include <net/genetlink.h> |
26 | #include <asm/atomic.h> | 30 | #include <asm/atomic.h> |
27 | 31 | ||
@@ -49,6 +53,11 @@ __read_mostly = { | |||
49 | [TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING }, | 53 | [TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING }, |
50 | [TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },}; | 54 | [TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },}; |
51 | 55 | ||
56 | static struct nla_policy | ||
57 | cgroupstats_cmd_get_policy[CGROUPSTATS_CMD_ATTR_MAX+1] __read_mostly = { | ||
58 | [CGROUPSTATS_CMD_ATTR_FD] = { .type = NLA_U32 }, | ||
59 | }; | ||
60 | |||
52 | struct listener { | 61 | struct listener { |
53 | struct list_head list; | 62 | struct list_head list; |
54 | pid_t pid; | 63 | pid_t pid; |
@@ -372,6 +381,51 @@ err: | |||
372 | return NULL; | 381 | return NULL; |
373 | } | 382 | } |
374 | 383 | ||
384 | static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info) | ||
385 | { | ||
386 | int rc = 0; | ||
387 | struct sk_buff *rep_skb; | ||
388 | struct cgroupstats *stats; | ||
389 | struct nlattr *na; | ||
390 | size_t size; | ||
391 | u32 fd; | ||
392 | struct file *file; | ||
393 | int fput_needed; | ||
394 | |||
395 | na = info->attrs[CGROUPSTATS_CMD_ATTR_FD]; | ||
396 | if (!na) | ||
397 | return -EINVAL; | ||
398 | |||
399 | fd = nla_get_u32(info->attrs[CGROUPSTATS_CMD_ATTR_FD]); | ||
400 | file = fget_light(fd, &fput_needed); | ||
401 | if (file) { | ||
402 | size = nla_total_size(sizeof(struct cgroupstats)); | ||
403 | |||
404 | rc = prepare_reply(info, CGROUPSTATS_CMD_NEW, &rep_skb, | ||
405 | size); | ||
406 | if (rc < 0) | ||
407 | goto err; | ||
408 | |||
409 | na = nla_reserve(rep_skb, CGROUPSTATS_TYPE_CGROUP_STATS, | ||
410 | sizeof(struct cgroupstats)); | ||
411 | stats = nla_data(na); | ||
412 | memset(stats, 0, sizeof(*stats)); | ||
413 | |||
414 | rc = cgroupstats_build(stats, file->f_dentry); | ||
415 | if (rc < 0) | ||
416 | goto err; | ||
417 | |||
418 | fput_light(file, fput_needed); | ||
419 | return send_reply(rep_skb, info->snd_pid); | ||
420 | } | ||
421 | |||
422 | err: | ||
423 | if (file) | ||
424 | fput_light(file, fput_needed); | ||
425 | nlmsg_free(rep_skb); | ||
426 | return rc; | ||
427 | } | ||
428 | |||
375 | static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) | 429 | static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) |
376 | { | 430 | { |
377 | int rc = 0; | 431 | int rc = 0; |
@@ -522,6 +576,12 @@ static struct genl_ops taskstats_ops = { | |||
522 | .policy = taskstats_cmd_get_policy, | 576 | .policy = taskstats_cmd_get_policy, |
523 | }; | 577 | }; |
524 | 578 | ||
579 | static struct genl_ops cgroupstats_ops = { | ||
580 | .cmd = CGROUPSTATS_CMD_GET, | ||
581 | .doit = cgroupstats_user_cmd, | ||
582 | .policy = cgroupstats_cmd_get_policy, | ||
583 | }; | ||
584 | |||
525 | /* Needed early in initialization */ | 585 | /* Needed early in initialization */ |
526 | void __init taskstats_init_early(void) | 586 | void __init taskstats_init_early(void) |
527 | { | 587 | { |
@@ -546,8 +606,15 @@ static int __init taskstats_init(void) | |||
546 | if (rc < 0) | 606 | if (rc < 0) |
547 | goto err; | 607 | goto err; |
548 | 608 | ||
609 | rc = genl_register_ops(&family, &cgroupstats_ops); | ||
610 | if (rc < 0) | ||
611 | goto err_cgroup_ops; | ||
612 | |||
549 | family_registered = 1; | 613 | family_registered = 1; |
614 | printk("registered taskstats version %d\n", TASKSTATS_GENL_VERSION); | ||
550 | return 0; | 615 | return 0; |
616 | err_cgroup_ops: | ||
617 | genl_unregister_ops(&family, &taskstats_ops); | ||
551 | err: | 618 | err: |
552 | genl_unregister_family(&family); | 619 | genl_unregister_family(&family); |
553 | return rc; | 620 | return rc; |