aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorShailabh Nagar <nagar@watson.ibm.com>2006-07-14 03:24:41 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-07-15 00:53:56 -0400
commit6f44993fe1d7b2b097f6ac60cd5835c6f5ca0874 (patch)
tree0f349f4e6c28cc5d11b7083273543a294c437216
parentc757249af152c59fd74b85e52e8c090acb33d9c0 (diff)
[PATCH] per-task-delay-accounting: delay accounting usage of taskstats interface
Usage of taskstats interface by delay accounting. Signed-off-by: Shailabh Nagar <nagar@us.ibm.com> Signed-off-by: Balbir Singh <balbir@in.ibm.com> Cc: Jes Sorensen <jes@sgi.com> Cc: Peter Chubb <peterc@gelato.unsw.edu.au> Cc: Erich Focht <efocht@ess.nec.de> Cc: Levent Serinol <lserinol@gmail.com> Cc: Jay Lan <jlan@engr.sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--include/linux/delayacct.h15
-rw-r--r--include/linux/sched.h1
-rw-r--r--include/linux/taskstats.h55
-rw-r--r--include/linux/taskstats_kern.h1
-rw-r--r--init/Kconfig1
-rw-r--r--kernel/delayacct.c62
-rw-r--r--kernel/taskstats.c16
7 files changed, 144 insertions, 7 deletions
diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index 0ecbf9aad8e1..d955078a1441 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -18,6 +18,7 @@
18#define _LINUX_DELAYACCT_H 18#define _LINUX_DELAYACCT_H
19 19
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/taskstats_kern.h>
21 22
22/* 23/*
23 * Per-task flags relevant to delay accounting 24 * Per-task flags relevant to delay accounting
@@ -35,6 +36,7 @@ extern void __delayacct_tsk_init(struct task_struct *);
35extern void __delayacct_tsk_exit(struct task_struct *); 36extern void __delayacct_tsk_exit(struct task_struct *);
36extern void __delayacct_blkio_start(void); 37extern void __delayacct_blkio_start(void);
37extern void __delayacct_blkio_end(void); 38extern void __delayacct_blkio_end(void);
39extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *);
38 40
39static inline void delayacct_set_flag(int flag) 41static inline void delayacct_set_flag(int flag)
40{ 42{
@@ -74,6 +76,16 @@ static inline void delayacct_blkio_end(void)
74 __delayacct_blkio_end(); 76 __delayacct_blkio_end();
75} 77}
76 78
79static inline int delayacct_add_tsk(struct taskstats *d,
80 struct task_struct *tsk)
81{
82 if (likely(!delayacct_on))
83 return -EINVAL;
84 if (!tsk->delays)
85 return 0;
86 return __delayacct_add_tsk(d, tsk);
87}
88
77#else 89#else
78static inline void delayacct_set_flag(int flag) 90static inline void delayacct_set_flag(int flag)
79{} 91{}
@@ -89,6 +101,9 @@ static inline void delayacct_blkio_start(void)
89{} 101{}
90static inline void delayacct_blkio_end(void) 102static inline void delayacct_blkio_end(void)
91{} 103{}
104static inline int delayacct_add_tsk(struct taskstats *d,
105 struct task_struct *tsk)
106{ return 0; }
92#endif /* CONFIG_TASK_DELAY_ACCT */ 107#endif /* CONFIG_TASK_DELAY_ACCT */
93 108
94#endif 109#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f751062d89a2..3c5610ca0c92 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -990,6 +990,7 @@ struct task_struct {
990 */ 990 */
991 struct pipe_inode_info *splice_pipe; 991 struct pipe_inode_info *splice_pipe;
992#ifdef CONFIG_TASK_DELAY_ACCT 992#ifdef CONFIG_TASK_DELAY_ACCT
993 spinlock_t delays_lock;
993 struct task_delay_info *delays; 994 struct task_delay_info *delays;
994#endif 995#endif
995}; 996};
diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h
index 51f62759bea9..c6aeca32348e 100644
--- a/include/linux/taskstats.h
+++ b/include/linux/taskstats.h
@@ -34,7 +34,60 @@
34struct taskstats { 34struct taskstats {
35 35
36 /* Version 1 */ 36 /* Version 1 */
37 __u64 version; 37 __u16 version;
38 __u16 padding[3]; /* Userspace should not interpret the padding
39 * field which can be replaced by useful
40 * fields if struct taskstats is extended.
41 */
42
43 /* Delay accounting fields start
44 *
45 * All values, until comment "Delay accounting fields end" are
46 * available only if delay accounting is enabled, even though the last
47 * few fields are not delays
48 *
49 * xxx_count is the number of delay values recorded
50 * xxx_delay_total is the corresponding cumulative delay in nanoseconds
51 *
52 * xxx_delay_total wraps around to zero on overflow
53 * xxx_count incremented regardless of overflow
54 */
55
56 /* Delay waiting for cpu, while runnable
57 * count, delay_total NOT updated atomically
58 */
59 __u64 cpu_count;
60 __u64 cpu_delay_total;
61
62 /* Following four fields atomically updated using task->delays->lock */
63
64 /* Delay waiting for synchronous block I/O to complete
65 * does not account for delays in I/O submission
66 */
67 __u64 blkio_count;
68 __u64 blkio_delay_total;
69
70 /* Delay waiting for page fault I/O (swap in only) */
71 __u64 swapin_count;
72 __u64 swapin_delay_total;
73
74 /* cpu "wall-clock" running time
75 * On some architectures, value will adjust for cpu time stolen
76 * from the kernel in involuntary waits due to virtualization.
77 * Value is cumulative, in nanoseconds, without a corresponding count
78 * and wraps around to zero silently on overflow
79 */
80 __u64 cpu_run_real_total;
81
82 /* cpu "virtual" running time
83 * Uses time intervals seen by the kernel i.e. no adjustment
84 * for kernel's involuntary waits due to virtualization.
85 * Value is cumulative, in nanoseconds, without a corresponding count
86 * and wraps around to zero silently on overflow
87 */
88 __u64 cpu_run_virtual_total;
89 /* Delay accounting fields end */
90 /* version 1 ends here */
38}; 91};
39 92
40 93
diff --git a/include/linux/taskstats_kern.h b/include/linux/taskstats_kern.h
index bd0ecb969c26..fc9da2e26443 100644
--- a/include/linux/taskstats_kern.h
+++ b/include/linux/taskstats_kern.h
@@ -17,6 +17,7 @@ enum {
17 17
18#ifdef CONFIG_TASKSTATS 18#ifdef CONFIG_TASKSTATS
19extern kmem_cache_t *taskstats_cache; 19extern kmem_cache_t *taskstats_cache;
20extern struct mutex taskstats_exit_mutex;
20 21
21static inline void taskstats_exit_alloc(struct taskstats **ptidstats, 22static inline void taskstats_exit_alloc(struct taskstats **ptidstats,
22 struct taskstats **ptgidstats) 23 struct taskstats **ptgidstats)
diff --git a/init/Kconfig b/init/Kconfig
index 56a7093b4e4c..a099fc6526d9 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -173,6 +173,7 @@ config TASKSTATS
173 173
174config TASK_DELAY_ACCT 174config TASK_DELAY_ACCT
175 bool "Enable per-task delay accounting (EXPERIMENTAL)" 175 bool "Enable per-task delay accounting (EXPERIMENTAL)"
176 depends on TASKSTATS
176 help 177 help
177 Collect information on time spent by a task waiting for system 178 Collect information on time spent by a task waiting for system
178 resources like cpu, synchronous block I/O completion and swapping 179 resources like cpu, synchronous block I/O completion and swapping
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 3546b0800f9f..1be274a462ca 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -41,6 +41,10 @@ void delayacct_init(void)
41 41
42void __delayacct_tsk_init(struct task_struct *tsk) 42void __delayacct_tsk_init(struct task_struct *tsk)
43{ 43{
44 spin_lock_init(&tsk->delays_lock);
45 /* No need to acquire tsk->delays_lock for allocation here unless
46 __delayacct_tsk_init called after tsk is attached to tasklist
47 */
44 tsk->delays = kmem_cache_zalloc(delayacct_cache, SLAB_KERNEL); 48 tsk->delays = kmem_cache_zalloc(delayacct_cache, SLAB_KERNEL);
45 if (tsk->delays) 49 if (tsk->delays)
46 spin_lock_init(&tsk->delays->lock); 50 spin_lock_init(&tsk->delays->lock);
@@ -48,8 +52,11 @@ void __delayacct_tsk_init(struct task_struct *tsk)
48 52
49void __delayacct_tsk_exit(struct task_struct *tsk) 53void __delayacct_tsk_exit(struct task_struct *tsk)
50{ 54{
51 kmem_cache_free(delayacct_cache, tsk->delays); 55 struct task_delay_info *delays = tsk->delays;
56 spin_lock(&tsk->delays_lock);
52 tsk->delays = NULL; 57 tsk->delays = NULL;
58 spin_unlock(&tsk->delays_lock);
59 kmem_cache_free(delayacct_cache, delays);
53} 60}
54 61
55/* 62/*
@@ -104,3 +111,56 @@ void __delayacct_blkio_end(void)
104 &current->delays->blkio_delay, 111 &current->delays->blkio_delay,
105 &current->delays->blkio_count); 112 &current->delays->blkio_count);
106} 113}
114
115int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
116{
117 s64 tmp;
118 struct timespec ts;
119 unsigned long t1,t2,t3;
120
121 spin_lock(&tsk->delays_lock);
122
123 /* Though tsk->delays accessed later, early exit avoids
124 * unnecessary returning of other data
125 */
126 if (!tsk->delays)
127 goto done;
128
129 tmp = (s64)d->cpu_run_real_total;
130 cputime_to_timespec(tsk->utime + tsk->stime, &ts);
131 tmp += timespec_to_ns(&ts);
132 d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp;
133
134 /*
135 * No locking available for sched_info (and too expensive to add one)
136 * Mitigate by taking snapshot of values
137 */
138 t1 = tsk->sched_info.pcnt;
139 t2 = tsk->sched_info.run_delay;
140 t3 = tsk->sched_info.cpu_time;
141
142 d->cpu_count += t1;
143
144 jiffies_to_timespec(t2, &ts);
145 tmp = (s64)d->cpu_delay_total + timespec_to_ns(&ts);
146 d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp;
147
148 tmp = (s64)d->cpu_run_virtual_total + (s64)jiffies_to_usecs(t3) * 1000;
149 d->cpu_run_virtual_total =
150 (tmp < (s64)d->cpu_run_virtual_total) ? 0 : tmp;
151
152 /* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */
153
154 spin_lock(&tsk->delays->lock);
155 tmp = d->blkio_delay_total + tsk->delays->blkio_delay;
156 d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp;
157 tmp = d->swapin_delay_total + tsk->delays->swapin_delay;
158 d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp;
159 d->blkio_count += tsk->delays->blkio_count;
160 d->swapin_count += tsk->delays->swapin_count;
161 spin_unlock(&tsk->delays->lock);
162
163done:
164 spin_unlock(&tsk->delays_lock);
165 return 0;
166}
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 82ec9137d908..ea9506de3b85 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -18,13 +18,13 @@
18 18
19#include <linux/kernel.h> 19#include <linux/kernel.h>
20#include <linux/taskstats_kern.h> 20#include <linux/taskstats_kern.h>
21#include <linux/delayacct.h>
21#include <net/genetlink.h> 22#include <net/genetlink.h>
22#include <asm/atomic.h> 23#include <asm/atomic.h>
23 24
24static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 }; 25static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 };
25static int family_registered; 26static int family_registered;
26kmem_cache_t *taskstats_cache; 27kmem_cache_t *taskstats_cache;
27static DEFINE_MUTEX(taskstats_exit_mutex);
28 28
29static struct genl_family family = { 29static struct genl_family family = {
30 .id = GENL_ID_GENERATE, 30 .id = GENL_ID_GENERATE,
@@ -120,7 +120,10 @@ static int fill_pid(pid_t pid, struct task_struct *pidtsk,
120 * goto err; 120 * goto err;
121 */ 121 */
122 122
123err: 123 rc = delayacct_add_tsk(stats, tsk);
124 stats->version = TASKSTATS_VERSION;
125
126 /* Define err: label here if needed */
124 put_task_struct(tsk); 127 put_task_struct(tsk);
125 return rc; 128 return rc;
126 129
@@ -152,8 +155,14 @@ static int fill_tgid(pid_t tgid, struct task_struct *tgidtsk,
152 * break; 155 * break;
153 */ 156 */
154 157
158 rc = delayacct_add_tsk(stats, tsk);
159 if (rc)
160 break;
161
155 } while_each_thread(first, tsk); 162 } while_each_thread(first, tsk);
156 read_unlock(&tasklist_lock); 163 read_unlock(&tasklist_lock);
164 stats->version = TASKSTATS_VERSION;
165
157 166
158 /* 167 /*
159 * Accounting subsytems can also add calls here if they don't 168 * Accounting subsytems can also add calls here if they don't
@@ -233,8 +242,6 @@ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats,
233 if (!family_registered || !tidstats) 242 if (!family_registered || !tidstats)
234 return; 243 return;
235 244
236 mutex_lock(&taskstats_exit_mutex);
237
238 is_thread_group = !thread_group_empty(tsk); 245 is_thread_group = !thread_group_empty(tsk);
239 rc = 0; 246 rc = 0;
240 247
@@ -292,7 +299,6 @@ nla_put_failure:
292err_skb: 299err_skb:
293 nlmsg_free(rep_skb); 300 nlmsg_free(rep_skb);
294ret: 301ret:
295 mutex_unlock(&taskstats_exit_mutex);
296 return; 302 return;
297} 303}
298 304