diff options
| -rw-r--r-- | include/linux/delayacct.h | 15 | ||||
| -rw-r--r-- | include/linux/sched.h | 1 | ||||
| -rw-r--r-- | include/linux/taskstats.h | 55 | ||||
| -rw-r--r-- | include/linux/taskstats_kern.h | 1 | ||||
| -rw-r--r-- | init/Kconfig | 1 | ||||
| -rw-r--r-- | kernel/delayacct.c | 62 | ||||
| -rw-r--r-- | kernel/taskstats.c | 16 |
7 files changed, 144 insertions, 7 deletions
diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 0ecbf9aad8e1..d955078a1441 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | #define _LINUX_DELAYACCT_H | 18 | #define _LINUX_DELAYACCT_H |
| 19 | 19 | ||
| 20 | #include <linux/sched.h> | 20 | #include <linux/sched.h> |
| 21 | #include <linux/taskstats_kern.h> | ||
| 21 | 22 | ||
| 22 | /* | 23 | /* |
| 23 | * Per-task flags relevant to delay accounting | 24 | * Per-task flags relevant to delay accounting |
| @@ -35,6 +36,7 @@ extern void __delayacct_tsk_init(struct task_struct *); | |||
| 35 | extern void __delayacct_tsk_exit(struct task_struct *); | 36 | extern void __delayacct_tsk_exit(struct task_struct *); |
| 36 | extern void __delayacct_blkio_start(void); | 37 | extern void __delayacct_blkio_start(void); |
| 37 | extern void __delayacct_blkio_end(void); | 38 | extern void __delayacct_blkio_end(void); |
| 39 | extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *); | ||
| 38 | 40 | ||
| 39 | static inline void delayacct_set_flag(int flag) | 41 | static inline void delayacct_set_flag(int flag) |
| 40 | { | 42 | { |
| @@ -74,6 +76,16 @@ static inline void delayacct_blkio_end(void) | |||
| 74 | __delayacct_blkio_end(); | 76 | __delayacct_blkio_end(); |
| 75 | } | 77 | } |
| 76 | 78 | ||
| 79 | static inline int delayacct_add_tsk(struct taskstats *d, | ||
| 80 | struct task_struct *tsk) | ||
| 81 | { | ||
| 82 | if (likely(!delayacct_on)) | ||
| 83 | return -EINVAL; | ||
| 84 | if (!tsk->delays) | ||
| 85 | return 0; | ||
| 86 | return __delayacct_add_tsk(d, tsk); | ||
| 87 | } | ||
| 88 | |||
| 77 | #else | 89 | #else |
| 78 | static inline void delayacct_set_flag(int flag) | 90 | static inline void delayacct_set_flag(int flag) |
| 79 | {} | 91 | {} |
| @@ -89,6 +101,9 @@ static inline void delayacct_blkio_start(void) | |||
| 89 | {} | 101 | {} |
| 90 | static inline void delayacct_blkio_end(void) | 102 | static inline void delayacct_blkio_end(void) |
| 91 | {} | 103 | {} |
| 104 | static inline int delayacct_add_tsk(struct taskstats *d, | ||
| 105 | struct task_struct *tsk) | ||
| 106 | { return 0; } | ||
| 92 | #endif /* CONFIG_TASK_DELAY_ACCT */ | 107 | #endif /* CONFIG_TASK_DELAY_ACCT */ |
| 93 | 108 | ||
| 94 | #endif | 109 | #endif |
diff --git a/include/linux/sched.h b/include/linux/sched.h index f751062d89a2..3c5610ca0c92 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
| @@ -990,6 +990,7 @@ struct task_struct { | |||
| 990 | */ | 990 | */ |
| 991 | struct pipe_inode_info *splice_pipe; | 991 | struct pipe_inode_info *splice_pipe; |
| 992 | #ifdef CONFIG_TASK_DELAY_ACCT | 992 | #ifdef CONFIG_TASK_DELAY_ACCT |
| 993 | spinlock_t delays_lock; | ||
| 993 | struct task_delay_info *delays; | 994 | struct task_delay_info *delays; |
| 994 | #endif | 995 | #endif |
| 995 | }; | 996 | }; |
diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h index 51f62759bea9..c6aeca32348e 100644 --- a/include/linux/taskstats.h +++ b/include/linux/taskstats.h | |||
| @@ -34,7 +34,60 @@ | |||
| 34 | struct taskstats { | 34 | struct taskstats { |
| 35 | 35 | ||
| 36 | /* Version 1 */ | 36 | /* Version 1 */ |
| 37 | __u64 version; | 37 | __u16 version; |
| 38 | __u16 padding[3]; /* Userspace should not interpret the padding | ||
| 39 | * field which can be replaced by useful | ||
| 40 | * fields if struct taskstats is extended. | ||
| 41 | */ | ||
| 42 | |||
| 43 | /* Delay accounting fields start | ||
| 44 | * | ||
| 45 | * All values, until comment "Delay accounting fields end" are | ||
| 46 | * available only if delay accounting is enabled, even though the last | ||
| 47 | * few fields are not delays | ||
| 48 | * | ||
| 49 | * xxx_count is the number of delay values recorded | ||
| 50 | * xxx_delay_total is the corresponding cumulative delay in nanoseconds | ||
| 51 | * | ||
| 52 | * xxx_delay_total wraps around to zero on overflow | ||
| 53 | * xxx_count incremented regardless of overflow | ||
| 54 | */ | ||
| 55 | |||
| 56 | /* Delay waiting for cpu, while runnable | ||
| 57 | * count, delay_total NOT updated atomically | ||
| 58 | */ | ||
| 59 | __u64 cpu_count; | ||
| 60 | __u64 cpu_delay_total; | ||
| 61 | |||
| 62 | /* Following four fields atomically updated using task->delays->lock */ | ||
| 63 | |||
| 64 | /* Delay waiting for synchronous block I/O to complete | ||
| 65 | * does not account for delays in I/O submission | ||
| 66 | */ | ||
| 67 | __u64 blkio_count; | ||
| 68 | __u64 blkio_delay_total; | ||
| 69 | |||
| 70 | /* Delay waiting for page fault I/O (swap in only) */ | ||
| 71 | __u64 swapin_count; | ||
| 72 | __u64 swapin_delay_total; | ||
| 73 | |||
| 74 | /* cpu "wall-clock" running time | ||
| 75 | * On some architectures, value will adjust for cpu time stolen | ||
| 76 | * from the kernel in involuntary waits due to virtualization. | ||
| 77 | * Value is cumulative, in nanoseconds, without a corresponding count | ||
| 78 | * and wraps around to zero silently on overflow | ||
| 79 | */ | ||
| 80 | __u64 cpu_run_real_total; | ||
| 81 | |||
| 82 | /* cpu "virtual" running time | ||
| 83 | * Uses time intervals seen by the kernel i.e. no adjustment | ||
| 84 | * for kernel's involuntary waits due to virtualization. | ||
| 85 | * Value is cumulative, in nanoseconds, without a corresponding count | ||
| 86 | * and wraps around to zero silently on overflow | ||
| 87 | */ | ||
| 88 | __u64 cpu_run_virtual_total; | ||
| 89 | /* Delay accounting fields end */ | ||
| 90 | /* version 1 ends here */ | ||
| 38 | }; | 91 | }; |
| 39 | 92 | ||
| 40 | 93 | ||
diff --git a/include/linux/taskstats_kern.h b/include/linux/taskstats_kern.h index bd0ecb969c26..fc9da2e26443 100644 --- a/include/linux/taskstats_kern.h +++ b/include/linux/taskstats_kern.h | |||
| @@ -17,6 +17,7 @@ enum { | |||
| 17 | 17 | ||
| 18 | #ifdef CONFIG_TASKSTATS | 18 | #ifdef CONFIG_TASKSTATS |
| 19 | extern kmem_cache_t *taskstats_cache; | 19 | extern kmem_cache_t *taskstats_cache; |
| 20 | extern struct mutex taskstats_exit_mutex; | ||
| 20 | 21 | ||
| 21 | static inline void taskstats_exit_alloc(struct taskstats **ptidstats, | 22 | static inline void taskstats_exit_alloc(struct taskstats **ptidstats, |
| 22 | struct taskstats **ptgidstats) | 23 | struct taskstats **ptgidstats) |
diff --git a/init/Kconfig b/init/Kconfig index 56a7093b4e4c..a099fc6526d9 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
| @@ -173,6 +173,7 @@ config TASKSTATS | |||
| 173 | 173 | ||
| 174 | config TASK_DELAY_ACCT | 174 | config TASK_DELAY_ACCT |
| 175 | bool "Enable per-task delay accounting (EXPERIMENTAL)" | 175 | bool "Enable per-task delay accounting (EXPERIMENTAL)" |
| 176 | depends on TASKSTATS | ||
| 176 | help | 177 | help |
| 177 | Collect information on time spent by a task waiting for system | 178 | Collect information on time spent by a task waiting for system |
| 178 | resources like cpu, synchronous block I/O completion and swapping | 179 | resources like cpu, synchronous block I/O completion and swapping |
diff --git a/kernel/delayacct.c b/kernel/delayacct.c index 3546b0800f9f..1be274a462ca 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c | |||
| @@ -41,6 +41,10 @@ void delayacct_init(void) | |||
| 41 | 41 | ||
| 42 | void __delayacct_tsk_init(struct task_struct *tsk) | 42 | void __delayacct_tsk_init(struct task_struct *tsk) |
| 43 | { | 43 | { |
| 44 | spin_lock_init(&tsk->delays_lock); | ||
| 45 | /* No need to acquire tsk->delays_lock for allocation here unless | ||
| 46 | __delayacct_tsk_init called after tsk is attached to tasklist | ||
| 47 | */ | ||
| 44 | tsk->delays = kmem_cache_zalloc(delayacct_cache, SLAB_KERNEL); | 48 | tsk->delays = kmem_cache_zalloc(delayacct_cache, SLAB_KERNEL); |
| 45 | if (tsk->delays) | 49 | if (tsk->delays) |
| 46 | spin_lock_init(&tsk->delays->lock); | 50 | spin_lock_init(&tsk->delays->lock); |
| @@ -48,8 +52,11 @@ void __delayacct_tsk_init(struct task_struct *tsk) | |||
| 48 | 52 | ||
| 49 | void __delayacct_tsk_exit(struct task_struct *tsk) | 53 | void __delayacct_tsk_exit(struct task_struct *tsk) |
| 50 | { | 54 | { |
| 51 | kmem_cache_free(delayacct_cache, tsk->delays); | 55 | struct task_delay_info *delays = tsk->delays; |
| 56 | spin_lock(&tsk->delays_lock); | ||
| 52 | tsk->delays = NULL; | 57 | tsk->delays = NULL; |
| 58 | spin_unlock(&tsk->delays_lock); | ||
| 59 | kmem_cache_free(delayacct_cache, delays); | ||
| 53 | } | 60 | } |
| 54 | 61 | ||
| 55 | /* | 62 | /* |
| @@ -104,3 +111,56 @@ void __delayacct_blkio_end(void) | |||
| 104 | ¤t->delays->blkio_delay, | 111 | ¤t->delays->blkio_delay, |
| 105 | ¤t->delays->blkio_count); | 112 | ¤t->delays->blkio_count); |
| 106 | } | 113 | } |
| 114 | |||
| 115 | int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) | ||
| 116 | { | ||
| 117 | s64 tmp; | ||
| 118 | struct timespec ts; | ||
| 119 | unsigned long t1,t2,t3; | ||
| 120 | |||
| 121 | spin_lock(&tsk->delays_lock); | ||
| 122 | |||
| 123 | /* Though tsk->delays accessed later, early exit avoids | ||
| 124 | * unnecessary returning of other data | ||
| 125 | */ | ||
| 126 | if (!tsk->delays) | ||
| 127 | goto done; | ||
| 128 | |||
| 129 | tmp = (s64)d->cpu_run_real_total; | ||
| 130 | cputime_to_timespec(tsk->utime + tsk->stime, &ts); | ||
| 131 | tmp += timespec_to_ns(&ts); | ||
| 132 | d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp; | ||
| 133 | |||
| 134 | /* | ||
| 135 | * No locking available for sched_info (and too expensive to add one) | ||
| 136 | * Mitigate by taking snapshot of values | ||
| 137 | */ | ||
| 138 | t1 = tsk->sched_info.pcnt; | ||
| 139 | t2 = tsk->sched_info.run_delay; | ||
| 140 | t3 = tsk->sched_info.cpu_time; | ||
| 141 | |||
| 142 | d->cpu_count += t1; | ||
| 143 | |||
| 144 | jiffies_to_timespec(t2, &ts); | ||
| 145 | tmp = (s64)d->cpu_delay_total + timespec_to_ns(&ts); | ||
| 146 | d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp; | ||
| 147 | |||
| 148 | tmp = (s64)d->cpu_run_virtual_total + (s64)jiffies_to_usecs(t3) * 1000; | ||
| 149 | d->cpu_run_virtual_total = | ||
| 150 | (tmp < (s64)d->cpu_run_virtual_total) ? 0 : tmp; | ||
| 151 | |||
| 152 | /* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */ | ||
| 153 | |||
| 154 | spin_lock(&tsk->delays->lock); | ||
| 155 | tmp = d->blkio_delay_total + tsk->delays->blkio_delay; | ||
| 156 | d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp; | ||
| 157 | tmp = d->swapin_delay_total + tsk->delays->swapin_delay; | ||
| 158 | d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp; | ||
| 159 | d->blkio_count += tsk->delays->blkio_count; | ||
| 160 | d->swapin_count += tsk->delays->swapin_count; | ||
| 161 | spin_unlock(&tsk->delays->lock); | ||
| 162 | |||
| 163 | done: | ||
| 164 | spin_unlock(&tsk->delays_lock); | ||
| 165 | return 0; | ||
| 166 | } | ||
diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 82ec9137d908..ea9506de3b85 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c | |||
| @@ -18,13 +18,13 @@ | |||
| 18 | 18 | ||
| 19 | #include <linux/kernel.h> | 19 | #include <linux/kernel.h> |
| 20 | #include <linux/taskstats_kern.h> | 20 | #include <linux/taskstats_kern.h> |
| 21 | #include <linux/delayacct.h> | ||
| 21 | #include <net/genetlink.h> | 22 | #include <net/genetlink.h> |
| 22 | #include <asm/atomic.h> | 23 | #include <asm/atomic.h> |
| 23 | 24 | ||
| 24 | static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 }; | 25 | static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 }; |
| 25 | static int family_registered; | 26 | static int family_registered; |
| 26 | kmem_cache_t *taskstats_cache; | 27 | kmem_cache_t *taskstats_cache; |
| 27 | static DEFINE_MUTEX(taskstats_exit_mutex); | ||
| 28 | 28 | ||
| 29 | static struct genl_family family = { | 29 | static struct genl_family family = { |
| 30 | .id = GENL_ID_GENERATE, | 30 | .id = GENL_ID_GENERATE, |
| @@ -120,7 +120,10 @@ static int fill_pid(pid_t pid, struct task_struct *pidtsk, | |||
| 120 | * goto err; | 120 | * goto err; |
| 121 | */ | 121 | */ |
| 122 | 122 | ||
| 123 | err: | 123 | rc = delayacct_add_tsk(stats, tsk); |
| 124 | stats->version = TASKSTATS_VERSION; | ||
| 125 | |||
| 126 | /* Define err: label here if needed */ | ||
| 124 | put_task_struct(tsk); | 127 | put_task_struct(tsk); |
| 125 | return rc; | 128 | return rc; |
| 126 | 129 | ||
| @@ -152,8 +155,14 @@ static int fill_tgid(pid_t tgid, struct task_struct *tgidtsk, | |||
| 152 | * break; | 155 | * break; |
| 153 | */ | 156 | */ |
| 154 | 157 | ||
| 158 | rc = delayacct_add_tsk(stats, tsk); | ||
| 159 | if (rc) | ||
| 160 | break; | ||
| 161 | |||
| 155 | } while_each_thread(first, tsk); | 162 | } while_each_thread(first, tsk); |
| 156 | read_unlock(&tasklist_lock); | 163 | read_unlock(&tasklist_lock); |
| 164 | stats->version = TASKSTATS_VERSION; | ||
| 165 | |||
| 157 | 166 | ||
| 158 | /* | 167 | /* |
| 159 | * Accounting subsytems can also add calls here if they don't | 168 | * Accounting subsytems can also add calls here if they don't |
| @@ -233,8 +242,6 @@ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats, | |||
| 233 | if (!family_registered || !tidstats) | 242 | if (!family_registered || !tidstats) |
| 234 | return; | 243 | return; |
| 235 | 244 | ||
| 236 | mutex_lock(&taskstats_exit_mutex); | ||
| 237 | |||
| 238 | is_thread_group = !thread_group_empty(tsk); | 245 | is_thread_group = !thread_group_empty(tsk); |
| 239 | rc = 0; | 246 | rc = 0; |
| 240 | 247 | ||
| @@ -292,7 +299,6 @@ nla_put_failure: | |||
| 292 | err_skb: | 299 | err_skb: |
| 293 | nlmsg_free(rep_skb); | 300 | nlmsg_free(rep_skb); |
| 294 | ret: | 301 | ret: |
| 295 | mutex_unlock(&taskstats_exit_mutex); | ||
| 296 | return; | 302 | return; |
| 297 | } | 303 | } |
| 298 | 304 | ||
