diff options
-rw-r--r-- | Documentation/kernel-parameters.txt | 2 | ||||
-rw-r--r-- | include/linux/delayacct.h | 69 | ||||
-rw-r--r-- | include/linux/sched.h | 20 | ||||
-rw-r--r-- | include/linux/time.h | 12 | ||||
-rw-r--r-- | init/Kconfig | 10 | ||||
-rw-r--r-- | init/main.c | 2 | ||||
-rw-r--r-- | kernel/Makefile | 1 | ||||
-rw-r--r-- | kernel/delayacct.c | 87 | ||||
-rw-r--r-- | kernel/exit.c | 2 | ||||
-rw-r--r-- | kernel/fork.c | 2 |
10 files changed, 207 insertions, 0 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 149f62ba14a..e11f7728ec6 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -448,6 +448,8 @@ running once the system is up. | |||
448 | Format: <area>[,<node>] | 448 | Format: <area>[,<node>] |
449 | See also Documentation/networking/decnet.txt. | 449 | See also Documentation/networking/decnet.txt. |
450 | 450 | ||
451 | delayacct [KNL] Enable per-task delay accounting | ||
452 | |||
451 | dhash_entries= [KNL] | 453 | dhash_entries= [KNL] |
452 | Set number of hash buckets for dentry cache. | 454 | Set number of hash buckets for dentry cache. |
453 | 455 | ||
diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h new file mode 100644 index 00000000000..9572cfa1f12 --- /dev/null +++ b/include/linux/delayacct.h | |||
@@ -0,0 +1,69 @@ | |||
1 | /* delayacct.h - per-task delay accounting | ||
2 | * | ||
3 | * Copyright (C) Shailabh Nagar, IBM Corp. 2006 | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation; either version 2 of the License, or | ||
8 | * (at your option) any later version. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | ||
13 | * the GNU General Public License for more details. | ||
14 | * | ||
15 | */ | ||
16 | |||
17 | #ifndef _LINUX_DELAYACCT_H | ||
18 | #define _LINUX_DELAYACCT_H | ||
19 | |||
20 | #include <linux/sched.h> | ||
21 | |||
22 | #ifdef CONFIG_TASK_DELAY_ACCT | ||
23 | |||
24 | extern int delayacct_on; /* Delay accounting turned on/off */ | ||
25 | extern kmem_cache_t *delayacct_cache; | ||
26 | extern void delayacct_init(void); | ||
27 | extern void __delayacct_tsk_init(struct task_struct *); | ||
28 | extern void __delayacct_tsk_exit(struct task_struct *); | ||
29 | |||
30 | static inline void delayacct_set_flag(int flag) | ||
31 | { | ||
32 | if (current->delays) | ||
33 | current->delays->flags |= flag; | ||
34 | } | ||
35 | |||
36 | static inline void delayacct_clear_flag(int flag) | ||
37 | { | ||
38 | if (current->delays) | ||
39 | current->delays->flags &= ~flag; | ||
40 | } | ||
41 | |||
42 | static inline void delayacct_tsk_init(struct task_struct *tsk) | ||
43 | { | ||
44 | /* reinitialize in case parent's non-null pointer was dup'ed*/ | ||
45 | tsk->delays = NULL; | ||
46 | if (unlikely(delayacct_on)) | ||
47 | __delayacct_tsk_init(tsk); | ||
48 | } | ||
49 | |||
50 | static inline void delayacct_tsk_exit(struct task_struct *tsk) | ||
51 | { | ||
52 | if (tsk->delays) | ||
53 | __delayacct_tsk_exit(tsk); | ||
54 | } | ||
55 | |||
56 | #else | ||
57 | static inline void delayacct_set_flag(int flag) | ||
58 | {} | ||
59 | static inline void delayacct_clear_flag(int flag) | ||
60 | {} | ||
61 | static inline void delayacct_init(void) | ||
62 | {} | ||
63 | static inline void delayacct_tsk_init(struct task_struct *tsk) | ||
64 | {} | ||
65 | static inline void delayacct_tsk_exit(struct task_struct *tsk) | ||
66 | {} | ||
67 | #endif /* CONFIG_TASK_DELAY_ACCT */ | ||
68 | |||
69 | #endif | ||
diff --git a/include/linux/sched.h b/include/linux/sched.h index 1c876e27ff9..7a54e62763c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -552,6 +552,23 @@ struct sched_info { | |||
552 | extern struct file_operations proc_schedstat_operations; | 552 | extern struct file_operations proc_schedstat_operations; |
553 | #endif | 553 | #endif |
554 | 554 | ||
555 | #ifdef CONFIG_TASK_DELAY_ACCT | ||
556 | struct task_delay_info { | ||
557 | spinlock_t lock; | ||
558 | unsigned int flags; /* Private per-task flags */ | ||
559 | |||
560 | /* For each stat XXX, add following, aligned appropriately | ||
561 | * | ||
562 | * struct timespec XXX_start, XXX_end; | ||
563 | * u64 XXX_delay; | ||
564 | * u32 XXX_count; | ||
565 | * | ||
566 | * Atomicity of updates to XXX_delay, XXX_count protected by | ||
567 | * single lock above (split into XXX_lock if contention is an issue). | ||
568 | */ | ||
569 | }; | ||
570 | #endif | ||
571 | |||
555 | enum idle_type | 572 | enum idle_type |
556 | { | 573 | { |
557 | SCHED_IDLE, | 574 | SCHED_IDLE, |
@@ -945,6 +962,9 @@ struct task_struct { | |||
945 | * cache last used pipe for splice | 962 | * cache last used pipe for splice |
946 | */ | 963 | */ |
947 | struct pipe_inode_info *splice_pipe; | 964 | struct pipe_inode_info *splice_pipe; |
965 | #ifdef CONFIG_TASK_DELAY_ACCT | ||
966 | struct task_delay_info *delays; | ||
967 | #endif | ||
948 | }; | 968 | }; |
949 | 969 | ||
950 | static inline pid_t process_group(struct task_struct *tsk) | 970 | static inline pid_t process_group(struct task_struct *tsk) |
diff --git a/include/linux/time.h b/include/linux/time.h index c05f8bb9a32..a5b739967b7 100644 --- a/include/linux/time.h +++ b/include/linux/time.h | |||
@@ -71,6 +71,18 @@ extern unsigned long mktime(const unsigned int year, const unsigned int mon, | |||
71 | extern void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec); | 71 | extern void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec); |
72 | 72 | ||
73 | /* | 73 | /* |
74 | * sub = lhs - rhs, in normalized form | ||
75 | */ | ||
76 | static inline struct timespec timespec_sub(struct timespec lhs, | ||
77 | struct timespec rhs) | ||
78 | { | ||
79 | struct timespec ts_delta; | ||
80 | set_normalized_timespec(&ts_delta, lhs.tv_sec - rhs.tv_sec, | ||
81 | lhs.tv_nsec - rhs.tv_nsec); | ||
82 | return ts_delta; | ||
83 | } | ||
84 | |||
85 | /* | ||
74 | * Returns true if the timespec is norm, false if denorm: | 86 | * Returns true if the timespec is norm, false if denorm: |
75 | */ | 87 | */ |
76 | #define timespec_valid(ts) \ | 88 | #define timespec_valid(ts) \ |
diff --git a/init/Kconfig b/init/Kconfig index a5b073a103e..90498a3e53d 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -158,6 +158,16 @@ config BSD_PROCESS_ACCT_V3 | |||
158 | for processing it. A preliminary version of these tools is available | 158 | for processing it. A preliminary version of these tools is available |
159 | at <http://www.physik3.uni-rostock.de/tim/kernel/utils/acct/>. | 159 | at <http://www.physik3.uni-rostock.de/tim/kernel/utils/acct/>. |
160 | 160 | ||
161 | config TASK_DELAY_ACCT | ||
162 | bool "Enable per-task delay accounting (EXPERIMENTAL)" | ||
163 | help | ||
164 | Collect information on time spent by a task waiting for system | ||
165 | resources like cpu, synchronous block I/O completion and swapping | ||
166 | in pages. Such statistics can help in setting a task's priorities | ||
167 | relative to other tasks for cpu, io, rss limits etc. | ||
168 | |||
169 | Say N if unsure. | ||
170 | |||
161 | config SYSCTL | 171 | config SYSCTL |
162 | bool "Sysctl support" if EMBEDDED | 172 | bool "Sysctl support" if EMBEDDED |
163 | default y | 173 | default y |
diff --git a/init/main.c b/init/main.c index 628b8e9e841..9e8e8c15214 100644 --- a/init/main.c +++ b/init/main.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <linux/cpu.h> | 41 | #include <linux/cpu.h> |
42 | #include <linux/cpuset.h> | 42 | #include <linux/cpuset.h> |
43 | #include <linux/efi.h> | 43 | #include <linux/efi.h> |
44 | #include <linux/delayacct.h> | ||
44 | #include <linux/unistd.h> | 45 | #include <linux/unistd.h> |
45 | #include <linux/rmap.h> | 46 | #include <linux/rmap.h> |
46 | #include <linux/mempolicy.h> | 47 | #include <linux/mempolicy.h> |
@@ -574,6 +575,7 @@ asmlinkage void __init start_kernel(void) | |||
574 | proc_root_init(); | 575 | proc_root_init(); |
575 | #endif | 576 | #endif |
576 | cpuset_init(); | 577 | cpuset_init(); |
578 | delayacct_init(); | ||
577 | 579 | ||
578 | check_bugs(); | 580 | check_bugs(); |
579 | 581 | ||
diff --git a/kernel/Makefile b/kernel/Makefile index 47dbcd570cd..87bb34cc893 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -48,6 +48,7 @@ obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ | |||
48 | obj-$(CONFIG_SECCOMP) += seccomp.o | 48 | obj-$(CONFIG_SECCOMP) += seccomp.o |
49 | obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o | 49 | obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o |
50 | obj-$(CONFIG_RELAY) += relay.o | 50 | obj-$(CONFIG_RELAY) += relay.o |
51 | obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o | ||
51 | 52 | ||
52 | ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) | 53 | ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) |
53 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is | 54 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is |
diff --git a/kernel/delayacct.c b/kernel/delayacct.c new file mode 100644 index 00000000000..fbf7f228495 --- /dev/null +++ b/kernel/delayacct.c | |||
@@ -0,0 +1,87 @@ | |||
1 | /* delayacct.c - per-task delay accounting | ||
2 | * | ||
3 | * Copyright (C) Shailabh Nagar, IBM Corp. 2006 | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation; either version 2 of the License, or | ||
8 | * (at your option) any later version. | ||
9 | * | ||
10 | * This program is distributed in the hope that it would be useful, but | ||
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | ||
13 | * the GNU General Public License for more details. | ||
14 | */ | ||
15 | |||
16 | #include <linux/sched.h> | ||
17 | #include <linux/slab.h> | ||
18 | #include <linux/time.h> | ||
19 | #include <linux/sysctl.h> | ||
20 | #include <linux/delayacct.h> | ||
21 | |||
22 | int delayacct_on __read_mostly; /* Delay accounting turned on/off */ | ||
23 | kmem_cache_t *delayacct_cache; | ||
24 | |||
25 | static int __init delayacct_setup_enable(char *str) | ||
26 | { | ||
27 | delayacct_on = 1; | ||
28 | return 1; | ||
29 | } | ||
30 | __setup("delayacct", delayacct_setup_enable); | ||
31 | |||
32 | void delayacct_init(void) | ||
33 | { | ||
34 | delayacct_cache = kmem_cache_create("delayacct_cache", | ||
35 | sizeof(struct task_delay_info), | ||
36 | 0, | ||
37 | SLAB_PANIC, | ||
38 | NULL, NULL); | ||
39 | delayacct_tsk_init(&init_task); | ||
40 | } | ||
41 | |||
42 | void __delayacct_tsk_init(struct task_struct *tsk) | ||
43 | { | ||
44 | tsk->delays = kmem_cache_zalloc(delayacct_cache, SLAB_KERNEL); | ||
45 | if (tsk->delays) | ||
46 | spin_lock_init(&tsk->delays->lock); | ||
47 | } | ||
48 | |||
49 | void __delayacct_tsk_exit(struct task_struct *tsk) | ||
50 | { | ||
51 | kmem_cache_free(delayacct_cache, tsk->delays); | ||
52 | tsk->delays = NULL; | ||
53 | } | ||
54 | |||
55 | /* | ||
56 | * Start accounting for a delay statistic using | ||
57 | * its starting timestamp (@start) | ||
58 | */ | ||
59 | |||
60 | static inline void delayacct_start(struct timespec *start) | ||
61 | { | ||
62 | do_posix_clock_monotonic_gettime(start); | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * Finish delay accounting for a statistic using | ||
67 | * its timestamps (@start, @end), accumalator (@total) and @count | ||
68 | */ | ||
69 | |||
70 | static void delayacct_end(struct timespec *start, struct timespec *end, | ||
71 | u64 *total, u32 *count) | ||
72 | { | ||
73 | struct timespec ts; | ||
74 | s64 ns; | ||
75 | |||
76 | do_posix_clock_monotonic_gettime(end); | ||
77 | ts = timespec_sub(*end, *start); | ||
78 | ns = timespec_to_ns(&ts); | ||
79 | if (ns < 0) | ||
80 | return; | ||
81 | |||
82 | spin_lock(¤t->delays->lock); | ||
83 | *total += ns; | ||
84 | (*count)++; | ||
85 | spin_unlock(¤t->delays->lock); | ||
86 | } | ||
87 | |||
diff --git a/kernel/exit.c b/kernel/exit.c index 6664c084783..3c2cf91defa 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/mount.h> | 25 | #include <linux/mount.h> |
26 | #include <linux/proc_fs.h> | 26 | #include <linux/proc_fs.h> |
27 | #include <linux/mempolicy.h> | 27 | #include <linux/mempolicy.h> |
28 | #include <linux/delayacct.h> | ||
28 | #include <linux/cpuset.h> | 29 | #include <linux/cpuset.h> |
29 | #include <linux/syscalls.h> | 30 | #include <linux/syscalls.h> |
30 | #include <linux/signal.h> | 31 | #include <linux/signal.h> |
@@ -900,6 +901,7 @@ fastcall NORET_TYPE void do_exit(long code) | |||
900 | #endif | 901 | #endif |
901 | if (unlikely(tsk->audit_context)) | 902 | if (unlikely(tsk->audit_context)) |
902 | audit_free(tsk); | 903 | audit_free(tsk); |
904 | delayacct_tsk_exit(tsk); | ||
903 | exit_mm(tsk); | 905 | exit_mm(tsk); |
904 | 906 | ||
905 | if (group_dead) | 907 | if (group_dead) |
diff --git a/kernel/fork.c b/kernel/fork.c index 926e5a68ea9..451cfd35bf2 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -43,6 +43,7 @@ | |||
43 | #include <linux/rmap.h> | 43 | #include <linux/rmap.h> |
44 | #include <linux/acct.h> | 44 | #include <linux/acct.h> |
45 | #include <linux/cn_proc.h> | 45 | #include <linux/cn_proc.h> |
46 | #include <linux/delayacct.h> | ||
46 | 47 | ||
47 | #include <asm/pgtable.h> | 48 | #include <asm/pgtable.h> |
48 | #include <asm/pgalloc.h> | 49 | #include <asm/pgalloc.h> |
@@ -1000,6 +1001,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1000 | goto bad_fork_cleanup_put_domain; | 1001 | goto bad_fork_cleanup_put_domain; |
1001 | 1002 | ||
1002 | p->did_exec = 0; | 1003 | p->did_exec = 0; |
1004 | delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ | ||
1003 | copy_flags(clone_flags, p); | 1005 | copy_flags(clone_flags, p); |
1004 | p->pid = pid; | 1006 | p->pid = pid; |
1005 | retval = -EFAULT; | 1007 | retval = -EFAULT; |