aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/kernel-parameters.txt2
-rw-r--r--include/linux/delayacct.h69
-rw-r--r--include/linux/sched.h20
-rw-r--r--include/linux/time.h12
-rw-r--r--init/Kconfig10
-rw-r--r--init/main.c2
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/delayacct.c87
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/fork.c2
10 files changed, 207 insertions, 0 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 149f62ba14a5..e11f7728ec6f 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -448,6 +448,8 @@ running once the system is up.
448 Format: <area>[,<node>] 448 Format: <area>[,<node>]
449 See also Documentation/networking/decnet.txt. 449 See also Documentation/networking/decnet.txt.
450 450
451 delayacct [KNL] Enable per-task delay accounting
452
451 dhash_entries= [KNL] 453 dhash_entries= [KNL]
452 Set number of hash buckets for dentry cache. 454 Set number of hash buckets for dentry cache.
453 455
diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
new file mode 100644
index 000000000000..9572cfa1f129
--- /dev/null
+++ b/include/linux/delayacct.h
@@ -0,0 +1,69 @@
1/* delayacct.h - per-task delay accounting
2 *
3 * Copyright (C) Shailabh Nagar, IBM Corp. 2006
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU General Public License for more details.
14 *
15 */
16
17#ifndef _LINUX_DELAYACCT_H
18#define _LINUX_DELAYACCT_H
19
20#include <linux/sched.h>
21
22#ifdef CONFIG_TASK_DELAY_ACCT
23
24extern int delayacct_on; /* Delay accounting turned on/off */
25extern kmem_cache_t *delayacct_cache;
26extern void delayacct_init(void);
27extern void __delayacct_tsk_init(struct task_struct *);
28extern void __delayacct_tsk_exit(struct task_struct *);
29
30static inline void delayacct_set_flag(int flag)
31{
32 if (current->delays)
33 current->delays->flags |= flag;
34}
35
36static inline void delayacct_clear_flag(int flag)
37{
38 if (current->delays)
39 current->delays->flags &= ~flag;
40}
41
42static inline void delayacct_tsk_init(struct task_struct *tsk)
43{
44 /* reinitialize in case parent's non-null pointer was dup'ed*/
45 tsk->delays = NULL;
46 if (unlikely(delayacct_on))
47 __delayacct_tsk_init(tsk);
48}
49
50static inline void delayacct_tsk_exit(struct task_struct *tsk)
51{
52 if (tsk->delays)
53 __delayacct_tsk_exit(tsk);
54}
55
56#else
57static inline void delayacct_set_flag(int flag)
58{}
59static inline void delayacct_clear_flag(int flag)
60{}
61static inline void delayacct_init(void)
62{}
63static inline void delayacct_tsk_init(struct task_struct *tsk)
64{}
65static inline void delayacct_tsk_exit(struct task_struct *tsk)
66{}
67#endif /* CONFIG_TASK_DELAY_ACCT */
68
69#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1c876e27ff93..7a54e62763c5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -552,6 +552,23 @@ struct sched_info {
552extern struct file_operations proc_schedstat_operations; 552extern struct file_operations proc_schedstat_operations;
553#endif 553#endif
554 554
555#ifdef CONFIG_TASK_DELAY_ACCT
556struct task_delay_info {
557 spinlock_t lock;
558 unsigned int flags; /* Private per-task flags */
559
560 /* For each stat XXX, add following, aligned appropriately
561 *
562 * struct timespec XXX_start, XXX_end;
563 * u64 XXX_delay;
564 * u32 XXX_count;
565 *
566 * Atomicity of updates to XXX_delay, XXX_count protected by
567 * single lock above (split into XXX_lock if contention is an issue).
568 */
569};
570#endif
571
555enum idle_type 572enum idle_type
556{ 573{
557 SCHED_IDLE, 574 SCHED_IDLE,
@@ -945,6 +962,9 @@ struct task_struct {
945 * cache last used pipe for splice 962 * cache last used pipe for splice
946 */ 963 */
947 struct pipe_inode_info *splice_pipe; 964 struct pipe_inode_info *splice_pipe;
965#ifdef CONFIG_TASK_DELAY_ACCT
966 struct task_delay_info *delays;
967#endif
948}; 968};
949 969
950static inline pid_t process_group(struct task_struct *tsk) 970static inline pid_t process_group(struct task_struct *tsk)
diff --git a/include/linux/time.h b/include/linux/time.h
index c05f8bb9a323..a5b739967b74 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -71,6 +71,18 @@ extern unsigned long mktime(const unsigned int year, const unsigned int mon,
71extern void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec); 71extern void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec);
72 72
73/* 73/*
74 * sub = lhs - rhs, in normalized form
75 */
76static inline struct timespec timespec_sub(struct timespec lhs,
77 struct timespec rhs)
78{
79 struct timespec ts_delta;
80 set_normalized_timespec(&ts_delta, lhs.tv_sec - rhs.tv_sec,
81 lhs.tv_nsec - rhs.tv_nsec);
82 return ts_delta;
83}
84
85/*
74 * Returns true if the timespec is norm, false if denorm: 86 * Returns true if the timespec is norm, false if denorm:
75 */ 87 */
76#define timespec_valid(ts) \ 88#define timespec_valid(ts) \
diff --git a/init/Kconfig b/init/Kconfig
index a5b073a103e7..90498a3e53da 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -158,6 +158,16 @@ config BSD_PROCESS_ACCT_V3
158 for processing it. A preliminary version of these tools is available 158 for processing it. A preliminary version of these tools is available
159 at <http://www.physik3.uni-rostock.de/tim/kernel/utils/acct/>. 159 at <http://www.physik3.uni-rostock.de/tim/kernel/utils/acct/>.
160 160
161config TASK_DELAY_ACCT
162 bool "Enable per-task delay accounting (EXPERIMENTAL)"
163 help
164 Collect information on time spent by a task waiting for system
165 resources like cpu, synchronous block I/O completion and swapping
166 in pages. Such statistics can help in setting a task's priorities
167 relative to other tasks for cpu, io, rss limits etc.
168
169 Say N if unsure.
170
161config SYSCTL 171config SYSCTL
162 bool "Sysctl support" if EMBEDDED 172 bool "Sysctl support" if EMBEDDED
163 default y 173 default y
diff --git a/init/main.c b/init/main.c
index 628b8e9e841a..9e8e8c152142 100644
--- a/init/main.c
+++ b/init/main.c
@@ -41,6 +41,7 @@
41#include <linux/cpu.h> 41#include <linux/cpu.h>
42#include <linux/cpuset.h> 42#include <linux/cpuset.h>
43#include <linux/efi.h> 43#include <linux/efi.h>
44#include <linux/delayacct.h>
44#include <linux/unistd.h> 45#include <linux/unistd.h>
45#include <linux/rmap.h> 46#include <linux/rmap.h>
46#include <linux/mempolicy.h> 47#include <linux/mempolicy.h>
@@ -574,6 +575,7 @@ asmlinkage void __init start_kernel(void)
574 proc_root_init(); 575 proc_root_init();
575#endif 576#endif
576 cpuset_init(); 577 cpuset_init();
578 delayacct_init();
577 579
578 check_bugs(); 580 check_bugs();
579 581
diff --git a/kernel/Makefile b/kernel/Makefile
index 47dbcd570cd8..87bb34cc8938 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -48,6 +48,7 @@ obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
48obj-$(CONFIG_SECCOMP) += seccomp.o 48obj-$(CONFIG_SECCOMP) += seccomp.o
49obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o 49obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
50obj-$(CONFIG_RELAY) += relay.o 50obj-$(CONFIG_RELAY) += relay.o
51obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
51 52
52ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) 53ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
53# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is 54# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
new file mode 100644
index 000000000000..fbf7f2284952
--- /dev/null
+++ b/kernel/delayacct.c
@@ -0,0 +1,87 @@
1/* delayacct.c - per-task delay accounting
2 *
3 * Copyright (C) Shailabh Nagar, IBM Corp. 2006
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it would be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU General Public License for more details.
14 */
15
16#include <linux/sched.h>
17#include <linux/slab.h>
18#include <linux/time.h>
19#include <linux/sysctl.h>
20#include <linux/delayacct.h>
21
22int delayacct_on __read_mostly; /* Delay accounting turned on/off */
23kmem_cache_t *delayacct_cache;
24
25static int __init delayacct_setup_enable(char *str)
26{
27 delayacct_on = 1;
28 return 1;
29}
30__setup("delayacct", delayacct_setup_enable);
31
32void delayacct_init(void)
33{
34 delayacct_cache = kmem_cache_create("delayacct_cache",
35 sizeof(struct task_delay_info),
36 0,
37 SLAB_PANIC,
38 NULL, NULL);
39 delayacct_tsk_init(&init_task);
40}
41
42void __delayacct_tsk_init(struct task_struct *tsk)
43{
44 tsk->delays = kmem_cache_zalloc(delayacct_cache, SLAB_KERNEL);
45 if (tsk->delays)
46 spin_lock_init(&tsk->delays->lock);
47}
48
49void __delayacct_tsk_exit(struct task_struct *tsk)
50{
51 kmem_cache_free(delayacct_cache, tsk->delays);
52 tsk->delays = NULL;
53}
54
55/*
56 * Start accounting for a delay statistic using
57 * its starting timestamp (@start)
58 */
59
60static inline void delayacct_start(struct timespec *start)
61{
62 do_posix_clock_monotonic_gettime(start);
63}
64
65/*
66 * Finish delay accounting for a statistic using
67 * its timestamps (@start, @end), accumalator (@total) and @count
68 */
69
70static void delayacct_end(struct timespec *start, struct timespec *end,
71 u64 *total, u32 *count)
72{
73 struct timespec ts;
74 s64 ns;
75
76 do_posix_clock_monotonic_gettime(end);
77 ts = timespec_sub(*end, *start);
78 ns = timespec_to_ns(&ts);
79 if (ns < 0)
80 return;
81
82 spin_lock(&current->delays->lock);
83 *total += ns;
84 (*count)++;
85 spin_unlock(&current->delays->lock);
86}
87
diff --git a/kernel/exit.c b/kernel/exit.c
index 6664c084783d..3c2cf91defa7 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -25,6 +25,7 @@
25#include <linux/mount.h> 25#include <linux/mount.h>
26#include <linux/proc_fs.h> 26#include <linux/proc_fs.h>
27#include <linux/mempolicy.h> 27#include <linux/mempolicy.h>
28#include <linux/delayacct.h>
28#include <linux/cpuset.h> 29#include <linux/cpuset.h>
29#include <linux/syscalls.h> 30#include <linux/syscalls.h>
30#include <linux/signal.h> 31#include <linux/signal.h>
@@ -900,6 +901,7 @@ fastcall NORET_TYPE void do_exit(long code)
900#endif 901#endif
901 if (unlikely(tsk->audit_context)) 902 if (unlikely(tsk->audit_context))
902 audit_free(tsk); 903 audit_free(tsk);
904 delayacct_tsk_exit(tsk);
903 exit_mm(tsk); 905 exit_mm(tsk);
904 906
905 if (group_dead) 907 if (group_dead)
diff --git a/kernel/fork.c b/kernel/fork.c
index 926e5a68ea9e..451cfd35bf22 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -43,6 +43,7 @@
43#include <linux/rmap.h> 43#include <linux/rmap.h>
44#include <linux/acct.h> 44#include <linux/acct.h>
45#include <linux/cn_proc.h> 45#include <linux/cn_proc.h>
46#include <linux/delayacct.h>
46 47
47#include <asm/pgtable.h> 48#include <asm/pgtable.h>
48#include <asm/pgalloc.h> 49#include <asm/pgalloc.h>
@@ -1000,6 +1001,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1000 goto bad_fork_cleanup_put_domain; 1001 goto bad_fork_cleanup_put_domain;
1001 1002
1002 p->did_exec = 0; 1003 p->did_exec = 0;
1004 delayacct_tsk_init(p); /* Must remain after dup_task_struct() */
1003 copy_flags(clone_flags, p); 1005 copy_flags(clone_flags, p);
1004 p->pid = pid; 1006 p->pid = pid;
1005 retval = -EFAULT; 1007 retval = -EFAULT;