aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2007-02-16 04:28:13 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-02-16 11:13:59 -0500
commit82f67cd9fca8c8762c15ba7ed0d5747588c1e221 (patch)
tree1ff7e5cc496580b85bb42fb1d7b19dcbef7b7776
parent8bfd9a7a229b5f3d3eda5d7d45c2eebec5b4ba16 (diff)
[PATCH] Add debugging feature /proc/timer_stat
Add /proc/timer_stats support: debugging feature to profile timer expiration. Both the starting site, process/PID and the expiration function is captured. This allows the quick identification of timer event sources in a system. Sample output: # echo 1 > /proc/timer_stats # cat /proc/timer_stats Timer Stats Version: v0.1 Sample period: 4.010 s 24, 0 swapper hrtimer_stop_sched_tick (hrtimer_sched_tick) 11, 0 swapper sk_reset_timer (tcp_delack_timer) 6, 0 swapper hrtimer_stop_sched_tick (hrtimer_sched_tick) 2, 1 swapper queue_delayed_work_on (delayed_work_timer_fn) 17, 0 swapper hrtimer_restart_sched_tick (hrtimer_sched_tick) 2, 1 swapper queue_delayed_work_on (delayed_work_timer_fn) 4, 2050 pcscd do_nanosleep (hrtimer_wakeup) 5, 4179 sshd sk_reset_timer (tcp_write_timer) 4, 2248 yum-updatesd schedule_timeout (process_timeout) 18, 0 swapper hrtimer_restart_sched_tick (hrtimer_sched_tick) 3, 0 swapper sk_reset_timer (tcp_delack_timer) 1, 1 swapper neigh_table_init_no_netlink (neigh_periodic_timer) 2, 1 swapper e1000_up (e1000_watchdog) 1, 1 init schedule_timeout (process_timeout) 100 total events, 25.24 events/sec [ cleanups and hrtimers support from Thomas Gleixner <tglx@linutronix.de> ] [bunk@stusta.de: nr_entries can become static] Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: john stultz <johnstul@us.ibm.com> Cc: Roman Zippel <zippel@linux-m68k.org> Cc: Andi Kleen <ak@suse.de> Signed-off-by: Adrian Bunk <bunk@stusta.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/hrtimer/timer_stats.txt68
-rw-r--r--include/linux/hrtimer.h45
-rw-r--r--include/linux/timer.h54
-rw-r--r--kernel/hrtimer.c26
-rw-r--r--kernel/time/Makefile1
-rw-r--r--kernel/time/timer_stats.c411
-rw-r--r--kernel/timer.c31
-rw-r--r--kernel/workqueue.c7
-rw-r--r--lib/Kconfig.debug11
9 files changed, 650 insertions, 4 deletions
diff --git a/Documentation/hrtimer/timer_stats.txt b/Documentation/hrtimer/timer_stats.txt
new file mode 100644
index 000000000000..27f782e3593f
--- /dev/null
+++ b/Documentation/hrtimer/timer_stats.txt
@@ -0,0 +1,68 @@
1timer_stats - timer usage statistics
2------------------------------------
3
4timer_stats is a debugging facility to make the timer (ab)usage in a Linux
5system visible to kernel and userspace developers. It is not intended for
6production usage as it adds significant overhead to the (hr)timer code and the
7(hr)timer data structures.
8
9timer_stats should be used by kernel and userspace developers to verify that
10their code does not make unduly use of timers. This helps to avoid unnecessary
11wakeups, which should be avoided to optimize power consumption.
12
13It can be enabled by CONFIG_TIMER_STATS in the "Kernel hacking" configuration
14section.
15
16timer_stats collects information about the timer events which are fired in a
17Linux system over a sample period:
18
19- the pid of the task(process) which initialized the timer
20- the name of the process which initialized the timer
21- the function where the timer was intialized
22- the callback function which is associated to the timer
23- the number of events (callbacks)
24
25timer_stats adds an entry to /proc: /proc/timer_stats
26
27This entry is used to control the statistics functionality and to read out the
28sampled information.
29
30The timer_stats functionality is inactive on bootup.
31
32To activate a sample period issue:
33# echo 1 >/proc/timer_stats
34
35To stop a sample period issue:
36# echo 0 >/proc/timer_stats
37
38The statistics can be retrieved by:
39# cat /proc/timer_stats
40
41The readout of /proc/timer_stats automatically disables sampling. The sampled
42information is kept until a new sample period is started. This allows multiple
43readouts.
44
45Sample output of /proc/timer_stats:
46
47Timerstats sample period: 3.888770 s
48 12, 0 swapper hrtimer_stop_sched_tick (hrtimer_sched_tick)
49 15, 1 swapper hcd_submit_urb (rh_timer_func)
50 4, 959 kedac schedule_timeout (process_timeout)
51 1, 0 swapper page_writeback_init (wb_timer_fn)
52 28, 0 swapper hrtimer_stop_sched_tick (hrtimer_sched_tick)
53 22, 2948 IRQ 4 tty_flip_buffer_push (delayed_work_timer_fn)
54 3, 3100 bash schedule_timeout (process_timeout)
55 1, 1 swapper queue_delayed_work_on (delayed_work_timer_fn)
56 1, 1 swapper queue_delayed_work_on (delayed_work_timer_fn)
57 1, 1 swapper neigh_table_init_no_netlink (neigh_periodic_timer)
58 1, 2292 ip __netdev_watchdog_up (dev_watchdog)
59 1, 23 events/1 do_cache_clean (delayed_work_timer_fn)
6090 total events, 30.0 events/sec
61
62The first column is the number of events, the second column the pid, the third
63column is the name of the process. The forth column shows the function which
64initialized the timer and in parantheses the callback function which was
65executed on expiry.
66
67 Thomas, Ingo
68
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 4ecd991431b4..93b8757c1ea2 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -119,6 +119,11 @@ struct hrtimer {
119 enum hrtimer_cb_mode cb_mode; 119 enum hrtimer_cb_mode cb_mode;
120 struct list_head cb_entry; 120 struct list_head cb_entry;
121#endif 121#endif
122#ifdef CONFIG_TIMER_STATS
123 void *start_site;
124 char start_comm[16];
125 int start_pid;
126#endif
122}; 127};
123 128
124/** 129/**
@@ -311,4 +316,44 @@ extern unsigned long ktime_divns(const ktime_t kt, s64 div);
311# define ktime_divns(kt, div) (unsigned long)((kt).tv64 / (div)) 316# define ktime_divns(kt, div) (unsigned long)((kt).tv64 / (div))
312#endif 317#endif
313 318
319/*
320 * Timer-statistics info:
321 */
322#ifdef CONFIG_TIMER_STATS
323
324extern void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
325 void *timerf, char * comm);
326
327static inline void timer_stats_account_hrtimer(struct hrtimer *timer)
328{
329 timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
330 timer->function, timer->start_comm);
331}
332
333extern void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer,
334 void *addr);
335
336static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
337{
338 __timer_stats_hrtimer_set_start_info(timer, __builtin_return_address(0));
339}
340
341static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer)
342{
343 timer->start_site = NULL;
344}
345#else
346static inline void timer_stats_account_hrtimer(struct hrtimer *timer)
347{
348}
349
350static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
351{
352}
353
354static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer)
355{
356}
357#endif
358
314#endif 359#endif
diff --git a/include/linux/timer.h b/include/linux/timer.h
index 44d41e9d7818..719113b652dd 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -2,6 +2,7 @@
2#define _LINUX_TIMER_H 2#define _LINUX_TIMER_H
3 3
4#include <linux/list.h> 4#include <linux/list.h>
5#include <linux/ktime.h>
5#include <linux/spinlock.h> 6#include <linux/spinlock.h>
6#include <linux/stddef.h> 7#include <linux/stddef.h>
7 8
@@ -15,6 +16,11 @@ struct timer_list {
15 unsigned long data; 16 unsigned long data;
16 17
17 struct tvec_t_base_s *base; 18 struct tvec_t_base_s *base;
19#ifdef CONFIG_TIMER_STATS
20 void *start_site;
21 char start_comm[16];
22 int start_pid;
23#endif
18}; 24};
19 25
20extern struct tvec_t_base_s boot_tvec_bases; 26extern struct tvec_t_base_s boot_tvec_bases;
@@ -73,6 +79,54 @@ extern unsigned long next_timer_interrupt(void);
73 */ 79 */
74extern unsigned long get_next_timer_interrupt(unsigned long now); 80extern unsigned long get_next_timer_interrupt(unsigned long now);
75 81
82/*
83 * Timer-statistics info:
84 */
85#ifdef CONFIG_TIMER_STATS
86
87extern void init_timer_stats(void);
88
89extern void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
90 void *timerf, char * comm);
91
92static inline void timer_stats_account_timer(struct timer_list *timer)
93{
94 timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
95 timer->function, timer->start_comm);
96}
97
98extern void __timer_stats_timer_set_start_info(struct timer_list *timer,
99 void *addr);
100
101static inline void timer_stats_timer_set_start_info(struct timer_list *timer)
102{
103 __timer_stats_timer_set_start_info(timer, __builtin_return_address(0));
104}
105
106static inline void timer_stats_timer_clear_start_info(struct timer_list *timer)
107{
108 timer->start_site = NULL;
109}
110#else
111static inline void init_timer_stats(void)
112{
113}
114
115static inline void timer_stats_account_timer(struct timer_list *timer)
116{
117}
118
119static inline void timer_stats_timer_set_start_info(struct timer_list *timer)
120{
121}
122
123static inline void timer_stats_timer_clear_start_info(struct timer_list *timer)
124{
125}
126#endif
127
128extern void delayed_work_timer_fn(unsigned long __data);
129
76/** 130/**
77 * add_timer - start a timer 131 * add_timer - start a timer
78 * @timer: the timer to be added 132 * @timer: the timer to be added
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 62aad8e1a383..476cb0c0b4a4 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -585,6 +585,18 @@ static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { }
585 585
586#endif /* CONFIG_HIGH_RES_TIMERS */ 586#endif /* CONFIG_HIGH_RES_TIMERS */
587 587
588#ifdef CONFIG_TIMER_STATS
589void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, void *addr)
590{
591 if (timer->start_site)
592 return;
593
594 timer->start_site = addr;
595 memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
596 timer->start_pid = current->pid;
597}
598#endif
599
588/* 600/*
589 * Counterpart to lock_timer_base above: 601 * Counterpart to lock_timer_base above:
590 */ 602 */
@@ -743,6 +755,7 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base)
743 * reprogramming happens in the interrupt handler. This is a 755 * reprogramming happens in the interrupt handler. This is a
744 * rare case and less expensive than a smp call. 756 * rare case and less expensive than a smp call.
745 */ 757 */
758 timer_stats_hrtimer_clear_start_info(timer);
746 reprogram = base->cpu_base == &__get_cpu_var(hrtimer_bases); 759 reprogram = base->cpu_base == &__get_cpu_var(hrtimer_bases);
747 __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 760 __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE,
748 reprogram); 761 reprogram);
@@ -791,6 +804,8 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
791 } 804 }
792 timer->expires = tim; 805 timer->expires = tim;
793 806
807 timer_stats_hrtimer_set_start_info(timer);
808
794 enqueue_hrtimer(timer, new_base, base == new_base); 809 enqueue_hrtimer(timer, new_base, base == new_base);
795 810
796 unlock_hrtimer_base(timer, &flags); 811 unlock_hrtimer_base(timer, &flags);
@@ -925,6 +940,12 @@ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
925 940
926 timer->base = &cpu_base->clock_base[clock_id]; 941 timer->base = &cpu_base->clock_base[clock_id];
927 hrtimer_init_timer_hres(timer); 942 hrtimer_init_timer_hres(timer);
943
944#ifdef CONFIG_TIMER_STATS
945 timer->start_site = NULL;
946 timer->start_pid = -1;
947 memset(timer->start_comm, 0, TASK_COMM_LEN);
948#endif
928} 949}
929EXPORT_SYMBOL_GPL(hrtimer_init); 950EXPORT_SYMBOL_GPL(hrtimer_init);
930 951
@@ -1006,6 +1027,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
1006 1027
1007 __remove_hrtimer(timer, base, 1028 __remove_hrtimer(timer, base,
1008 HRTIMER_STATE_CALLBACK, 0); 1029 HRTIMER_STATE_CALLBACK, 0);
1030 timer_stats_account_hrtimer(timer);
1009 1031
1010 /* 1032 /*
1011 * Note: We clear the CALLBACK bit after 1033 * Note: We clear the CALLBACK bit after
@@ -1050,6 +1072,8 @@ static void run_hrtimer_softirq(struct softirq_action *h)
1050 timer = list_entry(cpu_base->cb_pending.next, 1072 timer = list_entry(cpu_base->cb_pending.next,
1051 struct hrtimer, cb_entry); 1073 struct hrtimer, cb_entry);
1052 1074
1075 timer_stats_account_hrtimer(timer);
1076
1053 fn = timer->function; 1077 fn = timer->function;
1054 __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0); 1078 __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0);
1055 spin_unlock_irq(&cpu_base->lock); 1079 spin_unlock_irq(&cpu_base->lock);
@@ -1106,6 +1130,8 @@ static inline void run_hrtimer_queue(struct hrtimer_cpu_base *cpu_base,
1106 if (base->softirq_time.tv64 <= timer->expires.tv64) 1130 if (base->softirq_time.tv64 <= timer->expires.tv64)
1107 break; 1131 break;
1108 1132
1133 timer_stats_account_hrtimer(timer);
1134
1109 fn = timer->function; 1135 fn = timer->function;
1110 __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0); 1136 __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
1111 spin_unlock_irq(&cpu_base->lock); 1137 spin_unlock_irq(&cpu_base->lock);
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index f246bc836b9a..252b4b5d9dd0 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -5,3 +5,4 @@ obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o
5obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += tick-broadcast.o 5obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += tick-broadcast.o
6obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o 6obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o
7obj-$(CONFIG_TICK_ONESHOT) += tick-sched.o 7obj-$(CONFIG_TICK_ONESHOT) += tick-sched.o
8obj-$(CONFIG_TIMER_STATS) += timer_stats.o
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c
new file mode 100644
index 000000000000..1bc4882e28e0
--- /dev/null
+++ b/kernel/time/timer_stats.c
@@ -0,0 +1,411 @@
1/*
2 * kernel/time/timer_stats.c
3 *
4 * Collect timer usage statistics.
5 *
6 * Copyright(C) 2006, Red Hat, Inc., Ingo Molnar
7 * Copyright(C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
8 *
9 * timer_stats is based on timer_top, a similar functionality which was part of
10 * Con Kolivas dyntick patch set. It was developed by Daniel Petrini at the
11 * Instituto Nokia de Tecnologia - INdT - Manaus. timer_top's design was based
12 * on dynamic allocation of the statistics entries and linear search based
13 * lookup combined with a global lock, rather than the static array, hash
14 * and per-CPU locking which is used by timer_stats. It was written for the
15 * pre hrtimer kernel code and therefore did not take hrtimers into account.
16 * Nevertheless it provided the base for the timer_stats implementation and
17 * was a helpful source of inspiration. Kudos to Daniel and the Nokia folks
18 * for this effort.
19 *
20 * timer_top.c is
21 * Copyright (C) 2005 Instituto Nokia de Tecnologia - INdT - Manaus
22 * Written by Daniel Petrini <d.pensator@gmail.com>
23 * timer_top.c was released under the GNU General Public License version 2
24 *
25 * We export the addresses and counting of timer functions being called,
26 * the pid and cmdline from the owner process if applicable.
27 *
28 * Start/stop data collection:
29 * # echo 1[0] >/proc/timer_stats
30 *
31 * Display the information collected so far:
32 * # cat /proc/timer_stats
33 *
34 * This program is free software; you can redistribute it and/or modify
35 * it under the terms of the GNU General Public License version 2 as
36 * published by the Free Software Foundation.
37 */
38
39#include <linux/proc_fs.h>
40#include <linux/module.h>
41#include <linux/spinlock.h>
42#include <linux/sched.h>
43#include <linux/seq_file.h>
44#include <linux/kallsyms.h>
45
46#include <asm/uaccess.h>
47
48/*
49 * This is our basic unit of interest: a timer expiry event identified
50 * by the timer, its start/expire functions and the PID of the task that
51 * started the timer. We count the number of times an event happens:
52 */
53struct entry {
54 /*
55 * Hash list:
56 */
57 struct entry *next;
58
59 /*
60 * Hash keys:
61 */
62 void *timer;
63 void *start_func;
64 void *expire_func;
65 pid_t pid;
66
67 /*
68 * Number of timeout events:
69 */
70 unsigned long count;
71
72 /*
73 * We save the command-line string to preserve
74 * this information past task exit:
75 */
76 char comm[TASK_COMM_LEN + 1];
77
78} ____cacheline_aligned_in_smp;
79
80/*
81 * Spinlock protecting the tables - not taken during lookup:
82 */
83static DEFINE_SPINLOCK(table_lock);
84
85/*
86 * Per-CPU lookup locks for fast hash lookup:
87 */
88static DEFINE_PER_CPU(spinlock_t, lookup_lock);
89
90/*
91 * Mutex to serialize state changes with show-stats activities:
92 */
93static DEFINE_MUTEX(show_mutex);
94
95/*
96 * Collection status, active/inactive:
97 */
98static int __read_mostly active;
99
100/*
101 * Beginning/end timestamps of measurement:
102 */
103static ktime_t time_start, time_stop;
104
105/*
106 * tstat entry structs only get allocated while collection is
107 * active and never freed during that time - this simplifies
108 * things quite a bit.
109 *
110 * They get freed when a new collection period is started.
111 */
112#define MAX_ENTRIES_BITS 10
113#define MAX_ENTRIES (1UL << MAX_ENTRIES_BITS)
114
115static unsigned long nr_entries;
116static struct entry entries[MAX_ENTRIES];
117
118static atomic_t overflow_count;
119
120static void reset_entries(void)
121{
122 nr_entries = 0;
123 memset(entries, 0, sizeof(entries));
124 atomic_set(&overflow_count, 0);
125}
126
127static struct entry *alloc_entry(void)
128{
129 if (nr_entries >= MAX_ENTRIES)
130 return NULL;
131
132 return entries + nr_entries++;
133}
134
135/*
136 * The entries are in a hash-table, for fast lookup:
137 */
138#define TSTAT_HASH_BITS (MAX_ENTRIES_BITS - 1)
139#define TSTAT_HASH_SIZE (1UL << TSTAT_HASH_BITS)
140#define TSTAT_HASH_MASK (TSTAT_HASH_SIZE - 1)
141
142#define __tstat_hashfn(entry) \
143 (((unsigned long)(entry)->timer ^ \
144 (unsigned long)(entry)->start_func ^ \
145 (unsigned long)(entry)->expire_func ^ \
146 (unsigned long)(entry)->pid ) & TSTAT_HASH_MASK)
147
148#define tstat_hashentry(entry) (tstat_hash_table + __tstat_hashfn(entry))
149
150static struct entry *tstat_hash_table[TSTAT_HASH_SIZE] __read_mostly;
151
152static int match_entries(struct entry *entry1, struct entry *entry2)
153{
154 return entry1->timer == entry2->timer &&
155 entry1->start_func == entry2->start_func &&
156 entry1->expire_func == entry2->expire_func &&
157 entry1->pid == entry2->pid;
158}
159
160/*
161 * Look up whether an entry matching this item is present
162 * in the hash already. Must be called with irqs off and the
163 * lookup lock held:
164 */
165static struct entry *tstat_lookup(struct entry *entry, char *comm)
166{
167 struct entry **head, *curr, *prev;
168
169 head = tstat_hashentry(entry);
170 curr = *head;
171
172 /*
173 * The fastpath is when the entry is already hashed,
174 * we do this with the lookup lock held, but with the
175 * table lock not held:
176 */
177 while (curr) {
178 if (match_entries(curr, entry))
179 return curr;
180
181 curr = curr->next;
182 }
183 /*
184 * Slowpath: allocate, set up and link a new hash entry:
185 */
186 prev = NULL;
187 curr = *head;
188
189 spin_lock(&table_lock);
190 /*
191 * Make sure we have not raced with another CPU:
192 */
193 while (curr) {
194 if (match_entries(curr, entry))
195 goto out_unlock;
196
197 prev = curr;
198 curr = curr->next;
199 }
200
201 curr = alloc_entry();
202 if (curr) {
203 *curr = *entry;
204 curr->count = 0;
205 memcpy(curr->comm, comm, TASK_COMM_LEN);
206 if (prev)
207 prev->next = curr;
208 else
209 *head = curr;
210 curr->next = NULL;
211 }
212 out_unlock:
213 spin_unlock(&table_lock);
214
215 return curr;
216}
217
218/**
219 * timer_stats_update_stats - Update the statistics for a timer.
220 * @timer: pointer to either a timer_list or a hrtimer
221 * @pid: the pid of the task which set up the timer
222 * @startf: pointer to the function which did the timer setup
223 * @timerf: pointer to the timer callback function of the timer
224 * @comm: name of the process which set up the timer
225 *
226 * When the timer is already registered, then the event counter is
227 * incremented. Otherwise the timer is registered in a free slot.
228 */
229void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
230 void *timerf, char * comm)
231{
232 /*
233 * It doesnt matter which lock we take:
234 */
235 spinlock_t *lock = &per_cpu(lookup_lock, raw_smp_processor_id());
236 struct entry *entry, input;
237 unsigned long flags;
238
239 input.timer = timer;
240 input.start_func = startf;
241 input.expire_func = timerf;
242 input.pid = pid;
243
244 spin_lock_irqsave(lock, flags);
245 if (!active)
246 goto out_unlock;
247
248 entry = tstat_lookup(&input, comm);
249 if (likely(entry))
250 entry->count++;
251 else
252 atomic_inc(&overflow_count);
253
254 out_unlock:
255 spin_unlock_irqrestore(lock, flags);
256}
257
258static void print_name_offset(struct seq_file *m, unsigned long addr)
259{
260 char namebuf[KSYM_NAME_LEN+1];
261 unsigned long size, offset;
262 const char *sym_name;
263 char *modname;
264
265 sym_name = kallsyms_lookup(addr, &size, &offset, &modname, namebuf);
266 if (sym_name)
267 seq_printf(m, "%s", sym_name);
268 else
269 seq_printf(m, "<%p>", (void *)addr);
270}
271
272static int tstats_show(struct seq_file *m, void *v)
273{
274 struct timespec period;
275 struct entry *entry;
276 unsigned long ms;
277 long events = 0;
278 ktime_t time;
279 int i;
280
281 mutex_lock(&show_mutex);
282 /*
283 * If still active then calculate up to now:
284 */
285 if (active)
286 time_stop = ktime_get();
287
288 time = ktime_sub(time_stop, time_start);
289
290 period = ktime_to_timespec(time);
291 ms = period.tv_nsec / 1000000;
292
293 seq_puts(m, "Timer Stats Version: v0.1\n");
294 seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms);
295 if (atomic_read(&overflow_count))
296 seq_printf(m, "Overflow: %d entries\n",
297 atomic_read(&overflow_count));
298
299 for (i = 0; i < nr_entries; i++) {
300 entry = entries + i;
301 seq_printf(m, "%4lu, %5d %-16s ",
302 entry->count, entry->pid, entry->comm);
303
304 print_name_offset(m, (unsigned long)entry->start_func);
305 seq_puts(m, " (");
306 print_name_offset(m, (unsigned long)entry->expire_func);
307 seq_puts(m, ")\n");
308
309 events += entry->count;
310 }
311
312 ms += period.tv_sec * 1000;
313 if (!ms)
314 ms = 1;
315
316 if (events && period.tv_sec)
317 seq_printf(m, "%ld total events, %ld.%ld events/sec\n", events,
318 events / period.tv_sec, events * 1000 / ms);
319 else
320 seq_printf(m, "%ld total events\n", events);
321
322 mutex_unlock(&show_mutex);
323
324 return 0;
325}
326
327/*
328 * After a state change, make sure all concurrent lookup/update
329 * activities have stopped:
330 */
331static void sync_access(void)
332{
333 unsigned long flags;
334 int cpu;
335
336 for_each_online_cpu(cpu) {
337 spin_lock_irqsave(&per_cpu(lookup_lock, cpu), flags);
338 /* nothing */
339 spin_unlock_irqrestore(&per_cpu(lookup_lock, cpu), flags);
340 }
341}
342
343static ssize_t tstats_write(struct file *file, const char __user *buf,
344 size_t count, loff_t *offs)
345{
346 char ctl[2];
347
348 if (count != 2 || *offs)
349 return -EINVAL;
350
351 if (copy_from_user(ctl, buf, count))
352 return -EFAULT;
353
354 mutex_lock(&show_mutex);
355 switch (ctl[0]) {
356 case '0':
357 if (active) {
358 active = 0;
359 time_stop = ktime_get();
360 sync_access();
361 }
362 break;
363 case '1':
364 if (!active) {
365 reset_entries();
366 time_start = ktime_get();
367 active = 1;
368 }
369 break;
370 default:
371 count = -EINVAL;
372 }
373 mutex_unlock(&show_mutex);
374
375 return count;
376}
377
378static int tstats_open(struct inode *inode, struct file *filp)
379{
380 return single_open(filp, tstats_show, NULL);
381}
382
383static struct file_operations tstats_fops = {
384 .open = tstats_open,
385 .read = seq_read,
386 .write = tstats_write,
387 .llseek = seq_lseek,
388 .release = seq_release,
389};
390
391void __init init_timer_stats(void)
392{
393 int cpu;
394
395 for_each_possible_cpu(cpu)
396 spin_lock_init(&per_cpu(lookup_lock, cpu));
397}
398
399static int __init init_tstats_procfs(void)
400{
401 struct proc_dir_entry *pe;
402
403 pe = create_proc_entry("timer_stats", 0644, NULL);
404 if (!pe)
405 return -ENOMEM;
406
407 pe->proc_fops = &tstats_fops;
408
409 return 0;
410}
411__initcall(init_tstats_procfs);
diff --git a/kernel/timer.c b/kernel/timer.c
index f058e6cfd50c..c3c67f3c0190 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -35,6 +35,7 @@
35#include <linux/syscalls.h> 35#include <linux/syscalls.h>
36#include <linux/delay.h> 36#include <linux/delay.h>
37#include <linux/tick.h> 37#include <linux/tick.h>
38#include <linux/kallsyms.h>
38 39
39#include <asm/uaccess.h> 40#include <asm/uaccess.h>
40#include <asm/unistd.h> 41#include <asm/unistd.h>
@@ -263,6 +264,18 @@ static void internal_add_timer(tvec_base_t *base, struct timer_list *timer)
263 list_add_tail(&timer->entry, vec); 264 list_add_tail(&timer->entry, vec);
264} 265}
265 266
267#ifdef CONFIG_TIMER_STATS
268void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr)
269{
270 if (timer->start_site)
271 return;
272
273 timer->start_site = addr;
274 memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
275 timer->start_pid = current->pid;
276}
277#endif
278
266/** 279/**
267 * init_timer - initialize a timer. 280 * init_timer - initialize a timer.
268 * @timer: the timer to be initialized 281 * @timer: the timer to be initialized
@@ -274,11 +287,16 @@ void fastcall init_timer(struct timer_list *timer)
274{ 287{
275 timer->entry.next = NULL; 288 timer->entry.next = NULL;
276 timer->base = __raw_get_cpu_var(tvec_bases); 289 timer->base = __raw_get_cpu_var(tvec_bases);
290#ifdef CONFIG_TIMER_STATS
291 timer->start_site = NULL;
292 timer->start_pid = -1;
293 memset(timer->start_comm, 0, TASK_COMM_LEN);
294#endif
277} 295}
278EXPORT_SYMBOL(init_timer); 296EXPORT_SYMBOL(init_timer);
279 297
280static inline void detach_timer(struct timer_list *timer, 298static inline void detach_timer(struct timer_list *timer,
281 int clear_pending) 299 int clear_pending)
282{ 300{
283 struct list_head *entry = &timer->entry; 301 struct list_head *entry = &timer->entry;
284 302
@@ -325,6 +343,7 @@ int __mod_timer(struct timer_list *timer, unsigned long expires)
325 unsigned long flags; 343 unsigned long flags;
326 int ret = 0; 344 int ret = 0;
327 345
346 timer_stats_timer_set_start_info(timer);
328 BUG_ON(!timer->function); 347 BUG_ON(!timer->function);
329 348
330 base = lock_timer_base(timer, &flags); 349 base = lock_timer_base(timer, &flags);
@@ -375,6 +394,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
375 tvec_base_t *base = per_cpu(tvec_bases, cpu); 394 tvec_base_t *base = per_cpu(tvec_bases, cpu);
376 unsigned long flags; 395 unsigned long flags;
377 396
397 timer_stats_timer_set_start_info(timer);
378 BUG_ON(timer_pending(timer) || !timer->function); 398 BUG_ON(timer_pending(timer) || !timer->function);
379 spin_lock_irqsave(&base->lock, flags); 399 spin_lock_irqsave(&base->lock, flags);
380 timer->base = base; 400 timer->base = base;
@@ -407,6 +427,7 @@ int mod_timer(struct timer_list *timer, unsigned long expires)
407{ 427{
408 BUG_ON(!timer->function); 428 BUG_ON(!timer->function);
409 429
430 timer_stats_timer_set_start_info(timer);
410 /* 431 /*
411 * This is a common optimization triggered by the 432 * This is a common optimization triggered by the
412 * networking code - if the timer is re-modified 433 * networking code - if the timer is re-modified
@@ -437,6 +458,7 @@ int del_timer(struct timer_list *timer)
437 unsigned long flags; 458 unsigned long flags;
438 int ret = 0; 459 int ret = 0;
439 460
461 timer_stats_timer_clear_start_info(timer);
440 if (timer_pending(timer)) { 462 if (timer_pending(timer)) {
441 base = lock_timer_base(timer, &flags); 463 base = lock_timer_base(timer, &flags);
442 if (timer_pending(timer)) { 464 if (timer_pending(timer)) {
@@ -570,6 +592,8 @@ static inline void __run_timers(tvec_base_t *base)
570 fn = timer->function; 592 fn = timer->function;
571 data = timer->data; 593 data = timer->data;
572 594
595 timer_stats_account_timer(timer);
596
573 set_running_timer(base, timer); 597 set_running_timer(base, timer);
574 detach_timer(timer, 1); 598 detach_timer(timer, 1);
575 spin_unlock_irq(&base->lock); 599 spin_unlock_irq(&base->lock);
@@ -1229,7 +1253,8 @@ static void run_timer_softirq(struct softirq_action *h)
1229{ 1253{
1230 tvec_base_t *base = __get_cpu_var(tvec_bases); 1254 tvec_base_t *base = __get_cpu_var(tvec_bases);
1231 1255
1232 hrtimer_run_queues(); 1256 hrtimer_run_queues();
1257
1233 if (time_after_eq(jiffies, base->timer_jiffies)) 1258 if (time_after_eq(jiffies, base->timer_jiffies))
1234 __run_timers(base); 1259 __run_timers(base);
1235} 1260}
@@ -1675,6 +1700,8 @@ void __init init_timers(void)
1675 int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE, 1700 int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE,
1676 (void *)(long)smp_processor_id()); 1701 (void *)(long)smp_processor_id());
1677 1702
1703 init_timer_stats();
1704
1678 BUG_ON(err == NOTIFY_BAD); 1705 BUG_ON(err == NOTIFY_BAD);
1679 register_cpu_notifier(&timers_nb); 1706 register_cpu_notifier(&timers_nb);
1680 open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL); 1707 open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 020d1fff57dc..b6fa5e63085d 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -218,7 +218,7 @@ int fastcall queue_work(struct workqueue_struct *wq, struct work_struct *work)
218} 218}
219EXPORT_SYMBOL_GPL(queue_work); 219EXPORT_SYMBOL_GPL(queue_work);
220 220
221static void delayed_work_timer_fn(unsigned long __data) 221void delayed_work_timer_fn(unsigned long __data)
222{ 222{
223 struct delayed_work *dwork = (struct delayed_work *)__data; 223 struct delayed_work *dwork = (struct delayed_work *)__data;
224 struct workqueue_struct *wq = get_wq_data(&dwork->work); 224 struct workqueue_struct *wq = get_wq_data(&dwork->work);
@@ -245,6 +245,7 @@ int fastcall queue_delayed_work(struct workqueue_struct *wq,
245 struct timer_list *timer = &dwork->timer; 245 struct timer_list *timer = &dwork->timer;
246 struct work_struct *work = &dwork->work; 246 struct work_struct *work = &dwork->work;
247 247
248 timer_stats_timer_set_start_info(timer);
248 if (delay == 0) 249 if (delay == 0)
249 return queue_work(wq, work); 250 return queue_work(wq, work);
250 251
@@ -593,8 +594,10 @@ EXPORT_SYMBOL(schedule_work);
593 * After waiting for a given time this puts a job in the kernel-global 594 * After waiting for a given time this puts a job in the kernel-global
594 * workqueue. 595 * workqueue.
595 */ 596 */
596int fastcall schedule_delayed_work(struct delayed_work *dwork, unsigned long delay) 597int fastcall schedule_delayed_work(struct delayed_work *dwork,
598 unsigned long delay)
597{ 599{
600 timer_stats_timer_set_start_info(&dwork->timer);
598 return queue_delayed_work(keventd_wq, dwork, delay); 601 return queue_delayed_work(keventd_wq, dwork, delay);
599} 602}
600EXPORT_SYMBOL(schedule_delayed_work); 603EXPORT_SYMBOL(schedule_delayed_work);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 63f04c15e6f5..4448f91b865c 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -134,6 +134,17 @@ config SCHEDSTATS
134 application, you can say N to avoid the very slight overhead 134 application, you can say N to avoid the very slight overhead
135 this adds. 135 this adds.
136 136
137config TIMER_STATS
138 bool "Collect kernel timers statistics"
139 depends on DEBUG_KERNEL && PROC_FS
140 help
141 If you say Y here, additional code will be inserted into the
142 timer routines to collect statistics about kernel timers being
143 reprogrammed. The statistics can be read from /proc/timer_stats.
144 The statistics collection is started by writing 1 to /proc/timer_stats,
145 writing 0 stops it. This feature is useful to collect information
146 about timer usage patterns in kernel and userspace.
147
137config DEBUG_SLAB 148config DEBUG_SLAB
138 bool "Debug slab memory allocations" 149 bool "Debug slab memory allocations"
139 depends on DEBUG_KERNEL && SLAB 150 depends on DEBUG_KERNEL && SLAB