aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/acct.c4
-rw-r--r--kernel/delayacct.c178
-rw-r--r--kernel/exit.c10
-rw-r--r--kernel/fork.c6
-rw-r--r--kernel/kallsyms.c4
-rw-r--r--kernel/kthread.c24
-rw-r--r--kernel/module.c11
-rw-r--r--kernel/rtmutex-tester.c1
-rw-r--r--kernel/sched.c84
-rw-r--r--kernel/softirq.c2
-rw-r--r--kernel/taskstats.c568
-rw-r--r--kernel/timer.c20
13 files changed, 856 insertions, 58 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 47dbcd570c..d62ec66c1a 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -48,6 +48,8 @@ obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
48obj-$(CONFIG_SECCOMP) += seccomp.o 48obj-$(CONFIG_SECCOMP) += seccomp.o
49obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o 49obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
50obj-$(CONFIG_RELAY) += relay.o 50obj-$(CONFIG_RELAY) += relay.o
51obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
52obj-$(CONFIG_TASKSTATS) += taskstats.o
51 53
52ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) 54ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
53# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is 55# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/acct.c b/kernel/acct.c
index f18e0b8df3..2a7c933651 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -488,7 +488,7 @@ static void do_acct_process(struct file *file)
488 old_encode_dev(tty_devnum(current->signal->tty)) : 0; 488 old_encode_dev(tty_devnum(current->signal->tty)) : 0;
489 read_unlock(&tasklist_lock); 489 read_unlock(&tasklist_lock);
490 490
491 spin_lock(&current->sighand->siglock); 491 spin_lock_irq(&current->sighand->siglock);
492 ac.ac_utime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_utime))); 492 ac.ac_utime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_utime)));
493 ac.ac_stime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_stime))); 493 ac.ac_stime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_stime)));
494 ac.ac_flag = pacct->ac_flag; 494 ac.ac_flag = pacct->ac_flag;
@@ -496,7 +496,7 @@ static void do_acct_process(struct file *file)
496 ac.ac_minflt = encode_comp_t(pacct->ac_minflt); 496 ac.ac_minflt = encode_comp_t(pacct->ac_minflt);
497 ac.ac_majflt = encode_comp_t(pacct->ac_majflt); 497 ac.ac_majflt = encode_comp_t(pacct->ac_majflt);
498 ac.ac_exitcode = pacct->ac_exitcode; 498 ac.ac_exitcode = pacct->ac_exitcode;
499 spin_unlock(&current->sighand->siglock); 499 spin_unlock_irq(&current->sighand->siglock);
500 ac.ac_io = encode_comp_t(0 /* current->io_usage */); /* %% */ 500 ac.ac_io = encode_comp_t(0 /* current->io_usage */); /* %% */
501 ac.ac_rw = encode_comp_t(ac.ac_io / 1024); 501 ac.ac_rw = encode_comp_t(ac.ac_io / 1024);
502 ac.ac_swaps = encode_comp_t(0); 502 ac.ac_swaps = encode_comp_t(0);
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
new file mode 100644
index 0000000000..f05392d642
--- /dev/null
+++ b/kernel/delayacct.c
@@ -0,0 +1,178 @@
1/* delayacct.c - per-task delay accounting
2 *
3 * Copyright (C) Shailabh Nagar, IBM Corp. 2006
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it would be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU General Public License for more details.
14 */
15
16#include <linux/sched.h>
17#include <linux/slab.h>
18#include <linux/time.h>
19#include <linux/sysctl.h>
20#include <linux/delayacct.h>
21
22int delayacct_on __read_mostly; /* Delay accounting turned on/off */
23kmem_cache_t *delayacct_cache;
24
25static int __init delayacct_setup_enable(char *str)
26{
27 delayacct_on = 1;
28 return 1;
29}
30__setup("delayacct", delayacct_setup_enable);
31
32void delayacct_init(void)
33{
34 delayacct_cache = kmem_cache_create("delayacct_cache",
35 sizeof(struct task_delay_info),
36 0,
37 SLAB_PANIC,
38 NULL, NULL);
39 delayacct_tsk_init(&init_task);
40}
41
42void __delayacct_tsk_init(struct task_struct *tsk)
43{
44 spin_lock_init(&tsk->delays_lock);
45 /* No need to acquire tsk->delays_lock for allocation here unless
46 __delayacct_tsk_init called after tsk is attached to tasklist
47 */
48 tsk->delays = kmem_cache_zalloc(delayacct_cache, SLAB_KERNEL);
49 if (tsk->delays)
50 spin_lock_init(&tsk->delays->lock);
51}
52
53void __delayacct_tsk_exit(struct task_struct *tsk)
54{
55 struct task_delay_info *delays = tsk->delays;
56 spin_lock(&tsk->delays_lock);
57 tsk->delays = NULL;
58 spin_unlock(&tsk->delays_lock);
59 kmem_cache_free(delayacct_cache, delays);
60}
61
62/*
63 * Start accounting for a delay statistic using
64 * its starting timestamp (@start)
65 */
66
67static inline void delayacct_start(struct timespec *start)
68{
69 do_posix_clock_monotonic_gettime(start);
70}
71
72/*
73 * Finish delay accounting for a statistic using
74 * its timestamps (@start, @end), accumalator (@total) and @count
75 */
76
77static void delayacct_end(struct timespec *start, struct timespec *end,
78 u64 *total, u32 *count)
79{
80 struct timespec ts;
81 s64 ns;
82
83 do_posix_clock_monotonic_gettime(end);
84 ts = timespec_sub(*end, *start);
85 ns = timespec_to_ns(&ts);
86 if (ns < 0)
87 return;
88
89 spin_lock(&current->delays->lock);
90 *total += ns;
91 (*count)++;
92 spin_unlock(&current->delays->lock);
93}
94
95void __delayacct_blkio_start(void)
96{
97 delayacct_start(&current->delays->blkio_start);
98}
99
100void __delayacct_blkio_end(void)
101{
102 if (current->delays->flags & DELAYACCT_PF_SWAPIN)
103 /* Swapin block I/O */
104 delayacct_end(&current->delays->blkio_start,
105 &current->delays->blkio_end,
106 &current->delays->swapin_delay,
107 &current->delays->swapin_count);
108 else /* Other block I/O */
109 delayacct_end(&current->delays->blkio_start,
110 &current->delays->blkio_end,
111 &current->delays->blkio_delay,
112 &current->delays->blkio_count);
113}
114
115int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
116{
117 s64 tmp;
118 struct timespec ts;
119 unsigned long t1,t2,t3;
120
121 spin_lock(&tsk->delays_lock);
122
123 /* Though tsk->delays accessed later, early exit avoids
124 * unnecessary returning of other data
125 */
126 if (!tsk->delays)
127 goto done;
128
129 tmp = (s64)d->cpu_run_real_total;
130 cputime_to_timespec(tsk->utime + tsk->stime, &ts);
131 tmp += timespec_to_ns(&ts);
132 d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp;
133
134 /*
135 * No locking available for sched_info (and too expensive to add one)
136 * Mitigate by taking snapshot of values
137 */
138 t1 = tsk->sched_info.pcnt;
139 t2 = tsk->sched_info.run_delay;
140 t3 = tsk->sched_info.cpu_time;
141
142 d->cpu_count += t1;
143
144 jiffies_to_timespec(t2, &ts);
145 tmp = (s64)d->cpu_delay_total + timespec_to_ns(&ts);
146 d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp;
147
148 tmp = (s64)d->cpu_run_virtual_total + (s64)jiffies_to_usecs(t3) * 1000;
149 d->cpu_run_virtual_total =
150 (tmp < (s64)d->cpu_run_virtual_total) ? 0 : tmp;
151
152 /* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */
153
154 spin_lock(&tsk->delays->lock);
155 tmp = d->blkio_delay_total + tsk->delays->blkio_delay;
156 d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp;
157 tmp = d->swapin_delay_total + tsk->delays->swapin_delay;
158 d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp;
159 d->blkio_count += tsk->delays->blkio_count;
160 d->swapin_count += tsk->delays->swapin_count;
161 spin_unlock(&tsk->delays->lock);
162
163done:
164 spin_unlock(&tsk->delays_lock);
165 return 0;
166}
167
168__u64 __delayacct_blkio_ticks(struct task_struct *tsk)
169{
170 __u64 ret;
171
172 spin_lock(&tsk->delays->lock);
173 ret = nsec_to_clock_t(tsk->delays->blkio_delay +
174 tsk->delays->swapin_delay);
175 spin_unlock(&tsk->delays->lock);
176 return ret;
177}
178
diff --git a/kernel/exit.c b/kernel/exit.c
index 6664c08478..dba194a8d4 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -25,6 +25,8 @@
25#include <linux/mount.h> 25#include <linux/mount.h>
26#include <linux/proc_fs.h> 26#include <linux/proc_fs.h>
27#include <linux/mempolicy.h> 27#include <linux/mempolicy.h>
28#include <linux/taskstats_kern.h>
29#include <linux/delayacct.h>
28#include <linux/cpuset.h> 30#include <linux/cpuset.h>
29#include <linux/syscalls.h> 31#include <linux/syscalls.h>
30#include <linux/signal.h> 32#include <linux/signal.h>
@@ -843,7 +845,9 @@ static void exit_notify(struct task_struct *tsk)
843fastcall NORET_TYPE void do_exit(long code) 845fastcall NORET_TYPE void do_exit(long code)
844{ 846{
845 struct task_struct *tsk = current; 847 struct task_struct *tsk = current;
848 struct taskstats *tidstats;
846 int group_dead; 849 int group_dead;
850 unsigned int mycpu;
847 851
848 profile_task_exit(tsk); 852 profile_task_exit(tsk);
849 853
@@ -881,6 +885,8 @@ fastcall NORET_TYPE void do_exit(long code)
881 current->comm, current->pid, 885 current->comm, current->pid,
882 preempt_count()); 886 preempt_count());
883 887
888 taskstats_exit_alloc(&tidstats, &mycpu);
889
884 acct_update_integrals(tsk); 890 acct_update_integrals(tsk);
885 if (tsk->mm) { 891 if (tsk->mm) {
886 update_hiwater_rss(tsk->mm); 892 update_hiwater_rss(tsk->mm);
@@ -900,6 +906,10 @@ fastcall NORET_TYPE void do_exit(long code)
900#endif 906#endif
901 if (unlikely(tsk->audit_context)) 907 if (unlikely(tsk->audit_context))
902 audit_free(tsk); 908 audit_free(tsk);
909 taskstats_exit_send(tsk, tidstats, group_dead, mycpu);
910 taskstats_exit_free(tidstats);
911 delayacct_tsk_exit(tsk);
912
903 exit_mm(tsk); 913 exit_mm(tsk);
904 914
905 if (group_dead) 915 if (group_dead)
diff --git a/kernel/fork.c b/kernel/fork.c
index 926e5a68ea..1b0f7b1e08 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -43,6 +43,8 @@
43#include <linux/rmap.h> 43#include <linux/rmap.h>
44#include <linux/acct.h> 44#include <linux/acct.h>
45#include <linux/cn_proc.h> 45#include <linux/cn_proc.h>
46#include <linux/delayacct.h>
47#include <linux/taskstats_kern.h>
46 48
47#include <asm/pgtable.h> 49#include <asm/pgtable.h>
48#include <asm/pgalloc.h> 50#include <asm/pgalloc.h>
@@ -818,6 +820,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
818 if (clone_flags & CLONE_THREAD) { 820 if (clone_flags & CLONE_THREAD) {
819 atomic_inc(&current->signal->count); 821 atomic_inc(&current->signal->count);
820 atomic_inc(&current->signal->live); 822 atomic_inc(&current->signal->live);
823 taskstats_tgid_alloc(current->signal);
821 return 0; 824 return 0;
822 } 825 }
823 sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); 826 sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
@@ -862,6 +865,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
862 INIT_LIST_HEAD(&sig->cpu_timers[0]); 865 INIT_LIST_HEAD(&sig->cpu_timers[0]);
863 INIT_LIST_HEAD(&sig->cpu_timers[1]); 866 INIT_LIST_HEAD(&sig->cpu_timers[1]);
864 INIT_LIST_HEAD(&sig->cpu_timers[2]); 867 INIT_LIST_HEAD(&sig->cpu_timers[2]);
868 taskstats_tgid_init(sig);
865 869
866 task_lock(current->group_leader); 870 task_lock(current->group_leader);
867 memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); 871 memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
@@ -883,6 +887,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
883void __cleanup_signal(struct signal_struct *sig) 887void __cleanup_signal(struct signal_struct *sig)
884{ 888{
885 exit_thread_group_keys(sig); 889 exit_thread_group_keys(sig);
890 taskstats_tgid_free(sig);
886 kmem_cache_free(signal_cachep, sig); 891 kmem_cache_free(signal_cachep, sig);
887} 892}
888 893
@@ -1000,6 +1005,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1000 goto bad_fork_cleanup_put_domain; 1005 goto bad_fork_cleanup_put_domain;
1001 1006
1002 p->did_exec = 0; 1007 p->did_exec = 0;
1008 delayacct_tsk_init(p); /* Must remain after dup_task_struct() */
1003 copy_flags(clone_flags, p); 1009 copy_flags(clone_flags, p);
1004 p->pid = pid; 1010 p->pid = pid;
1005 retval = -EFAULT; 1011 retval = -EFAULT;
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 39277dd6bf..ab16a5a4cf 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -275,8 +275,8 @@ static void upcase_if_global(struct kallsym_iter *iter)
275static int get_ksymbol_mod(struct kallsym_iter *iter) 275static int get_ksymbol_mod(struct kallsym_iter *iter)
276{ 276{
277 iter->owner = module_get_kallsym(iter->pos - kallsyms_num_syms, 277 iter->owner = module_get_kallsym(iter->pos - kallsyms_num_syms,
278 &iter->value, 278 &iter->value, &iter->type,
279 &iter->type, iter->name); 279 iter->name, sizeof(iter->name));
280 if (iter->owner == NULL) 280 if (iter->owner == NULL)
281 return 0; 281 return 0;
282 282
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 24be714b04..4f9c60ef95 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -216,23 +216,6 @@ EXPORT_SYMBOL(kthread_bind);
216 */ 216 */
217int kthread_stop(struct task_struct *k) 217int kthread_stop(struct task_struct *k)
218{ 218{
219 return kthread_stop_sem(k, NULL);
220}
221EXPORT_SYMBOL(kthread_stop);
222
223/**
224 * kthread_stop_sem - stop a thread created by kthread_create().
225 * @k: thread created by kthread_create().
226 * @s: semaphore that @k waits on while idle.
227 *
228 * Does essentially the same thing as kthread_stop() above, but wakes
229 * @k by calling up(@s).
230 *
231 * Returns the result of threadfn(), or %-EINTR if wake_up_process()
232 * was never called.
233 */
234int kthread_stop_sem(struct task_struct *k, struct semaphore *s)
235{
236 int ret; 219 int ret;
237 220
238 mutex_lock(&kthread_stop_lock); 221 mutex_lock(&kthread_stop_lock);
@@ -246,10 +229,7 @@ int kthread_stop_sem(struct task_struct *k, struct semaphore *s)
246 229
247 /* Now set kthread_should_stop() to true, and wake it up. */ 230 /* Now set kthread_should_stop() to true, and wake it up. */
248 kthread_stop_info.k = k; 231 kthread_stop_info.k = k;
249 if (s) 232 wake_up_process(k);
250 up(s);
251 else
252 wake_up_process(k);
253 put_task_struct(k); 233 put_task_struct(k);
254 234
255 /* Once it dies, reset stop ptr, gather result and we're done. */ 235 /* Once it dies, reset stop ptr, gather result and we're done. */
@@ -260,7 +240,7 @@ int kthread_stop_sem(struct task_struct *k, struct semaphore *s)
260 240
261 return ret; 241 return ret;
262} 242}
263EXPORT_SYMBOL(kthread_stop_sem); 243EXPORT_SYMBOL(kthread_stop);
264 244
265static __init int helper_init(void) 245static __init int helper_init(void)
266{ 246{
diff --git a/kernel/module.c b/kernel/module.c
index 35e1b1f859..2a19cd47c0 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2019,10 +2019,8 @@ const char *module_address_lookup(unsigned long addr,
2019 return NULL; 2019 return NULL;
2020} 2020}
2021 2021
2022struct module *module_get_kallsym(unsigned int symnum, 2022struct module *module_get_kallsym(unsigned int symnum, unsigned long *value,
2023 unsigned long *value, 2023 char *type, char *name, size_t namelen)
2024 char *type,
2025 char namebuf[128])
2026{ 2024{
2027 struct module *mod; 2025 struct module *mod;
2028 2026
@@ -2031,9 +2029,8 @@ struct module *module_get_kallsym(unsigned int symnum,
2031 if (symnum < mod->num_symtab) { 2029 if (symnum < mod->num_symtab) {
2032 *value = mod->symtab[symnum].st_value; 2030 *value = mod->symtab[symnum].st_value;
2033 *type = mod->symtab[symnum].st_info; 2031 *type = mod->symtab[symnum].st_info;
2034 strncpy(namebuf, 2032 strlcpy(name, mod->strtab + mod->symtab[symnum].st_name,
2035 mod->strtab + mod->symtab[symnum].st_name, 2033 namelen);
2036 127);
2037 mutex_unlock(&module_mutex); 2034 mutex_unlock(&module_mutex);
2038 return mod; 2035 return mod;
2039 } 2036 }
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c
index 494dac872a..948bd8f643 100644
--- a/kernel/rtmutex-tester.c
+++ b/kernel/rtmutex-tester.c
@@ -275,6 +275,7 @@ static int test_func(void *data)
275 275
276 /* Wait for the next command to be executed */ 276 /* Wait for the next command to be executed */
277 schedule(); 277 schedule();
278 try_to_freeze();
278 279
279 if (signal_pending(current)) 280 if (signal_pending(current))
280 flush_signals(current); 281 flush_signals(current);
diff --git a/kernel/sched.c b/kernel/sched.c
index d714611f16..b44b9a43b0 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -51,6 +51,7 @@
51#include <linux/times.h> 51#include <linux/times.h>
52#include <linux/acct.h> 52#include <linux/acct.h>
53#include <linux/kprobes.h> 53#include <linux/kprobes.h>
54#include <linux/delayacct.h>
54#include <asm/tlb.h> 55#include <asm/tlb.h>
55 56
56#include <asm/unistd.h> 57#include <asm/unistd.h>
@@ -501,9 +502,36 @@ struct file_operations proc_schedstat_operations = {
501 .release = single_release, 502 .release = single_release,
502}; 503};
503 504
505/*
506 * Expects runqueue lock to be held for atomicity of update
507 */
508static inline void
509rq_sched_info_arrive(struct rq *rq, unsigned long delta_jiffies)
510{
511 if (rq) {
512 rq->rq_sched_info.run_delay += delta_jiffies;
513 rq->rq_sched_info.pcnt++;
514 }
515}
516
517/*
518 * Expects runqueue lock to be held for atomicity of update
519 */
520static inline void
521rq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies)
522{
523 if (rq)
524 rq->rq_sched_info.cpu_time += delta_jiffies;
525}
504# define schedstat_inc(rq, field) do { (rq)->field++; } while (0) 526# define schedstat_inc(rq, field) do { (rq)->field++; } while (0)
505# define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0) 527# define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0)
506#else /* !CONFIG_SCHEDSTATS */ 528#else /* !CONFIG_SCHEDSTATS */
529static inline void
530rq_sched_info_arrive(struct rq *rq, unsigned long delta_jiffies)
531{}
532static inline void
533rq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies)
534{}
507# define schedstat_inc(rq, field) do { } while (0) 535# define schedstat_inc(rq, field) do { } while (0)
508# define schedstat_add(rq, field, amt) do { } while (0) 536# define schedstat_add(rq, field, amt) do { } while (0)
509#endif 537#endif
@@ -523,7 +551,7 @@ static inline struct rq *this_rq_lock(void)
523 return rq; 551 return rq;
524} 552}
525 553
526#ifdef CONFIG_SCHEDSTATS 554#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
527/* 555/*
528 * Called when a process is dequeued from the active array and given 556 * Called when a process is dequeued from the active array and given
529 * the cpu. We should note that with the exception of interactive 557 * the cpu. We should note that with the exception of interactive
@@ -551,21 +579,16 @@ static inline void sched_info_dequeued(struct task_struct *t)
551 */ 579 */
552static void sched_info_arrive(struct task_struct *t) 580static void sched_info_arrive(struct task_struct *t)
553{ 581{
554 unsigned long now = jiffies, diff = 0; 582 unsigned long now = jiffies, delta_jiffies = 0;
555 struct rq *rq = task_rq(t);
556 583
557 if (t->sched_info.last_queued) 584 if (t->sched_info.last_queued)
558 diff = now - t->sched_info.last_queued; 585 delta_jiffies = now - t->sched_info.last_queued;
559 sched_info_dequeued(t); 586 sched_info_dequeued(t);
560 t->sched_info.run_delay += diff; 587 t->sched_info.run_delay += delta_jiffies;
561 t->sched_info.last_arrival = now; 588 t->sched_info.last_arrival = now;
562 t->sched_info.pcnt++; 589 t->sched_info.pcnt++;
563 590
564 if (!rq) 591 rq_sched_info_arrive(task_rq(t), delta_jiffies);
565 return;
566
567 rq->rq_sched_info.run_delay += diff;
568 rq->rq_sched_info.pcnt++;
569} 592}
570 593
571/* 594/*
@@ -585,8 +608,9 @@ static void sched_info_arrive(struct task_struct *t)
585 */ 608 */
586static inline void sched_info_queued(struct task_struct *t) 609static inline void sched_info_queued(struct task_struct *t)
587{ 610{
588 if (!t->sched_info.last_queued) 611 if (unlikely(sched_info_on()))
589 t->sched_info.last_queued = jiffies; 612 if (!t->sched_info.last_queued)
613 t->sched_info.last_queued = jiffies;
590} 614}
591 615
592/* 616/*
@@ -595,13 +619,10 @@ static inline void sched_info_queued(struct task_struct *t)
595 */ 619 */
596static inline void sched_info_depart(struct task_struct *t) 620static inline void sched_info_depart(struct task_struct *t)
597{ 621{
598 struct rq *rq = task_rq(t); 622 unsigned long delta_jiffies = jiffies - t->sched_info.last_arrival;
599 unsigned long diff = jiffies - t->sched_info.last_arrival;
600
601 t->sched_info.cpu_time += diff;
602 623
603 if (rq) 624 t->sched_info.cpu_time += delta_jiffies;
604 rq->rq_sched_info.cpu_time += diff; 625 rq_sched_info_depart(task_rq(t), delta_jiffies);
605} 626}
606 627
607/* 628/*
@@ -610,7 +631,7 @@ static inline void sched_info_depart(struct task_struct *t)
610 * the idle task.) We are only called when prev != next. 631 * the idle task.) We are only called when prev != next.
611 */ 632 */
612static inline void 633static inline void
613sched_info_switch(struct task_struct *prev, struct task_struct *next) 634__sched_info_switch(struct task_struct *prev, struct task_struct *next)
614{ 635{
615 struct rq *rq = task_rq(prev); 636 struct rq *rq = task_rq(prev);
616 637
@@ -625,10 +646,16 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next)
625 if (next != rq->idle) 646 if (next != rq->idle)
626 sched_info_arrive(next); 647 sched_info_arrive(next);
627} 648}
649static inline void
650sched_info_switch(struct task_struct *prev, struct task_struct *next)
651{
652 if (unlikely(sched_info_on()))
653 __sched_info_switch(prev, next);
654}
628#else 655#else
629#define sched_info_queued(t) do { } while (0) 656#define sched_info_queued(t) do { } while (0)
630#define sched_info_switch(t, next) do { } while (0) 657#define sched_info_switch(t, next) do { } while (0)
631#endif /* CONFIG_SCHEDSTATS */ 658#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */
632 659
633/* 660/*
634 * Adding/removing a task to/from a priority array: 661 * Adding/removing a task to/from a priority array:
@@ -1530,8 +1557,9 @@ void fastcall sched_fork(struct task_struct *p, int clone_flags)
1530 1557
1531 INIT_LIST_HEAD(&p->run_list); 1558 INIT_LIST_HEAD(&p->run_list);
1532 p->array = NULL; 1559 p->array = NULL;
1533#ifdef CONFIG_SCHEDSTATS 1560#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
1534 memset(&p->sched_info, 0, sizeof(p->sched_info)); 1561 if (unlikely(sched_info_on()))
1562 memset(&p->sched_info, 0, sizeof(p->sched_info));
1535#endif 1563#endif
1536#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) 1564#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
1537 p->oncpu = 0; 1565 p->oncpu = 0;
@@ -1788,7 +1816,15 @@ context_switch(struct rq *rq, struct task_struct *prev,
1788 WARN_ON(rq->prev_mm); 1816 WARN_ON(rq->prev_mm);
1789 rq->prev_mm = oldmm; 1817 rq->prev_mm = oldmm;
1790 } 1818 }
1819 /*
1820 * Since the runqueue lock will be released by the next
1821 * task (which is an invalid locking op but in the case
1822 * of the scheduler it's an obvious special-case), so we
1823 * do an early lockdep release here:
1824 */
1825#ifndef __ARCH_WANT_UNLOCKED_CTXSW
1791 spin_release(&rq->lock.dep_map, 1, _THIS_IP_); 1826 spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
1827#endif
1792 1828
1793 /* Here we just switch the register state and the stack. */ 1829 /* Here we just switch the register state and the stack. */
1794 switch_to(prev, next, prev); 1830 switch_to(prev, next, prev);
@@ -4526,9 +4562,11 @@ void __sched io_schedule(void)
4526{ 4562{
4527 struct rq *rq = &__raw_get_cpu_var(runqueues); 4563 struct rq *rq = &__raw_get_cpu_var(runqueues);
4528 4564
4565 delayacct_blkio_start();
4529 atomic_inc(&rq->nr_iowait); 4566 atomic_inc(&rq->nr_iowait);
4530 schedule(); 4567 schedule();
4531 atomic_dec(&rq->nr_iowait); 4568 atomic_dec(&rq->nr_iowait);
4569 delayacct_blkio_end();
4532} 4570}
4533EXPORT_SYMBOL(io_schedule); 4571EXPORT_SYMBOL(io_schedule);
4534 4572
@@ -4537,9 +4575,11 @@ long __sched io_schedule_timeout(long timeout)
4537 struct rq *rq = &__raw_get_cpu_var(runqueues); 4575 struct rq *rq = &__raw_get_cpu_var(runqueues);
4538 long ret; 4576 long ret;
4539 4577
4578 delayacct_blkio_start();
4540 atomic_inc(&rq->nr_iowait); 4579 atomic_inc(&rq->nr_iowait);
4541 ret = schedule_timeout(timeout); 4580 ret = schedule_timeout(timeout);
4542 atomic_dec(&rq->nr_iowait); 4581 atomic_dec(&rq->nr_iowait);
4582 delayacct_blkio_end();
4543 return ret; 4583 return ret;
4544} 4584}
4545 4585
diff --git a/kernel/softirq.c b/kernel/softirq.c
index fd12f2556f..0f08a84ae3 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -311,8 +311,6 @@ void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
311 softirq_vec[nr].action = action; 311 softirq_vec[nr].action = action;
312} 312}
313 313
314EXPORT_UNUSED_SYMBOL(open_softirq); /* June 2006 */
315
316/* Tasklets */ 314/* Tasklets */
317struct tasklet_head 315struct tasklet_head
318{ 316{
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
new file mode 100644
index 0000000000..f45179ce02
--- /dev/null
+++ b/kernel/taskstats.c
@@ -0,0 +1,568 @@
1/*
2 * taskstats.c - Export per-task statistics to userland
3 *
4 * Copyright (C) Shailabh Nagar, IBM Corp. 2006
5 * (C) Balbir Singh, IBM Corp. 2006
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 */
18
19#include <linux/kernel.h>
20#include <linux/taskstats_kern.h>
21#include <linux/delayacct.h>
22#include <linux/cpumask.h>
23#include <linux/percpu.h>
24#include <net/genetlink.h>
25#include <asm/atomic.h>
26
27/*
28 * Maximum length of a cpumask that can be specified in
29 * the TASKSTATS_CMD_ATTR_REGISTER/DEREGISTER_CPUMASK attribute
30 */
31#define TASKSTATS_CPUMASK_MAXLEN (100+6*NR_CPUS)
32
33static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 };
34static int family_registered;
35kmem_cache_t *taskstats_cache;
36
37static struct genl_family family = {
38 .id = GENL_ID_GENERATE,
39 .name = TASKSTATS_GENL_NAME,
40 .version = TASKSTATS_GENL_VERSION,
41 .maxattr = TASKSTATS_CMD_ATTR_MAX,
42};
43
44static struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1]
45__read_mostly = {
46 [TASKSTATS_CMD_ATTR_PID] = { .type = NLA_U32 },
47 [TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 },
48 [TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING },
49 [TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },};
50
51struct listener {
52 struct list_head list;
53 pid_t pid;
54 char valid;
55};
56
57struct listener_list {
58 struct rw_semaphore sem;
59 struct list_head list;
60};
61static DEFINE_PER_CPU(struct listener_list, listener_array);
62
63enum actions {
64 REGISTER,
65 DEREGISTER,
66 CPU_DONT_CARE
67};
68
69static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp,
70 void **replyp, size_t size)
71{
72 struct sk_buff *skb;
73 void *reply;
74
75 /*
76 * If new attributes are added, please revisit this allocation
77 */
78 skb = nlmsg_new(size);
79 if (!skb)
80 return -ENOMEM;
81
82 if (!info) {
83 int seq = get_cpu_var(taskstats_seqnum)++;
84 put_cpu_var(taskstats_seqnum);
85
86 reply = genlmsg_put(skb, 0, seq,
87 family.id, 0, 0,
88 cmd, family.version);
89 } else
90 reply = genlmsg_put(skb, info->snd_pid, info->snd_seq,
91 family.id, 0, 0,
92 cmd, family.version);
93 if (reply == NULL) {
94 nlmsg_free(skb);
95 return -EINVAL;
96 }
97
98 *skbp = skb;
99 *replyp = reply;
100 return 0;
101}
102
103/*
104 * Send taskstats data in @skb to listener with nl_pid @pid
105 */
106static int send_reply(struct sk_buff *skb, pid_t pid)
107{
108 struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data);
109 void *reply = genlmsg_data(genlhdr);
110 int rc;
111
112 rc = genlmsg_end(skb, reply);
113 if (rc < 0) {
114 nlmsg_free(skb);
115 return rc;
116 }
117
118 return genlmsg_unicast(skb, pid);
119}
120
121/*
122 * Send taskstats data in @skb to listeners registered for @cpu's exit data
123 */
124static int send_cpu_listeners(struct sk_buff *skb, unsigned int cpu)
125{
126 struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data);
127 struct listener_list *listeners;
128 struct listener *s, *tmp;
129 struct sk_buff *skb_next, *skb_cur = skb;
130 void *reply = genlmsg_data(genlhdr);
131 int rc, ret, delcount = 0;
132
133 rc = genlmsg_end(skb, reply);
134 if (rc < 0) {
135 nlmsg_free(skb);
136 return rc;
137 }
138
139 rc = 0;
140 listeners = &per_cpu(listener_array, cpu);
141 down_read(&listeners->sem);
142 list_for_each_entry_safe(s, tmp, &listeners->list, list) {
143 skb_next = NULL;
144 if (!list_is_last(&s->list, &listeners->list)) {
145 skb_next = skb_clone(skb_cur, GFP_KERNEL);
146 if (!skb_next) {
147 nlmsg_free(skb_cur);
148 rc = -ENOMEM;
149 break;
150 }
151 }
152 ret = genlmsg_unicast(skb_cur, s->pid);
153 if (ret == -ECONNREFUSED) {
154 s->valid = 0;
155 delcount++;
156 rc = ret;
157 }
158 skb_cur = skb_next;
159 }
160 up_read(&listeners->sem);
161
162 if (!delcount)
163 return rc;
164
165 /* Delete invalidated entries */
166 down_write(&listeners->sem);
167 list_for_each_entry_safe(s, tmp, &listeners->list, list) {
168 if (!s->valid) {
169 list_del(&s->list);
170 kfree(s);
171 }
172 }
173 up_write(&listeners->sem);
174 return rc;
175}
176
177static int fill_pid(pid_t pid, struct task_struct *pidtsk,
178 struct taskstats *stats)
179{
180 int rc;
181 struct task_struct *tsk = pidtsk;
182
183 if (!pidtsk) {
184 read_lock(&tasklist_lock);
185 tsk = find_task_by_pid(pid);
186 if (!tsk) {
187 read_unlock(&tasklist_lock);
188 return -ESRCH;
189 }
190 get_task_struct(tsk);
191 read_unlock(&tasklist_lock);
192 } else
193 get_task_struct(tsk);
194
195 /*
196 * Each accounting subsystem adds calls to its functions to
197 * fill in relevant parts of struct taskstsats as follows
198 *
199 * rc = per-task-foo(stats, tsk);
200 * if (rc)
201 * goto err;
202 */
203
204 rc = delayacct_add_tsk(stats, tsk);
205 stats->version = TASKSTATS_VERSION;
206
207 /* Define err: label here if needed */
208 put_task_struct(tsk);
209 return rc;
210
211}
212
213static int fill_tgid(pid_t tgid, struct task_struct *tgidtsk,
214 struct taskstats *stats)
215{
216 struct task_struct *tsk, *first;
217 unsigned long flags;
218
219 /*
220 * Add additional stats from live tasks except zombie thread group
221 * leaders who are already counted with the dead tasks
222 */
223 first = tgidtsk;
224 if (!first) {
225 read_lock(&tasklist_lock);
226 first = find_task_by_pid(tgid);
227 if (!first) {
228 read_unlock(&tasklist_lock);
229 return -ESRCH;
230 }
231 get_task_struct(first);
232 read_unlock(&tasklist_lock);
233 } else
234 get_task_struct(first);
235
236 /* Start with stats from dead tasks */
237 spin_lock_irqsave(&first->signal->stats_lock, flags);
238 if (first->signal->stats)
239 memcpy(stats, first->signal->stats, sizeof(*stats));
240 spin_unlock_irqrestore(&first->signal->stats_lock, flags);
241
242 tsk = first;
243 read_lock(&tasklist_lock);
244 do {
245 if (tsk->exit_state == EXIT_ZOMBIE && thread_group_leader(tsk))
246 continue;
247 /*
248 * Accounting subsystem can call its functions here to
249 * fill in relevant parts of struct taskstsats as follows
250 *
251 * per-task-foo(stats, tsk);
252 */
253 delayacct_add_tsk(stats, tsk);
254
255 } while_each_thread(first, tsk);
256 read_unlock(&tasklist_lock);
257 stats->version = TASKSTATS_VERSION;
258
259 /*
260 * Accounting subsytems can also add calls here to modify
261 * fields of taskstats.
262 */
263
264 return 0;
265}
266
267
268static void fill_tgid_exit(struct task_struct *tsk)
269{
270 unsigned long flags;
271
272 spin_lock_irqsave(&tsk->signal->stats_lock, flags);
273 if (!tsk->signal->stats)
274 goto ret;
275
276 /*
277 * Each accounting subsystem calls its functions here to
278 * accumalate its per-task stats for tsk, into the per-tgid structure
279 *
280 * per-task-foo(tsk->signal->stats, tsk);
281 */
282 delayacct_add_tsk(tsk->signal->stats, tsk);
283ret:
284 spin_unlock_irqrestore(&tsk->signal->stats_lock, flags);
285 return;
286}
287
288static int add_del_listener(pid_t pid, cpumask_t *maskp, int isadd)
289{
290 struct listener_list *listeners;
291 struct listener *s, *tmp;
292 unsigned int cpu;
293 cpumask_t mask = *maskp;
294
295 if (!cpus_subset(mask, cpu_possible_map))
296 return -EINVAL;
297
298 if (isadd == REGISTER) {
299 for_each_cpu_mask(cpu, mask) {
300 s = kmalloc_node(sizeof(struct listener), GFP_KERNEL,
301 cpu_to_node(cpu));
302 if (!s)
303 goto cleanup;
304 s->pid = pid;
305 INIT_LIST_HEAD(&s->list);
306 s->valid = 1;
307
308 listeners = &per_cpu(listener_array, cpu);
309 down_write(&listeners->sem);
310 list_add(&s->list, &listeners->list);
311 up_write(&listeners->sem);
312 }
313 return 0;
314 }
315
316 /* Deregister or cleanup */
317cleanup:
318 for_each_cpu_mask(cpu, mask) {
319 listeners = &per_cpu(listener_array, cpu);
320 down_write(&listeners->sem);
321 list_for_each_entry_safe(s, tmp, &listeners->list, list) {
322 if (s->pid == pid) {
323 list_del(&s->list);
324 kfree(s);
325 break;
326 }
327 }
328 up_write(&listeners->sem);
329 }
330 return 0;
331}
332
333static int parse(struct nlattr *na, cpumask_t *mask)
334{
335 char *data;
336 int len;
337 int ret;
338
339 if (na == NULL)
340 return 1;
341 len = nla_len(na);
342 if (len > TASKSTATS_CPUMASK_MAXLEN)
343 return -E2BIG;
344 if (len < 1)
345 return -EINVAL;
346 data = kmalloc(len, GFP_KERNEL);
347 if (!data)
348 return -ENOMEM;
349 nla_strlcpy(data, na, len);
350 ret = cpulist_parse(data, *mask);
351 kfree(data);
352 return ret;
353}
354
355static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
356{
357 int rc = 0;
358 struct sk_buff *rep_skb;
359 struct taskstats stats;
360 void *reply;
361 size_t size;
362 struct nlattr *na;
363 cpumask_t mask;
364
365 rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], &mask);
366 if (rc < 0)
367 return rc;
368 if (rc == 0)
369 return add_del_listener(info->snd_pid, &mask, REGISTER);
370
371 rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], &mask);
372 if (rc < 0)
373 return rc;
374 if (rc == 0)
375 return add_del_listener(info->snd_pid, &mask, DEREGISTER);
376
377 /*
378 * Size includes space for nested attributes
379 */
380 size = nla_total_size(sizeof(u32)) +
381 nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
382
383 memset(&stats, 0, sizeof(stats));
384 rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, &reply, size);
385 if (rc < 0)
386 return rc;
387
388 if (info->attrs[TASKSTATS_CMD_ATTR_PID]) {
389 u32 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]);
390 rc = fill_pid(pid, NULL, &stats);
391 if (rc < 0)
392 goto err;
393
394 na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_PID);
395 NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_PID, pid);
396 NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS,
397 stats);
398 } else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) {
399 u32 tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]);
400 rc = fill_tgid(tgid, NULL, &stats);
401 if (rc < 0)
402 goto err;
403
404 na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_TGID);
405 NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_TGID, tgid);
406 NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS,
407 stats);
408 } else {
409 rc = -EINVAL;
410 goto err;
411 }
412
413 nla_nest_end(rep_skb, na);
414
415 return send_reply(rep_skb, info->snd_pid);
416
417nla_put_failure:
418 return genlmsg_cancel(rep_skb, reply);
419err:
420 nlmsg_free(rep_skb);
421 return rc;
422}
423
424void taskstats_exit_alloc(struct taskstats **ptidstats, unsigned int *mycpu)
425{
426 struct listener_list *listeners;
427 struct taskstats *tmp;
428 /*
429 * This is the cpu on which the task is exiting currently and will
430 * be the one for which the exit event is sent, even if the cpu
431 * on which this function is running changes later.
432 */
433 *mycpu = raw_smp_processor_id();
434
435 *ptidstats = NULL;
436 tmp = kmem_cache_zalloc(taskstats_cache, SLAB_KERNEL);
437 if (!tmp)
438 return;
439
440 listeners = &per_cpu(listener_array, *mycpu);
441 down_read(&listeners->sem);
442 if (!list_empty(&listeners->list)) {
443 *ptidstats = tmp;
444 tmp = NULL;
445 }
446 up_read(&listeners->sem);
447 kfree(tmp);
448}
449
450/* Send pid data out on exit */
451void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats,
452 int group_dead, unsigned int mycpu)
453{
454 int rc;
455 struct sk_buff *rep_skb;
456 void *reply;
457 size_t size;
458 int is_thread_group;
459 struct nlattr *na;
460 unsigned long flags;
461
462 if (!family_registered || !tidstats)
463 return;
464
465 spin_lock_irqsave(&tsk->signal->stats_lock, flags);
466 is_thread_group = tsk->signal->stats ? 1 : 0;
467 spin_unlock_irqrestore(&tsk->signal->stats_lock, flags);
468
469 rc = 0;
470 /*
471 * Size includes space for nested attributes
472 */
473 size = nla_total_size(sizeof(u32)) +
474 nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
475
476 if (is_thread_group)
477 size = 2 * size; /* PID + STATS + TGID + STATS */
478
479 rc = prepare_reply(NULL, TASKSTATS_CMD_NEW, &rep_skb, &reply, size);
480 if (rc < 0)
481 goto ret;
482
483 rc = fill_pid(tsk->pid, tsk, tidstats);
484 if (rc < 0)
485 goto err_skb;
486
487 na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_PID);
488 NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_PID, (u32)tsk->pid);
489 NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS,
490 *tidstats);
491 nla_nest_end(rep_skb, na);
492
493 if (!is_thread_group)
494 goto send;
495
496 /*
497 * tsk has/had a thread group so fill the tsk->signal->stats structure
498 * Doesn't matter if tsk is the leader or the last group member leaving
499 */
500
501 fill_tgid_exit(tsk);
502 if (!group_dead)
503 goto send;
504
505 na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_TGID);
506 NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_TGID, (u32)tsk->tgid);
507 /* No locking needed for tsk->signal->stats since group is dead */
508 NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS,
509 *tsk->signal->stats);
510 nla_nest_end(rep_skb, na);
511
512send:
513 send_cpu_listeners(rep_skb, mycpu);
514 return;
515
516nla_put_failure:
517 genlmsg_cancel(rep_skb, reply);
518 goto ret;
519err_skb:
520 nlmsg_free(rep_skb);
521ret:
522 return;
523}
524
525static struct genl_ops taskstats_ops = {
526 .cmd = TASKSTATS_CMD_GET,
527 .doit = taskstats_user_cmd,
528 .policy = taskstats_cmd_get_policy,
529};
530
531/* Needed early in initialization */
532void __init taskstats_init_early(void)
533{
534 unsigned int i;
535
536 taskstats_cache = kmem_cache_create("taskstats_cache",
537 sizeof(struct taskstats),
538 0, SLAB_PANIC, NULL, NULL);
539 for_each_possible_cpu(i) {
540 INIT_LIST_HEAD(&(per_cpu(listener_array, i).list));
541 init_rwsem(&(per_cpu(listener_array, i).sem));
542 }
543}
544
545static int __init taskstats_init(void)
546{
547 int rc;
548
549 rc = genl_register_family(&family);
550 if (rc)
551 return rc;
552
553 rc = genl_register_ops(&family, &taskstats_ops);
554 if (rc < 0)
555 goto err;
556
557 family_registered = 1;
558 return 0;
559err:
560 genl_unregister_family(&family);
561 return rc;
562}
563
564/*
565 * late initcall ensures initialization of statistics collection
566 * mechanisms precedes initialization of the taskstats interface
567 */
568late_initcall(taskstats_init);
diff --git a/kernel/timer.c b/kernel/timer.c
index 2a87430a58..05809c2e2f 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -374,6 +374,7 @@ int del_timer_sync(struct timer_list *timer)
374 int ret = try_to_del_timer_sync(timer); 374 int ret = try_to_del_timer_sync(timer);
375 if (ret >= 0) 375 if (ret >= 0)
376 return ret; 376 return ret;
377 cpu_relax();
377 } 378 }
378} 379}
379 380
@@ -968,6 +969,7 @@ void __init timekeeping_init(void)
968} 969}
969 970
970 971
972static int timekeeping_suspended;
971/* 973/*
972 * timekeeping_resume - Resumes the generic timekeeping subsystem. 974 * timekeeping_resume - Resumes the generic timekeeping subsystem.
973 * @dev: unused 975 * @dev: unused
@@ -983,6 +985,18 @@ static int timekeeping_resume(struct sys_device *dev)
983 write_seqlock_irqsave(&xtime_lock, flags); 985 write_seqlock_irqsave(&xtime_lock, flags);
984 /* restart the last cycle value */ 986 /* restart the last cycle value */
985 clock->cycle_last = clocksource_read(clock); 987 clock->cycle_last = clocksource_read(clock);
988 clock->error = 0;
989 timekeeping_suspended = 0;
990 write_sequnlock_irqrestore(&xtime_lock, flags);
991 return 0;
992}
993
994static int timekeeping_suspend(struct sys_device *dev, pm_message_t state)
995{
996 unsigned long flags;
997
998 write_seqlock_irqsave(&xtime_lock, flags);
999 timekeeping_suspended = 1;
986 write_sequnlock_irqrestore(&xtime_lock, flags); 1000 write_sequnlock_irqrestore(&xtime_lock, flags);
987 return 0; 1001 return 0;
988} 1002}
@@ -990,6 +1004,7 @@ static int timekeeping_resume(struct sys_device *dev)
990/* sysfs resume/suspend bits for timekeeping */ 1004/* sysfs resume/suspend bits for timekeeping */
991static struct sysdev_class timekeeping_sysclass = { 1005static struct sysdev_class timekeeping_sysclass = {
992 .resume = timekeeping_resume, 1006 .resume = timekeeping_resume,
1007 .suspend = timekeeping_suspend,
993 set_kset_name("timekeeping"), 1008 set_kset_name("timekeeping"),
994}; 1009};
995 1010
@@ -1100,13 +1115,16 @@ static void update_wall_time(void)
1100{ 1115{
1101 cycle_t offset; 1116 cycle_t offset;
1102 1117
1103 clock->xtime_nsec += (s64)xtime.tv_nsec << clock->shift; 1118 /* Make sure we're fully resumed: */
1119 if (unlikely(timekeeping_suspended))
1120 return;
1104 1121
1105#ifdef CONFIG_GENERIC_TIME 1122#ifdef CONFIG_GENERIC_TIME
1106 offset = (clocksource_read(clock) - clock->cycle_last) & clock->mask; 1123 offset = (clocksource_read(clock) - clock->cycle_last) & clock->mask;
1107#else 1124#else
1108 offset = clock->cycle_interval; 1125 offset = clock->cycle_interval;
1109#endif 1126#endif
1127 clock->xtime_nsec += (s64)xtime.tv_nsec << clock->shift;
1110 1128
1111 /* normally this loop will run just once, however in the 1129 /* normally this loop will run just once, however in the
1112 * case of lost or late ticks, it will accumulate correctly. 1130 * case of lost or late ticks, it will accumulate correctly.