diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 2 | ||||
-rw-r--r-- | kernel/acct.c | 4 | ||||
-rw-r--r-- | kernel/delayacct.c | 178 | ||||
-rw-r--r-- | kernel/exit.c | 10 | ||||
-rw-r--r-- | kernel/fork.c | 6 | ||||
-rw-r--r-- | kernel/kallsyms.c | 4 | ||||
-rw-r--r-- | kernel/kthread.c | 24 | ||||
-rw-r--r-- | kernel/module.c | 11 | ||||
-rw-r--r-- | kernel/rtmutex-tester.c | 1 | ||||
-rw-r--r-- | kernel/sched.c | 84 | ||||
-rw-r--r-- | kernel/softirq.c | 2 | ||||
-rw-r--r-- | kernel/taskstats.c | 568 | ||||
-rw-r--r-- | kernel/timer.c | 20 |
13 files changed, 856 insertions, 58 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 47dbcd570c..d62ec66c1a 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -48,6 +48,8 @@ obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ | |||
48 | obj-$(CONFIG_SECCOMP) += seccomp.o | 48 | obj-$(CONFIG_SECCOMP) += seccomp.o |
49 | obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o | 49 | obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o |
50 | obj-$(CONFIG_RELAY) += relay.o | 50 | obj-$(CONFIG_RELAY) += relay.o |
51 | obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o | ||
52 | obj-$(CONFIG_TASKSTATS) += taskstats.o | ||
51 | 53 | ||
52 | ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) | 54 | ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) |
53 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is | 55 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is |
diff --git a/kernel/acct.c b/kernel/acct.c index f18e0b8df3..2a7c933651 100644 --- a/kernel/acct.c +++ b/kernel/acct.c | |||
@@ -488,7 +488,7 @@ static void do_acct_process(struct file *file) | |||
488 | old_encode_dev(tty_devnum(current->signal->tty)) : 0; | 488 | old_encode_dev(tty_devnum(current->signal->tty)) : 0; |
489 | read_unlock(&tasklist_lock); | 489 | read_unlock(&tasklist_lock); |
490 | 490 | ||
491 | spin_lock(¤t->sighand->siglock); | 491 | spin_lock_irq(¤t->sighand->siglock); |
492 | ac.ac_utime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_utime))); | 492 | ac.ac_utime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_utime))); |
493 | ac.ac_stime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_stime))); | 493 | ac.ac_stime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_stime))); |
494 | ac.ac_flag = pacct->ac_flag; | 494 | ac.ac_flag = pacct->ac_flag; |
@@ -496,7 +496,7 @@ static void do_acct_process(struct file *file) | |||
496 | ac.ac_minflt = encode_comp_t(pacct->ac_minflt); | 496 | ac.ac_minflt = encode_comp_t(pacct->ac_minflt); |
497 | ac.ac_majflt = encode_comp_t(pacct->ac_majflt); | 497 | ac.ac_majflt = encode_comp_t(pacct->ac_majflt); |
498 | ac.ac_exitcode = pacct->ac_exitcode; | 498 | ac.ac_exitcode = pacct->ac_exitcode; |
499 | spin_unlock(¤t->sighand->siglock); | 499 | spin_unlock_irq(¤t->sighand->siglock); |
500 | ac.ac_io = encode_comp_t(0 /* current->io_usage */); /* %% */ | 500 | ac.ac_io = encode_comp_t(0 /* current->io_usage */); /* %% */ |
501 | ac.ac_rw = encode_comp_t(ac.ac_io / 1024); | 501 | ac.ac_rw = encode_comp_t(ac.ac_io / 1024); |
502 | ac.ac_swaps = encode_comp_t(0); | 502 | ac.ac_swaps = encode_comp_t(0); |
diff --git a/kernel/delayacct.c b/kernel/delayacct.c new file mode 100644 index 0000000000..f05392d642 --- /dev/null +++ b/kernel/delayacct.c | |||
@@ -0,0 +1,178 @@ | |||
1 | /* delayacct.c - per-task delay accounting | ||
2 | * | ||
3 | * Copyright (C) Shailabh Nagar, IBM Corp. 2006 | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation; either version 2 of the License, or | ||
8 | * (at your option) any later version. | ||
9 | * | ||
10 | * This program is distributed in the hope that it would be useful, but | ||
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | ||
13 | * the GNU General Public License for more details. | ||
14 | */ | ||
15 | |||
16 | #include <linux/sched.h> | ||
17 | #include <linux/slab.h> | ||
18 | #include <linux/time.h> | ||
19 | #include <linux/sysctl.h> | ||
20 | #include <linux/delayacct.h> | ||
21 | |||
22 | int delayacct_on __read_mostly; /* Delay accounting turned on/off */ | ||
23 | kmem_cache_t *delayacct_cache; | ||
24 | |||
25 | static int __init delayacct_setup_enable(char *str) | ||
26 | { | ||
27 | delayacct_on = 1; | ||
28 | return 1; | ||
29 | } | ||
30 | __setup("delayacct", delayacct_setup_enable); | ||
31 | |||
32 | void delayacct_init(void) | ||
33 | { | ||
34 | delayacct_cache = kmem_cache_create("delayacct_cache", | ||
35 | sizeof(struct task_delay_info), | ||
36 | 0, | ||
37 | SLAB_PANIC, | ||
38 | NULL, NULL); | ||
39 | delayacct_tsk_init(&init_task); | ||
40 | } | ||
41 | |||
42 | void __delayacct_tsk_init(struct task_struct *tsk) | ||
43 | { | ||
44 | spin_lock_init(&tsk->delays_lock); | ||
45 | /* No need to acquire tsk->delays_lock for allocation here unless | ||
46 | __delayacct_tsk_init called after tsk is attached to tasklist | ||
47 | */ | ||
48 | tsk->delays = kmem_cache_zalloc(delayacct_cache, SLAB_KERNEL); | ||
49 | if (tsk->delays) | ||
50 | spin_lock_init(&tsk->delays->lock); | ||
51 | } | ||
52 | |||
53 | void __delayacct_tsk_exit(struct task_struct *tsk) | ||
54 | { | ||
55 | struct task_delay_info *delays = tsk->delays; | ||
56 | spin_lock(&tsk->delays_lock); | ||
57 | tsk->delays = NULL; | ||
58 | spin_unlock(&tsk->delays_lock); | ||
59 | kmem_cache_free(delayacct_cache, delays); | ||
60 | } | ||
61 | |||
62 | /* | ||
63 | * Start accounting for a delay statistic using | ||
64 | * its starting timestamp (@start) | ||
65 | */ | ||
66 | |||
67 | static inline void delayacct_start(struct timespec *start) | ||
68 | { | ||
69 | do_posix_clock_monotonic_gettime(start); | ||
70 | } | ||
71 | |||
72 | /* | ||
73 | * Finish delay accounting for a statistic using | ||
74 | * its timestamps (@start, @end), accumalator (@total) and @count | ||
75 | */ | ||
76 | |||
77 | static void delayacct_end(struct timespec *start, struct timespec *end, | ||
78 | u64 *total, u32 *count) | ||
79 | { | ||
80 | struct timespec ts; | ||
81 | s64 ns; | ||
82 | |||
83 | do_posix_clock_monotonic_gettime(end); | ||
84 | ts = timespec_sub(*end, *start); | ||
85 | ns = timespec_to_ns(&ts); | ||
86 | if (ns < 0) | ||
87 | return; | ||
88 | |||
89 | spin_lock(¤t->delays->lock); | ||
90 | *total += ns; | ||
91 | (*count)++; | ||
92 | spin_unlock(¤t->delays->lock); | ||
93 | } | ||
94 | |||
95 | void __delayacct_blkio_start(void) | ||
96 | { | ||
97 | delayacct_start(¤t->delays->blkio_start); | ||
98 | } | ||
99 | |||
100 | void __delayacct_blkio_end(void) | ||
101 | { | ||
102 | if (current->delays->flags & DELAYACCT_PF_SWAPIN) | ||
103 | /* Swapin block I/O */ | ||
104 | delayacct_end(¤t->delays->blkio_start, | ||
105 | ¤t->delays->blkio_end, | ||
106 | ¤t->delays->swapin_delay, | ||
107 | ¤t->delays->swapin_count); | ||
108 | else /* Other block I/O */ | ||
109 | delayacct_end(¤t->delays->blkio_start, | ||
110 | ¤t->delays->blkio_end, | ||
111 | ¤t->delays->blkio_delay, | ||
112 | ¤t->delays->blkio_count); | ||
113 | } | ||
114 | |||
115 | int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) | ||
116 | { | ||
117 | s64 tmp; | ||
118 | struct timespec ts; | ||
119 | unsigned long t1,t2,t3; | ||
120 | |||
121 | spin_lock(&tsk->delays_lock); | ||
122 | |||
123 | /* Though tsk->delays accessed later, early exit avoids | ||
124 | * unnecessary returning of other data | ||
125 | */ | ||
126 | if (!tsk->delays) | ||
127 | goto done; | ||
128 | |||
129 | tmp = (s64)d->cpu_run_real_total; | ||
130 | cputime_to_timespec(tsk->utime + tsk->stime, &ts); | ||
131 | tmp += timespec_to_ns(&ts); | ||
132 | d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp; | ||
133 | |||
134 | /* | ||
135 | * No locking available for sched_info (and too expensive to add one) | ||
136 | * Mitigate by taking snapshot of values | ||
137 | */ | ||
138 | t1 = tsk->sched_info.pcnt; | ||
139 | t2 = tsk->sched_info.run_delay; | ||
140 | t3 = tsk->sched_info.cpu_time; | ||
141 | |||
142 | d->cpu_count += t1; | ||
143 | |||
144 | jiffies_to_timespec(t2, &ts); | ||
145 | tmp = (s64)d->cpu_delay_total + timespec_to_ns(&ts); | ||
146 | d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp; | ||
147 | |||
148 | tmp = (s64)d->cpu_run_virtual_total + (s64)jiffies_to_usecs(t3) * 1000; | ||
149 | d->cpu_run_virtual_total = | ||
150 | (tmp < (s64)d->cpu_run_virtual_total) ? 0 : tmp; | ||
151 | |||
152 | /* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */ | ||
153 | |||
154 | spin_lock(&tsk->delays->lock); | ||
155 | tmp = d->blkio_delay_total + tsk->delays->blkio_delay; | ||
156 | d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp; | ||
157 | tmp = d->swapin_delay_total + tsk->delays->swapin_delay; | ||
158 | d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp; | ||
159 | d->blkio_count += tsk->delays->blkio_count; | ||
160 | d->swapin_count += tsk->delays->swapin_count; | ||
161 | spin_unlock(&tsk->delays->lock); | ||
162 | |||
163 | done: | ||
164 | spin_unlock(&tsk->delays_lock); | ||
165 | return 0; | ||
166 | } | ||
167 | |||
168 | __u64 __delayacct_blkio_ticks(struct task_struct *tsk) | ||
169 | { | ||
170 | __u64 ret; | ||
171 | |||
172 | spin_lock(&tsk->delays->lock); | ||
173 | ret = nsec_to_clock_t(tsk->delays->blkio_delay + | ||
174 | tsk->delays->swapin_delay); | ||
175 | spin_unlock(&tsk->delays->lock); | ||
176 | return ret; | ||
177 | } | ||
178 | |||
diff --git a/kernel/exit.c b/kernel/exit.c index 6664c08478..dba194a8d4 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -25,6 +25,8 @@ | |||
25 | #include <linux/mount.h> | 25 | #include <linux/mount.h> |
26 | #include <linux/proc_fs.h> | 26 | #include <linux/proc_fs.h> |
27 | #include <linux/mempolicy.h> | 27 | #include <linux/mempolicy.h> |
28 | #include <linux/taskstats_kern.h> | ||
29 | #include <linux/delayacct.h> | ||
28 | #include <linux/cpuset.h> | 30 | #include <linux/cpuset.h> |
29 | #include <linux/syscalls.h> | 31 | #include <linux/syscalls.h> |
30 | #include <linux/signal.h> | 32 | #include <linux/signal.h> |
@@ -843,7 +845,9 @@ static void exit_notify(struct task_struct *tsk) | |||
843 | fastcall NORET_TYPE void do_exit(long code) | 845 | fastcall NORET_TYPE void do_exit(long code) |
844 | { | 846 | { |
845 | struct task_struct *tsk = current; | 847 | struct task_struct *tsk = current; |
848 | struct taskstats *tidstats; | ||
846 | int group_dead; | 849 | int group_dead; |
850 | unsigned int mycpu; | ||
847 | 851 | ||
848 | profile_task_exit(tsk); | 852 | profile_task_exit(tsk); |
849 | 853 | ||
@@ -881,6 +885,8 @@ fastcall NORET_TYPE void do_exit(long code) | |||
881 | current->comm, current->pid, | 885 | current->comm, current->pid, |
882 | preempt_count()); | 886 | preempt_count()); |
883 | 887 | ||
888 | taskstats_exit_alloc(&tidstats, &mycpu); | ||
889 | |||
884 | acct_update_integrals(tsk); | 890 | acct_update_integrals(tsk); |
885 | if (tsk->mm) { | 891 | if (tsk->mm) { |
886 | update_hiwater_rss(tsk->mm); | 892 | update_hiwater_rss(tsk->mm); |
@@ -900,6 +906,10 @@ fastcall NORET_TYPE void do_exit(long code) | |||
900 | #endif | 906 | #endif |
901 | if (unlikely(tsk->audit_context)) | 907 | if (unlikely(tsk->audit_context)) |
902 | audit_free(tsk); | 908 | audit_free(tsk); |
909 | taskstats_exit_send(tsk, tidstats, group_dead, mycpu); | ||
910 | taskstats_exit_free(tidstats); | ||
911 | delayacct_tsk_exit(tsk); | ||
912 | |||
903 | exit_mm(tsk); | 913 | exit_mm(tsk); |
904 | 914 | ||
905 | if (group_dead) | 915 | if (group_dead) |
diff --git a/kernel/fork.c b/kernel/fork.c index 926e5a68ea..1b0f7b1e08 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -43,6 +43,8 @@ | |||
43 | #include <linux/rmap.h> | 43 | #include <linux/rmap.h> |
44 | #include <linux/acct.h> | 44 | #include <linux/acct.h> |
45 | #include <linux/cn_proc.h> | 45 | #include <linux/cn_proc.h> |
46 | #include <linux/delayacct.h> | ||
47 | #include <linux/taskstats_kern.h> | ||
46 | 48 | ||
47 | #include <asm/pgtable.h> | 49 | #include <asm/pgtable.h> |
48 | #include <asm/pgalloc.h> | 50 | #include <asm/pgalloc.h> |
@@ -818,6 +820,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts | |||
818 | if (clone_flags & CLONE_THREAD) { | 820 | if (clone_flags & CLONE_THREAD) { |
819 | atomic_inc(¤t->signal->count); | 821 | atomic_inc(¤t->signal->count); |
820 | atomic_inc(¤t->signal->live); | 822 | atomic_inc(¤t->signal->live); |
823 | taskstats_tgid_alloc(current->signal); | ||
821 | return 0; | 824 | return 0; |
822 | } | 825 | } |
823 | sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); | 826 | sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); |
@@ -862,6 +865,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts | |||
862 | INIT_LIST_HEAD(&sig->cpu_timers[0]); | 865 | INIT_LIST_HEAD(&sig->cpu_timers[0]); |
863 | INIT_LIST_HEAD(&sig->cpu_timers[1]); | 866 | INIT_LIST_HEAD(&sig->cpu_timers[1]); |
864 | INIT_LIST_HEAD(&sig->cpu_timers[2]); | 867 | INIT_LIST_HEAD(&sig->cpu_timers[2]); |
868 | taskstats_tgid_init(sig); | ||
865 | 869 | ||
866 | task_lock(current->group_leader); | 870 | task_lock(current->group_leader); |
867 | memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); | 871 | memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); |
@@ -883,6 +887,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts | |||
883 | void __cleanup_signal(struct signal_struct *sig) | 887 | void __cleanup_signal(struct signal_struct *sig) |
884 | { | 888 | { |
885 | exit_thread_group_keys(sig); | 889 | exit_thread_group_keys(sig); |
890 | taskstats_tgid_free(sig); | ||
886 | kmem_cache_free(signal_cachep, sig); | 891 | kmem_cache_free(signal_cachep, sig); |
887 | } | 892 | } |
888 | 893 | ||
@@ -1000,6 +1005,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1000 | goto bad_fork_cleanup_put_domain; | 1005 | goto bad_fork_cleanup_put_domain; |
1001 | 1006 | ||
1002 | p->did_exec = 0; | 1007 | p->did_exec = 0; |
1008 | delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ | ||
1003 | copy_flags(clone_flags, p); | 1009 | copy_flags(clone_flags, p); |
1004 | p->pid = pid; | 1010 | p->pid = pid; |
1005 | retval = -EFAULT; | 1011 | retval = -EFAULT; |
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 39277dd6bf..ab16a5a4cf 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c | |||
@@ -275,8 +275,8 @@ static void upcase_if_global(struct kallsym_iter *iter) | |||
275 | static int get_ksymbol_mod(struct kallsym_iter *iter) | 275 | static int get_ksymbol_mod(struct kallsym_iter *iter) |
276 | { | 276 | { |
277 | iter->owner = module_get_kallsym(iter->pos - kallsyms_num_syms, | 277 | iter->owner = module_get_kallsym(iter->pos - kallsyms_num_syms, |
278 | &iter->value, | 278 | &iter->value, &iter->type, |
279 | &iter->type, iter->name); | 279 | iter->name, sizeof(iter->name)); |
280 | if (iter->owner == NULL) | 280 | if (iter->owner == NULL) |
281 | return 0; | 281 | return 0; |
282 | 282 | ||
diff --git a/kernel/kthread.c b/kernel/kthread.c index 24be714b04..4f9c60ef95 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c | |||
@@ -216,23 +216,6 @@ EXPORT_SYMBOL(kthread_bind); | |||
216 | */ | 216 | */ |
217 | int kthread_stop(struct task_struct *k) | 217 | int kthread_stop(struct task_struct *k) |
218 | { | 218 | { |
219 | return kthread_stop_sem(k, NULL); | ||
220 | } | ||
221 | EXPORT_SYMBOL(kthread_stop); | ||
222 | |||
223 | /** | ||
224 | * kthread_stop_sem - stop a thread created by kthread_create(). | ||
225 | * @k: thread created by kthread_create(). | ||
226 | * @s: semaphore that @k waits on while idle. | ||
227 | * | ||
228 | * Does essentially the same thing as kthread_stop() above, but wakes | ||
229 | * @k by calling up(@s). | ||
230 | * | ||
231 | * Returns the result of threadfn(), or %-EINTR if wake_up_process() | ||
232 | * was never called. | ||
233 | */ | ||
234 | int kthread_stop_sem(struct task_struct *k, struct semaphore *s) | ||
235 | { | ||
236 | int ret; | 219 | int ret; |
237 | 220 | ||
238 | mutex_lock(&kthread_stop_lock); | 221 | mutex_lock(&kthread_stop_lock); |
@@ -246,10 +229,7 @@ int kthread_stop_sem(struct task_struct *k, struct semaphore *s) | |||
246 | 229 | ||
247 | /* Now set kthread_should_stop() to true, and wake it up. */ | 230 | /* Now set kthread_should_stop() to true, and wake it up. */ |
248 | kthread_stop_info.k = k; | 231 | kthread_stop_info.k = k; |
249 | if (s) | 232 | wake_up_process(k); |
250 | up(s); | ||
251 | else | ||
252 | wake_up_process(k); | ||
253 | put_task_struct(k); | 233 | put_task_struct(k); |
254 | 234 | ||
255 | /* Once it dies, reset stop ptr, gather result and we're done. */ | 235 | /* Once it dies, reset stop ptr, gather result and we're done. */ |
@@ -260,7 +240,7 @@ int kthread_stop_sem(struct task_struct *k, struct semaphore *s) | |||
260 | 240 | ||
261 | return ret; | 241 | return ret; |
262 | } | 242 | } |
263 | EXPORT_SYMBOL(kthread_stop_sem); | 243 | EXPORT_SYMBOL(kthread_stop); |
264 | 244 | ||
265 | static __init int helper_init(void) | 245 | static __init int helper_init(void) |
266 | { | 246 | { |
diff --git a/kernel/module.c b/kernel/module.c index 35e1b1f859..2a19cd47c0 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -2019,10 +2019,8 @@ const char *module_address_lookup(unsigned long addr, | |||
2019 | return NULL; | 2019 | return NULL; |
2020 | } | 2020 | } |
2021 | 2021 | ||
2022 | struct module *module_get_kallsym(unsigned int symnum, | 2022 | struct module *module_get_kallsym(unsigned int symnum, unsigned long *value, |
2023 | unsigned long *value, | 2023 | char *type, char *name, size_t namelen) |
2024 | char *type, | ||
2025 | char namebuf[128]) | ||
2026 | { | 2024 | { |
2027 | struct module *mod; | 2025 | struct module *mod; |
2028 | 2026 | ||
@@ -2031,9 +2029,8 @@ struct module *module_get_kallsym(unsigned int symnum, | |||
2031 | if (symnum < mod->num_symtab) { | 2029 | if (symnum < mod->num_symtab) { |
2032 | *value = mod->symtab[symnum].st_value; | 2030 | *value = mod->symtab[symnum].st_value; |
2033 | *type = mod->symtab[symnum].st_info; | 2031 | *type = mod->symtab[symnum].st_info; |
2034 | strncpy(namebuf, | 2032 | strlcpy(name, mod->strtab + mod->symtab[symnum].st_name, |
2035 | mod->strtab + mod->symtab[symnum].st_name, | 2033 | namelen); |
2036 | 127); | ||
2037 | mutex_unlock(&module_mutex); | 2034 | mutex_unlock(&module_mutex); |
2038 | return mod; | 2035 | return mod; |
2039 | } | 2036 | } |
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c index 494dac872a..948bd8f643 100644 --- a/kernel/rtmutex-tester.c +++ b/kernel/rtmutex-tester.c | |||
@@ -275,6 +275,7 @@ static int test_func(void *data) | |||
275 | 275 | ||
276 | /* Wait for the next command to be executed */ | 276 | /* Wait for the next command to be executed */ |
277 | schedule(); | 277 | schedule(); |
278 | try_to_freeze(); | ||
278 | 279 | ||
279 | if (signal_pending(current)) | 280 | if (signal_pending(current)) |
280 | flush_signals(current); | 281 | flush_signals(current); |
diff --git a/kernel/sched.c b/kernel/sched.c index d714611f16..b44b9a43b0 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -51,6 +51,7 @@ | |||
51 | #include <linux/times.h> | 51 | #include <linux/times.h> |
52 | #include <linux/acct.h> | 52 | #include <linux/acct.h> |
53 | #include <linux/kprobes.h> | 53 | #include <linux/kprobes.h> |
54 | #include <linux/delayacct.h> | ||
54 | #include <asm/tlb.h> | 55 | #include <asm/tlb.h> |
55 | 56 | ||
56 | #include <asm/unistd.h> | 57 | #include <asm/unistd.h> |
@@ -501,9 +502,36 @@ struct file_operations proc_schedstat_operations = { | |||
501 | .release = single_release, | 502 | .release = single_release, |
502 | }; | 503 | }; |
503 | 504 | ||
505 | /* | ||
506 | * Expects runqueue lock to be held for atomicity of update | ||
507 | */ | ||
508 | static inline void | ||
509 | rq_sched_info_arrive(struct rq *rq, unsigned long delta_jiffies) | ||
510 | { | ||
511 | if (rq) { | ||
512 | rq->rq_sched_info.run_delay += delta_jiffies; | ||
513 | rq->rq_sched_info.pcnt++; | ||
514 | } | ||
515 | } | ||
516 | |||
517 | /* | ||
518 | * Expects runqueue lock to be held for atomicity of update | ||
519 | */ | ||
520 | static inline void | ||
521 | rq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies) | ||
522 | { | ||
523 | if (rq) | ||
524 | rq->rq_sched_info.cpu_time += delta_jiffies; | ||
525 | } | ||
504 | # define schedstat_inc(rq, field) do { (rq)->field++; } while (0) | 526 | # define schedstat_inc(rq, field) do { (rq)->field++; } while (0) |
505 | # define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0) | 527 | # define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0) |
506 | #else /* !CONFIG_SCHEDSTATS */ | 528 | #else /* !CONFIG_SCHEDSTATS */ |
529 | static inline void | ||
530 | rq_sched_info_arrive(struct rq *rq, unsigned long delta_jiffies) | ||
531 | {} | ||
532 | static inline void | ||
533 | rq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies) | ||
534 | {} | ||
507 | # define schedstat_inc(rq, field) do { } while (0) | 535 | # define schedstat_inc(rq, field) do { } while (0) |
508 | # define schedstat_add(rq, field, amt) do { } while (0) | 536 | # define schedstat_add(rq, field, amt) do { } while (0) |
509 | #endif | 537 | #endif |
@@ -523,7 +551,7 @@ static inline struct rq *this_rq_lock(void) | |||
523 | return rq; | 551 | return rq; |
524 | } | 552 | } |
525 | 553 | ||
526 | #ifdef CONFIG_SCHEDSTATS | 554 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
527 | /* | 555 | /* |
528 | * Called when a process is dequeued from the active array and given | 556 | * Called when a process is dequeued from the active array and given |
529 | * the cpu. We should note that with the exception of interactive | 557 | * the cpu. We should note that with the exception of interactive |
@@ -551,21 +579,16 @@ static inline void sched_info_dequeued(struct task_struct *t) | |||
551 | */ | 579 | */ |
552 | static void sched_info_arrive(struct task_struct *t) | 580 | static void sched_info_arrive(struct task_struct *t) |
553 | { | 581 | { |
554 | unsigned long now = jiffies, diff = 0; | 582 | unsigned long now = jiffies, delta_jiffies = 0; |
555 | struct rq *rq = task_rq(t); | ||
556 | 583 | ||
557 | if (t->sched_info.last_queued) | 584 | if (t->sched_info.last_queued) |
558 | diff = now - t->sched_info.last_queued; | 585 | delta_jiffies = now - t->sched_info.last_queued; |
559 | sched_info_dequeued(t); | 586 | sched_info_dequeued(t); |
560 | t->sched_info.run_delay += diff; | 587 | t->sched_info.run_delay += delta_jiffies; |
561 | t->sched_info.last_arrival = now; | 588 | t->sched_info.last_arrival = now; |
562 | t->sched_info.pcnt++; | 589 | t->sched_info.pcnt++; |
563 | 590 | ||
564 | if (!rq) | 591 | rq_sched_info_arrive(task_rq(t), delta_jiffies); |
565 | return; | ||
566 | |||
567 | rq->rq_sched_info.run_delay += diff; | ||
568 | rq->rq_sched_info.pcnt++; | ||
569 | } | 592 | } |
570 | 593 | ||
571 | /* | 594 | /* |
@@ -585,8 +608,9 @@ static void sched_info_arrive(struct task_struct *t) | |||
585 | */ | 608 | */ |
586 | static inline void sched_info_queued(struct task_struct *t) | 609 | static inline void sched_info_queued(struct task_struct *t) |
587 | { | 610 | { |
588 | if (!t->sched_info.last_queued) | 611 | if (unlikely(sched_info_on())) |
589 | t->sched_info.last_queued = jiffies; | 612 | if (!t->sched_info.last_queued) |
613 | t->sched_info.last_queued = jiffies; | ||
590 | } | 614 | } |
591 | 615 | ||
592 | /* | 616 | /* |
@@ -595,13 +619,10 @@ static inline void sched_info_queued(struct task_struct *t) | |||
595 | */ | 619 | */ |
596 | static inline void sched_info_depart(struct task_struct *t) | 620 | static inline void sched_info_depart(struct task_struct *t) |
597 | { | 621 | { |
598 | struct rq *rq = task_rq(t); | 622 | unsigned long delta_jiffies = jiffies - t->sched_info.last_arrival; |
599 | unsigned long diff = jiffies - t->sched_info.last_arrival; | ||
600 | |||
601 | t->sched_info.cpu_time += diff; | ||
602 | 623 | ||
603 | if (rq) | 624 | t->sched_info.cpu_time += delta_jiffies; |
604 | rq->rq_sched_info.cpu_time += diff; | 625 | rq_sched_info_depart(task_rq(t), delta_jiffies); |
605 | } | 626 | } |
606 | 627 | ||
607 | /* | 628 | /* |
@@ -610,7 +631,7 @@ static inline void sched_info_depart(struct task_struct *t) | |||
610 | * the idle task.) We are only called when prev != next. | 631 | * the idle task.) We are only called when prev != next. |
611 | */ | 632 | */ |
612 | static inline void | 633 | static inline void |
613 | sched_info_switch(struct task_struct *prev, struct task_struct *next) | 634 | __sched_info_switch(struct task_struct *prev, struct task_struct *next) |
614 | { | 635 | { |
615 | struct rq *rq = task_rq(prev); | 636 | struct rq *rq = task_rq(prev); |
616 | 637 | ||
@@ -625,10 +646,16 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next) | |||
625 | if (next != rq->idle) | 646 | if (next != rq->idle) |
626 | sched_info_arrive(next); | 647 | sched_info_arrive(next); |
627 | } | 648 | } |
649 | static inline void | ||
650 | sched_info_switch(struct task_struct *prev, struct task_struct *next) | ||
651 | { | ||
652 | if (unlikely(sched_info_on())) | ||
653 | __sched_info_switch(prev, next); | ||
654 | } | ||
628 | #else | 655 | #else |
629 | #define sched_info_queued(t) do { } while (0) | 656 | #define sched_info_queued(t) do { } while (0) |
630 | #define sched_info_switch(t, next) do { } while (0) | 657 | #define sched_info_switch(t, next) do { } while (0) |
631 | #endif /* CONFIG_SCHEDSTATS */ | 658 | #endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */ |
632 | 659 | ||
633 | /* | 660 | /* |
634 | * Adding/removing a task to/from a priority array: | 661 | * Adding/removing a task to/from a priority array: |
@@ -1530,8 +1557,9 @@ void fastcall sched_fork(struct task_struct *p, int clone_flags) | |||
1530 | 1557 | ||
1531 | INIT_LIST_HEAD(&p->run_list); | 1558 | INIT_LIST_HEAD(&p->run_list); |
1532 | p->array = NULL; | 1559 | p->array = NULL; |
1533 | #ifdef CONFIG_SCHEDSTATS | 1560 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
1534 | memset(&p->sched_info, 0, sizeof(p->sched_info)); | 1561 | if (unlikely(sched_info_on())) |
1562 | memset(&p->sched_info, 0, sizeof(p->sched_info)); | ||
1535 | #endif | 1563 | #endif |
1536 | #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) | 1564 | #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) |
1537 | p->oncpu = 0; | 1565 | p->oncpu = 0; |
@@ -1788,7 +1816,15 @@ context_switch(struct rq *rq, struct task_struct *prev, | |||
1788 | WARN_ON(rq->prev_mm); | 1816 | WARN_ON(rq->prev_mm); |
1789 | rq->prev_mm = oldmm; | 1817 | rq->prev_mm = oldmm; |
1790 | } | 1818 | } |
1819 | /* | ||
1820 | * Since the runqueue lock will be released by the next | ||
1821 | * task (which is an invalid locking op but in the case | ||
1822 | * of the scheduler it's an obvious special-case), so we | ||
1823 | * do an early lockdep release here: | ||
1824 | */ | ||
1825 | #ifndef __ARCH_WANT_UNLOCKED_CTXSW | ||
1791 | spin_release(&rq->lock.dep_map, 1, _THIS_IP_); | 1826 | spin_release(&rq->lock.dep_map, 1, _THIS_IP_); |
1827 | #endif | ||
1792 | 1828 | ||
1793 | /* Here we just switch the register state and the stack. */ | 1829 | /* Here we just switch the register state and the stack. */ |
1794 | switch_to(prev, next, prev); | 1830 | switch_to(prev, next, prev); |
@@ -4526,9 +4562,11 @@ void __sched io_schedule(void) | |||
4526 | { | 4562 | { |
4527 | struct rq *rq = &__raw_get_cpu_var(runqueues); | 4563 | struct rq *rq = &__raw_get_cpu_var(runqueues); |
4528 | 4564 | ||
4565 | delayacct_blkio_start(); | ||
4529 | atomic_inc(&rq->nr_iowait); | 4566 | atomic_inc(&rq->nr_iowait); |
4530 | schedule(); | 4567 | schedule(); |
4531 | atomic_dec(&rq->nr_iowait); | 4568 | atomic_dec(&rq->nr_iowait); |
4569 | delayacct_blkio_end(); | ||
4532 | } | 4570 | } |
4533 | EXPORT_SYMBOL(io_schedule); | 4571 | EXPORT_SYMBOL(io_schedule); |
4534 | 4572 | ||
@@ -4537,9 +4575,11 @@ long __sched io_schedule_timeout(long timeout) | |||
4537 | struct rq *rq = &__raw_get_cpu_var(runqueues); | 4575 | struct rq *rq = &__raw_get_cpu_var(runqueues); |
4538 | long ret; | 4576 | long ret; |
4539 | 4577 | ||
4578 | delayacct_blkio_start(); | ||
4540 | atomic_inc(&rq->nr_iowait); | 4579 | atomic_inc(&rq->nr_iowait); |
4541 | ret = schedule_timeout(timeout); | 4580 | ret = schedule_timeout(timeout); |
4542 | atomic_dec(&rq->nr_iowait); | 4581 | atomic_dec(&rq->nr_iowait); |
4582 | delayacct_blkio_end(); | ||
4543 | return ret; | 4583 | return ret; |
4544 | } | 4584 | } |
4545 | 4585 | ||
diff --git a/kernel/softirq.c b/kernel/softirq.c index fd12f2556f..0f08a84ae3 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -311,8 +311,6 @@ void open_softirq(int nr, void (*action)(struct softirq_action*), void *data) | |||
311 | softirq_vec[nr].action = action; | 311 | softirq_vec[nr].action = action; |
312 | } | 312 | } |
313 | 313 | ||
314 | EXPORT_UNUSED_SYMBOL(open_softirq); /* June 2006 */ | ||
315 | |||
316 | /* Tasklets */ | 314 | /* Tasklets */ |
317 | struct tasklet_head | 315 | struct tasklet_head |
318 | { | 316 | { |
diff --git a/kernel/taskstats.c b/kernel/taskstats.c new file mode 100644 index 0000000000..f45179ce02 --- /dev/null +++ b/kernel/taskstats.c | |||
@@ -0,0 +1,568 @@ | |||
1 | /* | ||
2 | * taskstats.c - Export per-task statistics to userland | ||
3 | * | ||
4 | * Copyright (C) Shailabh Nagar, IBM Corp. 2006 | ||
5 | * (C) Balbir Singh, IBM Corp. 2006 | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | */ | ||
18 | |||
19 | #include <linux/kernel.h> | ||
20 | #include <linux/taskstats_kern.h> | ||
21 | #include <linux/delayacct.h> | ||
22 | #include <linux/cpumask.h> | ||
23 | #include <linux/percpu.h> | ||
24 | #include <net/genetlink.h> | ||
25 | #include <asm/atomic.h> | ||
26 | |||
27 | /* | ||
28 | * Maximum length of a cpumask that can be specified in | ||
29 | * the TASKSTATS_CMD_ATTR_REGISTER/DEREGISTER_CPUMASK attribute | ||
30 | */ | ||
31 | #define TASKSTATS_CPUMASK_MAXLEN (100+6*NR_CPUS) | ||
32 | |||
33 | static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 }; | ||
34 | static int family_registered; | ||
35 | kmem_cache_t *taskstats_cache; | ||
36 | |||
37 | static struct genl_family family = { | ||
38 | .id = GENL_ID_GENERATE, | ||
39 | .name = TASKSTATS_GENL_NAME, | ||
40 | .version = TASKSTATS_GENL_VERSION, | ||
41 | .maxattr = TASKSTATS_CMD_ATTR_MAX, | ||
42 | }; | ||
43 | |||
44 | static struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1] | ||
45 | __read_mostly = { | ||
46 | [TASKSTATS_CMD_ATTR_PID] = { .type = NLA_U32 }, | ||
47 | [TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 }, | ||
48 | [TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING }, | ||
49 | [TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },}; | ||
50 | |||
51 | struct listener { | ||
52 | struct list_head list; | ||
53 | pid_t pid; | ||
54 | char valid; | ||
55 | }; | ||
56 | |||
57 | struct listener_list { | ||
58 | struct rw_semaphore sem; | ||
59 | struct list_head list; | ||
60 | }; | ||
61 | static DEFINE_PER_CPU(struct listener_list, listener_array); | ||
62 | |||
63 | enum actions { | ||
64 | REGISTER, | ||
65 | DEREGISTER, | ||
66 | CPU_DONT_CARE | ||
67 | }; | ||
68 | |||
69 | static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, | ||
70 | void **replyp, size_t size) | ||
71 | { | ||
72 | struct sk_buff *skb; | ||
73 | void *reply; | ||
74 | |||
75 | /* | ||
76 | * If new attributes are added, please revisit this allocation | ||
77 | */ | ||
78 | skb = nlmsg_new(size); | ||
79 | if (!skb) | ||
80 | return -ENOMEM; | ||
81 | |||
82 | if (!info) { | ||
83 | int seq = get_cpu_var(taskstats_seqnum)++; | ||
84 | put_cpu_var(taskstats_seqnum); | ||
85 | |||
86 | reply = genlmsg_put(skb, 0, seq, | ||
87 | family.id, 0, 0, | ||
88 | cmd, family.version); | ||
89 | } else | ||
90 | reply = genlmsg_put(skb, info->snd_pid, info->snd_seq, | ||
91 | family.id, 0, 0, | ||
92 | cmd, family.version); | ||
93 | if (reply == NULL) { | ||
94 | nlmsg_free(skb); | ||
95 | return -EINVAL; | ||
96 | } | ||
97 | |||
98 | *skbp = skb; | ||
99 | *replyp = reply; | ||
100 | return 0; | ||
101 | } | ||
102 | |||
103 | /* | ||
104 | * Send taskstats data in @skb to listener with nl_pid @pid | ||
105 | */ | ||
106 | static int send_reply(struct sk_buff *skb, pid_t pid) | ||
107 | { | ||
108 | struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data); | ||
109 | void *reply = genlmsg_data(genlhdr); | ||
110 | int rc; | ||
111 | |||
112 | rc = genlmsg_end(skb, reply); | ||
113 | if (rc < 0) { | ||
114 | nlmsg_free(skb); | ||
115 | return rc; | ||
116 | } | ||
117 | |||
118 | return genlmsg_unicast(skb, pid); | ||
119 | } | ||
120 | |||
121 | /* | ||
122 | * Send taskstats data in @skb to listeners registered for @cpu's exit data | ||
123 | */ | ||
124 | static int send_cpu_listeners(struct sk_buff *skb, unsigned int cpu) | ||
125 | { | ||
126 | struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data); | ||
127 | struct listener_list *listeners; | ||
128 | struct listener *s, *tmp; | ||
129 | struct sk_buff *skb_next, *skb_cur = skb; | ||
130 | void *reply = genlmsg_data(genlhdr); | ||
131 | int rc, ret, delcount = 0; | ||
132 | |||
133 | rc = genlmsg_end(skb, reply); | ||
134 | if (rc < 0) { | ||
135 | nlmsg_free(skb); | ||
136 | return rc; | ||
137 | } | ||
138 | |||
139 | rc = 0; | ||
140 | listeners = &per_cpu(listener_array, cpu); | ||
141 | down_read(&listeners->sem); | ||
142 | list_for_each_entry_safe(s, tmp, &listeners->list, list) { | ||
143 | skb_next = NULL; | ||
144 | if (!list_is_last(&s->list, &listeners->list)) { | ||
145 | skb_next = skb_clone(skb_cur, GFP_KERNEL); | ||
146 | if (!skb_next) { | ||
147 | nlmsg_free(skb_cur); | ||
148 | rc = -ENOMEM; | ||
149 | break; | ||
150 | } | ||
151 | } | ||
152 | ret = genlmsg_unicast(skb_cur, s->pid); | ||
153 | if (ret == -ECONNREFUSED) { | ||
154 | s->valid = 0; | ||
155 | delcount++; | ||
156 | rc = ret; | ||
157 | } | ||
158 | skb_cur = skb_next; | ||
159 | } | ||
160 | up_read(&listeners->sem); | ||
161 | |||
162 | if (!delcount) | ||
163 | return rc; | ||
164 | |||
165 | /* Delete invalidated entries */ | ||
166 | down_write(&listeners->sem); | ||
167 | list_for_each_entry_safe(s, tmp, &listeners->list, list) { | ||
168 | if (!s->valid) { | ||
169 | list_del(&s->list); | ||
170 | kfree(s); | ||
171 | } | ||
172 | } | ||
173 | up_write(&listeners->sem); | ||
174 | return rc; | ||
175 | } | ||
176 | |||
177 | static int fill_pid(pid_t pid, struct task_struct *pidtsk, | ||
178 | struct taskstats *stats) | ||
179 | { | ||
180 | int rc; | ||
181 | struct task_struct *tsk = pidtsk; | ||
182 | |||
183 | if (!pidtsk) { | ||
184 | read_lock(&tasklist_lock); | ||
185 | tsk = find_task_by_pid(pid); | ||
186 | if (!tsk) { | ||
187 | read_unlock(&tasklist_lock); | ||
188 | return -ESRCH; | ||
189 | } | ||
190 | get_task_struct(tsk); | ||
191 | read_unlock(&tasklist_lock); | ||
192 | } else | ||
193 | get_task_struct(tsk); | ||
194 | |||
195 | /* | ||
196 | * Each accounting subsystem adds calls to its functions to | ||
197 | * fill in relevant parts of struct taskstsats as follows | ||
198 | * | ||
199 | * rc = per-task-foo(stats, tsk); | ||
200 | * if (rc) | ||
201 | * goto err; | ||
202 | */ | ||
203 | |||
204 | rc = delayacct_add_tsk(stats, tsk); | ||
205 | stats->version = TASKSTATS_VERSION; | ||
206 | |||
207 | /* Define err: label here if needed */ | ||
208 | put_task_struct(tsk); | ||
209 | return rc; | ||
210 | |||
211 | } | ||
212 | |||
213 | static int fill_tgid(pid_t tgid, struct task_struct *tgidtsk, | ||
214 | struct taskstats *stats) | ||
215 | { | ||
216 | struct task_struct *tsk, *first; | ||
217 | unsigned long flags; | ||
218 | |||
219 | /* | ||
220 | * Add additional stats from live tasks except zombie thread group | ||
221 | * leaders who are already counted with the dead tasks | ||
222 | */ | ||
223 | first = tgidtsk; | ||
224 | if (!first) { | ||
225 | read_lock(&tasklist_lock); | ||
226 | first = find_task_by_pid(tgid); | ||
227 | if (!first) { | ||
228 | read_unlock(&tasklist_lock); | ||
229 | return -ESRCH; | ||
230 | } | ||
231 | get_task_struct(first); | ||
232 | read_unlock(&tasklist_lock); | ||
233 | } else | ||
234 | get_task_struct(first); | ||
235 | |||
236 | /* Start with stats from dead tasks */ | ||
237 | spin_lock_irqsave(&first->signal->stats_lock, flags); | ||
238 | if (first->signal->stats) | ||
239 | memcpy(stats, first->signal->stats, sizeof(*stats)); | ||
240 | spin_unlock_irqrestore(&first->signal->stats_lock, flags); | ||
241 | |||
242 | tsk = first; | ||
243 | read_lock(&tasklist_lock); | ||
244 | do { | ||
245 | if (tsk->exit_state == EXIT_ZOMBIE && thread_group_leader(tsk)) | ||
246 | continue; | ||
247 | /* | ||
248 | * Accounting subsystem can call its functions here to | ||
249 | * fill in relevant parts of struct taskstsats as follows | ||
250 | * | ||
251 | * per-task-foo(stats, tsk); | ||
252 | */ | ||
253 | delayacct_add_tsk(stats, tsk); | ||
254 | |||
255 | } while_each_thread(first, tsk); | ||
256 | read_unlock(&tasklist_lock); | ||
257 | stats->version = TASKSTATS_VERSION; | ||
258 | |||
259 | /* | ||
260 | * Accounting subsytems can also add calls here to modify | ||
261 | * fields of taskstats. | ||
262 | */ | ||
263 | |||
264 | return 0; | ||
265 | } | ||
266 | |||
267 | |||
268 | static void fill_tgid_exit(struct task_struct *tsk) | ||
269 | { | ||
270 | unsigned long flags; | ||
271 | |||
272 | spin_lock_irqsave(&tsk->signal->stats_lock, flags); | ||
273 | if (!tsk->signal->stats) | ||
274 | goto ret; | ||
275 | |||
276 | /* | ||
277 | * Each accounting subsystem calls its functions here to | ||
278 | * accumalate its per-task stats for tsk, into the per-tgid structure | ||
279 | * | ||
280 | * per-task-foo(tsk->signal->stats, tsk); | ||
281 | */ | ||
282 | delayacct_add_tsk(tsk->signal->stats, tsk); | ||
283 | ret: | ||
284 | spin_unlock_irqrestore(&tsk->signal->stats_lock, flags); | ||
285 | return; | ||
286 | } | ||
287 | |||
288 | static int add_del_listener(pid_t pid, cpumask_t *maskp, int isadd) | ||
289 | { | ||
290 | struct listener_list *listeners; | ||
291 | struct listener *s, *tmp; | ||
292 | unsigned int cpu; | ||
293 | cpumask_t mask = *maskp; | ||
294 | |||
295 | if (!cpus_subset(mask, cpu_possible_map)) | ||
296 | return -EINVAL; | ||
297 | |||
298 | if (isadd == REGISTER) { | ||
299 | for_each_cpu_mask(cpu, mask) { | ||
300 | s = kmalloc_node(sizeof(struct listener), GFP_KERNEL, | ||
301 | cpu_to_node(cpu)); | ||
302 | if (!s) | ||
303 | goto cleanup; | ||
304 | s->pid = pid; | ||
305 | INIT_LIST_HEAD(&s->list); | ||
306 | s->valid = 1; | ||
307 | |||
308 | listeners = &per_cpu(listener_array, cpu); | ||
309 | down_write(&listeners->sem); | ||
310 | list_add(&s->list, &listeners->list); | ||
311 | up_write(&listeners->sem); | ||
312 | } | ||
313 | return 0; | ||
314 | } | ||
315 | |||
316 | /* Deregister or cleanup */ | ||
317 | cleanup: | ||
318 | for_each_cpu_mask(cpu, mask) { | ||
319 | listeners = &per_cpu(listener_array, cpu); | ||
320 | down_write(&listeners->sem); | ||
321 | list_for_each_entry_safe(s, tmp, &listeners->list, list) { | ||
322 | if (s->pid == pid) { | ||
323 | list_del(&s->list); | ||
324 | kfree(s); | ||
325 | break; | ||
326 | } | ||
327 | } | ||
328 | up_write(&listeners->sem); | ||
329 | } | ||
330 | return 0; | ||
331 | } | ||
332 | |||
333 | static int parse(struct nlattr *na, cpumask_t *mask) | ||
334 | { | ||
335 | char *data; | ||
336 | int len; | ||
337 | int ret; | ||
338 | |||
339 | if (na == NULL) | ||
340 | return 1; | ||
341 | len = nla_len(na); | ||
342 | if (len > TASKSTATS_CPUMASK_MAXLEN) | ||
343 | return -E2BIG; | ||
344 | if (len < 1) | ||
345 | return -EINVAL; | ||
346 | data = kmalloc(len, GFP_KERNEL); | ||
347 | if (!data) | ||
348 | return -ENOMEM; | ||
349 | nla_strlcpy(data, na, len); | ||
350 | ret = cpulist_parse(data, *mask); | ||
351 | kfree(data); | ||
352 | return ret; | ||
353 | } | ||
354 | |||
355 | static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) | ||
356 | { | ||
357 | int rc = 0; | ||
358 | struct sk_buff *rep_skb; | ||
359 | struct taskstats stats; | ||
360 | void *reply; | ||
361 | size_t size; | ||
362 | struct nlattr *na; | ||
363 | cpumask_t mask; | ||
364 | |||
365 | rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], &mask); | ||
366 | if (rc < 0) | ||
367 | return rc; | ||
368 | if (rc == 0) | ||
369 | return add_del_listener(info->snd_pid, &mask, REGISTER); | ||
370 | |||
371 | rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], &mask); | ||
372 | if (rc < 0) | ||
373 | return rc; | ||
374 | if (rc == 0) | ||
375 | return add_del_listener(info->snd_pid, &mask, DEREGISTER); | ||
376 | |||
377 | /* | ||
378 | * Size includes space for nested attributes | ||
379 | */ | ||
380 | size = nla_total_size(sizeof(u32)) + | ||
381 | nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); | ||
382 | |||
383 | memset(&stats, 0, sizeof(stats)); | ||
384 | rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, &reply, size); | ||
385 | if (rc < 0) | ||
386 | return rc; | ||
387 | |||
388 | if (info->attrs[TASKSTATS_CMD_ATTR_PID]) { | ||
389 | u32 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]); | ||
390 | rc = fill_pid(pid, NULL, &stats); | ||
391 | if (rc < 0) | ||
392 | goto err; | ||
393 | |||
394 | na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_PID); | ||
395 | NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_PID, pid); | ||
396 | NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, | ||
397 | stats); | ||
398 | } else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) { | ||
399 | u32 tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]); | ||
400 | rc = fill_tgid(tgid, NULL, &stats); | ||
401 | if (rc < 0) | ||
402 | goto err; | ||
403 | |||
404 | na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_TGID); | ||
405 | NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_TGID, tgid); | ||
406 | NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, | ||
407 | stats); | ||
408 | } else { | ||
409 | rc = -EINVAL; | ||
410 | goto err; | ||
411 | } | ||
412 | |||
413 | nla_nest_end(rep_skb, na); | ||
414 | |||
415 | return send_reply(rep_skb, info->snd_pid); | ||
416 | |||
417 | nla_put_failure: | ||
418 | return genlmsg_cancel(rep_skb, reply); | ||
419 | err: | ||
420 | nlmsg_free(rep_skb); | ||
421 | return rc; | ||
422 | } | ||
423 | |||
424 | void taskstats_exit_alloc(struct taskstats **ptidstats, unsigned int *mycpu) | ||
425 | { | ||
426 | struct listener_list *listeners; | ||
427 | struct taskstats *tmp; | ||
428 | /* | ||
429 | * This is the cpu on which the task is exiting currently and will | ||
430 | * be the one for which the exit event is sent, even if the cpu | ||
431 | * on which this function is running changes later. | ||
432 | */ | ||
433 | *mycpu = raw_smp_processor_id(); | ||
434 | |||
435 | *ptidstats = NULL; | ||
436 | tmp = kmem_cache_zalloc(taskstats_cache, SLAB_KERNEL); | ||
437 | if (!tmp) | ||
438 | return; | ||
439 | |||
440 | listeners = &per_cpu(listener_array, *mycpu); | ||
441 | down_read(&listeners->sem); | ||
442 | if (!list_empty(&listeners->list)) { | ||
443 | *ptidstats = tmp; | ||
444 | tmp = NULL; | ||
445 | } | ||
446 | up_read(&listeners->sem); | ||
447 | kfree(tmp); | ||
448 | } | ||
449 | |||
450 | /* Send pid data out on exit */ | ||
451 | void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats, | ||
452 | int group_dead, unsigned int mycpu) | ||
453 | { | ||
454 | int rc; | ||
455 | struct sk_buff *rep_skb; | ||
456 | void *reply; | ||
457 | size_t size; | ||
458 | int is_thread_group; | ||
459 | struct nlattr *na; | ||
460 | unsigned long flags; | ||
461 | |||
462 | if (!family_registered || !tidstats) | ||
463 | return; | ||
464 | |||
465 | spin_lock_irqsave(&tsk->signal->stats_lock, flags); | ||
466 | is_thread_group = tsk->signal->stats ? 1 : 0; | ||
467 | spin_unlock_irqrestore(&tsk->signal->stats_lock, flags); | ||
468 | |||
469 | rc = 0; | ||
470 | /* | ||
471 | * Size includes space for nested attributes | ||
472 | */ | ||
473 | size = nla_total_size(sizeof(u32)) + | ||
474 | nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); | ||
475 | |||
476 | if (is_thread_group) | ||
477 | size = 2 * size; /* PID + STATS + TGID + STATS */ | ||
478 | |||
479 | rc = prepare_reply(NULL, TASKSTATS_CMD_NEW, &rep_skb, &reply, size); | ||
480 | if (rc < 0) | ||
481 | goto ret; | ||
482 | |||
483 | rc = fill_pid(tsk->pid, tsk, tidstats); | ||
484 | if (rc < 0) | ||
485 | goto err_skb; | ||
486 | |||
487 | na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_PID); | ||
488 | NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_PID, (u32)tsk->pid); | ||
489 | NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, | ||
490 | *tidstats); | ||
491 | nla_nest_end(rep_skb, na); | ||
492 | |||
493 | if (!is_thread_group) | ||
494 | goto send; | ||
495 | |||
496 | /* | ||
497 | * tsk has/had a thread group so fill the tsk->signal->stats structure | ||
498 | * Doesn't matter if tsk is the leader or the last group member leaving | ||
499 | */ | ||
500 | |||
501 | fill_tgid_exit(tsk); | ||
502 | if (!group_dead) | ||
503 | goto send; | ||
504 | |||
505 | na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_TGID); | ||
506 | NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_TGID, (u32)tsk->tgid); | ||
507 | /* No locking needed for tsk->signal->stats since group is dead */ | ||
508 | NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, | ||
509 | *tsk->signal->stats); | ||
510 | nla_nest_end(rep_skb, na); | ||
511 | |||
512 | send: | ||
513 | send_cpu_listeners(rep_skb, mycpu); | ||
514 | return; | ||
515 | |||
516 | nla_put_failure: | ||
517 | genlmsg_cancel(rep_skb, reply); | ||
518 | goto ret; | ||
519 | err_skb: | ||
520 | nlmsg_free(rep_skb); | ||
521 | ret: | ||
522 | return; | ||
523 | } | ||
524 | |||
525 | static struct genl_ops taskstats_ops = { | ||
526 | .cmd = TASKSTATS_CMD_GET, | ||
527 | .doit = taskstats_user_cmd, | ||
528 | .policy = taskstats_cmd_get_policy, | ||
529 | }; | ||
530 | |||
531 | /* Needed early in initialization */ | ||
532 | void __init taskstats_init_early(void) | ||
533 | { | ||
534 | unsigned int i; | ||
535 | |||
536 | taskstats_cache = kmem_cache_create("taskstats_cache", | ||
537 | sizeof(struct taskstats), | ||
538 | 0, SLAB_PANIC, NULL, NULL); | ||
539 | for_each_possible_cpu(i) { | ||
540 | INIT_LIST_HEAD(&(per_cpu(listener_array, i).list)); | ||
541 | init_rwsem(&(per_cpu(listener_array, i).sem)); | ||
542 | } | ||
543 | } | ||
544 | |||
545 | static int __init taskstats_init(void) | ||
546 | { | ||
547 | int rc; | ||
548 | |||
549 | rc = genl_register_family(&family); | ||
550 | if (rc) | ||
551 | return rc; | ||
552 | |||
553 | rc = genl_register_ops(&family, &taskstats_ops); | ||
554 | if (rc < 0) | ||
555 | goto err; | ||
556 | |||
557 | family_registered = 1; | ||
558 | return 0; | ||
559 | err: | ||
560 | genl_unregister_family(&family); | ||
561 | return rc; | ||
562 | } | ||
563 | |||
564 | /* | ||
565 | * late initcall ensures initialization of statistics collection | ||
566 | * mechanisms precedes initialization of the taskstats interface | ||
567 | */ | ||
568 | late_initcall(taskstats_init); | ||
diff --git a/kernel/timer.c b/kernel/timer.c index 2a87430a58..05809c2e2f 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -374,6 +374,7 @@ int del_timer_sync(struct timer_list *timer) | |||
374 | int ret = try_to_del_timer_sync(timer); | 374 | int ret = try_to_del_timer_sync(timer); |
375 | if (ret >= 0) | 375 | if (ret >= 0) |
376 | return ret; | 376 | return ret; |
377 | cpu_relax(); | ||
377 | } | 378 | } |
378 | } | 379 | } |
379 | 380 | ||
@@ -968,6 +969,7 @@ void __init timekeeping_init(void) | |||
968 | } | 969 | } |
969 | 970 | ||
970 | 971 | ||
972 | static int timekeeping_suspended; | ||
971 | /* | 973 | /* |
972 | * timekeeping_resume - Resumes the generic timekeeping subsystem. | 974 | * timekeeping_resume - Resumes the generic timekeeping subsystem. |
973 | * @dev: unused | 975 | * @dev: unused |
@@ -983,6 +985,18 @@ static int timekeeping_resume(struct sys_device *dev) | |||
983 | write_seqlock_irqsave(&xtime_lock, flags); | 985 | write_seqlock_irqsave(&xtime_lock, flags); |
984 | /* restart the last cycle value */ | 986 | /* restart the last cycle value */ |
985 | clock->cycle_last = clocksource_read(clock); | 987 | clock->cycle_last = clocksource_read(clock); |
988 | clock->error = 0; | ||
989 | timekeeping_suspended = 0; | ||
990 | write_sequnlock_irqrestore(&xtime_lock, flags); | ||
991 | return 0; | ||
992 | } | ||
993 | |||
994 | static int timekeeping_suspend(struct sys_device *dev, pm_message_t state) | ||
995 | { | ||
996 | unsigned long flags; | ||
997 | |||
998 | write_seqlock_irqsave(&xtime_lock, flags); | ||
999 | timekeeping_suspended = 1; | ||
986 | write_sequnlock_irqrestore(&xtime_lock, flags); | 1000 | write_sequnlock_irqrestore(&xtime_lock, flags); |
987 | return 0; | 1001 | return 0; |
988 | } | 1002 | } |
@@ -990,6 +1004,7 @@ static int timekeeping_resume(struct sys_device *dev) | |||
990 | /* sysfs resume/suspend bits for timekeeping */ | 1004 | /* sysfs resume/suspend bits for timekeeping */ |
991 | static struct sysdev_class timekeeping_sysclass = { | 1005 | static struct sysdev_class timekeeping_sysclass = { |
992 | .resume = timekeeping_resume, | 1006 | .resume = timekeeping_resume, |
1007 | .suspend = timekeeping_suspend, | ||
993 | set_kset_name("timekeeping"), | 1008 | set_kset_name("timekeeping"), |
994 | }; | 1009 | }; |
995 | 1010 | ||
@@ -1100,13 +1115,16 @@ static void update_wall_time(void) | |||
1100 | { | 1115 | { |
1101 | cycle_t offset; | 1116 | cycle_t offset; |
1102 | 1117 | ||
1103 | clock->xtime_nsec += (s64)xtime.tv_nsec << clock->shift; | 1118 | /* Make sure we're fully resumed: */ |
1119 | if (unlikely(timekeeping_suspended)) | ||
1120 | return; | ||
1104 | 1121 | ||
1105 | #ifdef CONFIG_GENERIC_TIME | 1122 | #ifdef CONFIG_GENERIC_TIME |
1106 | offset = (clocksource_read(clock) - clock->cycle_last) & clock->mask; | 1123 | offset = (clocksource_read(clock) - clock->cycle_last) & clock->mask; |
1107 | #else | 1124 | #else |
1108 | offset = clock->cycle_interval; | 1125 | offset = clock->cycle_interval; |
1109 | #endif | 1126 | #endif |
1127 | clock->xtime_nsec += (s64)xtime.tv_nsec << clock->shift; | ||
1110 | 1128 | ||
1111 | /* normally this loop will run just once, however in the | 1129 | /* normally this loop will run just once, however in the |
1112 | * case of lost or late ticks, it will accumulate correctly. | 1130 | * case of lost or late ticks, it will accumulate correctly. |