diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 2 | ||||
-rw-r--r-- | kernel/acct.c | 4 | ||||
-rw-r--r-- | kernel/delayacct.c | 178 | ||||
-rw-r--r-- | kernel/exit.c | 10 | ||||
-rw-r--r-- | kernel/fork.c | 10 | ||||
-rw-r--r-- | kernel/futex.c | 6 | ||||
-rw-r--r-- | kernel/kallsyms.c | 4 | ||||
-rw-r--r-- | kernel/kthread.c | 24 | ||||
-rw-r--r-- | kernel/lockdep.c | 136 | ||||
-rw-r--r-- | kernel/module.c | 11 | ||||
-rw-r--r-- | kernel/panic.c | 2 | ||||
-rw-r--r-- | kernel/power/pm.c | 37 | ||||
-rw-r--r-- | kernel/power/snapshot.c | 10 | ||||
-rw-r--r-- | kernel/power/swap.c | 26 | ||||
-rw-r--r-- | kernel/printk.c | 4 | ||||
-rw-r--r-- | kernel/resource.c | 2 | ||||
-rw-r--r-- | kernel/rtmutex-tester.c | 1 | ||||
-rw-r--r-- | kernel/sched.c | 103 | ||||
-rw-r--r-- | kernel/softirq.c | 2 | ||||
-rw-r--r-- | kernel/sys.c | 2 | ||||
-rw-r--r-- | kernel/taskstats.c | 568 | ||||
-rw-r--r-- | kernel/timer.c | 105 | ||||
-rw-r--r-- | kernel/wait.c | 8 |
23 files changed, 1020 insertions, 235 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 47dbcd570cd8..d62ec66c1af2 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -48,6 +48,8 @@ obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ | |||
48 | obj-$(CONFIG_SECCOMP) += seccomp.o | 48 | obj-$(CONFIG_SECCOMP) += seccomp.o |
49 | obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o | 49 | obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o |
50 | obj-$(CONFIG_RELAY) += relay.o | 50 | obj-$(CONFIG_RELAY) += relay.o |
51 | obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o | ||
52 | obj-$(CONFIG_TASKSTATS) += taskstats.o | ||
51 | 53 | ||
52 | ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) | 54 | ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) |
53 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is | 55 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is |
diff --git a/kernel/acct.c b/kernel/acct.c index f18e0b8df3e1..2a7c933651c7 100644 --- a/kernel/acct.c +++ b/kernel/acct.c | |||
@@ -488,7 +488,7 @@ static void do_acct_process(struct file *file) | |||
488 | old_encode_dev(tty_devnum(current->signal->tty)) : 0; | 488 | old_encode_dev(tty_devnum(current->signal->tty)) : 0; |
489 | read_unlock(&tasklist_lock); | 489 | read_unlock(&tasklist_lock); |
490 | 490 | ||
491 | spin_lock(¤t->sighand->siglock); | 491 | spin_lock_irq(¤t->sighand->siglock); |
492 | ac.ac_utime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_utime))); | 492 | ac.ac_utime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_utime))); |
493 | ac.ac_stime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_stime))); | 493 | ac.ac_stime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_stime))); |
494 | ac.ac_flag = pacct->ac_flag; | 494 | ac.ac_flag = pacct->ac_flag; |
@@ -496,7 +496,7 @@ static void do_acct_process(struct file *file) | |||
496 | ac.ac_minflt = encode_comp_t(pacct->ac_minflt); | 496 | ac.ac_minflt = encode_comp_t(pacct->ac_minflt); |
497 | ac.ac_majflt = encode_comp_t(pacct->ac_majflt); | 497 | ac.ac_majflt = encode_comp_t(pacct->ac_majflt); |
498 | ac.ac_exitcode = pacct->ac_exitcode; | 498 | ac.ac_exitcode = pacct->ac_exitcode; |
499 | spin_unlock(¤t->sighand->siglock); | 499 | spin_unlock_irq(¤t->sighand->siglock); |
500 | ac.ac_io = encode_comp_t(0 /* current->io_usage */); /* %% */ | 500 | ac.ac_io = encode_comp_t(0 /* current->io_usage */); /* %% */ |
501 | ac.ac_rw = encode_comp_t(ac.ac_io / 1024); | 501 | ac.ac_rw = encode_comp_t(ac.ac_io / 1024); |
502 | ac.ac_swaps = encode_comp_t(0); | 502 | ac.ac_swaps = encode_comp_t(0); |
diff --git a/kernel/delayacct.c b/kernel/delayacct.c new file mode 100644 index 000000000000..f05392d64267 --- /dev/null +++ b/kernel/delayacct.c | |||
@@ -0,0 +1,178 @@ | |||
1 | /* delayacct.c - per-task delay accounting | ||
2 | * | ||
3 | * Copyright (C) Shailabh Nagar, IBM Corp. 2006 | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation; either version 2 of the License, or | ||
8 | * (at your option) any later version. | ||
9 | * | ||
10 | * This program is distributed in the hope that it would be useful, but | ||
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | ||
13 | * the GNU General Public License for more details. | ||
14 | */ | ||
15 | |||
16 | #include <linux/sched.h> | ||
17 | #include <linux/slab.h> | ||
18 | #include <linux/time.h> | ||
19 | #include <linux/sysctl.h> | ||
20 | #include <linux/delayacct.h> | ||
21 | |||
22 | int delayacct_on __read_mostly; /* Delay accounting turned on/off */ | ||
23 | kmem_cache_t *delayacct_cache; | ||
24 | |||
25 | static int __init delayacct_setup_enable(char *str) | ||
26 | { | ||
27 | delayacct_on = 1; | ||
28 | return 1; | ||
29 | } | ||
30 | __setup("delayacct", delayacct_setup_enable); | ||
31 | |||
32 | void delayacct_init(void) | ||
33 | { | ||
34 | delayacct_cache = kmem_cache_create("delayacct_cache", | ||
35 | sizeof(struct task_delay_info), | ||
36 | 0, | ||
37 | SLAB_PANIC, | ||
38 | NULL, NULL); | ||
39 | delayacct_tsk_init(&init_task); | ||
40 | } | ||
41 | |||
42 | void __delayacct_tsk_init(struct task_struct *tsk) | ||
43 | { | ||
44 | spin_lock_init(&tsk->delays_lock); | ||
45 | /* No need to acquire tsk->delays_lock for allocation here unless | ||
46 | __delayacct_tsk_init called after tsk is attached to tasklist | ||
47 | */ | ||
48 | tsk->delays = kmem_cache_zalloc(delayacct_cache, SLAB_KERNEL); | ||
49 | if (tsk->delays) | ||
50 | spin_lock_init(&tsk->delays->lock); | ||
51 | } | ||
52 | |||
53 | void __delayacct_tsk_exit(struct task_struct *tsk) | ||
54 | { | ||
55 | struct task_delay_info *delays = tsk->delays; | ||
56 | spin_lock(&tsk->delays_lock); | ||
57 | tsk->delays = NULL; | ||
58 | spin_unlock(&tsk->delays_lock); | ||
59 | kmem_cache_free(delayacct_cache, delays); | ||
60 | } | ||
61 | |||
62 | /* | ||
63 | * Start accounting for a delay statistic using | ||
64 | * its starting timestamp (@start) | ||
65 | */ | ||
66 | |||
67 | static inline void delayacct_start(struct timespec *start) | ||
68 | { | ||
69 | do_posix_clock_monotonic_gettime(start); | ||
70 | } | ||
71 | |||
72 | /* | ||
73 | * Finish delay accounting for a statistic using | ||
74 | * its timestamps (@start, @end), accumalator (@total) and @count | ||
75 | */ | ||
76 | |||
77 | static void delayacct_end(struct timespec *start, struct timespec *end, | ||
78 | u64 *total, u32 *count) | ||
79 | { | ||
80 | struct timespec ts; | ||
81 | s64 ns; | ||
82 | |||
83 | do_posix_clock_monotonic_gettime(end); | ||
84 | ts = timespec_sub(*end, *start); | ||
85 | ns = timespec_to_ns(&ts); | ||
86 | if (ns < 0) | ||
87 | return; | ||
88 | |||
89 | spin_lock(¤t->delays->lock); | ||
90 | *total += ns; | ||
91 | (*count)++; | ||
92 | spin_unlock(¤t->delays->lock); | ||
93 | } | ||
94 | |||
95 | void __delayacct_blkio_start(void) | ||
96 | { | ||
97 | delayacct_start(¤t->delays->blkio_start); | ||
98 | } | ||
99 | |||
100 | void __delayacct_blkio_end(void) | ||
101 | { | ||
102 | if (current->delays->flags & DELAYACCT_PF_SWAPIN) | ||
103 | /* Swapin block I/O */ | ||
104 | delayacct_end(¤t->delays->blkio_start, | ||
105 | ¤t->delays->blkio_end, | ||
106 | ¤t->delays->swapin_delay, | ||
107 | ¤t->delays->swapin_count); | ||
108 | else /* Other block I/O */ | ||
109 | delayacct_end(¤t->delays->blkio_start, | ||
110 | ¤t->delays->blkio_end, | ||
111 | ¤t->delays->blkio_delay, | ||
112 | ¤t->delays->blkio_count); | ||
113 | } | ||
114 | |||
115 | int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) | ||
116 | { | ||
117 | s64 tmp; | ||
118 | struct timespec ts; | ||
119 | unsigned long t1,t2,t3; | ||
120 | |||
121 | spin_lock(&tsk->delays_lock); | ||
122 | |||
123 | /* Though tsk->delays accessed later, early exit avoids | ||
124 | * unnecessary returning of other data | ||
125 | */ | ||
126 | if (!tsk->delays) | ||
127 | goto done; | ||
128 | |||
129 | tmp = (s64)d->cpu_run_real_total; | ||
130 | cputime_to_timespec(tsk->utime + tsk->stime, &ts); | ||
131 | tmp += timespec_to_ns(&ts); | ||
132 | d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp; | ||
133 | |||
134 | /* | ||
135 | * No locking available for sched_info (and too expensive to add one) | ||
136 | * Mitigate by taking snapshot of values | ||
137 | */ | ||
138 | t1 = tsk->sched_info.pcnt; | ||
139 | t2 = tsk->sched_info.run_delay; | ||
140 | t3 = tsk->sched_info.cpu_time; | ||
141 | |||
142 | d->cpu_count += t1; | ||
143 | |||
144 | jiffies_to_timespec(t2, &ts); | ||
145 | tmp = (s64)d->cpu_delay_total + timespec_to_ns(&ts); | ||
146 | d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp; | ||
147 | |||
148 | tmp = (s64)d->cpu_run_virtual_total + (s64)jiffies_to_usecs(t3) * 1000; | ||
149 | d->cpu_run_virtual_total = | ||
150 | (tmp < (s64)d->cpu_run_virtual_total) ? 0 : tmp; | ||
151 | |||
152 | /* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */ | ||
153 | |||
154 | spin_lock(&tsk->delays->lock); | ||
155 | tmp = d->blkio_delay_total + tsk->delays->blkio_delay; | ||
156 | d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp; | ||
157 | tmp = d->swapin_delay_total + tsk->delays->swapin_delay; | ||
158 | d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp; | ||
159 | d->blkio_count += tsk->delays->blkio_count; | ||
160 | d->swapin_count += tsk->delays->swapin_count; | ||
161 | spin_unlock(&tsk->delays->lock); | ||
162 | |||
163 | done: | ||
164 | spin_unlock(&tsk->delays_lock); | ||
165 | return 0; | ||
166 | } | ||
167 | |||
168 | __u64 __delayacct_blkio_ticks(struct task_struct *tsk) | ||
169 | { | ||
170 | __u64 ret; | ||
171 | |||
172 | spin_lock(&tsk->delays->lock); | ||
173 | ret = nsec_to_clock_t(tsk->delays->blkio_delay + | ||
174 | tsk->delays->swapin_delay); | ||
175 | spin_unlock(&tsk->delays->lock); | ||
176 | return ret; | ||
177 | } | ||
178 | |||
diff --git a/kernel/exit.c b/kernel/exit.c index 6664c084783d..dba194a8d416 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -25,6 +25,8 @@ | |||
25 | #include <linux/mount.h> | 25 | #include <linux/mount.h> |
26 | #include <linux/proc_fs.h> | 26 | #include <linux/proc_fs.h> |
27 | #include <linux/mempolicy.h> | 27 | #include <linux/mempolicy.h> |
28 | #include <linux/taskstats_kern.h> | ||
29 | #include <linux/delayacct.h> | ||
28 | #include <linux/cpuset.h> | 30 | #include <linux/cpuset.h> |
29 | #include <linux/syscalls.h> | 31 | #include <linux/syscalls.h> |
30 | #include <linux/signal.h> | 32 | #include <linux/signal.h> |
@@ -843,7 +845,9 @@ static void exit_notify(struct task_struct *tsk) | |||
843 | fastcall NORET_TYPE void do_exit(long code) | 845 | fastcall NORET_TYPE void do_exit(long code) |
844 | { | 846 | { |
845 | struct task_struct *tsk = current; | 847 | struct task_struct *tsk = current; |
848 | struct taskstats *tidstats; | ||
846 | int group_dead; | 849 | int group_dead; |
850 | unsigned int mycpu; | ||
847 | 851 | ||
848 | profile_task_exit(tsk); | 852 | profile_task_exit(tsk); |
849 | 853 | ||
@@ -881,6 +885,8 @@ fastcall NORET_TYPE void do_exit(long code) | |||
881 | current->comm, current->pid, | 885 | current->comm, current->pid, |
882 | preempt_count()); | 886 | preempt_count()); |
883 | 887 | ||
888 | taskstats_exit_alloc(&tidstats, &mycpu); | ||
889 | |||
884 | acct_update_integrals(tsk); | 890 | acct_update_integrals(tsk); |
885 | if (tsk->mm) { | 891 | if (tsk->mm) { |
886 | update_hiwater_rss(tsk->mm); | 892 | update_hiwater_rss(tsk->mm); |
@@ -900,6 +906,10 @@ fastcall NORET_TYPE void do_exit(long code) | |||
900 | #endif | 906 | #endif |
901 | if (unlikely(tsk->audit_context)) | 907 | if (unlikely(tsk->audit_context)) |
902 | audit_free(tsk); | 908 | audit_free(tsk); |
909 | taskstats_exit_send(tsk, tidstats, group_dead, mycpu); | ||
910 | taskstats_exit_free(tidstats); | ||
911 | delayacct_tsk_exit(tsk); | ||
912 | |||
903 | exit_mm(tsk); | 913 | exit_mm(tsk); |
904 | 914 | ||
905 | if (group_dead) | 915 | if (group_dead) |
diff --git a/kernel/fork.c b/kernel/fork.c index 56e4e07e45f7..1b0f7b1e0881 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -43,6 +43,8 @@ | |||
43 | #include <linux/rmap.h> | 43 | #include <linux/rmap.h> |
44 | #include <linux/acct.h> | 44 | #include <linux/acct.h> |
45 | #include <linux/cn_proc.h> | 45 | #include <linux/cn_proc.h> |
46 | #include <linux/delayacct.h> | ||
47 | #include <linux/taskstats_kern.h> | ||
46 | 48 | ||
47 | #include <asm/pgtable.h> | 49 | #include <asm/pgtable.h> |
48 | #include <asm/pgalloc.h> | 50 | #include <asm/pgalloc.h> |
@@ -61,9 +63,7 @@ int max_threads; /* tunable limit on nr_threads */ | |||
61 | 63 | ||
62 | DEFINE_PER_CPU(unsigned long, process_counts) = 0; | 64 | DEFINE_PER_CPU(unsigned long, process_counts) = 0; |
63 | 65 | ||
64 | __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ | 66 | __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ |
65 | |||
66 | EXPORT_SYMBOL(tasklist_lock); | ||
67 | 67 | ||
68 | int nr_processes(void) | 68 | int nr_processes(void) |
69 | { | 69 | { |
@@ -820,6 +820,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts | |||
820 | if (clone_flags & CLONE_THREAD) { | 820 | if (clone_flags & CLONE_THREAD) { |
821 | atomic_inc(¤t->signal->count); | 821 | atomic_inc(¤t->signal->count); |
822 | atomic_inc(¤t->signal->live); | 822 | atomic_inc(¤t->signal->live); |
823 | taskstats_tgid_alloc(current->signal); | ||
823 | return 0; | 824 | return 0; |
824 | } | 825 | } |
825 | sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); | 826 | sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); |
@@ -864,6 +865,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts | |||
864 | INIT_LIST_HEAD(&sig->cpu_timers[0]); | 865 | INIT_LIST_HEAD(&sig->cpu_timers[0]); |
865 | INIT_LIST_HEAD(&sig->cpu_timers[1]); | 866 | INIT_LIST_HEAD(&sig->cpu_timers[1]); |
866 | INIT_LIST_HEAD(&sig->cpu_timers[2]); | 867 | INIT_LIST_HEAD(&sig->cpu_timers[2]); |
868 | taskstats_tgid_init(sig); | ||
867 | 869 | ||
868 | task_lock(current->group_leader); | 870 | task_lock(current->group_leader); |
869 | memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); | 871 | memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); |
@@ -885,6 +887,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts | |||
885 | void __cleanup_signal(struct signal_struct *sig) | 887 | void __cleanup_signal(struct signal_struct *sig) |
886 | { | 888 | { |
887 | exit_thread_group_keys(sig); | 889 | exit_thread_group_keys(sig); |
890 | taskstats_tgid_free(sig); | ||
888 | kmem_cache_free(signal_cachep, sig); | 891 | kmem_cache_free(signal_cachep, sig); |
889 | } | 892 | } |
890 | 893 | ||
@@ -1002,6 +1005,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1002 | goto bad_fork_cleanup_put_domain; | 1005 | goto bad_fork_cleanup_put_domain; |
1003 | 1006 | ||
1004 | p->did_exec = 0; | 1007 | p->did_exec = 0; |
1008 | delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ | ||
1005 | copy_flags(clone_flags, p); | 1009 | copy_flags(clone_flags, p); |
1006 | p->pid = pid; | 1010 | p->pid = pid; |
1007 | retval = -EFAULT; | 1011 | retval = -EFAULT; |
diff --git a/kernel/futex.c b/kernel/futex.c index 1dc98e4dd287..cf0c8e21d1ab 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -476,6 +476,12 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me) | |||
476 | * the refcount and return its pi_state: | 476 | * the refcount and return its pi_state: |
477 | */ | 477 | */ |
478 | pi_state = this->pi_state; | 478 | pi_state = this->pi_state; |
479 | /* | ||
480 | * Userspace might have messed up non PI and PI futexes | ||
481 | */ | ||
482 | if (unlikely(!pi_state)) | ||
483 | return -EINVAL; | ||
484 | |||
479 | atomic_inc(&pi_state->refcount); | 485 | atomic_inc(&pi_state->refcount); |
480 | me->pi_state = pi_state; | 486 | me->pi_state = pi_state; |
481 | 487 | ||
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 39277dd6bf90..ab16a5a4cfe9 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c | |||
@@ -275,8 +275,8 @@ static void upcase_if_global(struct kallsym_iter *iter) | |||
275 | static int get_ksymbol_mod(struct kallsym_iter *iter) | 275 | static int get_ksymbol_mod(struct kallsym_iter *iter) |
276 | { | 276 | { |
277 | iter->owner = module_get_kallsym(iter->pos - kallsyms_num_syms, | 277 | iter->owner = module_get_kallsym(iter->pos - kallsyms_num_syms, |
278 | &iter->value, | 278 | &iter->value, &iter->type, |
279 | &iter->type, iter->name); | 279 | iter->name, sizeof(iter->name)); |
280 | if (iter->owner == NULL) | 280 | if (iter->owner == NULL) |
281 | return 0; | 281 | return 0; |
282 | 282 | ||
diff --git a/kernel/kthread.c b/kernel/kthread.c index 24be714b04c7..4f9c60ef95e8 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c | |||
@@ -216,23 +216,6 @@ EXPORT_SYMBOL(kthread_bind); | |||
216 | */ | 216 | */ |
217 | int kthread_stop(struct task_struct *k) | 217 | int kthread_stop(struct task_struct *k) |
218 | { | 218 | { |
219 | return kthread_stop_sem(k, NULL); | ||
220 | } | ||
221 | EXPORT_SYMBOL(kthread_stop); | ||
222 | |||
223 | /** | ||
224 | * kthread_stop_sem - stop a thread created by kthread_create(). | ||
225 | * @k: thread created by kthread_create(). | ||
226 | * @s: semaphore that @k waits on while idle. | ||
227 | * | ||
228 | * Does essentially the same thing as kthread_stop() above, but wakes | ||
229 | * @k by calling up(@s). | ||
230 | * | ||
231 | * Returns the result of threadfn(), or %-EINTR if wake_up_process() | ||
232 | * was never called. | ||
233 | */ | ||
234 | int kthread_stop_sem(struct task_struct *k, struct semaphore *s) | ||
235 | { | ||
236 | int ret; | 219 | int ret; |
237 | 220 | ||
238 | mutex_lock(&kthread_stop_lock); | 221 | mutex_lock(&kthread_stop_lock); |
@@ -246,10 +229,7 @@ int kthread_stop_sem(struct task_struct *k, struct semaphore *s) | |||
246 | 229 | ||
247 | /* Now set kthread_should_stop() to true, and wake it up. */ | 230 | /* Now set kthread_should_stop() to true, and wake it up. */ |
248 | kthread_stop_info.k = k; | 231 | kthread_stop_info.k = k; |
249 | if (s) | 232 | wake_up_process(k); |
250 | up(s); | ||
251 | else | ||
252 | wake_up_process(k); | ||
253 | put_task_struct(k); | 233 | put_task_struct(k); |
254 | 234 | ||
255 | /* Once it dies, reset stop ptr, gather result and we're done. */ | 235 | /* Once it dies, reset stop ptr, gather result and we're done. */ |
@@ -260,7 +240,7 @@ int kthread_stop_sem(struct task_struct *k, struct semaphore *s) | |||
260 | 240 | ||
261 | return ret; | 241 | return ret; |
262 | } | 242 | } |
263 | EXPORT_SYMBOL(kthread_stop_sem); | 243 | EXPORT_SYMBOL(kthread_stop); |
264 | 244 | ||
265 | static __init int helper_init(void) | 245 | static __init int helper_init(void) |
266 | { | 246 | { |
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index f32ca78c198d..9bad17884513 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
@@ -169,22 +169,17 @@ EXPORT_SYMBOL(lockdep_internal); | |||
169 | */ | 169 | */ |
170 | static int class_filter(struct lock_class *class) | 170 | static int class_filter(struct lock_class *class) |
171 | { | 171 | { |
172 | #if 0 | ||
173 | /* Example */ | ||
172 | if (class->name_version == 1 && | 174 | if (class->name_version == 1 && |
173 | !strcmp(class->name, "&rl->lock")) | 175 | !strcmp(class->name, "lockname")) |
174 | return 1; | 176 | return 1; |
175 | if (class->name_version == 1 && | 177 | if (class->name_version == 1 && |
176 | !strcmp(class->name, "&ni->mrec_lock")) | 178 | !strcmp(class->name, "&struct->lockfield")) |
177 | return 1; | 179 | return 1; |
178 | if (class->name_version == 1 && | 180 | #endif |
179 | !strcmp(class->name, "mft_ni_runlist_lock")) | 181 | /* Allow everything else. 0 would be filter everything else */ |
180 | return 1; | 182 | return 1; |
181 | if (class->name_version == 1 && | ||
182 | !strcmp(class->name, "mft_ni_mrec_lock")) | ||
183 | return 1; | ||
184 | if (class->name_version == 1 && | ||
185 | !strcmp(class->name, "&vol->lcnbmp_lock")) | ||
186 | return 1; | ||
187 | return 0; | ||
188 | } | 183 | } |
189 | #endif | 184 | #endif |
190 | 185 | ||
@@ -408,23 +403,12 @@ static void lockdep_print_held_locks(struct task_struct *curr) | |||
408 | print_lock(curr->held_locks + i); | 403 | print_lock(curr->held_locks + i); |
409 | } | 404 | } |
410 | } | 405 | } |
411 | /* | ||
412 | * Helper to print a nice hierarchy of lock dependencies: | ||
413 | */ | ||
414 | static void print_spaces(int nr) | ||
415 | { | ||
416 | int i; | ||
417 | |||
418 | for (i = 0; i < nr; i++) | ||
419 | printk(" "); | ||
420 | } | ||
421 | 406 | ||
422 | static void print_lock_class_header(struct lock_class *class, int depth) | 407 | static void print_lock_class_header(struct lock_class *class, int depth) |
423 | { | 408 | { |
424 | int bit; | 409 | int bit; |
425 | 410 | ||
426 | print_spaces(depth); | 411 | printk("%*s->", depth, ""); |
427 | printk("->"); | ||
428 | print_lock_name(class); | 412 | print_lock_name(class); |
429 | printk(" ops: %lu", class->ops); | 413 | printk(" ops: %lu", class->ops); |
430 | printk(" {\n"); | 414 | printk(" {\n"); |
@@ -433,17 +417,14 @@ static void print_lock_class_header(struct lock_class *class, int depth) | |||
433 | if (class->usage_mask & (1 << bit)) { | 417 | if (class->usage_mask & (1 << bit)) { |
434 | int len = depth; | 418 | int len = depth; |
435 | 419 | ||
436 | print_spaces(depth); | 420 | len += printk("%*s %s", depth, "", usage_str[bit]); |
437 | len += printk(" %s", usage_str[bit]); | ||
438 | len += printk(" at:\n"); | 421 | len += printk(" at:\n"); |
439 | print_stack_trace(class->usage_traces + bit, len); | 422 | print_stack_trace(class->usage_traces + bit, len); |
440 | } | 423 | } |
441 | } | 424 | } |
442 | print_spaces(depth); | 425 | printk("%*s }\n", depth, ""); |
443 | printk(" }\n"); | ||
444 | 426 | ||
445 | print_spaces(depth); | 427 | printk("%*s ... key at: ",depth,""); |
446 | printk(" ... key at: "); | ||
447 | print_ip_sym((unsigned long)class->key); | 428 | print_ip_sym((unsigned long)class->key); |
448 | } | 429 | } |
449 | 430 | ||
@@ -463,8 +444,7 @@ static void print_lock_dependencies(struct lock_class *class, int depth) | |||
463 | DEBUG_LOCKS_WARN_ON(!entry->class); | 444 | DEBUG_LOCKS_WARN_ON(!entry->class); |
464 | print_lock_dependencies(entry->class, depth + 1); | 445 | print_lock_dependencies(entry->class, depth + 1); |
465 | 446 | ||
466 | print_spaces(depth); | 447 | printk("%*s ... acquired at:\n",depth,""); |
467 | printk(" ... acquired at:\n"); | ||
468 | print_stack_trace(&entry->trace, 2); | 448 | print_stack_trace(&entry->trace, 2); |
469 | printk("\n"); | 449 | printk("\n"); |
470 | } | 450 | } |
@@ -1124,7 +1104,7 @@ extern void __error_too_big_MAX_LOCKDEP_SUBCLASSES(void); | |||
1124 | * itself, so actual lookup of the hash should be once per lock object. | 1104 | * itself, so actual lookup of the hash should be once per lock object. |
1125 | */ | 1105 | */ |
1126 | static inline struct lock_class * | 1106 | static inline struct lock_class * |
1127 | register_lock_class(struct lockdep_map *lock, unsigned int subclass) | 1107 | look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) |
1128 | { | 1108 | { |
1129 | struct lockdep_subclass_key *key; | 1109 | struct lockdep_subclass_key *key; |
1130 | struct list_head *hash_head; | 1110 | struct list_head *hash_head; |
@@ -1168,7 +1148,26 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass) | |||
1168 | */ | 1148 | */ |
1169 | list_for_each_entry(class, hash_head, hash_entry) | 1149 | list_for_each_entry(class, hash_head, hash_entry) |
1170 | if (class->key == key) | 1150 | if (class->key == key) |
1171 | goto out_set; | 1151 | return class; |
1152 | |||
1153 | return NULL; | ||
1154 | } | ||
1155 | |||
1156 | /* | ||
1157 | * Register a lock's class in the hash-table, if the class is not present | ||
1158 | * yet. Otherwise we look it up. We cache the result in the lock object | ||
1159 | * itself, so actual lookup of the hash should be once per lock object. | ||
1160 | */ | ||
1161 | static inline struct lock_class * | ||
1162 | register_lock_class(struct lockdep_map *lock, unsigned int subclass) | ||
1163 | { | ||
1164 | struct lockdep_subclass_key *key; | ||
1165 | struct list_head *hash_head; | ||
1166 | struct lock_class *class; | ||
1167 | |||
1168 | class = look_up_lock_class(lock, subclass); | ||
1169 | if (likely(class)) | ||
1170 | return class; | ||
1172 | 1171 | ||
1173 | /* | 1172 | /* |
1174 | * Debug-check: all keys must be persistent! | 1173 | * Debug-check: all keys must be persistent! |
@@ -1183,6 +1182,9 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass) | |||
1183 | return NULL; | 1182 | return NULL; |
1184 | } | 1183 | } |
1185 | 1184 | ||
1185 | key = lock->key->subkeys + subclass; | ||
1186 | hash_head = classhashentry(key); | ||
1187 | |||
1186 | __raw_spin_lock(&hash_lock); | 1188 | __raw_spin_lock(&hash_lock); |
1187 | /* | 1189 | /* |
1188 | * We have to do the hash-walk again, to avoid races | 1190 | * We have to do the hash-walk again, to avoid races |
@@ -1229,8 +1231,8 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass) | |||
1229 | out_unlock_set: | 1231 | out_unlock_set: |
1230 | __raw_spin_unlock(&hash_lock); | 1232 | __raw_spin_unlock(&hash_lock); |
1231 | 1233 | ||
1232 | out_set: | 1234 | if (!subclass) |
1233 | lock->class[subclass] = class; | 1235 | lock->class_cache = class; |
1234 | 1236 | ||
1235 | DEBUG_LOCKS_WARN_ON(class->subclass != subclass); | 1237 | DEBUG_LOCKS_WARN_ON(class->subclass != subclass); |
1236 | 1238 | ||
@@ -1934,7 +1936,7 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name, | |||
1934 | } | 1936 | } |
1935 | lock->name = name; | 1937 | lock->name = name; |
1936 | lock->key = key; | 1938 | lock->key = key; |
1937 | memset(lock->class, 0, sizeof(lock->class[0])*MAX_LOCKDEP_SUBCLASSES); | 1939 | lock->class_cache = NULL; |
1938 | } | 1940 | } |
1939 | 1941 | ||
1940 | EXPORT_SYMBOL_GPL(lockdep_init_map); | 1942 | EXPORT_SYMBOL_GPL(lockdep_init_map); |
@@ -1948,8 +1950,8 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
1948 | unsigned long ip) | 1950 | unsigned long ip) |
1949 | { | 1951 | { |
1950 | struct task_struct *curr = current; | 1952 | struct task_struct *curr = current; |
1953 | struct lock_class *class = NULL; | ||
1951 | struct held_lock *hlock; | 1954 | struct held_lock *hlock; |
1952 | struct lock_class *class; | ||
1953 | unsigned int depth, id; | 1955 | unsigned int depth, id; |
1954 | int chain_head = 0; | 1956 | int chain_head = 0; |
1955 | u64 chain_key; | 1957 | u64 chain_key; |
@@ -1967,8 +1969,11 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
1967 | return 0; | 1969 | return 0; |
1968 | } | 1970 | } |
1969 | 1971 | ||
1970 | class = lock->class[subclass]; | 1972 | if (!subclass) |
1971 | /* not cached yet? */ | 1973 | class = lock->class_cache; |
1974 | /* | ||
1975 | * Not cached yet or subclass? | ||
1976 | */ | ||
1972 | if (unlikely(!class)) { | 1977 | if (unlikely(!class)) { |
1973 | class = register_lock_class(lock, subclass); | 1978 | class = register_lock_class(lock, subclass); |
1974 | if (!class) | 1979 | if (!class) |
@@ -2469,48 +2474,44 @@ void lockdep_free_key_range(void *start, unsigned long size) | |||
2469 | 2474 | ||
2470 | void lockdep_reset_lock(struct lockdep_map *lock) | 2475 | void lockdep_reset_lock(struct lockdep_map *lock) |
2471 | { | 2476 | { |
2472 | struct lock_class *class, *next, *entry; | 2477 | struct lock_class *class, *next; |
2473 | struct list_head *head; | 2478 | struct list_head *head; |
2474 | unsigned long flags; | 2479 | unsigned long flags; |
2475 | int i, j; | 2480 | int i, j; |
2476 | 2481 | ||
2477 | raw_local_irq_save(flags); | 2482 | raw_local_irq_save(flags); |
2478 | __raw_spin_lock(&hash_lock); | ||
2479 | 2483 | ||
2480 | /* | 2484 | /* |
2481 | * Remove all classes this lock has: | 2485 | * Remove all classes this lock might have: |
2486 | */ | ||
2487 | for (j = 0; j < MAX_LOCKDEP_SUBCLASSES; j++) { | ||
2488 | /* | ||
2489 | * If the class exists we look it up and zap it: | ||
2490 | */ | ||
2491 | class = look_up_lock_class(lock, j); | ||
2492 | if (class) | ||
2493 | zap_class(class); | ||
2494 | } | ||
2495 | /* | ||
2496 | * Debug check: in the end all mapped classes should | ||
2497 | * be gone. | ||
2482 | */ | 2498 | */ |
2499 | __raw_spin_lock(&hash_lock); | ||
2483 | for (i = 0; i < CLASSHASH_SIZE; i++) { | 2500 | for (i = 0; i < CLASSHASH_SIZE; i++) { |
2484 | head = classhash_table + i; | 2501 | head = classhash_table + i; |
2485 | if (list_empty(head)) | 2502 | if (list_empty(head)) |
2486 | continue; | 2503 | continue; |
2487 | list_for_each_entry_safe(class, next, head, hash_entry) { | 2504 | list_for_each_entry_safe(class, next, head, hash_entry) { |
2488 | for (j = 0; j < MAX_LOCKDEP_SUBCLASSES; j++) { | 2505 | if (unlikely(class == lock->class_cache)) { |
2489 | entry = lock->class[j]; | 2506 | __raw_spin_unlock(&hash_lock); |
2490 | if (class == entry) { | 2507 | DEBUG_LOCKS_WARN_ON(1); |
2491 | zap_class(class); | 2508 | goto out_restore; |
2492 | lock->class[j] = NULL; | ||
2493 | break; | ||
2494 | } | ||
2495 | } | 2509 | } |
2496 | } | 2510 | } |
2497 | } | 2511 | } |
2498 | |||
2499 | /* | ||
2500 | * Debug check: in the end all mapped classes should | ||
2501 | * be gone. | ||
2502 | */ | ||
2503 | for (j = 0; j < MAX_LOCKDEP_SUBCLASSES; j++) { | ||
2504 | entry = lock->class[j]; | ||
2505 | if (!entry) | ||
2506 | continue; | ||
2507 | __raw_spin_unlock(&hash_lock); | ||
2508 | DEBUG_LOCKS_WARN_ON(1); | ||
2509 | raw_local_irq_restore(flags); | ||
2510 | return; | ||
2511 | } | ||
2512 | |||
2513 | __raw_spin_unlock(&hash_lock); | 2512 | __raw_spin_unlock(&hash_lock); |
2513 | |||
2514 | out_restore: | ||
2514 | raw_local_irq_restore(flags); | 2515 | raw_local_irq_restore(flags); |
2515 | } | 2516 | } |
2516 | 2517 | ||
@@ -2571,7 +2572,7 @@ static inline int in_range(const void *start, const void *addr, const void *end) | |||
2571 | 2572 | ||
2572 | static void | 2573 | static void |
2573 | print_freed_lock_bug(struct task_struct *curr, const void *mem_from, | 2574 | print_freed_lock_bug(struct task_struct *curr, const void *mem_from, |
2574 | const void *mem_to) | 2575 | const void *mem_to, struct held_lock *hlock) |
2575 | { | 2576 | { |
2576 | if (!debug_locks_off()) | 2577 | if (!debug_locks_off()) |
2577 | return; | 2578 | return; |
@@ -2583,6 +2584,7 @@ print_freed_lock_bug(struct task_struct *curr, const void *mem_from, | |||
2583 | printk( "-------------------------\n"); | 2584 | printk( "-------------------------\n"); |
2584 | printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n", | 2585 | printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n", |
2585 | curr->comm, curr->pid, mem_from, mem_to-1); | 2586 | curr->comm, curr->pid, mem_from, mem_to-1); |
2587 | print_lock(hlock); | ||
2586 | lockdep_print_held_locks(curr); | 2588 | lockdep_print_held_locks(curr); |
2587 | 2589 | ||
2588 | printk("\nstack backtrace:\n"); | 2590 | printk("\nstack backtrace:\n"); |
@@ -2616,7 +2618,7 @@ void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len) | |||
2616 | !in_range(mem_from, lock_to, mem_to)) | 2618 | !in_range(mem_from, lock_to, mem_to)) |
2617 | continue; | 2619 | continue; |
2618 | 2620 | ||
2619 | print_freed_lock_bug(curr, mem_from, mem_to); | 2621 | print_freed_lock_bug(curr, mem_from, mem_to, hlock); |
2620 | break; | 2622 | break; |
2621 | } | 2623 | } |
2622 | local_irq_restore(flags); | 2624 | local_irq_restore(flags); |
diff --git a/kernel/module.c b/kernel/module.c index 35e1b1f859d7..2a19cd47c046 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -2019,10 +2019,8 @@ const char *module_address_lookup(unsigned long addr, | |||
2019 | return NULL; | 2019 | return NULL; |
2020 | } | 2020 | } |
2021 | 2021 | ||
2022 | struct module *module_get_kallsym(unsigned int symnum, | 2022 | struct module *module_get_kallsym(unsigned int symnum, unsigned long *value, |
2023 | unsigned long *value, | 2023 | char *type, char *name, size_t namelen) |
2024 | char *type, | ||
2025 | char namebuf[128]) | ||
2026 | { | 2024 | { |
2027 | struct module *mod; | 2025 | struct module *mod; |
2028 | 2026 | ||
@@ -2031,9 +2029,8 @@ struct module *module_get_kallsym(unsigned int symnum, | |||
2031 | if (symnum < mod->num_symtab) { | 2029 | if (symnum < mod->num_symtab) { |
2032 | *value = mod->symtab[symnum].st_value; | 2030 | *value = mod->symtab[symnum].st_value; |
2033 | *type = mod->symtab[symnum].st_info; | 2031 | *type = mod->symtab[symnum].st_info; |
2034 | strncpy(namebuf, | 2032 | strlcpy(name, mod->strtab + mod->symtab[symnum].st_name, |
2035 | mod->strtab + mod->symtab[symnum].st_name, | 2033 | namelen); |
2036 | 127); | ||
2037 | mutex_unlock(&module_mutex); | 2034 | mutex_unlock(&module_mutex); |
2038 | return mod; | 2035 | return mod; |
2039 | } | 2036 | } |
diff --git a/kernel/panic.c b/kernel/panic.c index ab13f0f668b5..d8a0bca21233 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
@@ -172,6 +172,7 @@ const char *print_tainted(void) | |||
172 | 172 | ||
173 | void add_taint(unsigned flag) | 173 | void add_taint(unsigned flag) |
174 | { | 174 | { |
175 | debug_locks_off(); /* can't trust the integrity of the kernel anymore */ | ||
175 | tainted |= flag; | 176 | tainted |= flag; |
176 | } | 177 | } |
177 | EXPORT_SYMBOL(add_taint); | 178 | EXPORT_SYMBOL(add_taint); |
@@ -256,6 +257,7 @@ int oops_may_print(void) | |||
256 | */ | 257 | */ |
257 | void oops_enter(void) | 258 | void oops_enter(void) |
258 | { | 259 | { |
260 | debug_locks_off(); /* can't trust the integrity of the kernel anymore */ | ||
259 | do_oops_enter_exit(); | 261 | do_oops_enter_exit(); |
260 | } | 262 | } |
261 | 263 | ||
diff --git a/kernel/power/pm.c b/kernel/power/pm.c index 84063ac8fcfc..c50d15266c10 100644 --- a/kernel/power/pm.c +++ b/kernel/power/pm.c | |||
@@ -75,42 +75,6 @@ struct pm_dev *pm_register(pm_dev_t type, | |||
75 | return dev; | 75 | return dev; |
76 | } | 76 | } |
77 | 77 | ||
78 | static void __pm_unregister(struct pm_dev *dev) | ||
79 | { | ||
80 | if (dev) { | ||
81 | list_del(&dev->entry); | ||
82 | kfree(dev); | ||
83 | } | ||
84 | } | ||
85 | |||
86 | /** | ||
87 | * pm_unregister_all - unregister all devices with matching callback | ||
88 | * @callback: callback function pointer | ||
89 | * | ||
90 | * Unregister every device that would call the callback passed. This | ||
91 | * is primarily meant as a helper function for loadable modules. It | ||
92 | * enables a module to give up all its managed devices without keeping | ||
93 | * its own private list. | ||
94 | */ | ||
95 | |||
96 | void pm_unregister_all(pm_callback callback) | ||
97 | { | ||
98 | struct list_head *entry; | ||
99 | |||
100 | if (!callback) | ||
101 | return; | ||
102 | |||
103 | mutex_lock(&pm_devs_lock); | ||
104 | entry = pm_devs.next; | ||
105 | while (entry != &pm_devs) { | ||
106 | struct pm_dev *dev = list_entry(entry, struct pm_dev, entry); | ||
107 | entry = entry->next; | ||
108 | if (dev->callback == callback) | ||
109 | __pm_unregister(dev); | ||
110 | } | ||
111 | mutex_unlock(&pm_devs_lock); | ||
112 | } | ||
113 | |||
114 | /** | 78 | /** |
115 | * pm_send - send request to a single device | 79 | * pm_send - send request to a single device |
116 | * @dev: device to send to | 80 | * @dev: device to send to |
@@ -239,7 +203,6 @@ int pm_send_all(pm_request_t rqst, void *data) | |||
239 | } | 203 | } |
240 | 204 | ||
241 | EXPORT_SYMBOL(pm_register); | 205 | EXPORT_SYMBOL(pm_register); |
242 | EXPORT_SYMBOL(pm_unregister_all); | ||
243 | EXPORT_SYMBOL(pm_send_all); | 206 | EXPORT_SYMBOL(pm_send_all); |
244 | EXPORT_SYMBOL(pm_active); | 207 | EXPORT_SYMBOL(pm_active); |
245 | 208 | ||
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 24c96f354231..75d4886e648e 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
@@ -227,11 +227,17 @@ static void copy_data_pages(struct pbe *pblist) | |||
227 | for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) { | 227 | for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) { |
228 | if (saveable(zone, &zone_pfn)) { | 228 | if (saveable(zone, &zone_pfn)) { |
229 | struct page *page; | 229 | struct page *page; |
230 | long *src, *dst; | ||
231 | int n; | ||
232 | |||
230 | page = pfn_to_page(zone_pfn + zone->zone_start_pfn); | 233 | page = pfn_to_page(zone_pfn + zone->zone_start_pfn); |
231 | BUG_ON(!pbe); | 234 | BUG_ON(!pbe); |
232 | pbe->orig_address = (unsigned long)page_address(page); | 235 | pbe->orig_address = (unsigned long)page_address(page); |
233 | /* copy_page is not usable for copying task structs. */ | 236 | /* copy_page and memcpy are not usable for copying task structs. */ |
234 | memcpy((void *)pbe->address, (void *)pbe->orig_address, PAGE_SIZE); | 237 | dst = (long *)pbe->address; |
238 | src = (long *)pbe->orig_address; | ||
239 | for (n = PAGE_SIZE / sizeof(long); n; n--) | ||
240 | *dst++ = *src++; | ||
235 | pbe = pbe->next; | 241 | pbe = pbe->next; |
236 | } | 242 | } |
237 | } | 243 | } |
diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 044b8e0c1025..f1dd146bd64d 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c | |||
@@ -263,7 +263,6 @@ int swsusp_write(void) | |||
263 | struct swap_map_handle handle; | 263 | struct swap_map_handle handle; |
264 | struct snapshot_handle snapshot; | 264 | struct snapshot_handle snapshot; |
265 | struct swsusp_info *header; | 265 | struct swsusp_info *header; |
266 | unsigned long start; | ||
267 | int error; | 266 | int error; |
268 | 267 | ||
269 | if ((error = swsusp_swap_check())) { | 268 | if ((error = swsusp_swap_check())) { |
@@ -281,16 +280,17 @@ int swsusp_write(void) | |||
281 | } | 280 | } |
282 | error = get_swap_writer(&handle); | 281 | error = get_swap_writer(&handle); |
283 | if (!error) { | 282 | if (!error) { |
284 | start = handle.cur_swap; | 283 | unsigned long start = handle.cur_swap; |
285 | error = swap_write_page(&handle, header); | 284 | error = swap_write_page(&handle, header); |
286 | } | 285 | if (!error) |
287 | if (!error) | 286 | error = save_image(&handle, &snapshot, |
288 | error = save_image(&handle, &snapshot, header->pages - 1); | 287 | header->pages - 1); |
289 | if (!error) { | 288 | if (!error) { |
290 | flush_swap_writer(&handle); | 289 | flush_swap_writer(&handle); |
291 | printk("S"); | 290 | printk("S"); |
292 | error = mark_swapfiles(swp_entry(root_swap, start)); | 291 | error = mark_swapfiles(swp_entry(root_swap, start)); |
293 | printk("|\n"); | 292 | printk("|\n"); |
293 | } | ||
294 | } | 294 | } |
295 | if (error) | 295 | if (error) |
296 | free_all_swap_pages(root_swap, handle.bitmap); | 296 | free_all_swap_pages(root_swap, handle.bitmap); |
@@ -311,8 +311,10 @@ static atomic_t io_done = ATOMIC_INIT(0); | |||
311 | 311 | ||
312 | static int end_io(struct bio *bio, unsigned int num, int err) | 312 | static int end_io(struct bio *bio, unsigned int num, int err) |
313 | { | 313 | { |
314 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) | 314 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { |
315 | panic("I/O error reading memory image"); | 315 | printk(KERN_ERR "I/O error reading swsusp image.\n"); |
316 | return -EIO; | ||
317 | } | ||
316 | atomic_set(&io_done, 0); | 318 | atomic_set(&io_done, 0); |
317 | return 0; | 319 | return 0; |
318 | } | 320 | } |
diff --git a/kernel/printk.c b/kernel/printk.c index bdba5d80496c..65ca0688f86f 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -52,7 +52,7 @@ int console_printk[4] = { | |||
52 | DEFAULT_CONSOLE_LOGLEVEL, /* default_console_loglevel */ | 52 | DEFAULT_CONSOLE_LOGLEVEL, /* default_console_loglevel */ |
53 | }; | 53 | }; |
54 | 54 | ||
55 | EXPORT_SYMBOL(console_printk); | 55 | EXPORT_UNUSED_SYMBOL(console_printk); /* June 2006 */ |
56 | 56 | ||
57 | /* | 57 | /* |
58 | * Low lever drivers may need that to know if they can schedule in | 58 | * Low lever drivers may need that to know if they can schedule in |
@@ -773,7 +773,7 @@ int is_console_locked(void) | |||
773 | { | 773 | { |
774 | return console_locked; | 774 | return console_locked; |
775 | } | 775 | } |
776 | EXPORT_SYMBOL(is_console_locked); | 776 | EXPORT_UNUSED_SYMBOL(is_console_locked); /* June 2006 */ |
777 | 777 | ||
778 | /** | 778 | /** |
779 | * release_console_sem - unlock the console system | 779 | * release_console_sem - unlock the console system |
diff --git a/kernel/resource.c b/kernel/resource.c index 129cf046e561..0dd3a857579e 100644 --- a/kernel/resource.c +++ b/kernel/resource.c | |||
@@ -404,8 +404,6 @@ int insert_resource(struct resource *parent, struct resource *new) | |||
404 | return result; | 404 | return result; |
405 | } | 405 | } |
406 | 406 | ||
407 | EXPORT_SYMBOL(insert_resource); | ||
408 | |||
409 | /* | 407 | /* |
410 | * Given an existing resource, change its start and size to match the | 408 | * Given an existing resource, change its start and size to match the |
411 | * arguments. Returns -EBUSY if it can't fit. Existing children of | 409 | * arguments. Returns -EBUSY if it can't fit. Existing children of |
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c index 494dac872a13..948bd8f643e2 100644 --- a/kernel/rtmutex-tester.c +++ b/kernel/rtmutex-tester.c | |||
@@ -275,6 +275,7 @@ static int test_func(void *data) | |||
275 | 275 | ||
276 | /* Wait for the next command to be executed */ | 276 | /* Wait for the next command to be executed */ |
277 | schedule(); | 277 | schedule(); |
278 | try_to_freeze(); | ||
278 | 279 | ||
279 | if (signal_pending(current)) | 280 | if (signal_pending(current)) |
280 | flush_signals(current); | 281 | flush_signals(current); |
diff --git a/kernel/sched.c b/kernel/sched.c index 4ee400f9d56b..b44b9a43b0fc 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -51,6 +51,7 @@ | |||
51 | #include <linux/times.h> | 51 | #include <linux/times.h> |
52 | #include <linux/acct.h> | 52 | #include <linux/acct.h> |
53 | #include <linux/kprobes.h> | 53 | #include <linux/kprobes.h> |
54 | #include <linux/delayacct.h> | ||
54 | #include <asm/tlb.h> | 55 | #include <asm/tlb.h> |
55 | 56 | ||
56 | #include <asm/unistd.h> | 57 | #include <asm/unistd.h> |
@@ -501,9 +502,36 @@ struct file_operations proc_schedstat_operations = { | |||
501 | .release = single_release, | 502 | .release = single_release, |
502 | }; | 503 | }; |
503 | 504 | ||
505 | /* | ||
506 | * Expects runqueue lock to be held for atomicity of update | ||
507 | */ | ||
508 | static inline void | ||
509 | rq_sched_info_arrive(struct rq *rq, unsigned long delta_jiffies) | ||
510 | { | ||
511 | if (rq) { | ||
512 | rq->rq_sched_info.run_delay += delta_jiffies; | ||
513 | rq->rq_sched_info.pcnt++; | ||
514 | } | ||
515 | } | ||
516 | |||
517 | /* | ||
518 | * Expects runqueue lock to be held for atomicity of update | ||
519 | */ | ||
520 | static inline void | ||
521 | rq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies) | ||
522 | { | ||
523 | if (rq) | ||
524 | rq->rq_sched_info.cpu_time += delta_jiffies; | ||
525 | } | ||
504 | # define schedstat_inc(rq, field) do { (rq)->field++; } while (0) | 526 | # define schedstat_inc(rq, field) do { (rq)->field++; } while (0) |
505 | # define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0) | 527 | # define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0) |
506 | #else /* !CONFIG_SCHEDSTATS */ | 528 | #else /* !CONFIG_SCHEDSTATS */ |
529 | static inline void | ||
530 | rq_sched_info_arrive(struct rq *rq, unsigned long delta_jiffies) | ||
531 | {} | ||
532 | static inline void | ||
533 | rq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies) | ||
534 | {} | ||
507 | # define schedstat_inc(rq, field) do { } while (0) | 535 | # define schedstat_inc(rq, field) do { } while (0) |
508 | # define schedstat_add(rq, field, amt) do { } while (0) | 536 | # define schedstat_add(rq, field, amt) do { } while (0) |
509 | #endif | 537 | #endif |
@@ -523,7 +551,7 @@ static inline struct rq *this_rq_lock(void) | |||
523 | return rq; | 551 | return rq; |
524 | } | 552 | } |
525 | 553 | ||
526 | #ifdef CONFIG_SCHEDSTATS | 554 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
527 | /* | 555 | /* |
528 | * Called when a process is dequeued from the active array and given | 556 | * Called when a process is dequeued from the active array and given |
529 | * the cpu. We should note that with the exception of interactive | 557 | * the cpu. We should note that with the exception of interactive |
@@ -551,21 +579,16 @@ static inline void sched_info_dequeued(struct task_struct *t) | |||
551 | */ | 579 | */ |
552 | static void sched_info_arrive(struct task_struct *t) | 580 | static void sched_info_arrive(struct task_struct *t) |
553 | { | 581 | { |
554 | unsigned long now = jiffies, diff = 0; | 582 | unsigned long now = jiffies, delta_jiffies = 0; |
555 | struct rq *rq = task_rq(t); | ||
556 | 583 | ||
557 | if (t->sched_info.last_queued) | 584 | if (t->sched_info.last_queued) |
558 | diff = now - t->sched_info.last_queued; | 585 | delta_jiffies = now - t->sched_info.last_queued; |
559 | sched_info_dequeued(t); | 586 | sched_info_dequeued(t); |
560 | t->sched_info.run_delay += diff; | 587 | t->sched_info.run_delay += delta_jiffies; |
561 | t->sched_info.last_arrival = now; | 588 | t->sched_info.last_arrival = now; |
562 | t->sched_info.pcnt++; | 589 | t->sched_info.pcnt++; |
563 | 590 | ||
564 | if (!rq) | 591 | rq_sched_info_arrive(task_rq(t), delta_jiffies); |
565 | return; | ||
566 | |||
567 | rq->rq_sched_info.run_delay += diff; | ||
568 | rq->rq_sched_info.pcnt++; | ||
569 | } | 592 | } |
570 | 593 | ||
571 | /* | 594 | /* |
@@ -585,8 +608,9 @@ static void sched_info_arrive(struct task_struct *t) | |||
585 | */ | 608 | */ |
586 | static inline void sched_info_queued(struct task_struct *t) | 609 | static inline void sched_info_queued(struct task_struct *t) |
587 | { | 610 | { |
588 | if (!t->sched_info.last_queued) | 611 | if (unlikely(sched_info_on())) |
589 | t->sched_info.last_queued = jiffies; | 612 | if (!t->sched_info.last_queued) |
613 | t->sched_info.last_queued = jiffies; | ||
590 | } | 614 | } |
591 | 615 | ||
592 | /* | 616 | /* |
@@ -595,13 +619,10 @@ static inline void sched_info_queued(struct task_struct *t) | |||
595 | */ | 619 | */ |
596 | static inline void sched_info_depart(struct task_struct *t) | 620 | static inline void sched_info_depart(struct task_struct *t) |
597 | { | 621 | { |
598 | struct rq *rq = task_rq(t); | 622 | unsigned long delta_jiffies = jiffies - t->sched_info.last_arrival; |
599 | unsigned long diff = jiffies - t->sched_info.last_arrival; | ||
600 | 623 | ||
601 | t->sched_info.cpu_time += diff; | 624 | t->sched_info.cpu_time += delta_jiffies; |
602 | 625 | rq_sched_info_depart(task_rq(t), delta_jiffies); | |
603 | if (rq) | ||
604 | rq->rq_sched_info.cpu_time += diff; | ||
605 | } | 626 | } |
606 | 627 | ||
607 | /* | 628 | /* |
@@ -610,7 +631,7 @@ static inline void sched_info_depart(struct task_struct *t) | |||
610 | * the idle task.) We are only called when prev != next. | 631 | * the idle task.) We are only called when prev != next. |
611 | */ | 632 | */ |
612 | static inline void | 633 | static inline void |
613 | sched_info_switch(struct task_struct *prev, struct task_struct *next) | 634 | __sched_info_switch(struct task_struct *prev, struct task_struct *next) |
614 | { | 635 | { |
615 | struct rq *rq = task_rq(prev); | 636 | struct rq *rq = task_rq(prev); |
616 | 637 | ||
@@ -625,10 +646,16 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next) | |||
625 | if (next != rq->idle) | 646 | if (next != rq->idle) |
626 | sched_info_arrive(next); | 647 | sched_info_arrive(next); |
627 | } | 648 | } |
649 | static inline void | ||
650 | sched_info_switch(struct task_struct *prev, struct task_struct *next) | ||
651 | { | ||
652 | if (unlikely(sched_info_on())) | ||
653 | __sched_info_switch(prev, next); | ||
654 | } | ||
628 | #else | 655 | #else |
629 | #define sched_info_queued(t) do { } while (0) | 656 | #define sched_info_queued(t) do { } while (0) |
630 | #define sched_info_switch(t, next) do { } while (0) | 657 | #define sched_info_switch(t, next) do { } while (0) |
631 | #endif /* CONFIG_SCHEDSTATS */ | 658 | #endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */ |
632 | 659 | ||
633 | /* | 660 | /* |
634 | * Adding/removing a task to/from a priority array: | 661 | * Adding/removing a task to/from a priority array: |
@@ -1530,8 +1557,9 @@ void fastcall sched_fork(struct task_struct *p, int clone_flags) | |||
1530 | 1557 | ||
1531 | INIT_LIST_HEAD(&p->run_list); | 1558 | INIT_LIST_HEAD(&p->run_list); |
1532 | p->array = NULL; | 1559 | p->array = NULL; |
1533 | #ifdef CONFIG_SCHEDSTATS | 1560 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
1534 | memset(&p->sched_info, 0, sizeof(p->sched_info)); | 1561 | if (unlikely(sched_info_on())) |
1562 | memset(&p->sched_info, 0, sizeof(p->sched_info)); | ||
1535 | #endif | 1563 | #endif |
1536 | #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) | 1564 | #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) |
1537 | p->oncpu = 0; | 1565 | p->oncpu = 0; |
@@ -1788,7 +1816,15 @@ context_switch(struct rq *rq, struct task_struct *prev, | |||
1788 | WARN_ON(rq->prev_mm); | 1816 | WARN_ON(rq->prev_mm); |
1789 | rq->prev_mm = oldmm; | 1817 | rq->prev_mm = oldmm; |
1790 | } | 1818 | } |
1819 | /* | ||
1820 | * Since the runqueue lock will be released by the next | ||
1821 | * task (which is an invalid locking op but in the case | ||
1822 | * of the scheduler it's an obvious special-case), so we | ||
1823 | * do an early lockdep release here: | ||
1824 | */ | ||
1825 | #ifndef __ARCH_WANT_UNLOCKED_CTXSW | ||
1791 | spin_release(&rq->lock.dep_map, 1, _THIS_IP_); | 1826 | spin_release(&rq->lock.dep_map, 1, _THIS_IP_); |
1827 | #endif | ||
1792 | 1828 | ||
1793 | /* Here we just switch the register state and the stack. */ | 1829 | /* Here we just switch the register state and the stack. */ |
1794 | switch_to(prev, next, prev); | 1830 | switch_to(prev, next, prev); |
@@ -3384,7 +3420,7 @@ EXPORT_SYMBOL(schedule); | |||
3384 | 3420 | ||
3385 | #ifdef CONFIG_PREEMPT | 3421 | #ifdef CONFIG_PREEMPT |
3386 | /* | 3422 | /* |
3387 | * this is is the entry point to schedule() from in-kernel preemption | 3423 | * this is the entry point to schedule() from in-kernel preemption |
3388 | * off of preempt_enable. Kernel preemptions off return from interrupt | 3424 | * off of preempt_enable. Kernel preemptions off return from interrupt |
3389 | * occur there and call schedule directly. | 3425 | * occur there and call schedule directly. |
3390 | */ | 3426 | */ |
@@ -3427,7 +3463,7 @@ need_resched: | |||
3427 | EXPORT_SYMBOL(preempt_schedule); | 3463 | EXPORT_SYMBOL(preempt_schedule); |
3428 | 3464 | ||
3429 | /* | 3465 | /* |
3430 | * this is is the entry point to schedule() from kernel preemption | 3466 | * this is the entry point to schedule() from kernel preemption |
3431 | * off of irq context. | 3467 | * off of irq context. |
3432 | * Note, that this is called and return with irqs disabled. This will | 3468 | * Note, that this is called and return with irqs disabled. This will |
3433 | * protect us against recursive calling from irq. | 3469 | * protect us against recursive calling from irq. |
@@ -3439,7 +3475,7 @@ asmlinkage void __sched preempt_schedule_irq(void) | |||
3439 | struct task_struct *task = current; | 3475 | struct task_struct *task = current; |
3440 | int saved_lock_depth; | 3476 | int saved_lock_depth; |
3441 | #endif | 3477 | #endif |
3442 | /* Catch callers which need to be fixed*/ | 3478 | /* Catch callers which need to be fixed */ |
3443 | BUG_ON(ti->preempt_count || !irqs_disabled()); | 3479 | BUG_ON(ti->preempt_count || !irqs_disabled()); |
3444 | 3480 | ||
3445 | need_resched: | 3481 | need_resched: |
@@ -4526,9 +4562,11 @@ void __sched io_schedule(void) | |||
4526 | { | 4562 | { |
4527 | struct rq *rq = &__raw_get_cpu_var(runqueues); | 4563 | struct rq *rq = &__raw_get_cpu_var(runqueues); |
4528 | 4564 | ||
4565 | delayacct_blkio_start(); | ||
4529 | atomic_inc(&rq->nr_iowait); | 4566 | atomic_inc(&rq->nr_iowait); |
4530 | schedule(); | 4567 | schedule(); |
4531 | atomic_dec(&rq->nr_iowait); | 4568 | atomic_dec(&rq->nr_iowait); |
4569 | delayacct_blkio_end(); | ||
4532 | } | 4570 | } |
4533 | EXPORT_SYMBOL(io_schedule); | 4571 | EXPORT_SYMBOL(io_schedule); |
4534 | 4572 | ||
@@ -4537,9 +4575,11 @@ long __sched io_schedule_timeout(long timeout) | |||
4537 | struct rq *rq = &__raw_get_cpu_var(runqueues); | 4575 | struct rq *rq = &__raw_get_cpu_var(runqueues); |
4538 | long ret; | 4576 | long ret; |
4539 | 4577 | ||
4578 | delayacct_blkio_start(); | ||
4540 | atomic_inc(&rq->nr_iowait); | 4579 | atomic_inc(&rq->nr_iowait); |
4541 | ret = schedule_timeout(timeout); | 4580 | ret = schedule_timeout(timeout); |
4542 | atomic_dec(&rq->nr_iowait); | 4581 | atomic_dec(&rq->nr_iowait); |
4582 | delayacct_blkio_end(); | ||
4543 | return ret; | 4583 | return ret; |
4544 | } | 4584 | } |
4545 | 4585 | ||
@@ -4650,7 +4690,7 @@ static inline struct task_struct *younger_sibling(struct task_struct *p) | |||
4650 | return list_entry(p->sibling.next,struct task_struct,sibling); | 4690 | return list_entry(p->sibling.next,struct task_struct,sibling); |
4651 | } | 4691 | } |
4652 | 4692 | ||
4653 | static const char *stat_nam[] = { "R", "S", "D", "T", "t", "Z", "X" }; | 4693 | static const char stat_nam[] = "RSDTtZX"; |
4654 | 4694 | ||
4655 | static void show_task(struct task_struct *p) | 4695 | static void show_task(struct task_struct *p) |
4656 | { | 4696 | { |
@@ -4658,12 +4698,9 @@ static void show_task(struct task_struct *p) | |||
4658 | unsigned long free = 0; | 4698 | unsigned long free = 0; |
4659 | unsigned state; | 4699 | unsigned state; |
4660 | 4700 | ||
4661 | printk("%-13.13s ", p->comm); | ||
4662 | state = p->state ? __ffs(p->state) + 1 : 0; | 4701 | state = p->state ? __ffs(p->state) + 1 : 0; |
4663 | if (state < ARRAY_SIZE(stat_nam)) | 4702 | printk("%-13.13s %c", p->comm, |
4664 | printk(stat_nam[state]); | 4703 | state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?'); |
4665 | else | ||
4666 | printk("?"); | ||
4667 | #if (BITS_PER_LONG == 32) | 4704 | #if (BITS_PER_LONG == 32) |
4668 | if (state == TASK_RUNNING) | 4705 | if (state == TASK_RUNNING) |
4669 | printk(" running "); | 4706 | printk(" running "); |
@@ -4877,7 +4914,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) | |||
4877 | p->timestamp = p->timestamp - rq_src->timestamp_last_tick | 4914 | p->timestamp = p->timestamp - rq_src->timestamp_last_tick |
4878 | + rq_dest->timestamp_last_tick; | 4915 | + rq_dest->timestamp_last_tick; |
4879 | deactivate_task(p, rq_src); | 4916 | deactivate_task(p, rq_src); |
4880 | activate_task(p, rq_dest, 0); | 4917 | __activate_task(p, rq_dest); |
4881 | if (TASK_PREEMPTS_CURR(p, rq_dest)) | 4918 | if (TASK_PREEMPTS_CURR(p, rq_dest)) |
4882 | resched_task(rq_dest->curr); | 4919 | resched_task(rq_dest->curr); |
4883 | } | 4920 | } |
@@ -5776,7 +5813,7 @@ static unsigned long long measure_migration_cost(int cpu1, int cpu2) | |||
5776 | cache = vmalloc(max_size); | 5813 | cache = vmalloc(max_size); |
5777 | if (!cache) { | 5814 | if (!cache) { |
5778 | printk("could not vmalloc %d bytes for cache!\n", 2*max_size); | 5815 | printk("could not vmalloc %d bytes for cache!\n", 2*max_size); |
5779 | return 1000000; // return 1 msec on very small boxen | 5816 | return 1000000; /* return 1 msec on very small boxen */ |
5780 | } | 5817 | } |
5781 | 5818 | ||
5782 | while (size <= max_size) { | 5819 | while (size <= max_size) { |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 215541e26c1a..0f08a84ae307 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -311,8 +311,6 @@ void open_softirq(int nr, void (*action)(struct softirq_action*), void *data) | |||
311 | softirq_vec[nr].action = action; | 311 | softirq_vec[nr].action = action; |
312 | } | 312 | } |
313 | 313 | ||
314 | EXPORT_SYMBOL(open_softirq); | ||
315 | |||
316 | /* Tasklets */ | 314 | /* Tasklets */ |
317 | struct tasklet_head | 315 | struct tasklet_head |
318 | { | 316 | { |
diff --git a/kernel/sys.c b/kernel/sys.c index dbb3b9c7ea64..e236f98f7ec5 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -1983,7 +1983,7 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, | |||
1983 | error = current->mm->dumpable; | 1983 | error = current->mm->dumpable; |
1984 | break; | 1984 | break; |
1985 | case PR_SET_DUMPABLE: | 1985 | case PR_SET_DUMPABLE: |
1986 | if (arg2 < 0 || arg2 > 2) { | 1986 | if (arg2 < 0 || arg2 > 1) { |
1987 | error = -EINVAL; | 1987 | error = -EINVAL; |
1988 | break; | 1988 | break; |
1989 | } | 1989 | } |
diff --git a/kernel/taskstats.c b/kernel/taskstats.c new file mode 100644 index 000000000000..f45179ce028e --- /dev/null +++ b/kernel/taskstats.c | |||
@@ -0,0 +1,568 @@ | |||
1 | /* | ||
2 | * taskstats.c - Export per-task statistics to userland | ||
3 | * | ||
4 | * Copyright (C) Shailabh Nagar, IBM Corp. 2006 | ||
5 | * (C) Balbir Singh, IBM Corp. 2006 | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | */ | ||
18 | |||
19 | #include <linux/kernel.h> | ||
20 | #include <linux/taskstats_kern.h> | ||
21 | #include <linux/delayacct.h> | ||
22 | #include <linux/cpumask.h> | ||
23 | #include <linux/percpu.h> | ||
24 | #include <net/genetlink.h> | ||
25 | #include <asm/atomic.h> | ||
26 | |||
27 | /* | ||
28 | * Maximum length of a cpumask that can be specified in | ||
29 | * the TASKSTATS_CMD_ATTR_REGISTER/DEREGISTER_CPUMASK attribute | ||
30 | */ | ||
31 | #define TASKSTATS_CPUMASK_MAXLEN (100+6*NR_CPUS) | ||
32 | |||
33 | static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 }; | ||
34 | static int family_registered; | ||
35 | kmem_cache_t *taskstats_cache; | ||
36 | |||
37 | static struct genl_family family = { | ||
38 | .id = GENL_ID_GENERATE, | ||
39 | .name = TASKSTATS_GENL_NAME, | ||
40 | .version = TASKSTATS_GENL_VERSION, | ||
41 | .maxattr = TASKSTATS_CMD_ATTR_MAX, | ||
42 | }; | ||
43 | |||
44 | static struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1] | ||
45 | __read_mostly = { | ||
46 | [TASKSTATS_CMD_ATTR_PID] = { .type = NLA_U32 }, | ||
47 | [TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 }, | ||
48 | [TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING }, | ||
49 | [TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },}; | ||
50 | |||
51 | struct listener { | ||
52 | struct list_head list; | ||
53 | pid_t pid; | ||
54 | char valid; | ||
55 | }; | ||
56 | |||
57 | struct listener_list { | ||
58 | struct rw_semaphore sem; | ||
59 | struct list_head list; | ||
60 | }; | ||
61 | static DEFINE_PER_CPU(struct listener_list, listener_array); | ||
62 | |||
63 | enum actions { | ||
64 | REGISTER, | ||
65 | DEREGISTER, | ||
66 | CPU_DONT_CARE | ||
67 | }; | ||
68 | |||
69 | static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, | ||
70 | void **replyp, size_t size) | ||
71 | { | ||
72 | struct sk_buff *skb; | ||
73 | void *reply; | ||
74 | |||
75 | /* | ||
76 | * If new attributes are added, please revisit this allocation | ||
77 | */ | ||
78 | skb = nlmsg_new(size); | ||
79 | if (!skb) | ||
80 | return -ENOMEM; | ||
81 | |||
82 | if (!info) { | ||
83 | int seq = get_cpu_var(taskstats_seqnum)++; | ||
84 | put_cpu_var(taskstats_seqnum); | ||
85 | |||
86 | reply = genlmsg_put(skb, 0, seq, | ||
87 | family.id, 0, 0, | ||
88 | cmd, family.version); | ||
89 | } else | ||
90 | reply = genlmsg_put(skb, info->snd_pid, info->snd_seq, | ||
91 | family.id, 0, 0, | ||
92 | cmd, family.version); | ||
93 | if (reply == NULL) { | ||
94 | nlmsg_free(skb); | ||
95 | return -EINVAL; | ||
96 | } | ||
97 | |||
98 | *skbp = skb; | ||
99 | *replyp = reply; | ||
100 | return 0; | ||
101 | } | ||
102 | |||
103 | /* | ||
104 | * Send taskstats data in @skb to listener with nl_pid @pid | ||
105 | */ | ||
106 | static int send_reply(struct sk_buff *skb, pid_t pid) | ||
107 | { | ||
108 | struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data); | ||
109 | void *reply = genlmsg_data(genlhdr); | ||
110 | int rc; | ||
111 | |||
112 | rc = genlmsg_end(skb, reply); | ||
113 | if (rc < 0) { | ||
114 | nlmsg_free(skb); | ||
115 | return rc; | ||
116 | } | ||
117 | |||
118 | return genlmsg_unicast(skb, pid); | ||
119 | } | ||
120 | |||
121 | /* | ||
122 | * Send taskstats data in @skb to listeners registered for @cpu's exit data | ||
123 | */ | ||
124 | static int send_cpu_listeners(struct sk_buff *skb, unsigned int cpu) | ||
125 | { | ||
126 | struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data); | ||
127 | struct listener_list *listeners; | ||
128 | struct listener *s, *tmp; | ||
129 | struct sk_buff *skb_next, *skb_cur = skb; | ||
130 | void *reply = genlmsg_data(genlhdr); | ||
131 | int rc, ret, delcount = 0; | ||
132 | |||
133 | rc = genlmsg_end(skb, reply); | ||
134 | if (rc < 0) { | ||
135 | nlmsg_free(skb); | ||
136 | return rc; | ||
137 | } | ||
138 | |||
139 | rc = 0; | ||
140 | listeners = &per_cpu(listener_array, cpu); | ||
141 | down_read(&listeners->sem); | ||
142 | list_for_each_entry_safe(s, tmp, &listeners->list, list) { | ||
143 | skb_next = NULL; | ||
144 | if (!list_is_last(&s->list, &listeners->list)) { | ||
145 | skb_next = skb_clone(skb_cur, GFP_KERNEL); | ||
146 | if (!skb_next) { | ||
147 | nlmsg_free(skb_cur); | ||
148 | rc = -ENOMEM; | ||
149 | break; | ||
150 | } | ||
151 | } | ||
152 | ret = genlmsg_unicast(skb_cur, s->pid); | ||
153 | if (ret == -ECONNREFUSED) { | ||
154 | s->valid = 0; | ||
155 | delcount++; | ||
156 | rc = ret; | ||
157 | } | ||
158 | skb_cur = skb_next; | ||
159 | } | ||
160 | up_read(&listeners->sem); | ||
161 | |||
162 | if (!delcount) | ||
163 | return rc; | ||
164 | |||
165 | /* Delete invalidated entries */ | ||
166 | down_write(&listeners->sem); | ||
167 | list_for_each_entry_safe(s, tmp, &listeners->list, list) { | ||
168 | if (!s->valid) { | ||
169 | list_del(&s->list); | ||
170 | kfree(s); | ||
171 | } | ||
172 | } | ||
173 | up_write(&listeners->sem); | ||
174 | return rc; | ||
175 | } | ||
176 | |||
177 | static int fill_pid(pid_t pid, struct task_struct *pidtsk, | ||
178 | struct taskstats *stats) | ||
179 | { | ||
180 | int rc; | ||
181 | struct task_struct *tsk = pidtsk; | ||
182 | |||
183 | if (!pidtsk) { | ||
184 | read_lock(&tasklist_lock); | ||
185 | tsk = find_task_by_pid(pid); | ||
186 | if (!tsk) { | ||
187 | read_unlock(&tasklist_lock); | ||
188 | return -ESRCH; | ||
189 | } | ||
190 | get_task_struct(tsk); | ||
191 | read_unlock(&tasklist_lock); | ||
192 | } else | ||
193 | get_task_struct(tsk); | ||
194 | |||
195 | /* | ||
196 | * Each accounting subsystem adds calls to its functions to | ||
197 | * fill in relevant parts of struct taskstsats as follows | ||
198 | * | ||
199 | * rc = per-task-foo(stats, tsk); | ||
200 | * if (rc) | ||
201 | * goto err; | ||
202 | */ | ||
203 | |||
204 | rc = delayacct_add_tsk(stats, tsk); | ||
205 | stats->version = TASKSTATS_VERSION; | ||
206 | |||
207 | /* Define err: label here if needed */ | ||
208 | put_task_struct(tsk); | ||
209 | return rc; | ||
210 | |||
211 | } | ||
212 | |||
213 | static int fill_tgid(pid_t tgid, struct task_struct *tgidtsk, | ||
214 | struct taskstats *stats) | ||
215 | { | ||
216 | struct task_struct *tsk, *first; | ||
217 | unsigned long flags; | ||
218 | |||
219 | /* | ||
220 | * Add additional stats from live tasks except zombie thread group | ||
221 | * leaders who are already counted with the dead tasks | ||
222 | */ | ||
223 | first = tgidtsk; | ||
224 | if (!first) { | ||
225 | read_lock(&tasklist_lock); | ||
226 | first = find_task_by_pid(tgid); | ||
227 | if (!first) { | ||
228 | read_unlock(&tasklist_lock); | ||
229 | return -ESRCH; | ||
230 | } | ||
231 | get_task_struct(first); | ||
232 | read_unlock(&tasklist_lock); | ||
233 | } else | ||
234 | get_task_struct(first); | ||
235 | |||
236 | /* Start with stats from dead tasks */ | ||
237 | spin_lock_irqsave(&first->signal->stats_lock, flags); | ||
238 | if (first->signal->stats) | ||
239 | memcpy(stats, first->signal->stats, sizeof(*stats)); | ||
240 | spin_unlock_irqrestore(&first->signal->stats_lock, flags); | ||
241 | |||
242 | tsk = first; | ||
243 | read_lock(&tasklist_lock); | ||
244 | do { | ||
245 | if (tsk->exit_state == EXIT_ZOMBIE && thread_group_leader(tsk)) | ||
246 | continue; | ||
247 | /* | ||
248 | * Accounting subsystem can call its functions here to | ||
249 | * fill in relevant parts of struct taskstsats as follows | ||
250 | * | ||
251 | * per-task-foo(stats, tsk); | ||
252 | */ | ||
253 | delayacct_add_tsk(stats, tsk); | ||
254 | |||
255 | } while_each_thread(first, tsk); | ||
256 | read_unlock(&tasklist_lock); | ||
257 | stats->version = TASKSTATS_VERSION; | ||
258 | |||
259 | /* | ||
260 | * Accounting subsytems can also add calls here to modify | ||
261 | * fields of taskstats. | ||
262 | */ | ||
263 | |||
264 | return 0; | ||
265 | } | ||
266 | |||
267 | |||
268 | static void fill_tgid_exit(struct task_struct *tsk) | ||
269 | { | ||
270 | unsigned long flags; | ||
271 | |||
272 | spin_lock_irqsave(&tsk->signal->stats_lock, flags); | ||
273 | if (!tsk->signal->stats) | ||
274 | goto ret; | ||
275 | |||
276 | /* | ||
277 | * Each accounting subsystem calls its functions here to | ||
278 | * accumalate its per-task stats for tsk, into the per-tgid structure | ||
279 | * | ||
280 | * per-task-foo(tsk->signal->stats, tsk); | ||
281 | */ | ||
282 | delayacct_add_tsk(tsk->signal->stats, tsk); | ||
283 | ret: | ||
284 | spin_unlock_irqrestore(&tsk->signal->stats_lock, flags); | ||
285 | return; | ||
286 | } | ||
287 | |||
288 | static int add_del_listener(pid_t pid, cpumask_t *maskp, int isadd) | ||
289 | { | ||
290 | struct listener_list *listeners; | ||
291 | struct listener *s, *tmp; | ||
292 | unsigned int cpu; | ||
293 | cpumask_t mask = *maskp; | ||
294 | |||
295 | if (!cpus_subset(mask, cpu_possible_map)) | ||
296 | return -EINVAL; | ||
297 | |||
298 | if (isadd == REGISTER) { | ||
299 | for_each_cpu_mask(cpu, mask) { | ||
300 | s = kmalloc_node(sizeof(struct listener), GFP_KERNEL, | ||
301 | cpu_to_node(cpu)); | ||
302 | if (!s) | ||
303 | goto cleanup; | ||
304 | s->pid = pid; | ||
305 | INIT_LIST_HEAD(&s->list); | ||
306 | s->valid = 1; | ||
307 | |||
308 | listeners = &per_cpu(listener_array, cpu); | ||
309 | down_write(&listeners->sem); | ||
310 | list_add(&s->list, &listeners->list); | ||
311 | up_write(&listeners->sem); | ||
312 | } | ||
313 | return 0; | ||
314 | } | ||
315 | |||
316 | /* Deregister or cleanup */ | ||
317 | cleanup: | ||
318 | for_each_cpu_mask(cpu, mask) { | ||
319 | listeners = &per_cpu(listener_array, cpu); | ||
320 | down_write(&listeners->sem); | ||
321 | list_for_each_entry_safe(s, tmp, &listeners->list, list) { | ||
322 | if (s->pid == pid) { | ||
323 | list_del(&s->list); | ||
324 | kfree(s); | ||
325 | break; | ||
326 | } | ||
327 | } | ||
328 | up_write(&listeners->sem); | ||
329 | } | ||
330 | return 0; | ||
331 | } | ||
332 | |||
333 | static int parse(struct nlattr *na, cpumask_t *mask) | ||
334 | { | ||
335 | char *data; | ||
336 | int len; | ||
337 | int ret; | ||
338 | |||
339 | if (na == NULL) | ||
340 | return 1; | ||
341 | len = nla_len(na); | ||
342 | if (len > TASKSTATS_CPUMASK_MAXLEN) | ||
343 | return -E2BIG; | ||
344 | if (len < 1) | ||
345 | return -EINVAL; | ||
346 | data = kmalloc(len, GFP_KERNEL); | ||
347 | if (!data) | ||
348 | return -ENOMEM; | ||
349 | nla_strlcpy(data, na, len); | ||
350 | ret = cpulist_parse(data, *mask); | ||
351 | kfree(data); | ||
352 | return ret; | ||
353 | } | ||
354 | |||
355 | static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) | ||
356 | { | ||
357 | int rc = 0; | ||
358 | struct sk_buff *rep_skb; | ||
359 | struct taskstats stats; | ||
360 | void *reply; | ||
361 | size_t size; | ||
362 | struct nlattr *na; | ||
363 | cpumask_t mask; | ||
364 | |||
365 | rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], &mask); | ||
366 | if (rc < 0) | ||
367 | return rc; | ||
368 | if (rc == 0) | ||
369 | return add_del_listener(info->snd_pid, &mask, REGISTER); | ||
370 | |||
371 | rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], &mask); | ||
372 | if (rc < 0) | ||
373 | return rc; | ||
374 | if (rc == 0) | ||
375 | return add_del_listener(info->snd_pid, &mask, DEREGISTER); | ||
376 | |||
377 | /* | ||
378 | * Size includes space for nested attributes | ||
379 | */ | ||
380 | size = nla_total_size(sizeof(u32)) + | ||
381 | nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); | ||
382 | |||
383 | memset(&stats, 0, sizeof(stats)); | ||
384 | rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, &reply, size); | ||
385 | if (rc < 0) | ||
386 | return rc; | ||
387 | |||
388 | if (info->attrs[TASKSTATS_CMD_ATTR_PID]) { | ||
389 | u32 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]); | ||
390 | rc = fill_pid(pid, NULL, &stats); | ||
391 | if (rc < 0) | ||
392 | goto err; | ||
393 | |||
394 | na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_PID); | ||
395 | NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_PID, pid); | ||
396 | NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, | ||
397 | stats); | ||
398 | } else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) { | ||
399 | u32 tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]); | ||
400 | rc = fill_tgid(tgid, NULL, &stats); | ||
401 | if (rc < 0) | ||
402 | goto err; | ||
403 | |||
404 | na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_TGID); | ||
405 | NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_TGID, tgid); | ||
406 | NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, | ||
407 | stats); | ||
408 | } else { | ||
409 | rc = -EINVAL; | ||
410 | goto err; | ||
411 | } | ||
412 | |||
413 | nla_nest_end(rep_skb, na); | ||
414 | |||
415 | return send_reply(rep_skb, info->snd_pid); | ||
416 | |||
417 | nla_put_failure: | ||
418 | return genlmsg_cancel(rep_skb, reply); | ||
419 | err: | ||
420 | nlmsg_free(rep_skb); | ||
421 | return rc; | ||
422 | } | ||
423 | |||
424 | void taskstats_exit_alloc(struct taskstats **ptidstats, unsigned int *mycpu) | ||
425 | { | ||
426 | struct listener_list *listeners; | ||
427 | struct taskstats *tmp; | ||
428 | /* | ||
429 | * This is the cpu on which the task is exiting currently and will | ||
430 | * be the one for which the exit event is sent, even if the cpu | ||
431 | * on which this function is running changes later. | ||
432 | */ | ||
433 | *mycpu = raw_smp_processor_id(); | ||
434 | |||
435 | *ptidstats = NULL; | ||
436 | tmp = kmem_cache_zalloc(taskstats_cache, SLAB_KERNEL); | ||
437 | if (!tmp) | ||
438 | return; | ||
439 | |||
440 | listeners = &per_cpu(listener_array, *mycpu); | ||
441 | down_read(&listeners->sem); | ||
442 | if (!list_empty(&listeners->list)) { | ||
443 | *ptidstats = tmp; | ||
444 | tmp = NULL; | ||
445 | } | ||
446 | up_read(&listeners->sem); | ||
447 | kfree(tmp); | ||
448 | } | ||
449 | |||
450 | /* Send pid data out on exit */ | ||
451 | void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats, | ||
452 | int group_dead, unsigned int mycpu) | ||
453 | { | ||
454 | int rc; | ||
455 | struct sk_buff *rep_skb; | ||
456 | void *reply; | ||
457 | size_t size; | ||
458 | int is_thread_group; | ||
459 | struct nlattr *na; | ||
460 | unsigned long flags; | ||
461 | |||
462 | if (!family_registered || !tidstats) | ||
463 | return; | ||
464 | |||
465 | spin_lock_irqsave(&tsk->signal->stats_lock, flags); | ||
466 | is_thread_group = tsk->signal->stats ? 1 : 0; | ||
467 | spin_unlock_irqrestore(&tsk->signal->stats_lock, flags); | ||
468 | |||
469 | rc = 0; | ||
470 | /* | ||
471 | * Size includes space for nested attributes | ||
472 | */ | ||
473 | size = nla_total_size(sizeof(u32)) + | ||
474 | nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); | ||
475 | |||
476 | if (is_thread_group) | ||
477 | size = 2 * size; /* PID + STATS + TGID + STATS */ | ||
478 | |||
479 | rc = prepare_reply(NULL, TASKSTATS_CMD_NEW, &rep_skb, &reply, size); | ||
480 | if (rc < 0) | ||
481 | goto ret; | ||
482 | |||
483 | rc = fill_pid(tsk->pid, tsk, tidstats); | ||
484 | if (rc < 0) | ||
485 | goto err_skb; | ||
486 | |||
487 | na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_PID); | ||
488 | NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_PID, (u32)tsk->pid); | ||
489 | NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, | ||
490 | *tidstats); | ||
491 | nla_nest_end(rep_skb, na); | ||
492 | |||
493 | if (!is_thread_group) | ||
494 | goto send; | ||
495 | |||
496 | /* | ||
497 | * tsk has/had a thread group so fill the tsk->signal->stats structure | ||
498 | * Doesn't matter if tsk is the leader or the last group member leaving | ||
499 | */ | ||
500 | |||
501 | fill_tgid_exit(tsk); | ||
502 | if (!group_dead) | ||
503 | goto send; | ||
504 | |||
505 | na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_TGID); | ||
506 | NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_TGID, (u32)tsk->tgid); | ||
507 | /* No locking needed for tsk->signal->stats since group is dead */ | ||
508 | NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, | ||
509 | *tsk->signal->stats); | ||
510 | nla_nest_end(rep_skb, na); | ||
511 | |||
512 | send: | ||
513 | send_cpu_listeners(rep_skb, mycpu); | ||
514 | return; | ||
515 | |||
516 | nla_put_failure: | ||
517 | genlmsg_cancel(rep_skb, reply); | ||
518 | goto ret; | ||
519 | err_skb: | ||
520 | nlmsg_free(rep_skb); | ||
521 | ret: | ||
522 | return; | ||
523 | } | ||
524 | |||
525 | static struct genl_ops taskstats_ops = { | ||
526 | .cmd = TASKSTATS_CMD_GET, | ||
527 | .doit = taskstats_user_cmd, | ||
528 | .policy = taskstats_cmd_get_policy, | ||
529 | }; | ||
530 | |||
531 | /* Needed early in initialization */ | ||
532 | void __init taskstats_init_early(void) | ||
533 | { | ||
534 | unsigned int i; | ||
535 | |||
536 | taskstats_cache = kmem_cache_create("taskstats_cache", | ||
537 | sizeof(struct taskstats), | ||
538 | 0, SLAB_PANIC, NULL, NULL); | ||
539 | for_each_possible_cpu(i) { | ||
540 | INIT_LIST_HEAD(&(per_cpu(listener_array, i).list)); | ||
541 | init_rwsem(&(per_cpu(listener_array, i).sem)); | ||
542 | } | ||
543 | } | ||
544 | |||
545 | static int __init taskstats_init(void) | ||
546 | { | ||
547 | int rc; | ||
548 | |||
549 | rc = genl_register_family(&family); | ||
550 | if (rc) | ||
551 | return rc; | ||
552 | |||
553 | rc = genl_register_ops(&family, &taskstats_ops); | ||
554 | if (rc < 0) | ||
555 | goto err; | ||
556 | |||
557 | family_registered = 1; | ||
558 | return 0; | ||
559 | err: | ||
560 | genl_unregister_family(&family); | ||
561 | return rc; | ||
562 | } | ||
563 | |||
564 | /* | ||
565 | * late initcall ensures initialization of statistics collection | ||
566 | * mechanisms precedes initialization of the taskstats interface | ||
567 | */ | ||
568 | late_initcall(taskstats_init); | ||
diff --git a/kernel/timer.c b/kernel/timer.c index 396a3c024c2c..05809c2e2fd6 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -374,6 +374,7 @@ int del_timer_sync(struct timer_list *timer) | |||
374 | int ret = try_to_del_timer_sync(timer); | 374 | int ret = try_to_del_timer_sync(timer); |
375 | if (ret >= 0) | 375 | if (ret >= 0) |
376 | return ret; | 376 | return ret; |
377 | cpu_relax(); | ||
377 | } | 378 | } |
378 | } | 379 | } |
379 | 380 | ||
@@ -891,6 +892,7 @@ int do_settimeofday(struct timespec *tv) | |||
891 | set_normalized_timespec(&xtime, sec, nsec); | 892 | set_normalized_timespec(&xtime, sec, nsec); |
892 | set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); | 893 | set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); |
893 | 894 | ||
895 | clock->error = 0; | ||
894 | ntp_clear(); | 896 | ntp_clear(); |
895 | 897 | ||
896 | write_sequnlock_irqrestore(&xtime_lock, flags); | 898 | write_sequnlock_irqrestore(&xtime_lock, flags); |
@@ -967,6 +969,7 @@ void __init timekeeping_init(void) | |||
967 | } | 969 | } |
968 | 970 | ||
969 | 971 | ||
972 | static int timekeeping_suspended; | ||
970 | /* | 973 | /* |
971 | * timekeeping_resume - Resumes the generic timekeeping subsystem. | 974 | * timekeeping_resume - Resumes the generic timekeeping subsystem. |
972 | * @dev: unused | 975 | * @dev: unused |
@@ -982,6 +985,18 @@ static int timekeeping_resume(struct sys_device *dev) | |||
982 | write_seqlock_irqsave(&xtime_lock, flags); | 985 | write_seqlock_irqsave(&xtime_lock, flags); |
983 | /* restart the last cycle value */ | 986 | /* restart the last cycle value */ |
984 | clock->cycle_last = clocksource_read(clock); | 987 | clock->cycle_last = clocksource_read(clock); |
988 | clock->error = 0; | ||
989 | timekeeping_suspended = 0; | ||
990 | write_sequnlock_irqrestore(&xtime_lock, flags); | ||
991 | return 0; | ||
992 | } | ||
993 | |||
994 | static int timekeeping_suspend(struct sys_device *dev, pm_message_t state) | ||
995 | { | ||
996 | unsigned long flags; | ||
997 | |||
998 | write_seqlock_irqsave(&xtime_lock, flags); | ||
999 | timekeeping_suspended = 1; | ||
985 | write_sequnlock_irqrestore(&xtime_lock, flags); | 1000 | write_sequnlock_irqrestore(&xtime_lock, flags); |
986 | return 0; | 1001 | return 0; |
987 | } | 1002 | } |
@@ -989,6 +1004,7 @@ static int timekeeping_resume(struct sys_device *dev) | |||
989 | /* sysfs resume/suspend bits for timekeeping */ | 1004 | /* sysfs resume/suspend bits for timekeeping */ |
990 | static struct sysdev_class timekeeping_sysclass = { | 1005 | static struct sysdev_class timekeeping_sysclass = { |
991 | .resume = timekeeping_resume, | 1006 | .resume = timekeeping_resume, |
1007 | .suspend = timekeeping_suspend, | ||
992 | set_kset_name("timekeeping"), | 1008 | set_kset_name("timekeeping"), |
993 | }; | 1009 | }; |
994 | 1010 | ||
@@ -1008,52 +1024,52 @@ static int __init timekeeping_init_device(void) | |||
1008 | device_initcall(timekeeping_init_device); | 1024 | device_initcall(timekeeping_init_device); |
1009 | 1025 | ||
1010 | /* | 1026 | /* |
1011 | * If the error is already larger, we look ahead another tick, | 1027 | * If the error is already larger, we look ahead even further |
1012 | * to compensate for late or lost adjustments. | 1028 | * to compensate for late or lost adjustments. |
1013 | */ | 1029 | */ |
1014 | static __always_inline int clocksource_bigadjust(int sign, s64 error, s64 *interval, s64 *offset) | 1030 | static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, s64 *offset) |
1015 | { | 1031 | { |
1016 | int adj; | 1032 | s64 tick_error, i; |
1033 | u32 look_ahead, adj; | ||
1034 | s32 error2, mult; | ||
1017 | 1035 | ||
1018 | /* | 1036 | /* |
1019 | * As soon as the machine is synchronized to the external time | 1037 | * Use the current error value to determine how much to look ahead. |
1020 | * source this should be the common case. | 1038 | * The larger the error the slower we adjust for it to avoid problems |
1039 | * with losing too many ticks, otherwise we would overadjust and | ||
1040 | * produce an even larger error. The smaller the adjustment the | ||
1041 | * faster we try to adjust for it, as lost ticks can do less harm | ||
1042 | * here. This is tuned so that an error of about 1 msec is adusted | ||
1043 | * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks). | ||
1021 | */ | 1044 | */ |
1022 | error >>= 2; | 1045 | error2 = clock->error >> (TICK_LENGTH_SHIFT + 22 - 2 * SHIFT_HZ); |
1023 | if (likely(sign > 0 ? error <= *interval : error >= *interval)) | 1046 | error2 = abs(error2); |
1024 | return sign; | 1047 | for (look_ahead = 0; error2 > 0; look_ahead++) |
1048 | error2 >>= 2; | ||
1025 | 1049 | ||
1026 | /* | 1050 | /* |
1027 | * An extra look ahead dampens the effect of the current error, | 1051 | * Now calculate the error in (1 << look_ahead) ticks, but first |
1028 | * which can grow quite large with continously late updates, as | 1052 | * remove the single look ahead already included in the error. |
1029 | * it would dominate the adjustment value and can lead to | ||
1030 | * oscillation. | ||
1031 | */ | 1053 | */ |
1032 | error += current_tick_length() >> (TICK_LENGTH_SHIFT - clock->shift + 1); | 1054 | tick_error = current_tick_length() >> (TICK_LENGTH_SHIFT - clock->shift + 1); |
1033 | error -= clock->xtime_interval >> 1; | 1055 | tick_error -= clock->xtime_interval >> 1; |
1034 | 1056 | error = ((error - tick_error) >> look_ahead) + tick_error; | |
1035 | adj = 0; | 1057 | |
1036 | while (1) { | 1058 | /* Finally calculate the adjustment shift value. */ |
1037 | error >>= 1; | 1059 | i = *interval; |
1038 | if (sign > 0 ? error <= *interval : error >= *interval) | 1060 | mult = 1; |
1039 | break; | 1061 | if (error < 0) { |
1040 | adj++; | 1062 | error = -error; |
1063 | *interval = -*interval; | ||
1064 | *offset = -*offset; | ||
1065 | mult = -1; | ||
1041 | } | 1066 | } |
1042 | 1067 | for (adj = 0; error > i; adj++) | |
1043 | /* | 1068 | error >>= 1; |
1044 | * Add the current adjustments to the error and take the offset | ||
1045 | * into account, the latter can cause the error to be hardly | ||
1046 | * reduced at the next tick. Check the error again if there's | ||
1047 | * room for another adjustment, thus further reducing the error | ||
1048 | * which otherwise had to be corrected at the next update. | ||
1049 | */ | ||
1050 | error = (error << 1) - *interval + *offset; | ||
1051 | if (sign > 0 ? error > *interval : error < *interval) | ||
1052 | adj++; | ||
1053 | 1069 | ||
1054 | *interval <<= adj; | 1070 | *interval <<= adj; |
1055 | *offset <<= adj; | 1071 | *offset <<= adj; |
1056 | return sign << adj; | 1072 | return mult << adj; |
1057 | } | 1073 | } |
1058 | 1074 | ||
1059 | /* | 1075 | /* |
@@ -1068,11 +1084,19 @@ static void clocksource_adjust(struct clocksource *clock, s64 offset) | |||
1068 | 1084 | ||
1069 | error = clock->error >> (TICK_LENGTH_SHIFT - clock->shift - 1); | 1085 | error = clock->error >> (TICK_LENGTH_SHIFT - clock->shift - 1); |
1070 | if (error > interval) { | 1086 | if (error > interval) { |
1071 | adj = clocksource_bigadjust(1, error, &interval, &offset); | 1087 | error >>= 2; |
1088 | if (likely(error <= interval)) | ||
1089 | adj = 1; | ||
1090 | else | ||
1091 | adj = clocksource_bigadjust(error, &interval, &offset); | ||
1072 | } else if (error < -interval) { | 1092 | } else if (error < -interval) { |
1073 | interval = -interval; | 1093 | error >>= 2; |
1074 | offset = -offset; | 1094 | if (likely(error >= -interval)) { |
1075 | adj = clocksource_bigadjust(-1, error, &interval, &offset); | 1095 | adj = -1; |
1096 | interval = -interval; | ||
1097 | offset = -offset; | ||
1098 | } else | ||
1099 | adj = clocksource_bigadjust(error, &interval, &offset); | ||
1076 | } else | 1100 | } else |
1077 | return; | 1101 | return; |
1078 | 1102 | ||
@@ -1091,13 +1115,16 @@ static void update_wall_time(void) | |||
1091 | { | 1115 | { |
1092 | cycle_t offset; | 1116 | cycle_t offset; |
1093 | 1117 | ||
1094 | clock->xtime_nsec += (s64)xtime.tv_nsec << clock->shift; | 1118 | /* Make sure we're fully resumed: */ |
1119 | if (unlikely(timekeeping_suspended)) | ||
1120 | return; | ||
1095 | 1121 | ||
1096 | #ifdef CONFIG_GENERIC_TIME | 1122 | #ifdef CONFIG_GENERIC_TIME |
1097 | offset = (clocksource_read(clock) - clock->cycle_last) & clock->mask; | 1123 | offset = (clocksource_read(clock) - clock->cycle_last) & clock->mask; |
1098 | #else | 1124 | #else |
1099 | offset = clock->cycle_interval; | 1125 | offset = clock->cycle_interval; |
1100 | #endif | 1126 | #endif |
1127 | clock->xtime_nsec += (s64)xtime.tv_nsec << clock->shift; | ||
1101 | 1128 | ||
1102 | /* normally this loop will run just once, however in the | 1129 | /* normally this loop will run just once, however in the |
1103 | * case of lost or late ticks, it will accumulate correctly. | 1130 | * case of lost or late ticks, it will accumulate correctly. |
@@ -1129,7 +1156,7 @@ static void update_wall_time(void) | |||
1129 | clocksource_adjust(clock, offset); | 1156 | clocksource_adjust(clock, offset); |
1130 | 1157 | ||
1131 | /* store full nanoseconds into xtime */ | 1158 | /* store full nanoseconds into xtime */ |
1132 | xtime.tv_nsec = clock->xtime_nsec >> clock->shift; | 1159 | xtime.tv_nsec = (s64)clock->xtime_nsec >> clock->shift; |
1133 | clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift; | 1160 | clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift; |
1134 | 1161 | ||
1135 | /* check to see if there is a new clocksource to use */ | 1162 | /* check to see if there is a new clocksource to use */ |
diff --git a/kernel/wait.c b/kernel/wait.c index a1d57aeb7f75..59a82f63275d 100644 --- a/kernel/wait.c +++ b/kernel/wait.c | |||
@@ -10,9 +10,13 @@ | |||
10 | #include <linux/wait.h> | 10 | #include <linux/wait.h> |
11 | #include <linux/hash.h> | 11 | #include <linux/hash.h> |
12 | 12 | ||
13 | struct lock_class_key waitqueue_lock_key; | 13 | void init_waitqueue_head(wait_queue_head_t *q) |
14 | { | ||
15 | spin_lock_init(&q->lock); | ||
16 | INIT_LIST_HEAD(&q->task_list); | ||
17 | } | ||
14 | 18 | ||
15 | EXPORT_SYMBOL(waitqueue_lock_key); | 19 | EXPORT_SYMBOL(init_waitqueue_head); |
16 | 20 | ||
17 | void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) | 21 | void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) |
18 | { | 22 | { |