diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 2 | ||||
-rw-r--r-- | kernel/acct.c | 4 | ||||
-rw-r--r-- | kernel/cpu.c | 75 | ||||
-rw-r--r-- | kernel/cpuset.c | 24 | ||||
-rw-r--r-- | kernel/delayacct.c | 178 | ||||
-rw-r--r-- | kernel/exit.c | 10 | ||||
-rw-r--r-- | kernel/fork.c | 6 | ||||
-rw-r--r-- | kernel/futex.c | 121 | ||||
-rw-r--r-- | kernel/futex_compat.c | 34 | ||||
-rw-r--r-- | kernel/kallsyms.c | 4 | ||||
-rw-r--r-- | kernel/kthread.c | 24 | ||||
-rw-r--r-- | kernel/module.c | 11 | ||||
-rw-r--r-- | kernel/rtmutex-tester.c | 1 | ||||
-rw-r--r-- | kernel/sched.c | 84 | ||||
-rw-r--r-- | kernel/softirq.c | 2 | ||||
-rw-r--r-- | kernel/taskstats.c | 568 | ||||
-rw-r--r-- | kernel/timer.c | 20 |
17 files changed, 1021 insertions, 147 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 47dbcd570cd8..d62ec66c1af2 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -48,6 +48,8 @@ obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ | |||
48 | obj-$(CONFIG_SECCOMP) += seccomp.o | 48 | obj-$(CONFIG_SECCOMP) += seccomp.o |
49 | obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o | 49 | obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o |
50 | obj-$(CONFIG_RELAY) += relay.o | 50 | obj-$(CONFIG_RELAY) += relay.o |
51 | obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o | ||
52 | obj-$(CONFIG_TASKSTATS) += taskstats.o | ||
51 | 53 | ||
52 | ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) | 54 | ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) |
53 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is | 55 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is |
diff --git a/kernel/acct.c b/kernel/acct.c index f18e0b8df3e1..2a7c933651c7 100644 --- a/kernel/acct.c +++ b/kernel/acct.c | |||
@@ -488,7 +488,7 @@ static void do_acct_process(struct file *file) | |||
488 | old_encode_dev(tty_devnum(current->signal->tty)) : 0; | 488 | old_encode_dev(tty_devnum(current->signal->tty)) : 0; |
489 | read_unlock(&tasklist_lock); | 489 | read_unlock(&tasklist_lock); |
490 | 490 | ||
491 | spin_lock(¤t->sighand->siglock); | 491 | spin_lock_irq(¤t->sighand->siglock); |
492 | ac.ac_utime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_utime))); | 492 | ac.ac_utime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_utime))); |
493 | ac.ac_stime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_stime))); | 493 | ac.ac_stime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_stime))); |
494 | ac.ac_flag = pacct->ac_flag; | 494 | ac.ac_flag = pacct->ac_flag; |
@@ -496,7 +496,7 @@ static void do_acct_process(struct file *file) | |||
496 | ac.ac_minflt = encode_comp_t(pacct->ac_minflt); | 496 | ac.ac_minflt = encode_comp_t(pacct->ac_minflt); |
497 | ac.ac_majflt = encode_comp_t(pacct->ac_majflt); | 497 | ac.ac_majflt = encode_comp_t(pacct->ac_majflt); |
498 | ac.ac_exitcode = pacct->ac_exitcode; | 498 | ac.ac_exitcode = pacct->ac_exitcode; |
499 | spin_unlock(¤t->sighand->siglock); | 499 | spin_unlock_irq(¤t->sighand->siglock); |
500 | ac.ac_io = encode_comp_t(0 /* current->io_usage */); /* %% */ | 500 | ac.ac_io = encode_comp_t(0 /* current->io_usage */); /* %% */ |
501 | ac.ac_rw = encode_comp_t(ac.ac_io / 1024); | 501 | ac.ac_rw = encode_comp_t(ac.ac_io / 1024); |
502 | ac.ac_swaps = encode_comp_t(0); | 502 | ac.ac_swaps = encode_comp_t(0); |
diff --git a/kernel/cpu.c b/kernel/cpu.c index 70fbf2e83766..f230f9ae01c2 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -16,56 +16,48 @@ | |||
16 | #include <linux/mutex.h> | 16 | #include <linux/mutex.h> |
17 | 17 | ||
18 | /* This protects CPUs going up and down... */ | 18 | /* This protects CPUs going up and down... */ |
19 | static DEFINE_MUTEX(cpucontrol); | 19 | static DEFINE_MUTEX(cpu_add_remove_lock); |
20 | static DEFINE_MUTEX(cpu_bitmask_lock); | ||
20 | 21 | ||
21 | static __cpuinitdata BLOCKING_NOTIFIER_HEAD(cpu_chain); | 22 | static __cpuinitdata BLOCKING_NOTIFIER_HEAD(cpu_chain); |
22 | 23 | ||
23 | #ifdef CONFIG_HOTPLUG_CPU | 24 | #ifdef CONFIG_HOTPLUG_CPU |
24 | static struct task_struct *lock_cpu_hotplug_owner; | ||
25 | static int lock_cpu_hotplug_depth; | ||
26 | 25 | ||
27 | static int __lock_cpu_hotplug(int interruptible) | 26 | /* Crappy recursive lock-takers in cpufreq! Complain loudly about idiots */ |
28 | { | 27 | static struct task_struct *recursive; |
29 | int ret = 0; | 28 | static int recursive_depth; |
30 | |||
31 | if (lock_cpu_hotplug_owner != current) { | ||
32 | if (interruptible) | ||
33 | ret = mutex_lock_interruptible(&cpucontrol); | ||
34 | else | ||
35 | mutex_lock(&cpucontrol); | ||
36 | } | ||
37 | |||
38 | /* | ||
39 | * Set only if we succeed in locking | ||
40 | */ | ||
41 | if (!ret) { | ||
42 | lock_cpu_hotplug_depth++; | ||
43 | lock_cpu_hotplug_owner = current; | ||
44 | } | ||
45 | |||
46 | return ret; | ||
47 | } | ||
48 | 29 | ||
49 | void lock_cpu_hotplug(void) | 30 | void lock_cpu_hotplug(void) |
50 | { | 31 | { |
51 | __lock_cpu_hotplug(0); | 32 | struct task_struct *tsk = current; |
33 | |||
34 | if (tsk == recursive) { | ||
35 | static int warnings = 10; | ||
36 | if (warnings) { | ||
37 | printk(KERN_ERR "Lukewarm IQ detected in hotplug locking\n"); | ||
38 | WARN_ON(1); | ||
39 | warnings--; | ||
40 | } | ||
41 | recursive_depth++; | ||
42 | return; | ||
43 | } | ||
44 | mutex_lock(&cpu_bitmask_lock); | ||
45 | recursive = tsk; | ||
52 | } | 46 | } |
53 | EXPORT_SYMBOL_GPL(lock_cpu_hotplug); | 47 | EXPORT_SYMBOL_GPL(lock_cpu_hotplug); |
54 | 48 | ||
55 | void unlock_cpu_hotplug(void) | 49 | void unlock_cpu_hotplug(void) |
56 | { | 50 | { |
57 | if (--lock_cpu_hotplug_depth == 0) { | 51 | WARN_ON(recursive != current); |
58 | lock_cpu_hotplug_owner = NULL; | 52 | if (recursive_depth) { |
59 | mutex_unlock(&cpucontrol); | 53 | recursive_depth--; |
54 | return; | ||
60 | } | 55 | } |
56 | mutex_unlock(&cpu_bitmask_lock); | ||
57 | recursive = NULL; | ||
61 | } | 58 | } |
62 | EXPORT_SYMBOL_GPL(unlock_cpu_hotplug); | 59 | EXPORT_SYMBOL_GPL(unlock_cpu_hotplug); |
63 | 60 | ||
64 | int lock_cpu_hotplug_interruptible(void) | ||
65 | { | ||
66 | return __lock_cpu_hotplug(1); | ||
67 | } | ||
68 | EXPORT_SYMBOL_GPL(lock_cpu_hotplug_interruptible); | ||
69 | #endif /* CONFIG_HOTPLUG_CPU */ | 61 | #endif /* CONFIG_HOTPLUG_CPU */ |
70 | 62 | ||
71 | /* Need to know about CPUs going up/down? */ | 63 | /* Need to know about CPUs going up/down? */ |
@@ -122,9 +114,7 @@ int cpu_down(unsigned int cpu) | |||
122 | struct task_struct *p; | 114 | struct task_struct *p; |
123 | cpumask_t old_allowed, tmp; | 115 | cpumask_t old_allowed, tmp; |
124 | 116 | ||
125 | if ((err = lock_cpu_hotplug_interruptible()) != 0) | 117 | mutex_lock(&cpu_add_remove_lock); |
126 | return err; | ||
127 | |||
128 | if (num_online_cpus() == 1) { | 118 | if (num_online_cpus() == 1) { |
129 | err = -EBUSY; | 119 | err = -EBUSY; |
130 | goto out; | 120 | goto out; |
@@ -150,7 +140,10 @@ int cpu_down(unsigned int cpu) | |||
150 | cpu_clear(cpu, tmp); | 140 | cpu_clear(cpu, tmp); |
151 | set_cpus_allowed(current, tmp); | 141 | set_cpus_allowed(current, tmp); |
152 | 142 | ||
143 | mutex_lock(&cpu_bitmask_lock); | ||
153 | p = __stop_machine_run(take_cpu_down, NULL, cpu); | 144 | p = __stop_machine_run(take_cpu_down, NULL, cpu); |
145 | mutex_unlock(&cpu_bitmask_lock); | ||
146 | |||
154 | if (IS_ERR(p)) { | 147 | if (IS_ERR(p)) { |
155 | /* CPU didn't die: tell everyone. Can't complain. */ | 148 | /* CPU didn't die: tell everyone. Can't complain. */ |
156 | if (blocking_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED, | 149 | if (blocking_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED, |
@@ -187,7 +180,7 @@ out_thread: | |||
187 | out_allowed: | 180 | out_allowed: |
188 | set_cpus_allowed(current, old_allowed); | 181 | set_cpus_allowed(current, old_allowed); |
189 | out: | 182 | out: |
190 | unlock_cpu_hotplug(); | 183 | mutex_unlock(&cpu_add_remove_lock); |
191 | return err; | 184 | return err; |
192 | } | 185 | } |
193 | #endif /*CONFIG_HOTPLUG_CPU*/ | 186 | #endif /*CONFIG_HOTPLUG_CPU*/ |
@@ -197,9 +190,7 @@ int __devinit cpu_up(unsigned int cpu) | |||
197 | int ret; | 190 | int ret; |
198 | void *hcpu = (void *)(long)cpu; | 191 | void *hcpu = (void *)(long)cpu; |
199 | 192 | ||
200 | if ((ret = lock_cpu_hotplug_interruptible()) != 0) | 193 | mutex_lock(&cpu_add_remove_lock); |
201 | return ret; | ||
202 | |||
203 | if (cpu_online(cpu) || !cpu_present(cpu)) { | 194 | if (cpu_online(cpu) || !cpu_present(cpu)) { |
204 | ret = -EINVAL; | 195 | ret = -EINVAL; |
205 | goto out; | 196 | goto out; |
@@ -214,7 +205,9 @@ int __devinit cpu_up(unsigned int cpu) | |||
214 | } | 205 | } |
215 | 206 | ||
216 | /* Arch-specific enabling code. */ | 207 | /* Arch-specific enabling code. */ |
208 | mutex_lock(&cpu_bitmask_lock); | ||
217 | ret = __cpu_up(cpu); | 209 | ret = __cpu_up(cpu); |
210 | mutex_unlock(&cpu_bitmask_lock); | ||
218 | if (ret != 0) | 211 | if (ret != 0) |
219 | goto out_notify; | 212 | goto out_notify; |
220 | BUG_ON(!cpu_online(cpu)); | 213 | BUG_ON(!cpu_online(cpu)); |
@@ -227,6 +220,6 @@ out_notify: | |||
227 | blocking_notifier_call_chain(&cpu_chain, | 220 | blocking_notifier_call_chain(&cpu_chain, |
228 | CPU_UP_CANCELED, hcpu); | 221 | CPU_UP_CANCELED, hcpu); |
229 | out: | 222 | out: |
230 | unlock_cpu_hotplug(); | 223 | mutex_unlock(&cpu_add_remove_lock); |
231 | return ret; | 224 | return ret; |
232 | } | 225 | } |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index c232dc077438..1a649f2bb9bb 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -762,6 +762,8 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial) | |||
762 | * | 762 | * |
763 | * Call with manage_mutex held. May nest a call to the | 763 | * Call with manage_mutex held. May nest a call to the |
764 | * lock_cpu_hotplug()/unlock_cpu_hotplug() pair. | 764 | * lock_cpu_hotplug()/unlock_cpu_hotplug() pair. |
765 | * Must not be called holding callback_mutex, because we must | ||
766 | * not call lock_cpu_hotplug() while holding callback_mutex. | ||
765 | */ | 767 | */ |
766 | 768 | ||
767 | static void update_cpu_domains(struct cpuset *cur) | 769 | static void update_cpu_domains(struct cpuset *cur) |
@@ -781,7 +783,7 @@ static void update_cpu_domains(struct cpuset *cur) | |||
781 | if (is_cpu_exclusive(c)) | 783 | if (is_cpu_exclusive(c)) |
782 | cpus_andnot(pspan, pspan, c->cpus_allowed); | 784 | cpus_andnot(pspan, pspan, c->cpus_allowed); |
783 | } | 785 | } |
784 | if (is_removed(cur) || !is_cpu_exclusive(cur)) { | 786 | if (!is_cpu_exclusive(cur)) { |
785 | cpus_or(pspan, pspan, cur->cpus_allowed); | 787 | cpus_or(pspan, pspan, cur->cpus_allowed); |
786 | if (cpus_equal(pspan, cur->cpus_allowed)) | 788 | if (cpus_equal(pspan, cur->cpus_allowed)) |
787 | return; | 789 | return; |
@@ -1917,6 +1919,17 @@ static int cpuset_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
1917 | return cpuset_create(c_parent, dentry->d_name.name, mode | S_IFDIR); | 1919 | return cpuset_create(c_parent, dentry->d_name.name, mode | S_IFDIR); |
1918 | } | 1920 | } |
1919 | 1921 | ||
1922 | /* | ||
1923 | * Locking note on the strange update_flag() call below: | ||
1924 | * | ||
1925 | * If the cpuset being removed is marked cpu_exclusive, then simulate | ||
1926 | * turning cpu_exclusive off, which will call update_cpu_domains(). | ||
1927 | * The lock_cpu_hotplug() call in update_cpu_domains() must not be | ||
1928 | * made while holding callback_mutex. Elsewhere the kernel nests | ||
1929 | * callback_mutex inside lock_cpu_hotplug() calls. So the reverse | ||
1930 | * nesting would risk an ABBA deadlock. | ||
1931 | */ | ||
1932 | |||
1920 | static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) | 1933 | static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) |
1921 | { | 1934 | { |
1922 | struct cpuset *cs = dentry->d_fsdata; | 1935 | struct cpuset *cs = dentry->d_fsdata; |
@@ -1936,11 +1949,16 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) | |||
1936 | mutex_unlock(&manage_mutex); | 1949 | mutex_unlock(&manage_mutex); |
1937 | return -EBUSY; | 1950 | return -EBUSY; |
1938 | } | 1951 | } |
1952 | if (is_cpu_exclusive(cs)) { | ||
1953 | int retval = update_flag(CS_CPU_EXCLUSIVE, cs, "0"); | ||
1954 | if (retval < 0) { | ||
1955 | mutex_unlock(&manage_mutex); | ||
1956 | return retval; | ||
1957 | } | ||
1958 | } | ||
1939 | parent = cs->parent; | 1959 | parent = cs->parent; |
1940 | mutex_lock(&callback_mutex); | 1960 | mutex_lock(&callback_mutex); |
1941 | set_bit(CS_REMOVED, &cs->flags); | 1961 | set_bit(CS_REMOVED, &cs->flags); |
1942 | if (is_cpu_exclusive(cs)) | ||
1943 | update_cpu_domains(cs); | ||
1944 | list_del(&cs->sibling); /* delete my sibling from parent->children */ | 1962 | list_del(&cs->sibling); /* delete my sibling from parent->children */ |
1945 | spin_lock(&cs->dentry->d_lock); | 1963 | spin_lock(&cs->dentry->d_lock); |
1946 | d = dget(cs->dentry); | 1964 | d = dget(cs->dentry); |
diff --git a/kernel/delayacct.c b/kernel/delayacct.c new file mode 100644 index 000000000000..f05392d64267 --- /dev/null +++ b/kernel/delayacct.c | |||
@@ -0,0 +1,178 @@ | |||
1 | /* delayacct.c - per-task delay accounting | ||
2 | * | ||
3 | * Copyright (C) Shailabh Nagar, IBM Corp. 2006 | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation; either version 2 of the License, or | ||
8 | * (at your option) any later version. | ||
9 | * | ||
10 | * This program is distributed in the hope that it would be useful, but | ||
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | ||
13 | * the GNU General Public License for more details. | ||
14 | */ | ||
15 | |||
16 | #include <linux/sched.h> | ||
17 | #include <linux/slab.h> | ||
18 | #include <linux/time.h> | ||
19 | #include <linux/sysctl.h> | ||
20 | #include <linux/delayacct.h> | ||
21 | |||
22 | int delayacct_on __read_mostly; /* Delay accounting turned on/off */ | ||
23 | kmem_cache_t *delayacct_cache; | ||
24 | |||
25 | static int __init delayacct_setup_enable(char *str) | ||
26 | { | ||
27 | delayacct_on = 1; | ||
28 | return 1; | ||
29 | } | ||
30 | __setup("delayacct", delayacct_setup_enable); | ||
31 | |||
32 | void delayacct_init(void) | ||
33 | { | ||
34 | delayacct_cache = kmem_cache_create("delayacct_cache", | ||
35 | sizeof(struct task_delay_info), | ||
36 | 0, | ||
37 | SLAB_PANIC, | ||
38 | NULL, NULL); | ||
39 | delayacct_tsk_init(&init_task); | ||
40 | } | ||
41 | |||
42 | void __delayacct_tsk_init(struct task_struct *tsk) | ||
43 | { | ||
44 | spin_lock_init(&tsk->delays_lock); | ||
45 | /* No need to acquire tsk->delays_lock for allocation here unless | ||
46 | __delayacct_tsk_init called after tsk is attached to tasklist | ||
47 | */ | ||
48 | tsk->delays = kmem_cache_zalloc(delayacct_cache, SLAB_KERNEL); | ||
49 | if (tsk->delays) | ||
50 | spin_lock_init(&tsk->delays->lock); | ||
51 | } | ||
52 | |||
53 | void __delayacct_tsk_exit(struct task_struct *tsk) | ||
54 | { | ||
55 | struct task_delay_info *delays = tsk->delays; | ||
56 | spin_lock(&tsk->delays_lock); | ||
57 | tsk->delays = NULL; | ||
58 | spin_unlock(&tsk->delays_lock); | ||
59 | kmem_cache_free(delayacct_cache, delays); | ||
60 | } | ||
61 | |||
62 | /* | ||
63 | * Start accounting for a delay statistic using | ||
64 | * its starting timestamp (@start) | ||
65 | */ | ||
66 | |||
67 | static inline void delayacct_start(struct timespec *start) | ||
68 | { | ||
69 | do_posix_clock_monotonic_gettime(start); | ||
70 | } | ||
71 | |||
72 | /* | ||
73 | * Finish delay accounting for a statistic using | ||
74 | * its timestamps (@start, @end), accumalator (@total) and @count | ||
75 | */ | ||
76 | |||
77 | static void delayacct_end(struct timespec *start, struct timespec *end, | ||
78 | u64 *total, u32 *count) | ||
79 | { | ||
80 | struct timespec ts; | ||
81 | s64 ns; | ||
82 | |||
83 | do_posix_clock_monotonic_gettime(end); | ||
84 | ts = timespec_sub(*end, *start); | ||
85 | ns = timespec_to_ns(&ts); | ||
86 | if (ns < 0) | ||
87 | return; | ||
88 | |||
89 | spin_lock(¤t->delays->lock); | ||
90 | *total += ns; | ||
91 | (*count)++; | ||
92 | spin_unlock(¤t->delays->lock); | ||
93 | } | ||
94 | |||
95 | void __delayacct_blkio_start(void) | ||
96 | { | ||
97 | delayacct_start(¤t->delays->blkio_start); | ||
98 | } | ||
99 | |||
100 | void __delayacct_blkio_end(void) | ||
101 | { | ||
102 | if (current->delays->flags & DELAYACCT_PF_SWAPIN) | ||
103 | /* Swapin block I/O */ | ||
104 | delayacct_end(¤t->delays->blkio_start, | ||
105 | ¤t->delays->blkio_end, | ||
106 | ¤t->delays->swapin_delay, | ||
107 | ¤t->delays->swapin_count); | ||
108 | else /* Other block I/O */ | ||
109 | delayacct_end(¤t->delays->blkio_start, | ||
110 | ¤t->delays->blkio_end, | ||
111 | ¤t->delays->blkio_delay, | ||
112 | ¤t->delays->blkio_count); | ||
113 | } | ||
114 | |||
115 | int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) | ||
116 | { | ||
117 | s64 tmp; | ||
118 | struct timespec ts; | ||
119 | unsigned long t1,t2,t3; | ||
120 | |||
121 | spin_lock(&tsk->delays_lock); | ||
122 | |||
123 | /* Though tsk->delays accessed later, early exit avoids | ||
124 | * unnecessary returning of other data | ||
125 | */ | ||
126 | if (!tsk->delays) | ||
127 | goto done; | ||
128 | |||
129 | tmp = (s64)d->cpu_run_real_total; | ||
130 | cputime_to_timespec(tsk->utime + tsk->stime, &ts); | ||
131 | tmp += timespec_to_ns(&ts); | ||
132 | d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp; | ||
133 | |||
134 | /* | ||
135 | * No locking available for sched_info (and too expensive to add one) | ||
136 | * Mitigate by taking snapshot of values | ||
137 | */ | ||
138 | t1 = tsk->sched_info.pcnt; | ||
139 | t2 = tsk->sched_info.run_delay; | ||
140 | t3 = tsk->sched_info.cpu_time; | ||
141 | |||
142 | d->cpu_count += t1; | ||
143 | |||
144 | jiffies_to_timespec(t2, &ts); | ||
145 | tmp = (s64)d->cpu_delay_total + timespec_to_ns(&ts); | ||
146 | d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp; | ||
147 | |||
148 | tmp = (s64)d->cpu_run_virtual_total + (s64)jiffies_to_usecs(t3) * 1000; | ||
149 | d->cpu_run_virtual_total = | ||
150 | (tmp < (s64)d->cpu_run_virtual_total) ? 0 : tmp; | ||
151 | |||
152 | /* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */ | ||
153 | |||
154 | spin_lock(&tsk->delays->lock); | ||
155 | tmp = d->blkio_delay_total + tsk->delays->blkio_delay; | ||
156 | d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp; | ||
157 | tmp = d->swapin_delay_total + tsk->delays->swapin_delay; | ||
158 | d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp; | ||
159 | d->blkio_count += tsk->delays->blkio_count; | ||
160 | d->swapin_count += tsk->delays->swapin_count; | ||
161 | spin_unlock(&tsk->delays->lock); | ||
162 | |||
163 | done: | ||
164 | spin_unlock(&tsk->delays_lock); | ||
165 | return 0; | ||
166 | } | ||
167 | |||
168 | __u64 __delayacct_blkio_ticks(struct task_struct *tsk) | ||
169 | { | ||
170 | __u64 ret; | ||
171 | |||
172 | spin_lock(&tsk->delays->lock); | ||
173 | ret = nsec_to_clock_t(tsk->delays->blkio_delay + | ||
174 | tsk->delays->swapin_delay); | ||
175 | spin_unlock(&tsk->delays->lock); | ||
176 | return ret; | ||
177 | } | ||
178 | |||
diff --git a/kernel/exit.c b/kernel/exit.c index 6664c084783d..dba194a8d416 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -25,6 +25,8 @@ | |||
25 | #include <linux/mount.h> | 25 | #include <linux/mount.h> |
26 | #include <linux/proc_fs.h> | 26 | #include <linux/proc_fs.h> |
27 | #include <linux/mempolicy.h> | 27 | #include <linux/mempolicy.h> |
28 | #include <linux/taskstats_kern.h> | ||
29 | #include <linux/delayacct.h> | ||
28 | #include <linux/cpuset.h> | 30 | #include <linux/cpuset.h> |
29 | #include <linux/syscalls.h> | 31 | #include <linux/syscalls.h> |
30 | #include <linux/signal.h> | 32 | #include <linux/signal.h> |
@@ -843,7 +845,9 @@ static void exit_notify(struct task_struct *tsk) | |||
843 | fastcall NORET_TYPE void do_exit(long code) | 845 | fastcall NORET_TYPE void do_exit(long code) |
844 | { | 846 | { |
845 | struct task_struct *tsk = current; | 847 | struct task_struct *tsk = current; |
848 | struct taskstats *tidstats; | ||
846 | int group_dead; | 849 | int group_dead; |
850 | unsigned int mycpu; | ||
847 | 851 | ||
848 | profile_task_exit(tsk); | 852 | profile_task_exit(tsk); |
849 | 853 | ||
@@ -881,6 +885,8 @@ fastcall NORET_TYPE void do_exit(long code) | |||
881 | current->comm, current->pid, | 885 | current->comm, current->pid, |
882 | preempt_count()); | 886 | preempt_count()); |
883 | 887 | ||
888 | taskstats_exit_alloc(&tidstats, &mycpu); | ||
889 | |||
884 | acct_update_integrals(tsk); | 890 | acct_update_integrals(tsk); |
885 | if (tsk->mm) { | 891 | if (tsk->mm) { |
886 | update_hiwater_rss(tsk->mm); | 892 | update_hiwater_rss(tsk->mm); |
@@ -900,6 +906,10 @@ fastcall NORET_TYPE void do_exit(long code) | |||
900 | #endif | 906 | #endif |
901 | if (unlikely(tsk->audit_context)) | 907 | if (unlikely(tsk->audit_context)) |
902 | audit_free(tsk); | 908 | audit_free(tsk); |
909 | taskstats_exit_send(tsk, tidstats, group_dead, mycpu); | ||
910 | taskstats_exit_free(tidstats); | ||
911 | delayacct_tsk_exit(tsk); | ||
912 | |||
903 | exit_mm(tsk); | 913 | exit_mm(tsk); |
904 | 914 | ||
905 | if (group_dead) | 915 | if (group_dead) |
diff --git a/kernel/fork.c b/kernel/fork.c index 926e5a68ea9e..1b0f7b1e0881 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -43,6 +43,8 @@ | |||
43 | #include <linux/rmap.h> | 43 | #include <linux/rmap.h> |
44 | #include <linux/acct.h> | 44 | #include <linux/acct.h> |
45 | #include <linux/cn_proc.h> | 45 | #include <linux/cn_proc.h> |
46 | #include <linux/delayacct.h> | ||
47 | #include <linux/taskstats_kern.h> | ||
46 | 48 | ||
47 | #include <asm/pgtable.h> | 49 | #include <asm/pgtable.h> |
48 | #include <asm/pgalloc.h> | 50 | #include <asm/pgalloc.h> |
@@ -818,6 +820,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts | |||
818 | if (clone_flags & CLONE_THREAD) { | 820 | if (clone_flags & CLONE_THREAD) { |
819 | atomic_inc(¤t->signal->count); | 821 | atomic_inc(¤t->signal->count); |
820 | atomic_inc(¤t->signal->live); | 822 | atomic_inc(¤t->signal->live); |
823 | taskstats_tgid_alloc(current->signal); | ||
821 | return 0; | 824 | return 0; |
822 | } | 825 | } |
823 | sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); | 826 | sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); |
@@ -862,6 +865,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts | |||
862 | INIT_LIST_HEAD(&sig->cpu_timers[0]); | 865 | INIT_LIST_HEAD(&sig->cpu_timers[0]); |
863 | INIT_LIST_HEAD(&sig->cpu_timers[1]); | 866 | INIT_LIST_HEAD(&sig->cpu_timers[1]); |
864 | INIT_LIST_HEAD(&sig->cpu_timers[2]); | 867 | INIT_LIST_HEAD(&sig->cpu_timers[2]); |
868 | taskstats_tgid_init(sig); | ||
865 | 869 | ||
866 | task_lock(current->group_leader); | 870 | task_lock(current->group_leader); |
867 | memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); | 871 | memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); |
@@ -883,6 +887,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts | |||
883 | void __cleanup_signal(struct signal_struct *sig) | 887 | void __cleanup_signal(struct signal_struct *sig) |
884 | { | 888 | { |
885 | exit_thread_group_keys(sig); | 889 | exit_thread_group_keys(sig); |
890 | taskstats_tgid_free(sig); | ||
886 | kmem_cache_free(signal_cachep, sig); | 891 | kmem_cache_free(signal_cachep, sig); |
887 | } | 892 | } |
888 | 893 | ||
@@ -1000,6 +1005,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1000 | goto bad_fork_cleanup_put_domain; | 1005 | goto bad_fork_cleanup_put_domain; |
1001 | 1006 | ||
1002 | p->did_exec = 0; | 1007 | p->did_exec = 0; |
1008 | delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ | ||
1003 | copy_flags(clone_flags, p); | 1009 | copy_flags(clone_flags, p); |
1004 | p->pid = pid; | 1010 | p->pid = pid; |
1005 | retval = -EFAULT; | 1011 | retval = -EFAULT; |
diff --git a/kernel/futex.c b/kernel/futex.c index cf0c8e21d1ab..dda2049692a2 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -415,15 +415,15 @@ out_unlock: | |||
415 | */ | 415 | */ |
416 | void exit_pi_state_list(struct task_struct *curr) | 416 | void exit_pi_state_list(struct task_struct *curr) |
417 | { | 417 | { |
418 | struct futex_hash_bucket *hb; | ||
419 | struct list_head *next, *head = &curr->pi_state_list; | 418 | struct list_head *next, *head = &curr->pi_state_list; |
420 | struct futex_pi_state *pi_state; | 419 | struct futex_pi_state *pi_state; |
420 | struct futex_hash_bucket *hb; | ||
421 | union futex_key key; | 421 | union futex_key key; |
422 | 422 | ||
423 | /* | 423 | /* |
424 | * We are a ZOMBIE and nobody can enqueue itself on | 424 | * We are a ZOMBIE and nobody can enqueue itself on |
425 | * pi_state_list anymore, but we have to be careful | 425 | * pi_state_list anymore, but we have to be careful |
426 | * versus waiters unqueueing themselfs | 426 | * versus waiters unqueueing themselves: |
427 | */ | 427 | */ |
428 | spin_lock_irq(&curr->pi_lock); | 428 | spin_lock_irq(&curr->pi_lock); |
429 | while (!list_empty(head)) { | 429 | while (!list_empty(head)) { |
@@ -431,21 +431,24 @@ void exit_pi_state_list(struct task_struct *curr) | |||
431 | next = head->next; | 431 | next = head->next; |
432 | pi_state = list_entry(next, struct futex_pi_state, list); | 432 | pi_state = list_entry(next, struct futex_pi_state, list); |
433 | key = pi_state->key; | 433 | key = pi_state->key; |
434 | hb = hash_futex(&key); | ||
434 | spin_unlock_irq(&curr->pi_lock); | 435 | spin_unlock_irq(&curr->pi_lock); |
435 | 436 | ||
436 | hb = hash_futex(&key); | ||
437 | spin_lock(&hb->lock); | 437 | spin_lock(&hb->lock); |
438 | 438 | ||
439 | spin_lock_irq(&curr->pi_lock); | 439 | spin_lock_irq(&curr->pi_lock); |
440 | /* | ||
441 | * We dropped the pi-lock, so re-check whether this | ||
442 | * task still owns the PI-state: | ||
443 | */ | ||
440 | if (head->next != next) { | 444 | if (head->next != next) { |
441 | spin_unlock(&hb->lock); | 445 | spin_unlock(&hb->lock); |
442 | continue; | 446 | continue; |
443 | } | 447 | } |
444 | 448 | ||
445 | list_del_init(&pi_state->list); | ||
446 | |||
447 | WARN_ON(pi_state->owner != curr); | 449 | WARN_ON(pi_state->owner != curr); |
448 | 450 | WARN_ON(list_empty(&pi_state->list)); | |
451 | list_del_init(&pi_state->list); | ||
449 | pi_state->owner = NULL; | 452 | pi_state->owner = NULL; |
450 | spin_unlock_irq(&curr->pi_lock); | 453 | spin_unlock_irq(&curr->pi_lock); |
451 | 454 | ||
@@ -470,7 +473,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me) | |||
470 | head = &hb->chain; | 473 | head = &hb->chain; |
471 | 474 | ||
472 | list_for_each_entry_safe(this, next, head, list) { | 475 | list_for_each_entry_safe(this, next, head, list) { |
473 | if (match_futex (&this->key, &me->key)) { | 476 | if (match_futex(&this->key, &me->key)) { |
474 | /* | 477 | /* |
475 | * Another waiter already exists - bump up | 478 | * Another waiter already exists - bump up |
476 | * the refcount and return its pi_state: | 479 | * the refcount and return its pi_state: |
@@ -482,6 +485,8 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me) | |||
482 | if (unlikely(!pi_state)) | 485 | if (unlikely(!pi_state)) |
483 | return -EINVAL; | 486 | return -EINVAL; |
484 | 487 | ||
488 | WARN_ON(!atomic_read(&pi_state->refcount)); | ||
489 | |||
485 | atomic_inc(&pi_state->refcount); | 490 | atomic_inc(&pi_state->refcount); |
486 | me->pi_state = pi_state; | 491 | me->pi_state = pi_state; |
487 | 492 | ||
@@ -490,10 +495,13 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me) | |||
490 | } | 495 | } |
491 | 496 | ||
492 | /* | 497 | /* |
493 | * We are the first waiter - try to look up the real owner and | 498 | * We are the first waiter - try to look up the real owner and attach |
494 | * attach the new pi_state to it: | 499 | * the new pi_state to it, but bail out when the owner died bit is set |
500 | * and TID = 0: | ||
495 | */ | 501 | */ |
496 | pid = uval & FUTEX_TID_MASK; | 502 | pid = uval & FUTEX_TID_MASK; |
503 | if (!pid && (uval & FUTEX_OWNER_DIED)) | ||
504 | return -ESRCH; | ||
497 | p = futex_find_get_task(pid); | 505 | p = futex_find_get_task(pid); |
498 | if (!p) | 506 | if (!p) |
499 | return -ESRCH; | 507 | return -ESRCH; |
@@ -510,6 +518,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me) | |||
510 | pi_state->key = me->key; | 518 | pi_state->key = me->key; |
511 | 519 | ||
512 | spin_lock_irq(&p->pi_lock); | 520 | spin_lock_irq(&p->pi_lock); |
521 | WARN_ON(!list_empty(&pi_state->list)); | ||
513 | list_add(&pi_state->list, &p->pi_state_list); | 522 | list_add(&pi_state->list, &p->pi_state_list); |
514 | pi_state->owner = p; | 523 | pi_state->owner = p; |
515 | spin_unlock_irq(&p->pi_lock); | 524 | spin_unlock_irq(&p->pi_lock); |
@@ -573,20 +582,29 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) | |||
573 | * kept enabled while there is PI state around. We must also | 582 | * kept enabled while there is PI state around. We must also |
574 | * preserve the owner died bit.) | 583 | * preserve the owner died bit.) |
575 | */ | 584 | */ |
576 | newval = (uval & FUTEX_OWNER_DIED) | FUTEX_WAITERS | new_owner->pid; | 585 | if (!(uval & FUTEX_OWNER_DIED)) { |
586 | newval = FUTEX_WAITERS | new_owner->pid; | ||
577 | 587 | ||
578 | inc_preempt_count(); | 588 | inc_preempt_count(); |
579 | curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); | 589 | curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); |
580 | dec_preempt_count(); | 590 | dec_preempt_count(); |
591 | if (curval == -EFAULT) | ||
592 | return -EFAULT; | ||
593 | if (curval != uval) | ||
594 | return -EINVAL; | ||
595 | } | ||
581 | 596 | ||
582 | if (curval == -EFAULT) | 597 | spin_lock_irq(&pi_state->owner->pi_lock); |
583 | return -EFAULT; | 598 | WARN_ON(list_empty(&pi_state->list)); |
584 | if (curval != uval) | 599 | list_del_init(&pi_state->list); |
585 | return -EINVAL; | 600 | spin_unlock_irq(&pi_state->owner->pi_lock); |
586 | 601 | ||
587 | list_del_init(&pi_state->owner->pi_state_list); | 602 | spin_lock_irq(&new_owner->pi_lock); |
603 | WARN_ON(!list_empty(&pi_state->list)); | ||
588 | list_add(&pi_state->list, &new_owner->pi_state_list); | 604 | list_add(&pi_state->list, &new_owner->pi_state_list); |
589 | pi_state->owner = new_owner; | 605 | pi_state->owner = new_owner; |
606 | spin_unlock_irq(&new_owner->pi_lock); | ||
607 | |||
590 | rt_mutex_unlock(&pi_state->pi_mutex); | 608 | rt_mutex_unlock(&pi_state->pi_mutex); |
591 | 609 | ||
592 | return 0; | 610 | return 0; |
@@ -1236,6 +1254,7 @@ static int do_futex_lock_pi(u32 __user *uaddr, int detect, int trylock, | |||
1236 | /* Owner died? */ | 1254 | /* Owner died? */ |
1237 | if (q.pi_state->owner != NULL) { | 1255 | if (q.pi_state->owner != NULL) { |
1238 | spin_lock_irq(&q.pi_state->owner->pi_lock); | 1256 | spin_lock_irq(&q.pi_state->owner->pi_lock); |
1257 | WARN_ON(list_empty(&q.pi_state->list)); | ||
1239 | list_del_init(&q.pi_state->list); | 1258 | list_del_init(&q.pi_state->list); |
1240 | spin_unlock_irq(&q.pi_state->owner->pi_lock); | 1259 | spin_unlock_irq(&q.pi_state->owner->pi_lock); |
1241 | } else | 1260 | } else |
@@ -1244,6 +1263,7 @@ static int do_futex_lock_pi(u32 __user *uaddr, int detect, int trylock, | |||
1244 | q.pi_state->owner = current; | 1263 | q.pi_state->owner = current; |
1245 | 1264 | ||
1246 | spin_lock_irq(¤t->pi_lock); | 1265 | spin_lock_irq(¤t->pi_lock); |
1266 | WARN_ON(!list_empty(&q.pi_state->list)); | ||
1247 | list_add(&q.pi_state->list, ¤t->pi_state_list); | 1267 | list_add(&q.pi_state->list, ¤t->pi_state_list); |
1248 | spin_unlock_irq(¤t->pi_lock); | 1268 | spin_unlock_irq(¤t->pi_lock); |
1249 | 1269 | ||
@@ -1427,9 +1447,11 @@ retry_locked: | |||
1427 | * again. If it succeeds then we can return without waking | 1447 | * again. If it succeeds then we can return without waking |
1428 | * anyone else up: | 1448 | * anyone else up: |
1429 | */ | 1449 | */ |
1430 | inc_preempt_count(); | 1450 | if (!(uval & FUTEX_OWNER_DIED)) { |
1431 | uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0); | 1451 | inc_preempt_count(); |
1432 | dec_preempt_count(); | 1452 | uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0); |
1453 | dec_preempt_count(); | ||
1454 | } | ||
1433 | 1455 | ||
1434 | if (unlikely(uval == -EFAULT)) | 1456 | if (unlikely(uval == -EFAULT)) |
1435 | goto pi_faulted; | 1457 | goto pi_faulted; |
@@ -1462,9 +1484,11 @@ retry_locked: | |||
1462 | /* | 1484 | /* |
1463 | * No waiters - kernel unlocks the futex: | 1485 | * No waiters - kernel unlocks the futex: |
1464 | */ | 1486 | */ |
1465 | ret = unlock_futex_pi(uaddr, uval); | 1487 | if (!(uval & FUTEX_OWNER_DIED)) { |
1466 | if (ret == -EFAULT) | 1488 | ret = unlock_futex_pi(uaddr, uval); |
1467 | goto pi_faulted; | 1489 | if (ret == -EFAULT) |
1490 | goto pi_faulted; | ||
1491 | } | ||
1468 | 1492 | ||
1469 | out_unlock: | 1493 | out_unlock: |
1470 | spin_unlock(&hb->lock); | 1494 | spin_unlock(&hb->lock); |
@@ -1683,9 +1707,9 @@ err_unlock: | |||
1683 | * Process a futex-list entry, check whether it's owned by the | 1707 | * Process a futex-list entry, check whether it's owned by the |
1684 | * dying task, and do notification if so: | 1708 | * dying task, and do notification if so: |
1685 | */ | 1709 | */ |
1686 | int handle_futex_death(u32 __user *uaddr, struct task_struct *curr) | 1710 | int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi) |
1687 | { | 1711 | { |
1688 | u32 uval, nval; | 1712 | u32 uval, nval, mval; |
1689 | 1713 | ||
1690 | retry: | 1714 | retry: |
1691 | if (get_user(uval, uaddr)) | 1715 | if (get_user(uval, uaddr)) |
@@ -1702,21 +1726,45 @@ retry: | |||
1702 | * thread-death.) The rest of the cleanup is done in | 1726 | * thread-death.) The rest of the cleanup is done in |
1703 | * userspace. | 1727 | * userspace. |
1704 | */ | 1728 | */ |
1705 | nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, | 1729 | mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; |
1706 | uval | FUTEX_OWNER_DIED); | 1730 | nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval); |
1731 | |||
1707 | if (nval == -EFAULT) | 1732 | if (nval == -EFAULT) |
1708 | return -1; | 1733 | return -1; |
1709 | 1734 | ||
1710 | if (nval != uval) | 1735 | if (nval != uval) |
1711 | goto retry; | 1736 | goto retry; |
1712 | 1737 | ||
1713 | if (uval & FUTEX_WAITERS) | 1738 | /* |
1714 | futex_wake(uaddr, 1); | 1739 | * Wake robust non-PI futexes here. The wakeup of |
1740 | * PI futexes happens in exit_pi_state(): | ||
1741 | */ | ||
1742 | if (!pi) { | ||
1743 | if (uval & FUTEX_WAITERS) | ||
1744 | futex_wake(uaddr, 1); | ||
1745 | } | ||
1715 | } | 1746 | } |
1716 | return 0; | 1747 | return 0; |
1717 | } | 1748 | } |
1718 | 1749 | ||
1719 | /* | 1750 | /* |
1751 | * Fetch a robust-list pointer. Bit 0 signals PI futexes: | ||
1752 | */ | ||
1753 | static inline int fetch_robust_entry(struct robust_list __user **entry, | ||
1754 | struct robust_list __user **head, int *pi) | ||
1755 | { | ||
1756 | unsigned long uentry; | ||
1757 | |||
1758 | if (get_user(uentry, (unsigned long *)head)) | ||
1759 | return -EFAULT; | ||
1760 | |||
1761 | *entry = (void *)(uentry & ~1UL); | ||
1762 | *pi = uentry & 1; | ||
1763 | |||
1764 | return 0; | ||
1765 | } | ||
1766 | |||
1767 | /* | ||
1720 | * Walk curr->robust_list (very carefully, it's a userspace list!) | 1768 | * Walk curr->robust_list (very carefully, it's a userspace list!) |
1721 | * and mark any locks found there dead, and notify any waiters. | 1769 | * and mark any locks found there dead, and notify any waiters. |
1722 | * | 1770 | * |
@@ -1726,14 +1774,14 @@ void exit_robust_list(struct task_struct *curr) | |||
1726 | { | 1774 | { |
1727 | struct robust_list_head __user *head = curr->robust_list; | 1775 | struct robust_list_head __user *head = curr->robust_list; |
1728 | struct robust_list __user *entry, *pending; | 1776 | struct robust_list __user *entry, *pending; |
1729 | unsigned int limit = ROBUST_LIST_LIMIT; | 1777 | unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; |
1730 | unsigned long futex_offset; | 1778 | unsigned long futex_offset; |
1731 | 1779 | ||
1732 | /* | 1780 | /* |
1733 | * Fetch the list head (which was registered earlier, via | 1781 | * Fetch the list head (which was registered earlier, via |
1734 | * sys_set_robust_list()): | 1782 | * sys_set_robust_list()): |
1735 | */ | 1783 | */ |
1736 | if (get_user(entry, &head->list.next)) | 1784 | if (fetch_robust_entry(&entry, &head->list.next, &pi)) |
1737 | return; | 1785 | return; |
1738 | /* | 1786 | /* |
1739 | * Fetch the relative futex offset: | 1787 | * Fetch the relative futex offset: |
@@ -1744,10 +1792,11 @@ void exit_robust_list(struct task_struct *curr) | |||
1744 | * Fetch any possibly pending lock-add first, and handle it | 1792 | * Fetch any possibly pending lock-add first, and handle it |
1745 | * if it exists: | 1793 | * if it exists: |
1746 | */ | 1794 | */ |
1747 | if (get_user(pending, &head->list_op_pending)) | 1795 | if (fetch_robust_entry(&pending, &head->list_op_pending, &pip)) |
1748 | return; | 1796 | return; |
1797 | |||
1749 | if (pending) | 1798 | if (pending) |
1750 | handle_futex_death((void *)pending + futex_offset, curr); | 1799 | handle_futex_death((void *)pending + futex_offset, curr, pip); |
1751 | 1800 | ||
1752 | while (entry != &head->list) { | 1801 | while (entry != &head->list) { |
1753 | /* | 1802 | /* |
@@ -1756,12 +1805,12 @@ void exit_robust_list(struct task_struct *curr) | |||
1756 | */ | 1805 | */ |
1757 | if (entry != pending) | 1806 | if (entry != pending) |
1758 | if (handle_futex_death((void *)entry + futex_offset, | 1807 | if (handle_futex_death((void *)entry + futex_offset, |
1759 | curr)) | 1808 | curr, pi)) |
1760 | return; | 1809 | return; |
1761 | /* | 1810 | /* |
1762 | * Fetch the next entry in the list: | 1811 | * Fetch the next entry in the list: |
1763 | */ | 1812 | */ |
1764 | if (get_user(entry, &entry->next)) | 1813 | if (fetch_robust_entry(&entry, &entry->next, &pi)) |
1765 | return; | 1814 | return; |
1766 | /* | 1815 | /* |
1767 | * Avoid excessively long or circular lists: | 1816 | * Avoid excessively long or circular lists: |
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index d1d92b441fb7..d1aab1a452cc 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c | |||
@@ -12,6 +12,23 @@ | |||
12 | 12 | ||
13 | #include <asm/uaccess.h> | 13 | #include <asm/uaccess.h> |
14 | 14 | ||
15 | |||
16 | /* | ||
17 | * Fetch a robust-list pointer. Bit 0 signals PI futexes: | ||
18 | */ | ||
19 | static inline int | ||
20 | fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry, | ||
21 | compat_uptr_t *head, int *pi) | ||
22 | { | ||
23 | if (get_user(*uentry, head)) | ||
24 | return -EFAULT; | ||
25 | |||
26 | *entry = compat_ptr((*uentry) & ~1); | ||
27 | *pi = (unsigned int)(*uentry) & 1; | ||
28 | |||
29 | return 0; | ||
30 | } | ||
31 | |||
15 | /* | 32 | /* |
16 | * Walk curr->robust_list (very carefully, it's a userspace list!) | 33 | * Walk curr->robust_list (very carefully, it's a userspace list!) |
17 | * and mark any locks found there dead, and notify any waiters. | 34 | * and mark any locks found there dead, and notify any waiters. |
@@ -22,17 +39,16 @@ void compat_exit_robust_list(struct task_struct *curr) | |||
22 | { | 39 | { |
23 | struct compat_robust_list_head __user *head = curr->compat_robust_list; | 40 | struct compat_robust_list_head __user *head = curr->compat_robust_list; |
24 | struct robust_list __user *entry, *pending; | 41 | struct robust_list __user *entry, *pending; |
42 | unsigned int limit = ROBUST_LIST_LIMIT, pi; | ||
25 | compat_uptr_t uentry, upending; | 43 | compat_uptr_t uentry, upending; |
26 | unsigned int limit = ROBUST_LIST_LIMIT; | ||
27 | compat_long_t futex_offset; | 44 | compat_long_t futex_offset; |
28 | 45 | ||
29 | /* | 46 | /* |
30 | * Fetch the list head (which was registered earlier, via | 47 | * Fetch the list head (which was registered earlier, via |
31 | * sys_set_robust_list()): | 48 | * sys_set_robust_list()): |
32 | */ | 49 | */ |
33 | if (get_user(uentry, &head->list.next)) | 50 | if (fetch_robust_entry(&uentry, &entry, &head->list.next, &pi)) |
34 | return; | 51 | return; |
35 | entry = compat_ptr(uentry); | ||
36 | /* | 52 | /* |
37 | * Fetch the relative futex offset: | 53 | * Fetch the relative futex offset: |
38 | */ | 54 | */ |
@@ -42,11 +58,11 @@ void compat_exit_robust_list(struct task_struct *curr) | |||
42 | * Fetch any possibly pending lock-add first, and handle it | 58 | * Fetch any possibly pending lock-add first, and handle it |
43 | * if it exists: | 59 | * if it exists: |
44 | */ | 60 | */ |
45 | if (get_user(upending, &head->list_op_pending)) | 61 | if (fetch_robust_entry(&upending, &pending, |
62 | &head->list_op_pending, &pi)) | ||
46 | return; | 63 | return; |
47 | pending = compat_ptr(upending); | ||
48 | if (upending) | 64 | if (upending) |
49 | handle_futex_death((void *)pending + futex_offset, curr); | 65 | handle_futex_death((void *)pending + futex_offset, curr, pi); |
50 | 66 | ||
51 | while (compat_ptr(uentry) != &head->list) { | 67 | while (compat_ptr(uentry) != &head->list) { |
52 | /* | 68 | /* |
@@ -55,15 +71,15 @@ void compat_exit_robust_list(struct task_struct *curr) | |||
55 | */ | 71 | */ |
56 | if (entry != pending) | 72 | if (entry != pending) |
57 | if (handle_futex_death((void *)entry + futex_offset, | 73 | if (handle_futex_death((void *)entry + futex_offset, |
58 | curr)) | 74 | curr, pi)) |
59 | return; | 75 | return; |
60 | 76 | ||
61 | /* | 77 | /* |
62 | * Fetch the next entry in the list: | 78 | * Fetch the next entry in the list: |
63 | */ | 79 | */ |
64 | if (get_user(uentry, (compat_uptr_t *)&entry->next)) | 80 | if (fetch_robust_entry(&uentry, &entry, |
81 | (compat_uptr_t *)&entry->next, &pi)) | ||
65 | return; | 82 | return; |
66 | entry = compat_ptr(uentry); | ||
67 | /* | 83 | /* |
68 | * Avoid excessively long or circular lists: | 84 | * Avoid excessively long or circular lists: |
69 | */ | 85 | */ |
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 39277dd6bf90..ab16a5a4cfe9 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c | |||
@@ -275,8 +275,8 @@ static void upcase_if_global(struct kallsym_iter *iter) | |||
275 | static int get_ksymbol_mod(struct kallsym_iter *iter) | 275 | static int get_ksymbol_mod(struct kallsym_iter *iter) |
276 | { | 276 | { |
277 | iter->owner = module_get_kallsym(iter->pos - kallsyms_num_syms, | 277 | iter->owner = module_get_kallsym(iter->pos - kallsyms_num_syms, |
278 | &iter->value, | 278 | &iter->value, &iter->type, |
279 | &iter->type, iter->name); | 279 | iter->name, sizeof(iter->name)); |
280 | if (iter->owner == NULL) | 280 | if (iter->owner == NULL) |
281 | return 0; | 281 | return 0; |
282 | 282 | ||
diff --git a/kernel/kthread.c b/kernel/kthread.c index 24be714b04c7..4f9c60ef95e8 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c | |||
@@ -216,23 +216,6 @@ EXPORT_SYMBOL(kthread_bind); | |||
216 | */ | 216 | */ |
217 | int kthread_stop(struct task_struct *k) | 217 | int kthread_stop(struct task_struct *k) |
218 | { | 218 | { |
219 | return kthread_stop_sem(k, NULL); | ||
220 | } | ||
221 | EXPORT_SYMBOL(kthread_stop); | ||
222 | |||
223 | /** | ||
224 | * kthread_stop_sem - stop a thread created by kthread_create(). | ||
225 | * @k: thread created by kthread_create(). | ||
226 | * @s: semaphore that @k waits on while idle. | ||
227 | * | ||
228 | * Does essentially the same thing as kthread_stop() above, but wakes | ||
229 | * @k by calling up(@s). | ||
230 | * | ||
231 | * Returns the result of threadfn(), or %-EINTR if wake_up_process() | ||
232 | * was never called. | ||
233 | */ | ||
234 | int kthread_stop_sem(struct task_struct *k, struct semaphore *s) | ||
235 | { | ||
236 | int ret; | 219 | int ret; |
237 | 220 | ||
238 | mutex_lock(&kthread_stop_lock); | 221 | mutex_lock(&kthread_stop_lock); |
@@ -246,10 +229,7 @@ int kthread_stop_sem(struct task_struct *k, struct semaphore *s) | |||
246 | 229 | ||
247 | /* Now set kthread_should_stop() to true, and wake it up. */ | 230 | /* Now set kthread_should_stop() to true, and wake it up. */ |
248 | kthread_stop_info.k = k; | 231 | kthread_stop_info.k = k; |
249 | if (s) | 232 | wake_up_process(k); |
250 | up(s); | ||
251 | else | ||
252 | wake_up_process(k); | ||
253 | put_task_struct(k); | 233 | put_task_struct(k); |
254 | 234 | ||
255 | /* Once it dies, reset stop ptr, gather result and we're done. */ | 235 | /* Once it dies, reset stop ptr, gather result and we're done. */ |
@@ -260,7 +240,7 @@ int kthread_stop_sem(struct task_struct *k, struct semaphore *s) | |||
260 | 240 | ||
261 | return ret; | 241 | return ret; |
262 | } | 242 | } |
263 | EXPORT_SYMBOL(kthread_stop_sem); | 243 | EXPORT_SYMBOL(kthread_stop); |
264 | 244 | ||
265 | static __init int helper_init(void) | 245 | static __init int helper_init(void) |
266 | { | 246 | { |
diff --git a/kernel/module.c b/kernel/module.c index 35e1b1f859d7..2a19cd47c046 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -2019,10 +2019,8 @@ const char *module_address_lookup(unsigned long addr, | |||
2019 | return NULL; | 2019 | return NULL; |
2020 | } | 2020 | } |
2021 | 2021 | ||
2022 | struct module *module_get_kallsym(unsigned int symnum, | 2022 | struct module *module_get_kallsym(unsigned int symnum, unsigned long *value, |
2023 | unsigned long *value, | 2023 | char *type, char *name, size_t namelen) |
2024 | char *type, | ||
2025 | char namebuf[128]) | ||
2026 | { | 2024 | { |
2027 | struct module *mod; | 2025 | struct module *mod; |
2028 | 2026 | ||
@@ -2031,9 +2029,8 @@ struct module *module_get_kallsym(unsigned int symnum, | |||
2031 | if (symnum < mod->num_symtab) { | 2029 | if (symnum < mod->num_symtab) { |
2032 | *value = mod->symtab[symnum].st_value; | 2030 | *value = mod->symtab[symnum].st_value; |
2033 | *type = mod->symtab[symnum].st_info; | 2031 | *type = mod->symtab[symnum].st_info; |
2034 | strncpy(namebuf, | 2032 | strlcpy(name, mod->strtab + mod->symtab[symnum].st_name, |
2035 | mod->strtab + mod->symtab[symnum].st_name, | 2033 | namelen); |
2036 | 127); | ||
2037 | mutex_unlock(&module_mutex); | 2034 | mutex_unlock(&module_mutex); |
2038 | return mod; | 2035 | return mod; |
2039 | } | 2036 | } |
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c index 494dac872a13..948bd8f643e2 100644 --- a/kernel/rtmutex-tester.c +++ b/kernel/rtmutex-tester.c | |||
@@ -275,6 +275,7 @@ static int test_func(void *data) | |||
275 | 275 | ||
276 | /* Wait for the next command to be executed */ | 276 | /* Wait for the next command to be executed */ |
277 | schedule(); | 277 | schedule(); |
278 | try_to_freeze(); | ||
278 | 279 | ||
279 | if (signal_pending(current)) | 280 | if (signal_pending(current)) |
280 | flush_signals(current); | 281 | flush_signals(current); |
diff --git a/kernel/sched.c b/kernel/sched.c index d714611f1691..b44b9a43b0fc 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -51,6 +51,7 @@ | |||
51 | #include <linux/times.h> | 51 | #include <linux/times.h> |
52 | #include <linux/acct.h> | 52 | #include <linux/acct.h> |
53 | #include <linux/kprobes.h> | 53 | #include <linux/kprobes.h> |
54 | #include <linux/delayacct.h> | ||
54 | #include <asm/tlb.h> | 55 | #include <asm/tlb.h> |
55 | 56 | ||
56 | #include <asm/unistd.h> | 57 | #include <asm/unistd.h> |
@@ -501,9 +502,36 @@ struct file_operations proc_schedstat_operations = { | |||
501 | .release = single_release, | 502 | .release = single_release, |
502 | }; | 503 | }; |
503 | 504 | ||
505 | /* | ||
506 | * Expects runqueue lock to be held for atomicity of update | ||
507 | */ | ||
508 | static inline void | ||
509 | rq_sched_info_arrive(struct rq *rq, unsigned long delta_jiffies) | ||
510 | { | ||
511 | if (rq) { | ||
512 | rq->rq_sched_info.run_delay += delta_jiffies; | ||
513 | rq->rq_sched_info.pcnt++; | ||
514 | } | ||
515 | } | ||
516 | |||
517 | /* | ||
518 | * Expects runqueue lock to be held for atomicity of update | ||
519 | */ | ||
520 | static inline void | ||
521 | rq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies) | ||
522 | { | ||
523 | if (rq) | ||
524 | rq->rq_sched_info.cpu_time += delta_jiffies; | ||
525 | } | ||
504 | # define schedstat_inc(rq, field) do { (rq)->field++; } while (0) | 526 | # define schedstat_inc(rq, field) do { (rq)->field++; } while (0) |
505 | # define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0) | 527 | # define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0) |
506 | #else /* !CONFIG_SCHEDSTATS */ | 528 | #else /* !CONFIG_SCHEDSTATS */ |
529 | static inline void | ||
530 | rq_sched_info_arrive(struct rq *rq, unsigned long delta_jiffies) | ||
531 | {} | ||
532 | static inline void | ||
533 | rq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies) | ||
534 | {} | ||
507 | # define schedstat_inc(rq, field) do { } while (0) | 535 | # define schedstat_inc(rq, field) do { } while (0) |
508 | # define schedstat_add(rq, field, amt) do { } while (0) | 536 | # define schedstat_add(rq, field, amt) do { } while (0) |
509 | #endif | 537 | #endif |
@@ -523,7 +551,7 @@ static inline struct rq *this_rq_lock(void) | |||
523 | return rq; | 551 | return rq; |
524 | } | 552 | } |
525 | 553 | ||
526 | #ifdef CONFIG_SCHEDSTATS | 554 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
527 | /* | 555 | /* |
528 | * Called when a process is dequeued from the active array and given | 556 | * Called when a process is dequeued from the active array and given |
529 | * the cpu. We should note that with the exception of interactive | 557 | * the cpu. We should note that with the exception of interactive |
@@ -551,21 +579,16 @@ static inline void sched_info_dequeued(struct task_struct *t) | |||
551 | */ | 579 | */ |
552 | static void sched_info_arrive(struct task_struct *t) | 580 | static void sched_info_arrive(struct task_struct *t) |
553 | { | 581 | { |
554 | unsigned long now = jiffies, diff = 0; | 582 | unsigned long now = jiffies, delta_jiffies = 0; |
555 | struct rq *rq = task_rq(t); | ||
556 | 583 | ||
557 | if (t->sched_info.last_queued) | 584 | if (t->sched_info.last_queued) |
558 | diff = now - t->sched_info.last_queued; | 585 | delta_jiffies = now - t->sched_info.last_queued; |
559 | sched_info_dequeued(t); | 586 | sched_info_dequeued(t); |
560 | t->sched_info.run_delay += diff; | 587 | t->sched_info.run_delay += delta_jiffies; |
561 | t->sched_info.last_arrival = now; | 588 | t->sched_info.last_arrival = now; |
562 | t->sched_info.pcnt++; | 589 | t->sched_info.pcnt++; |
563 | 590 | ||
564 | if (!rq) | 591 | rq_sched_info_arrive(task_rq(t), delta_jiffies); |
565 | return; | ||
566 | |||
567 | rq->rq_sched_info.run_delay += diff; | ||
568 | rq->rq_sched_info.pcnt++; | ||
569 | } | 592 | } |
570 | 593 | ||
571 | /* | 594 | /* |
@@ -585,8 +608,9 @@ static void sched_info_arrive(struct task_struct *t) | |||
585 | */ | 608 | */ |
586 | static inline void sched_info_queued(struct task_struct *t) | 609 | static inline void sched_info_queued(struct task_struct *t) |
587 | { | 610 | { |
588 | if (!t->sched_info.last_queued) | 611 | if (unlikely(sched_info_on())) |
589 | t->sched_info.last_queued = jiffies; | 612 | if (!t->sched_info.last_queued) |
613 | t->sched_info.last_queued = jiffies; | ||
590 | } | 614 | } |
591 | 615 | ||
592 | /* | 616 | /* |
@@ -595,13 +619,10 @@ static inline void sched_info_queued(struct task_struct *t) | |||
595 | */ | 619 | */ |
596 | static inline void sched_info_depart(struct task_struct *t) | 620 | static inline void sched_info_depart(struct task_struct *t) |
597 | { | 621 | { |
598 | struct rq *rq = task_rq(t); | 622 | unsigned long delta_jiffies = jiffies - t->sched_info.last_arrival; |
599 | unsigned long diff = jiffies - t->sched_info.last_arrival; | ||
600 | |||
601 | t->sched_info.cpu_time += diff; | ||
602 | 623 | ||
603 | if (rq) | 624 | t->sched_info.cpu_time += delta_jiffies; |
604 | rq->rq_sched_info.cpu_time += diff; | 625 | rq_sched_info_depart(task_rq(t), delta_jiffies); |
605 | } | 626 | } |
606 | 627 | ||
607 | /* | 628 | /* |
@@ -610,7 +631,7 @@ static inline void sched_info_depart(struct task_struct *t) | |||
610 | * the idle task.) We are only called when prev != next. | 631 | * the idle task.) We are only called when prev != next. |
611 | */ | 632 | */ |
612 | static inline void | 633 | static inline void |
613 | sched_info_switch(struct task_struct *prev, struct task_struct *next) | 634 | __sched_info_switch(struct task_struct *prev, struct task_struct *next) |
614 | { | 635 | { |
615 | struct rq *rq = task_rq(prev); | 636 | struct rq *rq = task_rq(prev); |
616 | 637 | ||
@@ -625,10 +646,16 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next) | |||
625 | if (next != rq->idle) | 646 | if (next != rq->idle) |
626 | sched_info_arrive(next); | 647 | sched_info_arrive(next); |
627 | } | 648 | } |
649 | static inline void | ||
650 | sched_info_switch(struct task_struct *prev, struct task_struct *next) | ||
651 | { | ||
652 | if (unlikely(sched_info_on())) | ||
653 | __sched_info_switch(prev, next); | ||
654 | } | ||
628 | #else | 655 | #else |
629 | #define sched_info_queued(t) do { } while (0) | 656 | #define sched_info_queued(t) do { } while (0) |
630 | #define sched_info_switch(t, next) do { } while (0) | 657 | #define sched_info_switch(t, next) do { } while (0) |
631 | #endif /* CONFIG_SCHEDSTATS */ | 658 | #endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */ |
632 | 659 | ||
633 | /* | 660 | /* |
634 | * Adding/removing a task to/from a priority array: | 661 | * Adding/removing a task to/from a priority array: |
@@ -1530,8 +1557,9 @@ void fastcall sched_fork(struct task_struct *p, int clone_flags) | |||
1530 | 1557 | ||
1531 | INIT_LIST_HEAD(&p->run_list); | 1558 | INIT_LIST_HEAD(&p->run_list); |
1532 | p->array = NULL; | 1559 | p->array = NULL; |
1533 | #ifdef CONFIG_SCHEDSTATS | 1560 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
1534 | memset(&p->sched_info, 0, sizeof(p->sched_info)); | 1561 | if (unlikely(sched_info_on())) |
1562 | memset(&p->sched_info, 0, sizeof(p->sched_info)); | ||
1535 | #endif | 1563 | #endif |
1536 | #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) | 1564 | #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) |
1537 | p->oncpu = 0; | 1565 | p->oncpu = 0; |
@@ -1788,7 +1816,15 @@ context_switch(struct rq *rq, struct task_struct *prev, | |||
1788 | WARN_ON(rq->prev_mm); | 1816 | WARN_ON(rq->prev_mm); |
1789 | rq->prev_mm = oldmm; | 1817 | rq->prev_mm = oldmm; |
1790 | } | 1818 | } |
1819 | /* | ||
1820 | * Since the runqueue lock will be released by the next | ||
1821 | * task (which is an invalid locking op but in the case | ||
1822 | * of the scheduler it's an obvious special-case), so we | ||
1823 | * do an early lockdep release here: | ||
1824 | */ | ||
1825 | #ifndef __ARCH_WANT_UNLOCKED_CTXSW | ||
1791 | spin_release(&rq->lock.dep_map, 1, _THIS_IP_); | 1826 | spin_release(&rq->lock.dep_map, 1, _THIS_IP_); |
1827 | #endif | ||
1792 | 1828 | ||
1793 | /* Here we just switch the register state and the stack. */ | 1829 | /* Here we just switch the register state and the stack. */ |
1794 | switch_to(prev, next, prev); | 1830 | switch_to(prev, next, prev); |
@@ -4526,9 +4562,11 @@ void __sched io_schedule(void) | |||
4526 | { | 4562 | { |
4527 | struct rq *rq = &__raw_get_cpu_var(runqueues); | 4563 | struct rq *rq = &__raw_get_cpu_var(runqueues); |
4528 | 4564 | ||
4565 | delayacct_blkio_start(); | ||
4529 | atomic_inc(&rq->nr_iowait); | 4566 | atomic_inc(&rq->nr_iowait); |
4530 | schedule(); | 4567 | schedule(); |
4531 | atomic_dec(&rq->nr_iowait); | 4568 | atomic_dec(&rq->nr_iowait); |
4569 | delayacct_blkio_end(); | ||
4532 | } | 4570 | } |
4533 | EXPORT_SYMBOL(io_schedule); | 4571 | EXPORT_SYMBOL(io_schedule); |
4534 | 4572 | ||
@@ -4537,9 +4575,11 @@ long __sched io_schedule_timeout(long timeout) | |||
4537 | struct rq *rq = &__raw_get_cpu_var(runqueues); | 4575 | struct rq *rq = &__raw_get_cpu_var(runqueues); |
4538 | long ret; | 4576 | long ret; |
4539 | 4577 | ||
4578 | delayacct_blkio_start(); | ||
4540 | atomic_inc(&rq->nr_iowait); | 4579 | atomic_inc(&rq->nr_iowait); |
4541 | ret = schedule_timeout(timeout); | 4580 | ret = schedule_timeout(timeout); |
4542 | atomic_dec(&rq->nr_iowait); | 4581 | atomic_dec(&rq->nr_iowait); |
4582 | delayacct_blkio_end(); | ||
4543 | return ret; | 4583 | return ret; |
4544 | } | 4584 | } |
4545 | 4585 | ||
diff --git a/kernel/softirq.c b/kernel/softirq.c index fd12f2556f0d..0f08a84ae307 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -311,8 +311,6 @@ void open_softirq(int nr, void (*action)(struct softirq_action*), void *data) | |||
311 | softirq_vec[nr].action = action; | 311 | softirq_vec[nr].action = action; |
312 | } | 312 | } |
313 | 313 | ||
314 | EXPORT_UNUSED_SYMBOL(open_softirq); /* June 2006 */ | ||
315 | |||
316 | /* Tasklets */ | 314 | /* Tasklets */ |
317 | struct tasklet_head | 315 | struct tasklet_head |
318 | { | 316 | { |
diff --git a/kernel/taskstats.c b/kernel/taskstats.c new file mode 100644 index 000000000000..f45179ce028e --- /dev/null +++ b/kernel/taskstats.c | |||
@@ -0,0 +1,568 @@ | |||
1 | /* | ||
2 | * taskstats.c - Export per-task statistics to userland | ||
3 | * | ||
4 | * Copyright (C) Shailabh Nagar, IBM Corp. 2006 | ||
5 | * (C) Balbir Singh, IBM Corp. 2006 | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | */ | ||
18 | |||
19 | #include <linux/kernel.h> | ||
20 | #include <linux/taskstats_kern.h> | ||
21 | #include <linux/delayacct.h> | ||
22 | #include <linux/cpumask.h> | ||
23 | #include <linux/percpu.h> | ||
24 | #include <net/genetlink.h> | ||
25 | #include <asm/atomic.h> | ||
26 | |||
27 | /* | ||
28 | * Maximum length of a cpumask that can be specified in | ||
29 | * the TASKSTATS_CMD_ATTR_REGISTER/DEREGISTER_CPUMASK attribute | ||
30 | */ | ||
31 | #define TASKSTATS_CPUMASK_MAXLEN (100+6*NR_CPUS) | ||
32 | |||
33 | static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 }; | ||
34 | static int family_registered; | ||
35 | kmem_cache_t *taskstats_cache; | ||
36 | |||
37 | static struct genl_family family = { | ||
38 | .id = GENL_ID_GENERATE, | ||
39 | .name = TASKSTATS_GENL_NAME, | ||
40 | .version = TASKSTATS_GENL_VERSION, | ||
41 | .maxattr = TASKSTATS_CMD_ATTR_MAX, | ||
42 | }; | ||
43 | |||
44 | static struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1] | ||
45 | __read_mostly = { | ||
46 | [TASKSTATS_CMD_ATTR_PID] = { .type = NLA_U32 }, | ||
47 | [TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 }, | ||
48 | [TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING }, | ||
49 | [TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },}; | ||
50 | |||
51 | struct listener { | ||
52 | struct list_head list; | ||
53 | pid_t pid; | ||
54 | char valid; | ||
55 | }; | ||
56 | |||
57 | struct listener_list { | ||
58 | struct rw_semaphore sem; | ||
59 | struct list_head list; | ||
60 | }; | ||
61 | static DEFINE_PER_CPU(struct listener_list, listener_array); | ||
62 | |||
63 | enum actions { | ||
64 | REGISTER, | ||
65 | DEREGISTER, | ||
66 | CPU_DONT_CARE | ||
67 | }; | ||
68 | |||
69 | static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, | ||
70 | void **replyp, size_t size) | ||
71 | { | ||
72 | struct sk_buff *skb; | ||
73 | void *reply; | ||
74 | |||
75 | /* | ||
76 | * If new attributes are added, please revisit this allocation | ||
77 | */ | ||
78 | skb = nlmsg_new(size); | ||
79 | if (!skb) | ||
80 | return -ENOMEM; | ||
81 | |||
82 | if (!info) { | ||
83 | int seq = get_cpu_var(taskstats_seqnum)++; | ||
84 | put_cpu_var(taskstats_seqnum); | ||
85 | |||
86 | reply = genlmsg_put(skb, 0, seq, | ||
87 | family.id, 0, 0, | ||
88 | cmd, family.version); | ||
89 | } else | ||
90 | reply = genlmsg_put(skb, info->snd_pid, info->snd_seq, | ||
91 | family.id, 0, 0, | ||
92 | cmd, family.version); | ||
93 | if (reply == NULL) { | ||
94 | nlmsg_free(skb); | ||
95 | return -EINVAL; | ||
96 | } | ||
97 | |||
98 | *skbp = skb; | ||
99 | *replyp = reply; | ||
100 | return 0; | ||
101 | } | ||
102 | |||
103 | /* | ||
104 | * Send taskstats data in @skb to listener with nl_pid @pid | ||
105 | */ | ||
106 | static int send_reply(struct sk_buff *skb, pid_t pid) | ||
107 | { | ||
108 | struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data); | ||
109 | void *reply = genlmsg_data(genlhdr); | ||
110 | int rc; | ||
111 | |||
112 | rc = genlmsg_end(skb, reply); | ||
113 | if (rc < 0) { | ||
114 | nlmsg_free(skb); | ||
115 | return rc; | ||
116 | } | ||
117 | |||
118 | return genlmsg_unicast(skb, pid); | ||
119 | } | ||
120 | |||
121 | /* | ||
122 | * Send taskstats data in @skb to listeners registered for @cpu's exit data | ||
123 | */ | ||
124 | static int send_cpu_listeners(struct sk_buff *skb, unsigned int cpu) | ||
125 | { | ||
126 | struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data); | ||
127 | struct listener_list *listeners; | ||
128 | struct listener *s, *tmp; | ||
129 | struct sk_buff *skb_next, *skb_cur = skb; | ||
130 | void *reply = genlmsg_data(genlhdr); | ||
131 | int rc, ret, delcount = 0; | ||
132 | |||
133 | rc = genlmsg_end(skb, reply); | ||
134 | if (rc < 0) { | ||
135 | nlmsg_free(skb); | ||
136 | return rc; | ||
137 | } | ||
138 | |||
139 | rc = 0; | ||
140 | listeners = &per_cpu(listener_array, cpu); | ||
141 | down_read(&listeners->sem); | ||
142 | list_for_each_entry_safe(s, tmp, &listeners->list, list) { | ||
143 | skb_next = NULL; | ||
144 | if (!list_is_last(&s->list, &listeners->list)) { | ||
145 | skb_next = skb_clone(skb_cur, GFP_KERNEL); | ||
146 | if (!skb_next) { | ||
147 | nlmsg_free(skb_cur); | ||
148 | rc = -ENOMEM; | ||
149 | break; | ||
150 | } | ||
151 | } | ||
152 | ret = genlmsg_unicast(skb_cur, s->pid); | ||
153 | if (ret == -ECONNREFUSED) { | ||
154 | s->valid = 0; | ||
155 | delcount++; | ||
156 | rc = ret; | ||
157 | } | ||
158 | skb_cur = skb_next; | ||
159 | } | ||
160 | up_read(&listeners->sem); | ||
161 | |||
162 | if (!delcount) | ||
163 | return rc; | ||
164 | |||
165 | /* Delete invalidated entries */ | ||
166 | down_write(&listeners->sem); | ||
167 | list_for_each_entry_safe(s, tmp, &listeners->list, list) { | ||
168 | if (!s->valid) { | ||
169 | list_del(&s->list); | ||
170 | kfree(s); | ||
171 | } | ||
172 | } | ||
173 | up_write(&listeners->sem); | ||
174 | return rc; | ||
175 | } | ||
176 | |||
177 | static int fill_pid(pid_t pid, struct task_struct *pidtsk, | ||
178 | struct taskstats *stats) | ||
179 | { | ||
180 | int rc; | ||
181 | struct task_struct *tsk = pidtsk; | ||
182 | |||
183 | if (!pidtsk) { | ||
184 | read_lock(&tasklist_lock); | ||
185 | tsk = find_task_by_pid(pid); | ||
186 | if (!tsk) { | ||
187 | read_unlock(&tasklist_lock); | ||
188 | return -ESRCH; | ||
189 | } | ||
190 | get_task_struct(tsk); | ||
191 | read_unlock(&tasklist_lock); | ||
192 | } else | ||
193 | get_task_struct(tsk); | ||
194 | |||
195 | /* | ||
196 | * Each accounting subsystem adds calls to its functions to | ||
197 | * fill in relevant parts of struct taskstsats as follows | ||
198 | * | ||
199 | * rc = per-task-foo(stats, tsk); | ||
200 | * if (rc) | ||
201 | * goto err; | ||
202 | */ | ||
203 | |||
204 | rc = delayacct_add_tsk(stats, tsk); | ||
205 | stats->version = TASKSTATS_VERSION; | ||
206 | |||
207 | /* Define err: label here if needed */ | ||
208 | put_task_struct(tsk); | ||
209 | return rc; | ||
210 | |||
211 | } | ||
212 | |||
213 | static int fill_tgid(pid_t tgid, struct task_struct *tgidtsk, | ||
214 | struct taskstats *stats) | ||
215 | { | ||
216 | struct task_struct *tsk, *first; | ||
217 | unsigned long flags; | ||
218 | |||
219 | /* | ||
220 | * Add additional stats from live tasks except zombie thread group | ||
221 | * leaders who are already counted with the dead tasks | ||
222 | */ | ||
223 | first = tgidtsk; | ||
224 | if (!first) { | ||
225 | read_lock(&tasklist_lock); | ||
226 | first = find_task_by_pid(tgid); | ||
227 | if (!first) { | ||
228 | read_unlock(&tasklist_lock); | ||
229 | return -ESRCH; | ||
230 | } | ||
231 | get_task_struct(first); | ||
232 | read_unlock(&tasklist_lock); | ||
233 | } else | ||
234 | get_task_struct(first); | ||
235 | |||
236 | /* Start with stats from dead tasks */ | ||
237 | spin_lock_irqsave(&first->signal->stats_lock, flags); | ||
238 | if (first->signal->stats) | ||
239 | memcpy(stats, first->signal->stats, sizeof(*stats)); | ||
240 | spin_unlock_irqrestore(&first->signal->stats_lock, flags); | ||
241 | |||
242 | tsk = first; | ||
243 | read_lock(&tasklist_lock); | ||
244 | do { | ||
245 | if (tsk->exit_state == EXIT_ZOMBIE && thread_group_leader(tsk)) | ||
246 | continue; | ||
247 | /* | ||
248 | * Accounting subsystem can call its functions here to | ||
249 | * fill in relevant parts of struct taskstsats as follows | ||
250 | * | ||
251 | * per-task-foo(stats, tsk); | ||
252 | */ | ||
253 | delayacct_add_tsk(stats, tsk); | ||
254 | |||
255 | } while_each_thread(first, tsk); | ||
256 | read_unlock(&tasklist_lock); | ||
257 | stats->version = TASKSTATS_VERSION; | ||
258 | |||
259 | /* | ||
260 | * Accounting subsytems can also add calls here to modify | ||
261 | * fields of taskstats. | ||
262 | */ | ||
263 | |||
264 | return 0; | ||
265 | } | ||
266 | |||
267 | |||
268 | static void fill_tgid_exit(struct task_struct *tsk) | ||
269 | { | ||
270 | unsigned long flags; | ||
271 | |||
272 | spin_lock_irqsave(&tsk->signal->stats_lock, flags); | ||
273 | if (!tsk->signal->stats) | ||
274 | goto ret; | ||
275 | |||
276 | /* | ||
277 | * Each accounting subsystem calls its functions here to | ||
278 | * accumalate its per-task stats for tsk, into the per-tgid structure | ||
279 | * | ||
280 | * per-task-foo(tsk->signal->stats, tsk); | ||
281 | */ | ||
282 | delayacct_add_tsk(tsk->signal->stats, tsk); | ||
283 | ret: | ||
284 | spin_unlock_irqrestore(&tsk->signal->stats_lock, flags); | ||
285 | return; | ||
286 | } | ||
287 | |||
288 | static int add_del_listener(pid_t pid, cpumask_t *maskp, int isadd) | ||
289 | { | ||
290 | struct listener_list *listeners; | ||
291 | struct listener *s, *tmp; | ||
292 | unsigned int cpu; | ||
293 | cpumask_t mask = *maskp; | ||
294 | |||
295 | if (!cpus_subset(mask, cpu_possible_map)) | ||
296 | return -EINVAL; | ||
297 | |||
298 | if (isadd == REGISTER) { | ||
299 | for_each_cpu_mask(cpu, mask) { | ||
300 | s = kmalloc_node(sizeof(struct listener), GFP_KERNEL, | ||
301 | cpu_to_node(cpu)); | ||
302 | if (!s) | ||
303 | goto cleanup; | ||
304 | s->pid = pid; | ||
305 | INIT_LIST_HEAD(&s->list); | ||
306 | s->valid = 1; | ||
307 | |||
308 | listeners = &per_cpu(listener_array, cpu); | ||
309 | down_write(&listeners->sem); | ||
310 | list_add(&s->list, &listeners->list); | ||
311 | up_write(&listeners->sem); | ||
312 | } | ||
313 | return 0; | ||
314 | } | ||
315 | |||
316 | /* Deregister or cleanup */ | ||
317 | cleanup: | ||
318 | for_each_cpu_mask(cpu, mask) { | ||
319 | listeners = &per_cpu(listener_array, cpu); | ||
320 | down_write(&listeners->sem); | ||
321 | list_for_each_entry_safe(s, tmp, &listeners->list, list) { | ||
322 | if (s->pid == pid) { | ||
323 | list_del(&s->list); | ||
324 | kfree(s); | ||
325 | break; | ||
326 | } | ||
327 | } | ||
328 | up_write(&listeners->sem); | ||
329 | } | ||
330 | return 0; | ||
331 | } | ||
332 | |||
333 | static int parse(struct nlattr *na, cpumask_t *mask) | ||
334 | { | ||
335 | char *data; | ||
336 | int len; | ||
337 | int ret; | ||
338 | |||
339 | if (na == NULL) | ||
340 | return 1; | ||
341 | len = nla_len(na); | ||
342 | if (len > TASKSTATS_CPUMASK_MAXLEN) | ||
343 | return -E2BIG; | ||
344 | if (len < 1) | ||
345 | return -EINVAL; | ||
346 | data = kmalloc(len, GFP_KERNEL); | ||
347 | if (!data) | ||
348 | return -ENOMEM; | ||
349 | nla_strlcpy(data, na, len); | ||
350 | ret = cpulist_parse(data, *mask); | ||
351 | kfree(data); | ||
352 | return ret; | ||
353 | } | ||
354 | |||
355 | static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) | ||
356 | { | ||
357 | int rc = 0; | ||
358 | struct sk_buff *rep_skb; | ||
359 | struct taskstats stats; | ||
360 | void *reply; | ||
361 | size_t size; | ||
362 | struct nlattr *na; | ||
363 | cpumask_t mask; | ||
364 | |||
365 | rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], &mask); | ||
366 | if (rc < 0) | ||
367 | return rc; | ||
368 | if (rc == 0) | ||
369 | return add_del_listener(info->snd_pid, &mask, REGISTER); | ||
370 | |||
371 | rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], &mask); | ||
372 | if (rc < 0) | ||
373 | return rc; | ||
374 | if (rc == 0) | ||
375 | return add_del_listener(info->snd_pid, &mask, DEREGISTER); | ||
376 | |||
377 | /* | ||
378 | * Size includes space for nested attributes | ||
379 | */ | ||
380 | size = nla_total_size(sizeof(u32)) + | ||
381 | nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); | ||
382 | |||
383 | memset(&stats, 0, sizeof(stats)); | ||
384 | rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, &reply, size); | ||
385 | if (rc < 0) | ||
386 | return rc; | ||
387 | |||
388 | if (info->attrs[TASKSTATS_CMD_ATTR_PID]) { | ||
389 | u32 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]); | ||
390 | rc = fill_pid(pid, NULL, &stats); | ||
391 | if (rc < 0) | ||
392 | goto err; | ||
393 | |||
394 | na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_PID); | ||
395 | NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_PID, pid); | ||
396 | NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, | ||
397 | stats); | ||
398 | } else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) { | ||
399 | u32 tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]); | ||
400 | rc = fill_tgid(tgid, NULL, &stats); | ||
401 | if (rc < 0) | ||
402 | goto err; | ||
403 | |||
404 | na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_TGID); | ||
405 | NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_TGID, tgid); | ||
406 | NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, | ||
407 | stats); | ||
408 | } else { | ||
409 | rc = -EINVAL; | ||
410 | goto err; | ||
411 | } | ||
412 | |||
413 | nla_nest_end(rep_skb, na); | ||
414 | |||
415 | return send_reply(rep_skb, info->snd_pid); | ||
416 | |||
417 | nla_put_failure: | ||
418 | return genlmsg_cancel(rep_skb, reply); | ||
419 | err: | ||
420 | nlmsg_free(rep_skb); | ||
421 | return rc; | ||
422 | } | ||
423 | |||
424 | void taskstats_exit_alloc(struct taskstats **ptidstats, unsigned int *mycpu) | ||
425 | { | ||
426 | struct listener_list *listeners; | ||
427 | struct taskstats *tmp; | ||
428 | /* | ||
429 | * This is the cpu on which the task is exiting currently and will | ||
430 | * be the one for which the exit event is sent, even if the cpu | ||
431 | * on which this function is running changes later. | ||
432 | */ | ||
433 | *mycpu = raw_smp_processor_id(); | ||
434 | |||
435 | *ptidstats = NULL; | ||
436 | tmp = kmem_cache_zalloc(taskstats_cache, SLAB_KERNEL); | ||
437 | if (!tmp) | ||
438 | return; | ||
439 | |||
440 | listeners = &per_cpu(listener_array, *mycpu); | ||
441 | down_read(&listeners->sem); | ||
442 | if (!list_empty(&listeners->list)) { | ||
443 | *ptidstats = tmp; | ||
444 | tmp = NULL; | ||
445 | } | ||
446 | up_read(&listeners->sem); | ||
447 | kfree(tmp); | ||
448 | } | ||
449 | |||
450 | /* Send pid data out on exit */ | ||
451 | void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats, | ||
452 | int group_dead, unsigned int mycpu) | ||
453 | { | ||
454 | int rc; | ||
455 | struct sk_buff *rep_skb; | ||
456 | void *reply; | ||
457 | size_t size; | ||
458 | int is_thread_group; | ||
459 | struct nlattr *na; | ||
460 | unsigned long flags; | ||
461 | |||
462 | if (!family_registered || !tidstats) | ||
463 | return; | ||
464 | |||
465 | spin_lock_irqsave(&tsk->signal->stats_lock, flags); | ||
466 | is_thread_group = tsk->signal->stats ? 1 : 0; | ||
467 | spin_unlock_irqrestore(&tsk->signal->stats_lock, flags); | ||
468 | |||
469 | rc = 0; | ||
470 | /* | ||
471 | * Size includes space for nested attributes | ||
472 | */ | ||
473 | size = nla_total_size(sizeof(u32)) + | ||
474 | nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); | ||
475 | |||
476 | if (is_thread_group) | ||
477 | size = 2 * size; /* PID + STATS + TGID + STATS */ | ||
478 | |||
479 | rc = prepare_reply(NULL, TASKSTATS_CMD_NEW, &rep_skb, &reply, size); | ||
480 | if (rc < 0) | ||
481 | goto ret; | ||
482 | |||
483 | rc = fill_pid(tsk->pid, tsk, tidstats); | ||
484 | if (rc < 0) | ||
485 | goto err_skb; | ||
486 | |||
487 | na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_PID); | ||
488 | NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_PID, (u32)tsk->pid); | ||
489 | NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, | ||
490 | *tidstats); | ||
491 | nla_nest_end(rep_skb, na); | ||
492 | |||
493 | if (!is_thread_group) | ||
494 | goto send; | ||
495 | |||
496 | /* | ||
497 | * tsk has/had a thread group so fill the tsk->signal->stats structure | ||
498 | * Doesn't matter if tsk is the leader or the last group member leaving | ||
499 | */ | ||
500 | |||
501 | fill_tgid_exit(tsk); | ||
502 | if (!group_dead) | ||
503 | goto send; | ||
504 | |||
505 | na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_TGID); | ||
506 | NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_TGID, (u32)tsk->tgid); | ||
507 | /* No locking needed for tsk->signal->stats since group is dead */ | ||
508 | NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, | ||
509 | *tsk->signal->stats); | ||
510 | nla_nest_end(rep_skb, na); | ||
511 | |||
512 | send: | ||
513 | send_cpu_listeners(rep_skb, mycpu); | ||
514 | return; | ||
515 | |||
516 | nla_put_failure: | ||
517 | genlmsg_cancel(rep_skb, reply); | ||
518 | goto ret; | ||
519 | err_skb: | ||
520 | nlmsg_free(rep_skb); | ||
521 | ret: | ||
522 | return; | ||
523 | } | ||
524 | |||
525 | static struct genl_ops taskstats_ops = { | ||
526 | .cmd = TASKSTATS_CMD_GET, | ||
527 | .doit = taskstats_user_cmd, | ||
528 | .policy = taskstats_cmd_get_policy, | ||
529 | }; | ||
530 | |||
531 | /* Needed early in initialization */ | ||
532 | void __init taskstats_init_early(void) | ||
533 | { | ||
534 | unsigned int i; | ||
535 | |||
536 | taskstats_cache = kmem_cache_create("taskstats_cache", | ||
537 | sizeof(struct taskstats), | ||
538 | 0, SLAB_PANIC, NULL, NULL); | ||
539 | for_each_possible_cpu(i) { | ||
540 | INIT_LIST_HEAD(&(per_cpu(listener_array, i).list)); | ||
541 | init_rwsem(&(per_cpu(listener_array, i).sem)); | ||
542 | } | ||
543 | } | ||
544 | |||
545 | static int __init taskstats_init(void) | ||
546 | { | ||
547 | int rc; | ||
548 | |||
549 | rc = genl_register_family(&family); | ||
550 | if (rc) | ||
551 | return rc; | ||
552 | |||
553 | rc = genl_register_ops(&family, &taskstats_ops); | ||
554 | if (rc < 0) | ||
555 | goto err; | ||
556 | |||
557 | family_registered = 1; | ||
558 | return 0; | ||
559 | err: | ||
560 | genl_unregister_family(&family); | ||
561 | return rc; | ||
562 | } | ||
563 | |||
564 | /* | ||
565 | * late initcall ensures initialization of statistics collection | ||
566 | * mechanisms precedes initialization of the taskstats interface | ||
567 | */ | ||
568 | late_initcall(taskstats_init); | ||
diff --git a/kernel/timer.c b/kernel/timer.c index 2a87430a58d4..05809c2e2fd6 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -374,6 +374,7 @@ int del_timer_sync(struct timer_list *timer) | |||
374 | int ret = try_to_del_timer_sync(timer); | 374 | int ret = try_to_del_timer_sync(timer); |
375 | if (ret >= 0) | 375 | if (ret >= 0) |
376 | return ret; | 376 | return ret; |
377 | cpu_relax(); | ||
377 | } | 378 | } |
378 | } | 379 | } |
379 | 380 | ||
@@ -968,6 +969,7 @@ void __init timekeeping_init(void) | |||
968 | } | 969 | } |
969 | 970 | ||
970 | 971 | ||
972 | static int timekeeping_suspended; | ||
971 | /* | 973 | /* |
972 | * timekeeping_resume - Resumes the generic timekeeping subsystem. | 974 | * timekeeping_resume - Resumes the generic timekeeping subsystem. |
973 | * @dev: unused | 975 | * @dev: unused |
@@ -983,6 +985,18 @@ static int timekeeping_resume(struct sys_device *dev) | |||
983 | write_seqlock_irqsave(&xtime_lock, flags); | 985 | write_seqlock_irqsave(&xtime_lock, flags); |
984 | /* restart the last cycle value */ | 986 | /* restart the last cycle value */ |
985 | clock->cycle_last = clocksource_read(clock); | 987 | clock->cycle_last = clocksource_read(clock); |
988 | clock->error = 0; | ||
989 | timekeeping_suspended = 0; | ||
990 | write_sequnlock_irqrestore(&xtime_lock, flags); | ||
991 | return 0; | ||
992 | } | ||
993 | |||
994 | static int timekeeping_suspend(struct sys_device *dev, pm_message_t state) | ||
995 | { | ||
996 | unsigned long flags; | ||
997 | |||
998 | write_seqlock_irqsave(&xtime_lock, flags); | ||
999 | timekeeping_suspended = 1; | ||
986 | write_sequnlock_irqrestore(&xtime_lock, flags); | 1000 | write_sequnlock_irqrestore(&xtime_lock, flags); |
987 | return 0; | 1001 | return 0; |
988 | } | 1002 | } |
@@ -990,6 +1004,7 @@ static int timekeeping_resume(struct sys_device *dev) | |||
990 | /* sysfs resume/suspend bits for timekeeping */ | 1004 | /* sysfs resume/suspend bits for timekeeping */ |
991 | static struct sysdev_class timekeeping_sysclass = { | 1005 | static struct sysdev_class timekeeping_sysclass = { |
992 | .resume = timekeeping_resume, | 1006 | .resume = timekeeping_resume, |
1007 | .suspend = timekeeping_suspend, | ||
993 | set_kset_name("timekeeping"), | 1008 | set_kset_name("timekeeping"), |
994 | }; | 1009 | }; |
995 | 1010 | ||
@@ -1100,13 +1115,16 @@ static void update_wall_time(void) | |||
1100 | { | 1115 | { |
1101 | cycle_t offset; | 1116 | cycle_t offset; |
1102 | 1117 | ||
1103 | clock->xtime_nsec += (s64)xtime.tv_nsec << clock->shift; | 1118 | /* Make sure we're fully resumed: */ |
1119 | if (unlikely(timekeeping_suspended)) | ||
1120 | return; | ||
1104 | 1121 | ||
1105 | #ifdef CONFIG_GENERIC_TIME | 1122 | #ifdef CONFIG_GENERIC_TIME |
1106 | offset = (clocksource_read(clock) - clock->cycle_last) & clock->mask; | 1123 | offset = (clocksource_read(clock) - clock->cycle_last) & clock->mask; |
1107 | #else | 1124 | #else |
1108 | offset = clock->cycle_interval; | 1125 | offset = clock->cycle_interval; |
1109 | #endif | 1126 | #endif |
1127 | clock->xtime_nsec += (s64)xtime.tv_nsec << clock->shift; | ||
1110 | 1128 | ||
1111 | /* normally this loop will run just once, however in the | 1129 | /* normally this loop will run just once, however in the |
1112 | * case of lost or late ticks, it will accumulate correctly. | 1130 | * case of lost or late ticks, it will accumulate correctly. |