aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile4
-rw-r--r--kernel/cgroup.c1
-rw-r--r--kernel/cgroup_freezer.c51
-rw-r--r--kernel/cpu.c3
-rw-r--r--kernel/freezer.c20
-rw-r--r--kernel/irq/proc.c2
-rw-r--r--kernel/lockdep.c17
-rw-r--r--kernel/power/Kconfig2
-rw-r--r--kernel/printk.c39
-rw-r--r--kernel/profile.c2
-rw-r--r--kernel/resource.c8
-rw-r--r--kernel/sched.c16
-rw-r--r--kernel/sched_debug.c2
-rw-r--r--kernel/sched_fair.c241
-rw-r--r--kernel/sched_features.h1
-rw-r--r--kernel/sched_idletask.c5
-rw-r--r--kernel/sched_rt.c5
-rw-r--r--kernel/signal.c3
-rw-r--r--kernel/smp.c18
-rw-r--r--kernel/sysctl.c2
-rw-r--r--kernel/timer.c129
-rw-r--r--kernel/trace/Kconfig31
-rw-r--r--kernel/trace/Makefile6
-rw-r--r--kernel/trace/ftrace.c608
-rw-r--r--kernel/trace/ring_buffer.c62
-rw-r--r--kernel/trace/trace.c63
-rw-r--r--kernel/trace/trace.h22
-rw-r--r--kernel/trace/trace_functions.c2
-rw-r--r--kernel/trace/trace_irqsoff.c4
-rw-r--r--kernel/trace/trace_sched_wakeup.c4
-rw-r--r--kernel/trace/trace_selftest.c18
-rw-r--r--kernel/trace/trace_stack.c4
-rw-r--r--kernel/tracepoint.c8
-rw-r--r--kernel/workqueue.c45
34 files changed, 640 insertions, 808 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 305f11dbef21..9a3ec66a9d84 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -13,7 +13,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \
13 13
14CFLAGS_REMOVE_sched.o = -mno-spe 14CFLAGS_REMOVE_sched.o = -mno-spe
15 15
16ifdef CONFIG_FTRACE 16ifdef CONFIG_FUNCTION_TRACER
17# Do not trace debug files and internal ftrace files 17# Do not trace debug files and internal ftrace files
18CFLAGS_REMOVE_lockdep.o = -pg 18CFLAGS_REMOVE_lockdep.o = -pg
19CFLAGS_REMOVE_lockdep_proc.o = -pg 19CFLAGS_REMOVE_lockdep_proc.o = -pg
@@ -88,7 +88,7 @@ obj-$(CONFIG_MARKERS) += marker.o
88obj-$(CONFIG_TRACEPOINTS) += tracepoint.o 88obj-$(CONFIG_TRACEPOINTS) += tracepoint.o
89obj-$(CONFIG_LATENCYTOP) += latencytop.o 89obj-$(CONFIG_LATENCYTOP) += latencytop.o
90obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o 90obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
91obj-$(CONFIG_FTRACE) += trace/ 91obj-$(CONFIG_FUNCTION_TRACER) += trace/
92obj-$(CONFIG_TRACING) += trace/ 92obj-$(CONFIG_TRACING) += trace/
93obj-$(CONFIG_SMP) += sched_cpupri.o 93obj-$(CONFIG_SMP) += sched_cpupri.o
94 94
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 35eebd5510c2..358e77564e6f 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2497,7 +2497,6 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
2497 list_del(&cgrp->sibling); 2497 list_del(&cgrp->sibling);
2498 spin_lock(&cgrp->dentry->d_lock); 2498 spin_lock(&cgrp->dentry->d_lock);
2499 d = dget(cgrp->dentry); 2499 d = dget(cgrp->dentry);
2500 cgrp->dentry = NULL;
2501 spin_unlock(&d->d_lock); 2500 spin_unlock(&d->d_lock);
2502 2501
2503 cgroup_d_remove_dir(d); 2502 cgroup_d_remove_dir(d);
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index e95056954498..7fa476f01d05 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -162,9 +162,13 @@ static int freezer_can_attach(struct cgroup_subsys *ss,
162 struct task_struct *task) 162 struct task_struct *task)
163{ 163{
164 struct freezer *freezer; 164 struct freezer *freezer;
165 int retval;
166 165
167 /* Anything frozen can't move or be moved to/from */ 166 /*
167 * Anything frozen can't move or be moved to/from.
168 *
169 * Since orig_freezer->state == FROZEN means that @task has been
170 * frozen, so it's sufficient to check the latter condition.
171 */
168 172
169 if (is_task_frozen_enough(task)) 173 if (is_task_frozen_enough(task))
170 return -EBUSY; 174 return -EBUSY;
@@ -173,13 +177,7 @@ static int freezer_can_attach(struct cgroup_subsys *ss,
173 if (freezer->state == CGROUP_FROZEN) 177 if (freezer->state == CGROUP_FROZEN)
174 return -EBUSY; 178 return -EBUSY;
175 179
176 retval = 0; 180 return 0;
177 task_lock(task);
178 freezer = task_freezer(task);
179 if (freezer->state == CGROUP_FROZEN)
180 retval = -EBUSY;
181 task_unlock(task);
182 return retval;
183} 181}
184 182
185static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task) 183static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
@@ -190,8 +188,9 @@ static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
190 freezer = task_freezer(task); 188 freezer = task_freezer(task);
191 task_unlock(task); 189 task_unlock(task);
192 190
193 BUG_ON(freezer->state == CGROUP_FROZEN);
194 spin_lock_irq(&freezer->lock); 191 spin_lock_irq(&freezer->lock);
192 BUG_ON(freezer->state == CGROUP_FROZEN);
193
195 /* Locking avoids race with FREEZING -> THAWED transitions. */ 194 /* Locking avoids race with FREEZING -> THAWED transitions. */
196 if (freezer->state == CGROUP_FREEZING) 195 if (freezer->state == CGROUP_FREEZING)
197 freeze_task(task, true); 196 freeze_task(task, true);
@@ -276,25 +275,18 @@ static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
276 return num_cant_freeze_now ? -EBUSY : 0; 275 return num_cant_freeze_now ? -EBUSY : 0;
277} 276}
278 277
279static int unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) 278static void unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
280{ 279{
281 struct cgroup_iter it; 280 struct cgroup_iter it;
282 struct task_struct *task; 281 struct task_struct *task;
283 282
284 cgroup_iter_start(cgroup, &it); 283 cgroup_iter_start(cgroup, &it);
285 while ((task = cgroup_iter_next(cgroup, &it))) { 284 while ((task = cgroup_iter_next(cgroup, &it))) {
286 int do_wake; 285 thaw_process(task);
287
288 task_lock(task);
289 do_wake = __thaw_process(task);
290 task_unlock(task);
291 if (do_wake)
292 wake_up_process(task);
293 } 286 }
294 cgroup_iter_end(cgroup, &it); 287 cgroup_iter_end(cgroup, &it);
295 freezer->state = CGROUP_THAWED;
296 288
297 return 0; 289 freezer->state = CGROUP_THAWED;
298} 290}
299 291
300static int freezer_change_state(struct cgroup *cgroup, 292static int freezer_change_state(struct cgroup *cgroup,
@@ -304,27 +296,22 @@ static int freezer_change_state(struct cgroup *cgroup,
304 int retval = 0; 296 int retval = 0;
305 297
306 freezer = cgroup_freezer(cgroup); 298 freezer = cgroup_freezer(cgroup);
299
307 spin_lock_irq(&freezer->lock); 300 spin_lock_irq(&freezer->lock);
301
308 update_freezer_state(cgroup, freezer); 302 update_freezer_state(cgroup, freezer);
309 if (goal_state == freezer->state) 303 if (goal_state == freezer->state)
310 goto out; 304 goto out;
311 switch (freezer->state) { 305
306 switch (goal_state) {
312 case CGROUP_THAWED: 307 case CGROUP_THAWED:
313 retval = try_to_freeze_cgroup(cgroup, freezer); 308 unfreeze_cgroup(cgroup, freezer);
314 break; 309 break;
315 case CGROUP_FREEZING:
316 if (goal_state == CGROUP_FROZEN) {
317 /* Userspace is retrying after
318 * "/bin/echo FROZEN > freezer.state" returned -EBUSY */
319 retval = try_to_freeze_cgroup(cgroup, freezer);
320 break;
321 }
322 /* state == FREEZING and goal_state == THAWED, so unfreeze */
323 case CGROUP_FROZEN: 310 case CGROUP_FROZEN:
324 retval = unfreeze_cgroup(cgroup, freezer); 311 retval = try_to_freeze_cgroup(cgroup, freezer);
325 break; 312 break;
326 default: 313 default:
327 break; 314 BUG();
328 } 315 }
329out: 316out:
330 spin_unlock_irq(&freezer->lock); 317 spin_unlock_irq(&freezer->lock);
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 86d49045daed..5a732c5ef08b 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -499,3 +499,6 @@ const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
499#endif 499#endif
500}; 500};
501EXPORT_SYMBOL_GPL(cpu_bit_bitmap); 501EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
502
503const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
504EXPORT_SYMBOL(cpu_all_bits);
diff --git a/kernel/freezer.c b/kernel/freezer.c
index ba6248b323ef..2f4936cf7083 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -121,16 +121,7 @@ void cancel_freezing(struct task_struct *p)
121 } 121 }
122} 122}
123 123
124/* 124static int __thaw_process(struct task_struct *p)
125 * Wake up a frozen process
126 *
127 * task_lock() is needed to prevent the race with refrigerator() which may
128 * occur if the freezing of tasks fails. Namely, without the lock, if the
129 * freezing of tasks failed, thaw_tasks() might have run before a task in
130 * refrigerator() could call frozen_process(), in which case the task would be
131 * frozen and no one would thaw it.
132 */
133int __thaw_process(struct task_struct *p)
134{ 125{
135 if (frozen(p)) { 126 if (frozen(p)) {
136 p->flags &= ~PF_FROZEN; 127 p->flags &= ~PF_FROZEN;
@@ -140,6 +131,15 @@ int __thaw_process(struct task_struct *p)
140 return 0; 131 return 0;
141} 132}
142 133
134/*
135 * Wake up a frozen process
136 *
137 * task_lock() is needed to prevent the race with refrigerator() which may
138 * occur if the freezing of tasks fails. Namely, without the lock, if the
139 * freezing of tasks failed, thaw_tasks() might have run before a task in
140 * refrigerator() could call frozen_process(), in which case the task would be
141 * frozen and no one would thaw it.
142 */
143int thaw_process(struct task_struct *p) 143int thaw_process(struct task_struct *p)
144{ 144{
145 task_lock(p); 145 task_lock(p);
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index fac014a81b24..4d161c70ba55 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -220,7 +220,7 @@ void unregister_handler_proc(unsigned int irq, struct irqaction *action)
220 } 220 }
221} 221}
222 222
223void register_default_affinity_proc(void) 223static void register_default_affinity_proc(void)
224{ 224{
225#ifdef CONFIG_SMP 225#ifdef CONFIG_SMP
226 proc_create("irq/default_smp_affinity", 0600, NULL, 226 proc_create("irq/default_smp_affinity", 0600, NULL,
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index dbda475b13bd..06e157119d2b 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -2169,12 +2169,11 @@ void early_boot_irqs_on(void)
2169/* 2169/*
2170 * Hardirqs will be enabled: 2170 * Hardirqs will be enabled:
2171 */ 2171 */
2172void trace_hardirqs_on_caller(unsigned long a0) 2172void trace_hardirqs_on_caller(unsigned long ip)
2173{ 2173{
2174 struct task_struct *curr = current; 2174 struct task_struct *curr = current;
2175 unsigned long ip;
2176 2175
2177 time_hardirqs_on(CALLER_ADDR0, a0); 2176 time_hardirqs_on(CALLER_ADDR0, ip);
2178 2177
2179 if (unlikely(!debug_locks || current->lockdep_recursion)) 2178 if (unlikely(!debug_locks || current->lockdep_recursion))
2180 return; 2179 return;
@@ -2188,7 +2187,6 @@ void trace_hardirqs_on_caller(unsigned long a0)
2188 } 2187 }
2189 /* we'll do an OFF -> ON transition: */ 2188 /* we'll do an OFF -> ON transition: */
2190 curr->hardirqs_enabled = 1; 2189 curr->hardirqs_enabled = 1;
2191 ip = (unsigned long) __builtin_return_address(0);
2192 2190
2193 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) 2191 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
2194 return; 2192 return;
@@ -2224,11 +2222,11 @@ EXPORT_SYMBOL(trace_hardirqs_on);
2224/* 2222/*
2225 * Hardirqs were disabled: 2223 * Hardirqs were disabled:
2226 */ 2224 */
2227void trace_hardirqs_off_caller(unsigned long a0) 2225void trace_hardirqs_off_caller(unsigned long ip)
2228{ 2226{
2229 struct task_struct *curr = current; 2227 struct task_struct *curr = current;
2230 2228
2231 time_hardirqs_off(CALLER_ADDR0, a0); 2229 time_hardirqs_off(CALLER_ADDR0, ip);
2232 2230
2233 if (unlikely(!debug_locks || current->lockdep_recursion)) 2231 if (unlikely(!debug_locks || current->lockdep_recursion))
2234 return; 2232 return;
@@ -2241,7 +2239,7 @@ void trace_hardirqs_off_caller(unsigned long a0)
2241 * We have done an ON -> OFF transition: 2239 * We have done an ON -> OFF transition:
2242 */ 2240 */
2243 curr->hardirqs_enabled = 0; 2241 curr->hardirqs_enabled = 0;
2244 curr->hardirq_disable_ip = _RET_IP_; 2242 curr->hardirq_disable_ip = ip;
2245 curr->hardirq_disable_event = ++curr->irq_events; 2243 curr->hardirq_disable_event = ++curr->irq_events;
2246 debug_atomic_inc(&hardirqs_off_events); 2244 debug_atomic_inc(&hardirqs_off_events);
2247 } else 2245 } else
@@ -3417,9 +3415,10 @@ retry:
3417 } 3415 }
3418 printk(" ignoring it.\n"); 3416 printk(" ignoring it.\n");
3419 unlock = 0; 3417 unlock = 0;
3418 } else {
3419 if (count != 10)
3420 printk(KERN_CONT " locked it.\n");
3420 } 3421 }
3421 if (count != 10)
3422 printk(" locked it.\n");
3423 3422
3424 do_each_thread(g, p) { 3423 do_each_thread(g, p) {
3425 /* 3424 /*
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index dcd165f92a88..23bd4daeb96b 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -96,7 +96,7 @@ config SUSPEND
96 96
97config PM_TEST_SUSPEND 97config PM_TEST_SUSPEND
98 bool "Test suspend/resume and wakealarm during bootup" 98 bool "Test suspend/resume and wakealarm during bootup"
99 depends on SUSPEND && PM_DEBUG && RTC_LIB=y 99 depends on SUSPEND && PM_DEBUG && RTC_CLASS=y
100 ---help--- 100 ---help---
101 This option will let you suspend your machine during bootup, and 101 This option will let you suspend your machine during bootup, and
102 make it wake up a few seconds later using an RTC wakeup alarm. 102 make it wake up a few seconds later using an RTC wakeup alarm.
diff --git a/kernel/printk.c b/kernel/printk.c
index 6341af77eb65..f492f1583d77 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -233,45 +233,6 @@ static inline void boot_delay_msec(void)
233#endif 233#endif
234 234
235/* 235/*
236 * Return the number of unread characters in the log buffer.
237 */
238static int log_buf_get_len(void)
239{
240 return logged_chars;
241}
242
243/*
244 * Copy a range of characters from the log buffer.
245 */
246int log_buf_copy(char *dest, int idx, int len)
247{
248 int ret, max;
249 bool took_lock = false;
250
251 if (!oops_in_progress) {
252 spin_lock_irq(&logbuf_lock);
253 took_lock = true;
254 }
255
256 max = log_buf_get_len();
257 if (idx < 0 || idx >= max) {
258 ret = -1;
259 } else {
260 if (len > max)
261 len = max;
262 ret = len;
263 idx += (log_end - max);
264 while (len-- > 0)
265 dest[len] = LOG_BUF(idx + len);
266 }
267
268 if (took_lock)
269 spin_unlock_irq(&logbuf_lock);
270
271 return ret;
272}
273
274/*
275 * Commands to do_syslog: 236 * Commands to do_syslog:
276 * 237 *
277 * 0 -- Close the log. Currently a NOP. 238 * 0 -- Close the log. Currently a NOP.
diff --git a/kernel/profile.c b/kernel/profile.c
index a9e422df6bf6..9830a037d8db 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -102,7 +102,7 @@ int profile_setup(char *str)
102__setup("profile=", profile_setup); 102__setup("profile=", profile_setup);
103 103
104 104
105int profile_init(void) 105int __ref profile_init(void)
106{ 106{
107 int buffer_bytes; 107 int buffer_bytes;
108 if (!prof_on) 108 if (!prof_on)
diff --git a/kernel/resource.c b/kernel/resource.c
index 4089d12af6e0..4337063663ef 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -17,6 +17,7 @@
17#include <linux/proc_fs.h> 17#include <linux/proc_fs.h>
18#include <linux/seq_file.h> 18#include <linux/seq_file.h>
19#include <linux/device.h> 19#include <linux/device.h>
20#include <linux/pfn.h>
20#include <asm/io.h> 21#include <asm/io.h>
21 22
22 23
@@ -522,7 +523,7 @@ static void __init __reserve_region_with_split(struct resource *root,
522{ 523{
523 struct resource *parent = root; 524 struct resource *parent = root;
524 struct resource *conflict; 525 struct resource *conflict;
525 struct resource *res = kzalloc(sizeof(*res), GFP_KERNEL); 526 struct resource *res = kzalloc(sizeof(*res), GFP_ATOMIC);
526 527
527 if (!res) 528 if (!res)
528 return; 529 return;
@@ -571,7 +572,7 @@ static void __init __reserve_region_with_split(struct resource *root,
571 572
572} 573}
573 574
574void reserve_region_with_split(struct resource *root, 575void __init reserve_region_with_split(struct resource *root,
575 resource_size_t start, resource_size_t end, 576 resource_size_t start, resource_size_t end,
576 const char *name) 577 const char *name)
577{ 578{
@@ -849,7 +850,8 @@ int iomem_map_sanity_check(resource_size_t addr, unsigned long size)
849 continue; 850 continue;
850 if (p->end < addr) 851 if (p->end < addr)
851 continue; 852 continue;
852 if (p->start <= addr && (p->end >= addr + size - 1)) 853 if (PFN_DOWN(p->start) <= PFN_DOWN(addr) &&
854 PFN_DOWN(p->end) >= PFN_DOWN(addr + size - 1))
853 continue; 855 continue;
854 printk(KERN_WARNING "resource map sanity check conflict: " 856 printk(KERN_WARNING "resource map sanity check conflict: "
855 "0x%llx 0x%llx 0x%llx 0x%llx %s\n", 857 "0x%llx 0x%llx 0x%llx 0x%llx %s\n",
diff --git a/kernel/sched.c b/kernel/sched.c
index 6625c3c4b10d..57c933ffbee1 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -386,7 +386,6 @@ struct cfs_rq {
386 386
387 u64 exec_clock; 387 u64 exec_clock;
388 u64 min_vruntime; 388 u64 min_vruntime;
389 u64 pair_start;
390 389
391 struct rb_root tasks_timeline; 390 struct rb_root tasks_timeline;
392 struct rb_node *rb_leftmost; 391 struct rb_node *rb_leftmost;
@@ -398,7 +397,7 @@ struct cfs_rq {
398 * 'curr' points to currently running entity on this cfs_rq. 397 * 'curr' points to currently running entity on this cfs_rq.
399 * It is set to NULL otherwise (i.e when none are currently running). 398 * It is set to NULL otherwise (i.e when none are currently running).
400 */ 399 */
401 struct sched_entity *curr, *next; 400 struct sched_entity *curr, *next, *last;
402 401
403 unsigned long nr_spread_over; 402 unsigned long nr_spread_over;
404 403
@@ -1806,7 +1805,9 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
1806 /* 1805 /*
1807 * Buddy candidates are cache hot: 1806 * Buddy candidates are cache hot:
1808 */ 1807 */
1809 if (sched_feat(CACHE_HOT_BUDDY) && (&p->se == cfs_rq_of(&p->se)->next)) 1808 if (sched_feat(CACHE_HOT_BUDDY) &&
1809 (&p->se == cfs_rq_of(&p->se)->next ||
1810 &p->se == cfs_rq_of(&p->se)->last))
1810 return 1; 1811 return 1;
1811 1812
1812 if (p->sched_class != &fair_sched_class) 1813 if (p->sched_class != &fair_sched_class)
@@ -3344,7 +3345,7 @@ small_imbalance:
3344 } else 3345 } else
3345 this_load_per_task = cpu_avg_load_per_task(this_cpu); 3346 this_load_per_task = cpu_avg_load_per_task(this_cpu);
3346 3347
3347 if (max_load - this_load + 2*busiest_load_per_task >= 3348 if (max_load - this_load + busiest_load_per_task >=
3348 busiest_load_per_task * imbn) { 3349 busiest_load_per_task * imbn) {
3349 *imbalance = busiest_load_per_task; 3350 *imbalance = busiest_load_per_task;
3350 return busiest; 3351 return busiest;
@@ -6876,15 +6877,17 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
6876 struct sched_domain *tmp; 6877 struct sched_domain *tmp;
6877 6878
6878 /* Remove the sched domains which do not contribute to scheduling. */ 6879 /* Remove the sched domains which do not contribute to scheduling. */
6879 for (tmp = sd; tmp; tmp = tmp->parent) { 6880 for (tmp = sd; tmp; ) {
6880 struct sched_domain *parent = tmp->parent; 6881 struct sched_domain *parent = tmp->parent;
6881 if (!parent) 6882 if (!parent)
6882 break; 6883 break;
6884
6883 if (sd_parent_degenerate(tmp, parent)) { 6885 if (sd_parent_degenerate(tmp, parent)) {
6884 tmp->parent = parent->parent; 6886 tmp->parent = parent->parent;
6885 if (parent->parent) 6887 if (parent->parent)
6886 parent->parent->child = tmp; 6888 parent->parent->child = tmp;
6887 } 6889 } else
6890 tmp = tmp->parent;
6888 } 6891 }
6889 6892
6890 if (sd && sd_degenerate(sd)) { 6893 if (sd && sd_degenerate(sd)) {
@@ -7673,6 +7676,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7673error: 7676error:
7674 free_sched_groups(cpu_map, tmpmask); 7677 free_sched_groups(cpu_map, tmpmask);
7675 SCHED_CPUMASK_FREE((void *)allmasks); 7678 SCHED_CPUMASK_FREE((void *)allmasks);
7679 kfree(rd);
7676 return -ENOMEM; 7680 return -ENOMEM;
7677#endif 7681#endif
7678} 7682}
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index ad958c1ec708..5ae17762ec32 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -319,7 +319,7 @@ static int __init init_sched_debug_procfs(void)
319{ 319{
320 struct proc_dir_entry *pe; 320 struct proc_dir_entry *pe;
321 321
322 pe = proc_create("sched_debug", 0644, NULL, &sched_debug_fops); 322 pe = proc_create("sched_debug", 0444, NULL, &sched_debug_fops);
323 if (!pe) 323 if (!pe)
324 return -ENOMEM; 324 return -ENOMEM;
325 return 0; 325 return 0;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 9573c33688b8..51aa3e102acb 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -143,6 +143,49 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se)
143 return se->parent; 143 return se->parent;
144} 144}
145 145
146/* return depth at which a sched entity is present in the hierarchy */
147static inline int depth_se(struct sched_entity *se)
148{
149 int depth = 0;
150
151 for_each_sched_entity(se)
152 depth++;
153
154 return depth;
155}
156
157static void
158find_matching_se(struct sched_entity **se, struct sched_entity **pse)
159{
160 int se_depth, pse_depth;
161
162 /*
163 * preemption test can be made between sibling entities who are in the
164 * same cfs_rq i.e who have a common parent. Walk up the hierarchy of
165 * both tasks until we find their ancestors who are siblings of common
166 * parent.
167 */
168
169 /* First walk up until both entities are at same depth */
170 se_depth = depth_se(*se);
171 pse_depth = depth_se(*pse);
172
173 while (se_depth > pse_depth) {
174 se_depth--;
175 *se = parent_entity(*se);
176 }
177
178 while (pse_depth > se_depth) {
179 pse_depth--;
180 *pse = parent_entity(*pse);
181 }
182
183 while (!is_same_group(*se, *pse)) {
184 *se = parent_entity(*se);
185 *pse = parent_entity(*pse);
186 }
187}
188
146#else /* CONFIG_FAIR_GROUP_SCHED */ 189#else /* CONFIG_FAIR_GROUP_SCHED */
147 190
148static inline struct rq *rq_of(struct cfs_rq *cfs_rq) 191static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
@@ -193,6 +236,11 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se)
193 return NULL; 236 return NULL;
194} 237}
195 238
239static inline void
240find_matching_se(struct sched_entity **se, struct sched_entity **pse)
241{
242}
243
196#endif /* CONFIG_FAIR_GROUP_SCHED */ 244#endif /* CONFIG_FAIR_GROUP_SCHED */
197 245
198 246
@@ -223,6 +271,27 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
223 return se->vruntime - cfs_rq->min_vruntime; 271 return se->vruntime - cfs_rq->min_vruntime;
224} 272}
225 273
274static void update_min_vruntime(struct cfs_rq *cfs_rq)
275{
276 u64 vruntime = cfs_rq->min_vruntime;
277
278 if (cfs_rq->curr)
279 vruntime = cfs_rq->curr->vruntime;
280
281 if (cfs_rq->rb_leftmost) {
282 struct sched_entity *se = rb_entry(cfs_rq->rb_leftmost,
283 struct sched_entity,
284 run_node);
285
286 if (vruntime == cfs_rq->min_vruntime)
287 vruntime = se->vruntime;
288 else
289 vruntime = min_vruntime(vruntime, se->vruntime);
290 }
291
292 cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime);
293}
294
226/* 295/*
227 * Enqueue an entity into the rb-tree: 296 * Enqueue an entity into the rb-tree:
228 */ 297 */
@@ -256,15 +325,8 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
256 * Maintain a cache of leftmost tree entries (it is frequently 325 * Maintain a cache of leftmost tree entries (it is frequently
257 * used): 326 * used):
258 */ 327 */
259 if (leftmost) { 328 if (leftmost)
260 cfs_rq->rb_leftmost = &se->run_node; 329 cfs_rq->rb_leftmost = &se->run_node;
261 /*
262 * maintain cfs_rq->min_vruntime to be a monotonic increasing
263 * value tracking the leftmost vruntime in the tree.
264 */
265 cfs_rq->min_vruntime =
266 max_vruntime(cfs_rq->min_vruntime, se->vruntime);
267 }
268 330
269 rb_link_node(&se->run_node, parent, link); 331 rb_link_node(&se->run_node, parent, link);
270 rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline); 332 rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline);
@@ -274,37 +336,25 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
274{ 336{
275 if (cfs_rq->rb_leftmost == &se->run_node) { 337 if (cfs_rq->rb_leftmost == &se->run_node) {
276 struct rb_node *next_node; 338 struct rb_node *next_node;
277 struct sched_entity *next;
278 339
279 next_node = rb_next(&se->run_node); 340 next_node = rb_next(&se->run_node);
280 cfs_rq->rb_leftmost = next_node; 341 cfs_rq->rb_leftmost = next_node;
281
282 if (next_node) {
283 next = rb_entry(next_node,
284 struct sched_entity, run_node);
285 cfs_rq->min_vruntime =
286 max_vruntime(cfs_rq->min_vruntime,
287 next->vruntime);
288 }
289 } 342 }
290 343
291 if (cfs_rq->next == se)
292 cfs_rq->next = NULL;
293
294 rb_erase(&se->run_node, &cfs_rq->tasks_timeline); 344 rb_erase(&se->run_node, &cfs_rq->tasks_timeline);
295} 345}
296 346
297static inline struct rb_node *first_fair(struct cfs_rq *cfs_rq)
298{
299 return cfs_rq->rb_leftmost;
300}
301
302static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq) 347static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq)
303{ 348{
304 return rb_entry(first_fair(cfs_rq), struct sched_entity, run_node); 349 struct rb_node *left = cfs_rq->rb_leftmost;
350
351 if (!left)
352 return NULL;
353
354 return rb_entry(left, struct sched_entity, run_node);
305} 355}
306 356
307static inline struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) 357static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
308{ 358{
309 struct rb_node *last = rb_last(&cfs_rq->tasks_timeline); 359 struct rb_node *last = rb_last(&cfs_rq->tasks_timeline);
310 360
@@ -424,6 +474,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
424 schedstat_add(cfs_rq, exec_clock, delta_exec); 474 schedstat_add(cfs_rq, exec_clock, delta_exec);
425 delta_exec_weighted = calc_delta_fair(delta_exec, curr); 475 delta_exec_weighted = calc_delta_fair(delta_exec, curr);
426 curr->vruntime += delta_exec_weighted; 476 curr->vruntime += delta_exec_weighted;
477 update_min_vruntime(cfs_rq);
427} 478}
428 479
429static void update_curr(struct cfs_rq *cfs_rq) 480static void update_curr(struct cfs_rq *cfs_rq)
@@ -613,13 +664,7 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
613static void 664static void
614place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) 665place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
615{ 666{
616 u64 vruntime; 667 u64 vruntime = cfs_rq->min_vruntime;
617
618 if (first_fair(cfs_rq)) {
619 vruntime = min_vruntime(cfs_rq->min_vruntime,
620 __pick_next_entity(cfs_rq)->vruntime);
621 } else
622 vruntime = cfs_rq->min_vruntime;
623 668
624 /* 669 /*
625 * The 'current' period is already promised to the current tasks, 670 * The 'current' period is already promised to the current tasks,
@@ -693,9 +738,16 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
693#endif 738#endif
694 } 739 }
695 740
741 if (cfs_rq->last == se)
742 cfs_rq->last = NULL;
743
744 if (cfs_rq->next == se)
745 cfs_rq->next = NULL;
746
696 if (se != cfs_rq->curr) 747 if (se != cfs_rq->curr)
697 __dequeue_entity(cfs_rq, se); 748 __dequeue_entity(cfs_rq, se);
698 account_entity_dequeue(cfs_rq, se); 749 account_entity_dequeue(cfs_rq, se);
750 update_min_vruntime(cfs_rq);
699} 751}
700 752
701/* 753/*
@@ -742,29 +794,18 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
742 se->prev_sum_exec_runtime = se->sum_exec_runtime; 794 se->prev_sum_exec_runtime = se->sum_exec_runtime;
743} 795}
744 796
745static struct sched_entity * 797static int
746pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se) 798wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
747{
748 struct rq *rq = rq_of(cfs_rq);
749 u64 pair_slice = rq->clock - cfs_rq->pair_start;
750
751 if (!cfs_rq->next || pair_slice > sysctl_sched_min_granularity) {
752 cfs_rq->pair_start = rq->clock;
753 return se;
754 }
755
756 return cfs_rq->next;
757}
758 799
759static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) 800static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
760{ 801{
761 struct sched_entity *se = NULL; 802 struct sched_entity *se = __pick_next_entity(cfs_rq);
762 803
763 if (first_fair(cfs_rq)) { 804 if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, se) < 1)
764 se = __pick_next_entity(cfs_rq); 805 return cfs_rq->next;
765 se = pick_next(cfs_rq, se); 806
766 set_next_entity(cfs_rq, se); 807 if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, se) < 1)
767 } 808 return cfs_rq->last;
768 809
769 return se; 810 return se;
770} 811}
@@ -1122,10 +1163,9 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
1122 if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS)) 1163 if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS))
1123 return 0; 1164 return 0;
1124 1165
1125 if (!sync && sched_feat(SYNC_WAKEUPS) && 1166 if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost ||
1126 curr->se.avg_overlap < sysctl_sched_migration_cost && 1167 p->se.avg_overlap > sysctl_sched_migration_cost))
1127 p->se.avg_overlap < sysctl_sched_migration_cost) 1168 sync = 0;
1128 sync = 1;
1129 1169
1130 /* 1170 /*
1131 * If sync wakeup then subtract the (maximum possible) 1171 * If sync wakeup then subtract the (maximum possible)
@@ -1244,33 +1284,88 @@ static unsigned long wakeup_gran(struct sched_entity *se)
1244 * More easily preempt - nice tasks, while not making it harder for 1284 * More easily preempt - nice tasks, while not making it harder for
1245 * + nice tasks. 1285 * + nice tasks.
1246 */ 1286 */
1247 if (sched_feat(ASYM_GRAN)) 1287 if (!sched_feat(ASYM_GRAN) || se->load.weight > NICE_0_LOAD)
1248 gran = calc_delta_mine(gran, NICE_0_LOAD, &se->load); 1288 gran = calc_delta_fair(sysctl_sched_wakeup_granularity, se);
1249 1289
1250 return gran; 1290 return gran;
1251} 1291}
1252 1292
1253/* 1293/*
1294 * Should 'se' preempt 'curr'.
1295 *
1296 * |s1
1297 * |s2
1298 * |s3
1299 * g
1300 * |<--->|c
1301 *
1302 * w(c, s1) = -1
1303 * w(c, s2) = 0
1304 * w(c, s3) = 1
1305 *
1306 */
1307static int
1308wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
1309{
1310 s64 gran, vdiff = curr->vruntime - se->vruntime;
1311
1312 if (vdiff <= 0)
1313 return -1;
1314
1315 gran = wakeup_gran(curr);
1316 if (vdiff > gran)
1317 return 1;
1318
1319 return 0;
1320}
1321
1322static void set_last_buddy(struct sched_entity *se)
1323{
1324 for_each_sched_entity(se)
1325 cfs_rq_of(se)->last = se;
1326}
1327
1328static void set_next_buddy(struct sched_entity *se)
1329{
1330 for_each_sched_entity(se)
1331 cfs_rq_of(se)->next = se;
1332}
1333
1334/*
1254 * Preempt the current task with a newly woken task if needed: 1335 * Preempt the current task with a newly woken task if needed:
1255 */ 1336 */
1256static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) 1337static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
1257{ 1338{
1258 struct task_struct *curr = rq->curr; 1339 struct task_struct *curr = rq->curr;
1259 struct cfs_rq *cfs_rq = task_cfs_rq(curr);
1260 struct sched_entity *se = &curr->se, *pse = &p->se; 1340 struct sched_entity *se = &curr->se, *pse = &p->se;
1261 s64 delta_exec;
1262 1341
1263 if (unlikely(rt_prio(p->prio))) { 1342 if (unlikely(rt_prio(p->prio))) {
1343 struct cfs_rq *cfs_rq = task_cfs_rq(curr);
1344
1264 update_rq_clock(rq); 1345 update_rq_clock(rq);
1265 update_curr(cfs_rq); 1346 update_curr(cfs_rq);
1266 resched_task(curr); 1347 resched_task(curr);
1267 return; 1348 return;
1268 } 1349 }
1269 1350
1351 if (unlikely(p->sched_class != &fair_sched_class))
1352 return;
1353
1270 if (unlikely(se == pse)) 1354 if (unlikely(se == pse))
1271 return; 1355 return;
1272 1356
1273 cfs_rq_of(pse)->next = pse; 1357 /*
1358 * Only set the backward buddy when the current task is still on the
1359 * rq. This can happen when a wakeup gets interleaved with schedule on
1360 * the ->pre_schedule() or idle_balance() point, either of which can
1361 * drop the rq lock.
1362 *
1363 * Also, during early boot the idle thread is in the fair class, for
1364 * obvious reasons its a bad idea to schedule back to the idle thread.
1365 */
1366 if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle))
1367 set_last_buddy(se);
1368 set_next_buddy(pse);
1274 1369
1275 /* 1370 /*
1276 * We can come here with TIF_NEED_RESCHED already set from new task 1371 * We can come here with TIF_NEED_RESCHED already set from new task
@@ -1296,9 +1391,19 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
1296 return; 1391 return;
1297 } 1392 }
1298 1393
1299 delta_exec = se->sum_exec_runtime - se->prev_sum_exec_runtime; 1394 find_matching_se(&se, &pse);
1300 if (delta_exec > wakeup_gran(pse)) 1395
1301 resched_task(curr); 1396 while (se) {
1397 BUG_ON(!pse);
1398
1399 if (wakeup_preempt_entity(se, pse) == 1) {
1400 resched_task(curr);
1401 break;
1402 }
1403
1404 se = parent_entity(se);
1405 pse = parent_entity(pse);
1406 }
1302} 1407}
1303 1408
1304static struct task_struct *pick_next_task_fair(struct rq *rq) 1409static struct task_struct *pick_next_task_fair(struct rq *rq)
@@ -1312,6 +1417,7 @@ static struct task_struct *pick_next_task_fair(struct rq *rq)
1312 1417
1313 do { 1418 do {
1314 se = pick_next_entity(cfs_rq); 1419 se = pick_next_entity(cfs_rq);
1420 set_next_entity(cfs_rq, se);
1315 cfs_rq = group_cfs_rq(se); 1421 cfs_rq = group_cfs_rq(se);
1316 } while (cfs_rq); 1422 } while (cfs_rq);
1317 1423
@@ -1594,9 +1700,6 @@ static const struct sched_class fair_sched_class = {
1594 .enqueue_task = enqueue_task_fair, 1700 .enqueue_task = enqueue_task_fair,
1595 .dequeue_task = dequeue_task_fair, 1701 .dequeue_task = dequeue_task_fair,
1596 .yield_task = yield_task_fair, 1702 .yield_task = yield_task_fair,
1597#ifdef CONFIG_SMP
1598 .select_task_rq = select_task_rq_fair,
1599#endif /* CONFIG_SMP */
1600 1703
1601 .check_preempt_curr = check_preempt_wakeup, 1704 .check_preempt_curr = check_preempt_wakeup,
1602 1705
@@ -1604,6 +1707,8 @@ static const struct sched_class fair_sched_class = {
1604 .put_prev_task = put_prev_task_fair, 1707 .put_prev_task = put_prev_task_fair,
1605 1708
1606#ifdef CONFIG_SMP 1709#ifdef CONFIG_SMP
1710 .select_task_rq = select_task_rq_fair,
1711
1607 .load_balance = load_balance_fair, 1712 .load_balance = load_balance_fair,
1608 .move_one_task = move_one_task_fair, 1713 .move_one_task = move_one_task_fair,
1609#endif 1714#endif
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index fda016218296..da5d93b5d2c6 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -12,3 +12,4 @@ SCHED_FEAT(LB_BIAS, 1)
12SCHED_FEAT(LB_WAKEUP_UPDATE, 1) 12SCHED_FEAT(LB_WAKEUP_UPDATE, 1)
13SCHED_FEAT(ASYM_EFF_LOAD, 1) 13SCHED_FEAT(ASYM_EFF_LOAD, 1)
14SCHED_FEAT(WAKEUP_OVERLAP, 0) 14SCHED_FEAT(WAKEUP_OVERLAP, 0)
15SCHED_FEAT(LAST_BUDDY, 1)
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index dec4ccabe2f5..8a21a2e28c13 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -105,9 +105,6 @@ static const struct sched_class idle_sched_class = {
105 105
106 /* dequeue is not valid, we print a debug message there: */ 106 /* dequeue is not valid, we print a debug message there: */
107 .dequeue_task = dequeue_task_idle, 107 .dequeue_task = dequeue_task_idle,
108#ifdef CONFIG_SMP
109 .select_task_rq = select_task_rq_idle,
110#endif /* CONFIG_SMP */
111 108
112 .check_preempt_curr = check_preempt_curr_idle, 109 .check_preempt_curr = check_preempt_curr_idle,
113 110
@@ -115,6 +112,8 @@ static const struct sched_class idle_sched_class = {
115 .put_prev_task = put_prev_task_idle, 112 .put_prev_task = put_prev_task_idle,
116 113
117#ifdef CONFIG_SMP 114#ifdef CONFIG_SMP
115 .select_task_rq = select_task_rq_idle,
116
118 .load_balance = load_balance_idle, 117 .load_balance = load_balance_idle,
119 .move_one_task = move_one_task_idle, 118 .move_one_task = move_one_task_idle,
120#endif 119#endif
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index b446dc87494f..d9ba9d5f99d6 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1504,9 +1504,6 @@ static const struct sched_class rt_sched_class = {
1504 .enqueue_task = enqueue_task_rt, 1504 .enqueue_task = enqueue_task_rt,
1505 .dequeue_task = dequeue_task_rt, 1505 .dequeue_task = dequeue_task_rt,
1506 .yield_task = yield_task_rt, 1506 .yield_task = yield_task_rt,
1507#ifdef CONFIG_SMP
1508 .select_task_rq = select_task_rq_rt,
1509#endif /* CONFIG_SMP */
1510 1507
1511 .check_preempt_curr = check_preempt_curr_rt, 1508 .check_preempt_curr = check_preempt_curr_rt,
1512 1509
@@ -1514,6 +1511,8 @@ static const struct sched_class rt_sched_class = {
1514 .put_prev_task = put_prev_task_rt, 1511 .put_prev_task = put_prev_task_rt,
1515 1512
1516#ifdef CONFIG_SMP 1513#ifdef CONFIG_SMP
1514 .select_task_rq = select_task_rq_rt,
1515
1517 .load_balance = load_balance_rt, 1516 .load_balance = load_balance_rt,
1518 .move_one_task = move_one_task_rt, 1517 .move_one_task = move_one_task_rt,
1519 .set_cpus_allowed = set_cpus_allowed_rt, 1518 .set_cpus_allowed = set_cpus_allowed_rt,
diff --git a/kernel/signal.c b/kernel/signal.c
index 105217da5c82..4530fc654455 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1144,7 +1144,8 @@ static int kill_something_info(int sig, struct siginfo *info, pid_t pid)
1144 struct task_struct * p; 1144 struct task_struct * p;
1145 1145
1146 for_each_process(p) { 1146 for_each_process(p) {
1147 if (p->pid > 1 && !same_thread_group(p, current)) { 1147 if (task_pid_vnr(p) > 1 &&
1148 !same_thread_group(p, current)) {
1148 int err = group_send_sig_info(sig, info, p); 1149 int err = group_send_sig_info(sig, info, p);
1149 ++count; 1150 ++count;
1150 if (err != -EPERM) 1151 if (err != -EPERM)
diff --git a/kernel/smp.c b/kernel/smp.c
index f362a8553777..75c8dde58c55 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -51,10 +51,6 @@ static void csd_flag_wait(struct call_single_data *data)
51{ 51{
52 /* Wait for response */ 52 /* Wait for response */
53 do { 53 do {
54 /*
55 * We need to see the flags store in the IPI handler
56 */
57 smp_mb();
58 if (!(data->flags & CSD_FLAG_WAIT)) 54 if (!(data->flags & CSD_FLAG_WAIT))
59 break; 55 break;
60 cpu_relax(); 56 cpu_relax();
@@ -76,6 +72,11 @@ static void generic_exec_single(int cpu, struct call_single_data *data)
76 list_add_tail(&data->list, &dst->list); 72 list_add_tail(&data->list, &dst->list);
77 spin_unlock_irqrestore(&dst->lock, flags); 73 spin_unlock_irqrestore(&dst->lock, flags);
78 74
75 /*
76 * Make the list addition visible before sending the ipi.
77 */
78 smp_mb();
79
79 if (ipi) 80 if (ipi)
80 arch_send_call_function_single_ipi(cpu); 81 arch_send_call_function_single_ipi(cpu);
81 82
@@ -157,7 +158,7 @@ void generic_smp_call_function_single_interrupt(void)
157 * Need to see other stores to list head for checking whether 158 * Need to see other stores to list head for checking whether
158 * list is empty without holding q->lock 159 * list is empty without holding q->lock
159 */ 160 */
160 smp_mb(); 161 smp_read_barrier_depends();
161 while (!list_empty(&q->list)) { 162 while (!list_empty(&q->list)) {
162 unsigned int data_flags; 163 unsigned int data_flags;
163 164
@@ -191,7 +192,7 @@ void generic_smp_call_function_single_interrupt(void)
191 /* 192 /*
192 * See comment on outer loop 193 * See comment on outer loop
193 */ 194 */
194 smp_mb(); 195 smp_read_barrier_depends();
195 } 196 }
196} 197}
197 198
@@ -370,6 +371,11 @@ int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
370 list_add_tail_rcu(&data->csd.list, &call_function_queue); 371 list_add_tail_rcu(&data->csd.list, &call_function_queue);
371 spin_unlock_irqrestore(&call_function_lock, flags); 372 spin_unlock_irqrestore(&call_function_lock, flags);
372 373
374 /*
375 * Make the list addition visible before sending the ipi.
376 */
377 smp_mb();
378
373 /* Send a message to all CPUs in the map */ 379 /* Send a message to all CPUs in the map */
374 arch_send_call_function_ipi(mask); 380 arch_send_call_function_ipi(mask);
375 381
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index a13bd4dfaeb1..9d048fa2d902 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -474,7 +474,7 @@ static struct ctl_table kern_table[] = {
474 .mode = 0644, 474 .mode = 0644,
475 .proc_handler = &proc_dointvec, 475 .proc_handler = &proc_dointvec,
476 }, 476 },
477#ifdef CONFIG_FTRACE 477#ifdef CONFIG_FUNCTION_TRACER
478 { 478 {
479 .ctl_name = CTL_UNNUMBERED, 479 .ctl_name = CTL_UNNUMBERED,
480 .procname = "ftrace_enabled", 480 .procname = "ftrace_enabled",
diff --git a/kernel/timer.c b/kernel/timer.c
index 56becf373c58..dbd50fabe4c7 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -112,27 +112,8 @@ timer_set_base(struct timer_list *timer, struct tvec_base *new_base)
112 tbase_get_deferrable(timer->base)); 112 tbase_get_deferrable(timer->base));
113} 113}
114 114
115/** 115static unsigned long round_jiffies_common(unsigned long j, int cpu,
116 * __round_jiffies - function to round jiffies to a full second 116 bool force_up)
117 * @j: the time in (absolute) jiffies that should be rounded
118 * @cpu: the processor number on which the timeout will happen
119 *
120 * __round_jiffies() rounds an absolute time in the future (in jiffies)
121 * up or down to (approximately) full seconds. This is useful for timers
122 * for which the exact time they fire does not matter too much, as long as
123 * they fire approximately every X seconds.
124 *
125 * By rounding these timers to whole seconds, all such timers will fire
126 * at the same time, rather than at various times spread out. The goal
127 * of this is to have the CPU wake up less, which saves power.
128 *
129 * The exact rounding is skewed for each processor to avoid all
130 * processors firing at the exact same time, which could lead
131 * to lock contention or spurious cache line bouncing.
132 *
133 * The return value is the rounded version of the @j parameter.
134 */
135unsigned long __round_jiffies(unsigned long j, int cpu)
136{ 117{
137 int rem; 118 int rem;
138 unsigned long original = j; 119 unsigned long original = j;
@@ -154,8 +135,9 @@ unsigned long __round_jiffies(unsigned long j, int cpu)
154 * due to delays of the timer irq, long irq off times etc etc) then 135 * due to delays of the timer irq, long irq off times etc etc) then
155 * we should round down to the whole second, not up. Use 1/4th second 136 * we should round down to the whole second, not up. Use 1/4th second
156 * as cutoff for this rounding as an extreme upper bound for this. 137 * as cutoff for this rounding as an extreme upper bound for this.
138 * But never round down if @force_up is set.
157 */ 139 */
158 if (rem < HZ/4) /* round down */ 140 if (rem < HZ/4 && !force_up) /* round down */
159 j = j - rem; 141 j = j - rem;
160 else /* round up */ 142 else /* round up */
161 j = j - rem + HZ; 143 j = j - rem + HZ;
@@ -167,6 +149,31 @@ unsigned long __round_jiffies(unsigned long j, int cpu)
167 return original; 149 return original;
168 return j; 150 return j;
169} 151}
152
153/**
154 * __round_jiffies - function to round jiffies to a full second
155 * @j: the time in (absolute) jiffies that should be rounded
156 * @cpu: the processor number on which the timeout will happen
157 *
158 * __round_jiffies() rounds an absolute time in the future (in jiffies)
159 * up or down to (approximately) full seconds. This is useful for timers
160 * for which the exact time they fire does not matter too much, as long as
161 * they fire approximately every X seconds.
162 *
163 * By rounding these timers to whole seconds, all such timers will fire
164 * at the same time, rather than at various times spread out. The goal
165 * of this is to have the CPU wake up less, which saves power.
166 *
167 * The exact rounding is skewed for each processor to avoid all
168 * processors firing at the exact same time, which could lead
169 * to lock contention or spurious cache line bouncing.
170 *
171 * The return value is the rounded version of the @j parameter.
172 */
173unsigned long __round_jiffies(unsigned long j, int cpu)
174{
175 return round_jiffies_common(j, cpu, false);
176}
170EXPORT_SYMBOL_GPL(__round_jiffies); 177EXPORT_SYMBOL_GPL(__round_jiffies);
171 178
172/** 179/**
@@ -191,13 +198,10 @@ EXPORT_SYMBOL_GPL(__round_jiffies);
191 */ 198 */
192unsigned long __round_jiffies_relative(unsigned long j, int cpu) 199unsigned long __round_jiffies_relative(unsigned long j, int cpu)
193{ 200{
194 /* 201 unsigned long j0 = jiffies;
195 * In theory the following code can skip a jiffy in case jiffies 202
196 * increments right between the addition and the later subtraction. 203 /* Use j0 because jiffies might change while we run */
197 * However since the entire point of this function is to use approximate 204 return round_jiffies_common(j + j0, cpu, false) - j0;
198 * timeouts, it's entirely ok to not handle that.
199 */
200 return __round_jiffies(j + jiffies, cpu) - jiffies;
201} 205}
202EXPORT_SYMBOL_GPL(__round_jiffies_relative); 206EXPORT_SYMBOL_GPL(__round_jiffies_relative);
203 207
@@ -218,7 +222,7 @@ EXPORT_SYMBOL_GPL(__round_jiffies_relative);
218 */ 222 */
219unsigned long round_jiffies(unsigned long j) 223unsigned long round_jiffies(unsigned long j)
220{ 224{
221 return __round_jiffies(j, raw_smp_processor_id()); 225 return round_jiffies_common(j, raw_smp_processor_id(), false);
222} 226}
223EXPORT_SYMBOL_GPL(round_jiffies); 227EXPORT_SYMBOL_GPL(round_jiffies);
224 228
@@ -243,6 +247,71 @@ unsigned long round_jiffies_relative(unsigned long j)
243} 247}
244EXPORT_SYMBOL_GPL(round_jiffies_relative); 248EXPORT_SYMBOL_GPL(round_jiffies_relative);
245 249
250/**
251 * __round_jiffies_up - function to round jiffies up to a full second
252 * @j: the time in (absolute) jiffies that should be rounded
253 * @cpu: the processor number on which the timeout will happen
254 *
255 * This is the same as __round_jiffies() except that it will never
256 * round down. This is useful for timeouts for which the exact time
257 * of firing does not matter too much, as long as they don't fire too
258 * early.
259 */
260unsigned long __round_jiffies_up(unsigned long j, int cpu)
261{
262 return round_jiffies_common(j, cpu, true);
263}
264EXPORT_SYMBOL_GPL(__round_jiffies_up);
265
266/**
267 * __round_jiffies_up_relative - function to round jiffies up to a full second
268 * @j: the time in (relative) jiffies that should be rounded
269 * @cpu: the processor number on which the timeout will happen
270 *
271 * This is the same as __round_jiffies_relative() except that it will never
272 * round down. This is useful for timeouts for which the exact time
273 * of firing does not matter too much, as long as they don't fire too
274 * early.
275 */
276unsigned long __round_jiffies_up_relative(unsigned long j, int cpu)
277{
278 unsigned long j0 = jiffies;
279
280 /* Use j0 because jiffies might change while we run */
281 return round_jiffies_common(j + j0, cpu, true) - j0;
282}
283EXPORT_SYMBOL_GPL(__round_jiffies_up_relative);
284
285/**
286 * round_jiffies_up - function to round jiffies up to a full second
287 * @j: the time in (absolute) jiffies that should be rounded
288 *
289 * This is the same as round_jiffies() except that it will never
290 * round down. This is useful for timeouts for which the exact time
291 * of firing does not matter too much, as long as they don't fire too
292 * early.
293 */
294unsigned long round_jiffies_up(unsigned long j)
295{
296 return round_jiffies_common(j, raw_smp_processor_id(), true);
297}
298EXPORT_SYMBOL_GPL(round_jiffies_up);
299
300/**
301 * round_jiffies_up_relative - function to round jiffies up to a full second
302 * @j: the time in (relative) jiffies that should be rounded
303 *
304 * This is the same as round_jiffies_relative() except that it will never
305 * round down. This is useful for timeouts for which the exact time
306 * of firing does not matter too much, as long as they don't fire too
307 * early.
308 */
309unsigned long round_jiffies_up_relative(unsigned long j)
310{
311 return __round_jiffies_up_relative(j, raw_smp_processor_id());
312}
313EXPORT_SYMBOL_GPL(round_jiffies_up_relative);
314
246 315
247static inline void set_running_timer(struct tvec_base *base, 316static inline void set_running_timer(struct tvec_base *base,
248 struct timer_list *timer) 317 struct timer_list *timer)
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 1cb3e1f616af..33dbefd471e8 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -1,13 +1,13 @@
1# 1#
2# Architectures that offer an FTRACE implementation should select HAVE_FTRACE: 2# Architectures that offer an FUNCTION_TRACER implementation should
3# select HAVE_FUNCTION_TRACER:
3# 4#
4 5
5config NOP_TRACER 6config NOP_TRACER
6 bool 7 bool
7 8
8config HAVE_FTRACE 9config HAVE_FUNCTION_TRACER
9 bool 10 bool
10 select NOP_TRACER
11 11
12config HAVE_DYNAMIC_FTRACE 12config HAVE_DYNAMIC_FTRACE
13 bool 13 bool
@@ -25,12 +25,15 @@ config TRACING
25 bool 25 bool
26 select DEBUG_FS 26 select DEBUG_FS
27 select RING_BUFFER 27 select RING_BUFFER
28 select STACKTRACE 28 select STACKTRACE if STACKTRACE_SUPPORT
29 select TRACEPOINTS 29 select TRACEPOINTS
30 select NOP_TRACER
30 31
31config FTRACE 32menu "Tracers"
33
34config FUNCTION_TRACER
32 bool "Kernel Function Tracer" 35 bool "Kernel Function Tracer"
33 depends on HAVE_FTRACE 36 depends on HAVE_FUNCTION_TRACER
34 depends on DEBUG_KERNEL 37 depends on DEBUG_KERNEL
35 select FRAME_POINTER 38 select FRAME_POINTER
36 select TRACING 39 select TRACING
@@ -49,7 +52,6 @@ config IRQSOFF_TRACER
49 default n 52 default n
50 depends on TRACE_IRQFLAGS_SUPPORT 53 depends on TRACE_IRQFLAGS_SUPPORT
51 depends on GENERIC_TIME 54 depends on GENERIC_TIME
52 depends on HAVE_FTRACE
53 depends on DEBUG_KERNEL 55 depends on DEBUG_KERNEL
54 select TRACE_IRQFLAGS 56 select TRACE_IRQFLAGS
55 select TRACING 57 select TRACING
@@ -73,7 +75,6 @@ config PREEMPT_TRACER
73 default n 75 default n
74 depends on GENERIC_TIME 76 depends on GENERIC_TIME
75 depends on PREEMPT 77 depends on PREEMPT
76 depends on HAVE_FTRACE
77 depends on DEBUG_KERNEL 78 depends on DEBUG_KERNEL
78 select TRACING 79 select TRACING
79 select TRACER_MAX_TRACE 80 select TRACER_MAX_TRACE
@@ -101,7 +102,6 @@ config SYSPROF_TRACER
101 102
102config SCHED_TRACER 103config SCHED_TRACER
103 bool "Scheduling Latency Tracer" 104 bool "Scheduling Latency Tracer"
104 depends on HAVE_FTRACE
105 depends on DEBUG_KERNEL 105 depends on DEBUG_KERNEL
106 select TRACING 106 select TRACING
107 select CONTEXT_SWITCH_TRACER 107 select CONTEXT_SWITCH_TRACER
@@ -112,7 +112,6 @@ config SCHED_TRACER
112 112
113config CONTEXT_SWITCH_TRACER 113config CONTEXT_SWITCH_TRACER
114 bool "Trace process context switches" 114 bool "Trace process context switches"
115 depends on HAVE_FTRACE
116 depends on DEBUG_KERNEL 115 depends on DEBUG_KERNEL
117 select TRACING 116 select TRACING
118 select MARKERS 117 select MARKERS
@@ -122,9 +121,9 @@ config CONTEXT_SWITCH_TRACER
122 121
123config BOOT_TRACER 122config BOOT_TRACER
124 bool "Trace boot initcalls" 123 bool "Trace boot initcalls"
125 depends on HAVE_FTRACE
126 depends on DEBUG_KERNEL 124 depends on DEBUG_KERNEL
127 select TRACING 125 select TRACING
126 select CONTEXT_SWITCH_TRACER
128 help 127 help
129 This tracer helps developers to optimize boot times: it records 128 This tracer helps developers to optimize boot times: it records
130 the timings of the initcalls and traces key events and the identity 129 the timings of the initcalls and traces key events and the identity
@@ -141,9 +140,9 @@ config BOOT_TRACER
141 140
142config STACK_TRACER 141config STACK_TRACER
143 bool "Trace max stack" 142 bool "Trace max stack"
144 depends on HAVE_FTRACE 143 depends on HAVE_FUNCTION_TRACER
145 depends on DEBUG_KERNEL 144 depends on DEBUG_KERNEL
146 select FTRACE 145 select FUNCTION_TRACER
147 select STACKTRACE 146 select STACKTRACE
148 help 147 help
149 This special tracer records the maximum stack footprint of the 148 This special tracer records the maximum stack footprint of the
@@ -160,7 +159,7 @@ config STACK_TRACER
160 159
161config DYNAMIC_FTRACE 160config DYNAMIC_FTRACE
162 bool "enable/disable ftrace tracepoints dynamically" 161 bool "enable/disable ftrace tracepoints dynamically"
163 depends on FTRACE 162 depends on FUNCTION_TRACER
164 depends on HAVE_DYNAMIC_FTRACE 163 depends on HAVE_DYNAMIC_FTRACE
165 depends on DEBUG_KERNEL 164 depends on DEBUG_KERNEL
166 default y 165 default y
@@ -170,7 +169,7 @@ config DYNAMIC_FTRACE
170 with a No-Op instruction) as they are called. A table is 169 with a No-Op instruction) as they are called. A table is
171 created to dynamically enable them again. 170 created to dynamically enable them again.
172 171
173 This way a CONFIG_FTRACE kernel is slightly larger, but otherwise 172 This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but otherwise
174 has native performance as long as no tracing is active. 173 has native performance as long as no tracing is active.
175 174
176 The changes to the code are done by a kernel thread that 175 The changes to the code are done by a kernel thread that
@@ -195,3 +194,5 @@ config FTRACE_STARTUP_TEST
195 a series of tests are made to verify that the tracer is 194 a series of tests are made to verify that the tracer is
196 functioning properly. It will do tests on all the configured 195 functioning properly. It will do tests on all the configured
197 tracers of ftrace. 196 tracers of ftrace.
197
198endmenu
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index a85dfba88ba0..c8228b1a49e9 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -1,7 +1,7 @@
1 1
2# Do not instrument the tracer itself: 2# Do not instrument the tracer itself:
3 3
4ifdef CONFIG_FTRACE 4ifdef CONFIG_FUNCTION_TRACER
5ORIG_CFLAGS := $(KBUILD_CFLAGS) 5ORIG_CFLAGS := $(KBUILD_CFLAGS)
6KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS)) 6KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
7 7
@@ -10,13 +10,13 @@ CFLAGS_trace_selftest_dynamic.o = -pg
10obj-y += trace_selftest_dynamic.o 10obj-y += trace_selftest_dynamic.o
11endif 11endif
12 12
13obj-$(CONFIG_FTRACE) += libftrace.o 13obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
14obj-$(CONFIG_RING_BUFFER) += ring_buffer.o 14obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
15 15
16obj-$(CONFIG_TRACING) += trace.o 16obj-$(CONFIG_TRACING) += trace.o
17obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o 17obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
18obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o 18obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
19obj-$(CONFIG_FTRACE) += trace_functions.o 19obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
20obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o 20obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
21obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o 21obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
22obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o 22obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 4dda4f60a2a9..4a39d24568c8 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -25,13 +25,24 @@
25#include <linux/ftrace.h> 25#include <linux/ftrace.h>
26#include <linux/sysctl.h> 26#include <linux/sysctl.h>
27#include <linux/ctype.h> 27#include <linux/ctype.h>
28#include <linux/hash.h>
29#include <linux/list.h> 28#include <linux/list.h>
30 29
31#include <asm/ftrace.h> 30#include <asm/ftrace.h>
32 31
33#include "trace.h" 32#include "trace.h"
34 33
34#define FTRACE_WARN_ON(cond) \
35 do { \
36 if (WARN_ON(cond)) \
37 ftrace_kill(); \
38 } while (0)
39
40#define FTRACE_WARN_ON_ONCE(cond) \
41 do { \
42 if (WARN_ON_ONCE(cond)) \
43 ftrace_kill(); \
44 } while (0)
45
35/* ftrace_enabled is a method to turn ftrace on or off */ 46/* ftrace_enabled is a method to turn ftrace on or off */
36int ftrace_enabled __read_mostly; 47int ftrace_enabled __read_mostly;
37static int last_ftrace_enabled; 48static int last_ftrace_enabled;
@@ -153,21 +164,8 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
153} 164}
154 165
155#ifdef CONFIG_DYNAMIC_FTRACE 166#ifdef CONFIG_DYNAMIC_FTRACE
156
157#ifndef CONFIG_FTRACE_MCOUNT_RECORD 167#ifndef CONFIG_FTRACE_MCOUNT_RECORD
158/* 168# error Dynamic ftrace depends on MCOUNT_RECORD
159 * The hash lock is only needed when the recording of the mcount
160 * callers are dynamic. That is, by the caller themselves and
161 * not recorded via the compilation.
162 */
163static DEFINE_SPINLOCK(ftrace_hash_lock);
164#define ftrace_hash_lock(flags) spin_lock_irqsave(&ftrace_hash_lock, flags)
165#define ftrace_hash_unlock(flags) \
166 spin_unlock_irqrestore(&ftrace_hash_lock, flags)
167#else
168/* This is protected via the ftrace_lock with MCOUNT_RECORD. */
169#define ftrace_hash_lock(flags) do { (void)(flags); } while (0)
170#define ftrace_hash_unlock(flags) do { } while(0)
171#endif 169#endif
172 170
173/* 171/*
@@ -178,8 +176,6 @@ static DEFINE_SPINLOCK(ftrace_hash_lock);
178 */ 176 */
179static unsigned long mcount_addr = MCOUNT_ADDR; 177static unsigned long mcount_addr = MCOUNT_ADDR;
180 178
181static struct task_struct *ftraced_task;
182
183enum { 179enum {
184 FTRACE_ENABLE_CALLS = (1 << 0), 180 FTRACE_ENABLE_CALLS = (1 << 0),
185 FTRACE_DISABLE_CALLS = (1 << 1), 181 FTRACE_DISABLE_CALLS = (1 << 1),
@@ -190,13 +186,9 @@ enum {
190 186
191static int ftrace_filtered; 187static int ftrace_filtered;
192static int tracing_on; 188static int tracing_on;
193static int frozen_record_count;
194
195static struct hlist_head ftrace_hash[FTRACE_HASHSIZE];
196 189
197static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu); 190static LIST_HEAD(ftrace_new_addrs);
198 191
199static DEFINE_MUTEX(ftraced_lock);
200static DEFINE_MUTEX(ftrace_regex_lock); 192static DEFINE_MUTEX(ftrace_regex_lock);
201 193
202struct ftrace_page { 194struct ftrace_page {
@@ -214,16 +206,13 @@ struct ftrace_page {
214static struct ftrace_page *ftrace_pages_start; 206static struct ftrace_page *ftrace_pages_start;
215static struct ftrace_page *ftrace_pages; 207static struct ftrace_page *ftrace_pages;
216 208
217static int ftraced_trigger;
218static int ftraced_suspend;
219static int ftraced_stop;
220
221static int ftrace_record_suspend;
222
223static struct dyn_ftrace *ftrace_free_records; 209static struct dyn_ftrace *ftrace_free_records;
224 210
225 211
226#ifdef CONFIG_KPROBES 212#ifdef CONFIG_KPROBES
213
214static int frozen_record_count;
215
227static inline void freeze_record(struct dyn_ftrace *rec) 216static inline void freeze_record(struct dyn_ftrace *rec)
228{ 217{
229 if (!(rec->flags & FTRACE_FL_FROZEN)) { 218 if (!(rec->flags & FTRACE_FL_FROZEN)) {
@@ -250,72 +239,6 @@ static inline int record_frozen(struct dyn_ftrace *rec)
250# define record_frozen(rec) ({ 0; }) 239# define record_frozen(rec) ({ 0; })
251#endif /* CONFIG_KPROBES */ 240#endif /* CONFIG_KPROBES */
252 241
253int skip_trace(unsigned long ip)
254{
255 unsigned long fl;
256 struct dyn_ftrace *rec;
257 struct hlist_node *t;
258 struct hlist_head *head;
259
260 if (frozen_record_count == 0)
261 return 0;
262
263 head = &ftrace_hash[hash_long(ip, FTRACE_HASHBITS)];
264 hlist_for_each_entry_rcu(rec, t, head, node) {
265 if (rec->ip == ip) {
266 if (record_frozen(rec)) {
267 if (rec->flags & FTRACE_FL_FAILED)
268 return 1;
269
270 if (!(rec->flags & FTRACE_FL_CONVERTED))
271 return 1;
272
273 if (!tracing_on || !ftrace_enabled)
274 return 1;
275
276 if (ftrace_filtered) {
277 fl = rec->flags & (FTRACE_FL_FILTER |
278 FTRACE_FL_NOTRACE);
279 if (!fl || (fl & FTRACE_FL_NOTRACE))
280 return 1;
281 }
282 }
283 break;
284 }
285 }
286
287 return 0;
288}
289
290static inline int
291ftrace_ip_in_hash(unsigned long ip, unsigned long key)
292{
293 struct dyn_ftrace *p;
294 struct hlist_node *t;
295 int found = 0;
296
297 hlist_for_each_entry_rcu(p, t, &ftrace_hash[key], node) {
298 if (p->ip == ip) {
299 found = 1;
300 break;
301 }
302 }
303
304 return found;
305}
306
307static inline void
308ftrace_add_hash(struct dyn_ftrace *node, unsigned long key)
309{
310 hlist_add_head_rcu(&node->node, &ftrace_hash[key]);
311}
312
313/* called from kstop_machine */
314static inline void ftrace_del_hash(struct dyn_ftrace *node)
315{
316 hlist_del(&node->node);
317}
318
319static void ftrace_free_rec(struct dyn_ftrace *rec) 242static void ftrace_free_rec(struct dyn_ftrace *rec)
320{ 243{
321 rec->ip = (unsigned long)ftrace_free_records; 244 rec->ip = (unsigned long)ftrace_free_records;
@@ -346,7 +269,6 @@ void ftrace_release(void *start, unsigned long size)
346 } 269 }
347 } 270 }
348 spin_unlock(&ftrace_lock); 271 spin_unlock(&ftrace_lock);
349
350} 272}
351 273
352static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) 274static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
@@ -358,10 +280,8 @@ static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
358 rec = ftrace_free_records; 280 rec = ftrace_free_records;
359 281
360 if (unlikely(!(rec->flags & FTRACE_FL_FREE))) { 282 if (unlikely(!(rec->flags & FTRACE_FL_FREE))) {
361 WARN_ON_ONCE(1); 283 FTRACE_WARN_ON_ONCE(1);
362 ftrace_free_records = NULL; 284 ftrace_free_records = NULL;
363 ftrace_disabled = 1;
364 ftrace_enabled = 0;
365 return NULL; 285 return NULL;
366 } 286 }
367 287
@@ -371,76 +291,36 @@ static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
371 } 291 }
372 292
373 if (ftrace_pages->index == ENTRIES_PER_PAGE) { 293 if (ftrace_pages->index == ENTRIES_PER_PAGE) {
374 if (!ftrace_pages->next) 294 if (!ftrace_pages->next) {
375 return NULL; 295 /* allocate another page */
296 ftrace_pages->next =
297 (void *)get_zeroed_page(GFP_KERNEL);
298 if (!ftrace_pages->next)
299 return NULL;
300 }
376 ftrace_pages = ftrace_pages->next; 301 ftrace_pages = ftrace_pages->next;
377 } 302 }
378 303
379 return &ftrace_pages->records[ftrace_pages->index++]; 304 return &ftrace_pages->records[ftrace_pages->index++];
380} 305}
381 306
382static void 307static struct dyn_ftrace *
383ftrace_record_ip(unsigned long ip) 308ftrace_record_ip(unsigned long ip)
384{ 309{
385 struct dyn_ftrace *node; 310 struct dyn_ftrace *rec;
386 unsigned long flags;
387 unsigned long key;
388 int resched;
389 int cpu;
390 311
391 if (!ftrace_enabled || ftrace_disabled) 312 if (!ftrace_enabled || ftrace_disabled)
392 return; 313 return NULL;
393
394 resched = need_resched();
395 preempt_disable_notrace();
396 314
397 /* 315 rec = ftrace_alloc_dyn_node(ip);
398 * We simply need to protect against recursion. 316 if (!rec)
399 * Use the the raw version of smp_processor_id and not 317 return NULL;
400 * __get_cpu_var which can call debug hooks that can
401 * cause a recursive crash here.
402 */
403 cpu = raw_smp_processor_id();
404 per_cpu(ftrace_shutdown_disable_cpu, cpu)++;
405 if (per_cpu(ftrace_shutdown_disable_cpu, cpu) != 1)
406 goto out;
407
408 if (unlikely(ftrace_record_suspend))
409 goto out;
410
411 key = hash_long(ip, FTRACE_HASHBITS);
412
413 WARN_ON_ONCE(key >= FTRACE_HASHSIZE);
414
415 if (ftrace_ip_in_hash(ip, key))
416 goto out;
417
418 ftrace_hash_lock(flags);
419
420 /* This ip may have hit the hash before the lock */
421 if (ftrace_ip_in_hash(ip, key))
422 goto out_unlock;
423
424 node = ftrace_alloc_dyn_node(ip);
425 if (!node)
426 goto out_unlock;
427
428 node->ip = ip;
429
430 ftrace_add_hash(node, key);
431 318
432 ftraced_trigger = 1; 319 rec->ip = ip;
433 320
434 out_unlock: 321 list_add(&rec->list, &ftrace_new_addrs);
435 ftrace_hash_unlock(flags);
436 out:
437 per_cpu(ftrace_shutdown_disable_cpu, cpu)--;
438 322
439 /* prevent recursion with scheduler */ 323 return rec;
440 if (resched)
441 preempt_enable_no_resched_notrace();
442 else
443 preempt_enable_notrace();
444} 324}
445 325
446#define FTRACE_ADDR ((long)(ftrace_caller)) 326#define FTRACE_ADDR ((long)(ftrace_caller))
@@ -559,7 +439,6 @@ static void ftrace_replace_code(int enable)
559 rec->flags |= FTRACE_FL_FAILED; 439 rec->flags |= FTRACE_FL_FAILED;
560 if ((system_state == SYSTEM_BOOTING) || 440 if ((system_state == SYSTEM_BOOTING) ||
561 !core_kernel_text(rec->ip)) { 441 !core_kernel_text(rec->ip)) {
562 ftrace_del_hash(rec);
563 ftrace_free_rec(rec); 442 ftrace_free_rec(rec);
564 } 443 }
565 } 444 }
@@ -567,15 +446,6 @@ static void ftrace_replace_code(int enable)
567 } 446 }
568} 447}
569 448
570static void ftrace_shutdown_replenish(void)
571{
572 if (ftrace_pages->next)
573 return;
574
575 /* allocate another page */
576 ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL);
577}
578
579static void print_ip_ins(const char *fmt, unsigned char *p) 449static void print_ip_ins(const char *fmt, unsigned char *p)
580{ 450{
581 int i; 451 int i;
@@ -591,23 +461,23 @@ ftrace_code_disable(struct dyn_ftrace *rec)
591{ 461{
592 unsigned long ip; 462 unsigned long ip;
593 unsigned char *nop, *call; 463 unsigned char *nop, *call;
594 int failed; 464 int ret;
595 465
596 ip = rec->ip; 466 ip = rec->ip;
597 467
598 nop = ftrace_nop_replace(); 468 nop = ftrace_nop_replace();
599 call = ftrace_call_replace(ip, mcount_addr); 469 call = ftrace_call_replace(ip, mcount_addr);
600 470
601 failed = ftrace_modify_code(ip, call, nop); 471 ret = ftrace_modify_code(ip, call, nop);
602 if (failed) { 472 if (ret) {
603 switch (failed) { 473 switch (ret) {
604 case 1: 474 case -EFAULT:
605 WARN_ON_ONCE(1); 475 FTRACE_WARN_ON_ONCE(1);
606 pr_info("ftrace faulted on modifying "); 476 pr_info("ftrace faulted on modifying ");
607 print_ip_sym(ip); 477 print_ip_sym(ip);
608 break; 478 break;
609 case 2: 479 case -EINVAL:
610 WARN_ON_ONCE(1); 480 FTRACE_WARN_ON_ONCE(1);
611 pr_info("ftrace failed to modify "); 481 pr_info("ftrace failed to modify ");
612 print_ip_sym(ip); 482 print_ip_sym(ip);
613 print_ip_ins(" expected: ", call); 483 print_ip_ins(" expected: ", call);
@@ -615,6 +485,15 @@ ftrace_code_disable(struct dyn_ftrace *rec)
615 print_ip_ins(" replace: ", nop); 485 print_ip_ins(" replace: ", nop);
616 printk(KERN_CONT "\n"); 486 printk(KERN_CONT "\n");
617 break; 487 break;
488 case -EPERM:
489 FTRACE_WARN_ON_ONCE(1);
490 pr_info("ftrace faulted on writing ");
491 print_ip_sym(ip);
492 break;
493 default:
494 FTRACE_WARN_ON_ONCE(1);
495 pr_info("ftrace faulted on unknown error ");
496 print_ip_sym(ip);
618 } 497 }
619 498
620 rec->flags |= FTRACE_FL_FAILED; 499 rec->flags |= FTRACE_FL_FAILED;
@@ -623,19 +502,11 @@ ftrace_code_disable(struct dyn_ftrace *rec)
623 return 1; 502 return 1;
624} 503}
625 504
626static int __ftrace_update_code(void *ignore);
627
628static int __ftrace_modify_code(void *data) 505static int __ftrace_modify_code(void *data)
629{ 506{
630 unsigned long addr;
631 int *command = data; 507 int *command = data;
632 508
633 if (*command & FTRACE_ENABLE_CALLS) { 509 if (*command & FTRACE_ENABLE_CALLS) {
634 /*
635 * Update any recorded ips now that we have the
636 * machine stopped
637 */
638 __ftrace_update_code(NULL);
639 ftrace_replace_code(1); 510 ftrace_replace_code(1);
640 tracing_on = 1; 511 tracing_on = 1;
641 } else if (*command & FTRACE_DISABLE_CALLS) { 512 } else if (*command & FTRACE_DISABLE_CALLS) {
@@ -646,14 +517,6 @@ static int __ftrace_modify_code(void *data)
646 if (*command & FTRACE_UPDATE_TRACE_FUNC) 517 if (*command & FTRACE_UPDATE_TRACE_FUNC)
647 ftrace_update_ftrace_func(ftrace_trace_function); 518 ftrace_update_ftrace_func(ftrace_trace_function);
648 519
649 if (*command & FTRACE_ENABLE_MCOUNT) {
650 addr = (unsigned long)ftrace_record_ip;
651 ftrace_mcount_set(&addr);
652 } else if (*command & FTRACE_DISABLE_MCOUNT) {
653 addr = (unsigned long)ftrace_stub;
654 ftrace_mcount_set(&addr);
655 }
656
657 return 0; 520 return 0;
658} 521}
659 522
@@ -662,26 +525,9 @@ static void ftrace_run_update_code(int command)
662 stop_machine(__ftrace_modify_code, &command, NULL); 525 stop_machine(__ftrace_modify_code, &command, NULL);
663} 526}
664 527
665void ftrace_disable_daemon(void)
666{
667 /* Stop the daemon from calling kstop_machine */
668 mutex_lock(&ftraced_lock);
669 ftraced_stop = 1;
670 mutex_unlock(&ftraced_lock);
671
672 ftrace_force_update();
673}
674
675void ftrace_enable_daemon(void)
676{
677 mutex_lock(&ftraced_lock);
678 ftraced_stop = 0;
679 mutex_unlock(&ftraced_lock);
680
681 ftrace_force_update();
682}
683
684static ftrace_func_t saved_ftrace_func; 528static ftrace_func_t saved_ftrace_func;
529static int ftrace_start;
530static DEFINE_MUTEX(ftrace_start_lock);
685 531
686static void ftrace_startup(void) 532static void ftrace_startup(void)
687{ 533{
@@ -690,9 +536,9 @@ static void ftrace_startup(void)
690 if (unlikely(ftrace_disabled)) 536 if (unlikely(ftrace_disabled))
691 return; 537 return;
692 538
693 mutex_lock(&ftraced_lock); 539 mutex_lock(&ftrace_start_lock);
694 ftraced_suspend++; 540 ftrace_start++;
695 if (ftraced_suspend == 1) 541 if (ftrace_start == 1)
696 command |= FTRACE_ENABLE_CALLS; 542 command |= FTRACE_ENABLE_CALLS;
697 543
698 if (saved_ftrace_func != ftrace_trace_function) { 544 if (saved_ftrace_func != ftrace_trace_function) {
@@ -705,7 +551,7 @@ static void ftrace_startup(void)
705 551
706 ftrace_run_update_code(command); 552 ftrace_run_update_code(command);
707 out: 553 out:
708 mutex_unlock(&ftraced_lock); 554 mutex_unlock(&ftrace_start_lock);
709} 555}
710 556
711static void ftrace_shutdown(void) 557static void ftrace_shutdown(void)
@@ -715,9 +561,9 @@ static void ftrace_shutdown(void)
715 if (unlikely(ftrace_disabled)) 561 if (unlikely(ftrace_disabled))
716 return; 562 return;
717 563
718 mutex_lock(&ftraced_lock); 564 mutex_lock(&ftrace_start_lock);
719 ftraced_suspend--; 565 ftrace_start--;
720 if (!ftraced_suspend) 566 if (!ftrace_start)
721 command |= FTRACE_DISABLE_CALLS; 567 command |= FTRACE_DISABLE_CALLS;
722 568
723 if (saved_ftrace_func != ftrace_trace_function) { 569 if (saved_ftrace_func != ftrace_trace_function) {
@@ -730,7 +576,7 @@ static void ftrace_shutdown(void)
730 576
731 ftrace_run_update_code(command); 577 ftrace_run_update_code(command);
732 out: 578 out:
733 mutex_unlock(&ftraced_lock); 579 mutex_unlock(&ftrace_start_lock);
734} 580}
735 581
736static void ftrace_startup_sysctl(void) 582static void ftrace_startup_sysctl(void)
@@ -740,15 +586,15 @@ static void ftrace_startup_sysctl(void)
740 if (unlikely(ftrace_disabled)) 586 if (unlikely(ftrace_disabled))
741 return; 587 return;
742 588
743 mutex_lock(&ftraced_lock); 589 mutex_lock(&ftrace_start_lock);
744 /* Force update next time */ 590 /* Force update next time */
745 saved_ftrace_func = NULL; 591 saved_ftrace_func = NULL;
746 /* ftraced_suspend is true if we want ftrace running */ 592 /* ftrace_start is true if we want ftrace running */
747 if (ftraced_suspend) 593 if (ftrace_start)
748 command |= FTRACE_ENABLE_CALLS; 594 command |= FTRACE_ENABLE_CALLS;
749 595
750 ftrace_run_update_code(command); 596 ftrace_run_update_code(command);
751 mutex_unlock(&ftraced_lock); 597 mutex_unlock(&ftrace_start_lock);
752} 598}
753 599
754static void ftrace_shutdown_sysctl(void) 600static void ftrace_shutdown_sysctl(void)
@@ -758,112 +604,50 @@ static void ftrace_shutdown_sysctl(void)
758 if (unlikely(ftrace_disabled)) 604 if (unlikely(ftrace_disabled))
759 return; 605 return;
760 606
761 mutex_lock(&ftraced_lock); 607 mutex_lock(&ftrace_start_lock);
762 /* ftraced_suspend is true if ftrace is running */ 608 /* ftrace_start is true if ftrace is running */
763 if (ftraced_suspend) 609 if (ftrace_start)
764 command |= FTRACE_DISABLE_CALLS; 610 command |= FTRACE_DISABLE_CALLS;
765 611
766 ftrace_run_update_code(command); 612 ftrace_run_update_code(command);
767 mutex_unlock(&ftraced_lock); 613 mutex_unlock(&ftrace_start_lock);
768} 614}
769 615
770static cycle_t ftrace_update_time; 616static cycle_t ftrace_update_time;
771static unsigned long ftrace_update_cnt; 617static unsigned long ftrace_update_cnt;
772unsigned long ftrace_update_tot_cnt; 618unsigned long ftrace_update_tot_cnt;
773 619
774static int __ftrace_update_code(void *ignore) 620static int ftrace_update_code(void)
775{ 621{
776 int i, save_ftrace_enabled; 622 struct dyn_ftrace *p, *t;
777 cycle_t start, stop; 623 cycle_t start, stop;
778 struct dyn_ftrace *p;
779 struct hlist_node *t, *n;
780 struct hlist_head *head, temp_list;
781
782 /* Don't be recording funcs now */
783 ftrace_record_suspend++;
784 save_ftrace_enabled = ftrace_enabled;
785 ftrace_enabled = 0;
786 624
787 start = ftrace_now(raw_smp_processor_id()); 625 start = ftrace_now(raw_smp_processor_id());
788 ftrace_update_cnt = 0; 626 ftrace_update_cnt = 0;
789 627
790 /* No locks needed, the machine is stopped! */ 628 list_for_each_entry_safe(p, t, &ftrace_new_addrs, list) {
791 for (i = 0; i < FTRACE_HASHSIZE; i++) {
792 INIT_HLIST_HEAD(&temp_list);
793 head = &ftrace_hash[i];
794 629
795 /* all CPUS are stopped, we are safe to modify code */ 630 /* If something went wrong, bail without enabling anything */
796 hlist_for_each_entry_safe(p, t, n, head, node) { 631 if (unlikely(ftrace_disabled))
797 /* Skip over failed records which have not been 632 return -1;
798 * freed. */
799 if (p->flags & FTRACE_FL_FAILED)
800 continue;
801 633
802 /* Unconverted records are always at the head of the 634 list_del_init(&p->list);
803 * hash bucket. Once we encounter a converted record,
804 * simply skip over to the next bucket. Saves ftraced
805 * some processor cycles (ftrace does its bid for
806 * global warming :-p ). */
807 if (p->flags & (FTRACE_FL_CONVERTED))
808 break;
809 635
810 /* Ignore updates to this record's mcount site. 636 /* convert record (i.e, patch mcount-call with NOP) */
811 * Reintroduce this record at the head of this 637 if (ftrace_code_disable(p)) {
812 * bucket to attempt to "convert" it again if 638 p->flags |= FTRACE_FL_CONVERTED;
813 * the kprobe on it is unregistered before the 639 ftrace_update_cnt++;
814 * next run. */ 640 } else
815 if (get_kprobe((void *)p->ip)) { 641 ftrace_free_rec(p);
816 ftrace_del_hash(p);
817 INIT_HLIST_NODE(&p->node);
818 hlist_add_head(&p->node, &temp_list);
819 freeze_record(p);
820 continue;
821 } else {
822 unfreeze_record(p);
823 }
824
825 /* convert record (i.e, patch mcount-call with NOP) */
826 if (ftrace_code_disable(p)) {
827 p->flags |= FTRACE_FL_CONVERTED;
828 ftrace_update_cnt++;
829 } else {
830 if ((system_state == SYSTEM_BOOTING) ||
831 !core_kernel_text(p->ip)) {
832 ftrace_del_hash(p);
833 ftrace_free_rec(p);
834 }
835 }
836 }
837
838 hlist_for_each_entry_safe(p, t, n, &temp_list, node) {
839 hlist_del(&p->node);
840 INIT_HLIST_NODE(&p->node);
841 hlist_add_head(&p->node, head);
842 }
843 } 642 }
844 643
845 stop = ftrace_now(raw_smp_processor_id()); 644 stop = ftrace_now(raw_smp_processor_id());
846 ftrace_update_time = stop - start; 645 ftrace_update_time = stop - start;
847 ftrace_update_tot_cnt += ftrace_update_cnt; 646 ftrace_update_tot_cnt += ftrace_update_cnt;
848 ftraced_trigger = 0;
849
850 ftrace_enabled = save_ftrace_enabled;
851 ftrace_record_suspend--;
852 647
853 return 0; 648 return 0;
854} 649}
855 650
856static int ftrace_update_code(void)
857{
858 if (unlikely(ftrace_disabled) ||
859 !ftrace_enabled || !ftraced_trigger)
860 return 0;
861
862 stop_machine(__ftrace_update_code, NULL, NULL);
863
864 return 1;
865}
866
867static int __init ftrace_dyn_table_alloc(unsigned long num_to_init) 651static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
868{ 652{
869 struct ftrace_page *pg; 653 struct ftrace_page *pg;
@@ -892,7 +676,7 @@ static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
892 pg = ftrace_pages = ftrace_pages_start; 676 pg = ftrace_pages = ftrace_pages_start;
893 677
894 cnt = num_to_init / ENTRIES_PER_PAGE; 678 cnt = num_to_init / ENTRIES_PER_PAGE;
895 pr_info("ftrace: allocating %ld hash entries in %d pages\n", 679 pr_info("ftrace: allocating %ld entries in %d pages\n",
896 num_to_init, cnt); 680 num_to_init, cnt);
897 681
898 for (i = 0; i < cnt; i++) { 682 for (i = 0; i < cnt; i++) {
@@ -1401,10 +1185,10 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
1401 } 1185 }
1402 1186
1403 mutex_lock(&ftrace_sysctl_lock); 1187 mutex_lock(&ftrace_sysctl_lock);
1404 mutex_lock(&ftraced_lock); 1188 mutex_lock(&ftrace_start_lock);
1405 if (iter->filtered && ftraced_suspend && ftrace_enabled) 1189 if (iter->filtered && ftrace_start && ftrace_enabled)
1406 ftrace_run_update_code(FTRACE_ENABLE_CALLS); 1190 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
1407 mutex_unlock(&ftraced_lock); 1191 mutex_unlock(&ftrace_start_lock);
1408 mutex_unlock(&ftrace_sysctl_lock); 1192 mutex_unlock(&ftrace_sysctl_lock);
1409 1193
1410 kfree(iter); 1194 kfree(iter);
@@ -1424,55 +1208,6 @@ ftrace_notrace_release(struct inode *inode, struct file *file)
1424 return ftrace_regex_release(inode, file, 0); 1208 return ftrace_regex_release(inode, file, 0);
1425} 1209}
1426 1210
1427static ssize_t
1428ftraced_read(struct file *filp, char __user *ubuf,
1429 size_t cnt, loff_t *ppos)
1430{
1431 /* don't worry about races */
1432 char *buf = ftraced_stop ? "disabled\n" : "enabled\n";
1433 int r = strlen(buf);
1434
1435 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
1436}
1437
1438static ssize_t
1439ftraced_write(struct file *filp, const char __user *ubuf,
1440 size_t cnt, loff_t *ppos)
1441{
1442 char buf[64];
1443 long val;
1444 int ret;
1445
1446 if (cnt >= sizeof(buf))
1447 return -EINVAL;
1448
1449 if (copy_from_user(&buf, ubuf, cnt))
1450 return -EFAULT;
1451
1452 if (strncmp(buf, "enable", 6) == 0)
1453 val = 1;
1454 else if (strncmp(buf, "disable", 7) == 0)
1455 val = 0;
1456 else {
1457 buf[cnt] = 0;
1458
1459 ret = strict_strtoul(buf, 10, &val);
1460 if (ret < 0)
1461 return ret;
1462
1463 val = !!val;
1464 }
1465
1466 if (val)
1467 ftrace_enable_daemon();
1468 else
1469 ftrace_disable_daemon();
1470
1471 filp->f_pos += cnt;
1472
1473 return cnt;
1474}
1475
1476static struct file_operations ftrace_avail_fops = { 1211static struct file_operations ftrace_avail_fops = {
1477 .open = ftrace_avail_open, 1212 .open = ftrace_avail_open,
1478 .read = seq_read, 1213 .read = seq_read,
@@ -1503,54 +1238,6 @@ static struct file_operations ftrace_notrace_fops = {
1503 .release = ftrace_notrace_release, 1238 .release = ftrace_notrace_release,
1504}; 1239};
1505 1240
1506static struct file_operations ftraced_fops = {
1507 .open = tracing_open_generic,
1508 .read = ftraced_read,
1509 .write = ftraced_write,
1510};
1511
1512/**
1513 * ftrace_force_update - force an update to all recording ftrace functions
1514 */
1515int ftrace_force_update(void)
1516{
1517 int ret = 0;
1518
1519 if (unlikely(ftrace_disabled))
1520 return -ENODEV;
1521
1522 mutex_lock(&ftrace_sysctl_lock);
1523 mutex_lock(&ftraced_lock);
1524
1525 /*
1526 * If ftraced_trigger is not set, then there is nothing
1527 * to update.
1528 */
1529 if (ftraced_trigger && !ftrace_update_code())
1530 ret = -EBUSY;
1531
1532 mutex_unlock(&ftraced_lock);
1533 mutex_unlock(&ftrace_sysctl_lock);
1534
1535 return ret;
1536}
1537
1538static void ftrace_force_shutdown(void)
1539{
1540 struct task_struct *task;
1541 int command = FTRACE_DISABLE_CALLS | FTRACE_UPDATE_TRACE_FUNC;
1542
1543 mutex_lock(&ftraced_lock);
1544 task = ftraced_task;
1545 ftraced_task = NULL;
1546 ftraced_suspend = -1;
1547 ftrace_run_update_code(command);
1548 mutex_unlock(&ftraced_lock);
1549
1550 if (task)
1551 kthread_stop(task);
1552}
1553
1554static __init int ftrace_init_debugfs(void) 1241static __init int ftrace_init_debugfs(void)
1555{ 1242{
1556 struct dentry *d_tracer; 1243 struct dentry *d_tracer;
@@ -1581,17 +1268,11 @@ static __init int ftrace_init_debugfs(void)
1581 pr_warning("Could not create debugfs " 1268 pr_warning("Could not create debugfs "
1582 "'set_ftrace_notrace' entry\n"); 1269 "'set_ftrace_notrace' entry\n");
1583 1270
1584 entry = debugfs_create_file("ftraced_enabled", 0644, d_tracer,
1585 NULL, &ftraced_fops);
1586 if (!entry)
1587 pr_warning("Could not create debugfs "
1588 "'ftraced_enabled' entry\n");
1589 return 0; 1271 return 0;
1590} 1272}
1591 1273
1592fs_initcall(ftrace_init_debugfs); 1274fs_initcall(ftrace_init_debugfs);
1593 1275
1594#ifdef CONFIG_FTRACE_MCOUNT_RECORD
1595static int ftrace_convert_nops(unsigned long *start, 1276static int ftrace_convert_nops(unsigned long *start,
1596 unsigned long *end) 1277 unsigned long *end)
1597{ 1278{
@@ -1599,20 +1280,18 @@ static int ftrace_convert_nops(unsigned long *start,
1599 unsigned long addr; 1280 unsigned long addr;
1600 unsigned long flags; 1281 unsigned long flags;
1601 1282
1283 mutex_lock(&ftrace_start_lock);
1602 p = start; 1284 p = start;
1603 while (p < end) { 1285 while (p < end) {
1604 addr = ftrace_call_adjust(*p++); 1286 addr = ftrace_call_adjust(*p++);
1605 /* should not be called from interrupt context */
1606 spin_lock(&ftrace_lock);
1607 ftrace_record_ip(addr); 1287 ftrace_record_ip(addr);
1608 spin_unlock(&ftrace_lock);
1609 ftrace_shutdown_replenish();
1610 } 1288 }
1611 1289
1612 /* p is ignored */ 1290 /* disable interrupts to prevent kstop machine */
1613 local_irq_save(flags); 1291 local_irq_save(flags);
1614 __ftrace_update_code(p); 1292 ftrace_update_code();
1615 local_irq_restore(flags); 1293 local_irq_restore(flags);
1294 mutex_unlock(&ftrace_start_lock);
1616 1295
1617 return 0; 1296 return 0;
1618} 1297}
@@ -1658,130 +1337,34 @@ void __init ftrace_init(void)
1658 failed: 1337 failed:
1659 ftrace_disabled = 1; 1338 ftrace_disabled = 1;
1660} 1339}
1661#else /* CONFIG_FTRACE_MCOUNT_RECORD */
1662static int ftraced(void *ignore)
1663{
1664 unsigned long usecs;
1665
1666 while (!kthread_should_stop()) {
1667
1668 set_current_state(TASK_INTERRUPTIBLE);
1669
1670 /* check once a second */
1671 schedule_timeout(HZ);
1672 1340
1673 if (unlikely(ftrace_disabled)) 1341#else
1674 continue;
1675
1676 mutex_lock(&ftrace_sysctl_lock);
1677 mutex_lock(&ftraced_lock);
1678 if (!ftraced_suspend && !ftraced_stop &&
1679 ftrace_update_code()) {
1680 usecs = nsecs_to_usecs(ftrace_update_time);
1681 if (ftrace_update_tot_cnt > 100000) {
1682 ftrace_update_tot_cnt = 0;
1683 pr_info("hm, dftrace overflow: %lu change%s"
1684 " (%lu total) in %lu usec%s\n",
1685 ftrace_update_cnt,
1686 ftrace_update_cnt != 1 ? "s" : "",
1687 ftrace_update_tot_cnt,
1688 usecs, usecs != 1 ? "s" : "");
1689 ftrace_disabled = 1;
1690 WARN_ON_ONCE(1);
1691 }
1692 }
1693 mutex_unlock(&ftraced_lock);
1694 mutex_unlock(&ftrace_sysctl_lock);
1695
1696 ftrace_shutdown_replenish();
1697 }
1698 __set_current_state(TASK_RUNNING);
1699 return 0;
1700}
1701 1342
1702static int __init ftrace_dynamic_init(void) 1343static int __init ftrace_nodyn_init(void)
1703{ 1344{
1704 struct task_struct *p; 1345 ftrace_enabled = 1;
1705 unsigned long addr;
1706 int ret;
1707
1708 addr = (unsigned long)ftrace_record_ip;
1709
1710 stop_machine(ftrace_dyn_arch_init, &addr, NULL);
1711
1712 /* ftrace_dyn_arch_init places the return code in addr */
1713 if (addr) {
1714 ret = (int)addr;
1715 goto failed;
1716 }
1717
1718 ret = ftrace_dyn_table_alloc(NR_TO_INIT);
1719 if (ret)
1720 goto failed;
1721
1722 p = kthread_run(ftraced, NULL, "ftraced");
1723 if (IS_ERR(p)) {
1724 ret = -1;
1725 goto failed;
1726 }
1727
1728 last_ftrace_enabled = ftrace_enabled = 1;
1729 ftraced_task = p;
1730
1731 return 0; 1346 return 0;
1732
1733 failed:
1734 ftrace_disabled = 1;
1735 return ret;
1736} 1347}
1348device_initcall(ftrace_nodyn_init);
1737 1349
1738core_initcall(ftrace_dynamic_init);
1739#endif /* CONFIG_FTRACE_MCOUNT_RECORD */
1740
1741#else
1742# define ftrace_startup() do { } while (0) 1350# define ftrace_startup() do { } while (0)
1743# define ftrace_shutdown() do { } while (0) 1351# define ftrace_shutdown() do { } while (0)
1744# define ftrace_startup_sysctl() do { } while (0) 1352# define ftrace_startup_sysctl() do { } while (0)
1745# define ftrace_shutdown_sysctl() do { } while (0) 1353# define ftrace_shutdown_sysctl() do { } while (0)
1746# define ftrace_force_shutdown() do { } while (0)
1747#endif /* CONFIG_DYNAMIC_FTRACE */ 1354#endif /* CONFIG_DYNAMIC_FTRACE */
1748 1355
1749/** 1356/**
1750 * ftrace_kill_atomic - kill ftrace from critical sections 1357 * ftrace_kill - kill ftrace
1751 * 1358 *
1752 * This function should be used by panic code. It stops ftrace 1359 * This function should be used by panic code. It stops ftrace
1753 * but in a not so nice way. If you need to simply kill ftrace 1360 * but in a not so nice way. If you need to simply kill ftrace
1754 * from a non-atomic section, use ftrace_kill. 1361 * from a non-atomic section, use ftrace_kill.
1755 */ 1362 */
1756void ftrace_kill_atomic(void)
1757{
1758 ftrace_disabled = 1;
1759 ftrace_enabled = 0;
1760#ifdef CONFIG_DYNAMIC_FTRACE
1761 ftraced_suspend = -1;
1762#endif
1763 clear_ftrace_function();
1764}
1765
1766/**
1767 * ftrace_kill - totally shutdown ftrace
1768 *
1769 * This is a safety measure. If something was detected that seems
1770 * wrong, calling this function will keep ftrace from doing
1771 * any more modifications, and updates.
1772 * used when something went wrong.
1773 */
1774void ftrace_kill(void) 1363void ftrace_kill(void)
1775{ 1364{
1776 mutex_lock(&ftrace_sysctl_lock);
1777 ftrace_disabled = 1; 1365 ftrace_disabled = 1;
1778 ftrace_enabled = 0; 1366 ftrace_enabled = 0;
1779
1780 clear_ftrace_function(); 1367 clear_ftrace_function();
1781 mutex_unlock(&ftrace_sysctl_lock);
1782
1783 /* Try to totally disable ftrace */
1784 ftrace_force_shutdown();
1785} 1368}
1786 1369
1787/** 1370/**
@@ -1870,3 +1453,4 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
1870 mutex_unlock(&ftrace_sysctl_lock); 1453 mutex_unlock(&ftrace_sysctl_lock);
1871 return ret; 1454 return ret;
1872} 1455}
1456
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 94af1fe56bb4..3f3380638646 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -130,7 +130,7 @@ struct buffer_page {
130static inline void free_buffer_page(struct buffer_page *bpage) 130static inline void free_buffer_page(struct buffer_page *bpage)
131{ 131{
132 if (bpage->page) 132 if (bpage->page)
133 __free_page(bpage->page); 133 free_page((unsigned long)bpage->page);
134 kfree(bpage); 134 kfree(bpage);
135} 135}
136 136
@@ -966,7 +966,9 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
966 if (unlikely(*delta > (1ULL << 59) && !once++)) { 966 if (unlikely(*delta > (1ULL << 59) && !once++)) {
967 printk(KERN_WARNING "Delta way too big! %llu" 967 printk(KERN_WARNING "Delta way too big! %llu"
968 " ts=%llu write stamp = %llu\n", 968 " ts=%llu write stamp = %llu\n",
969 *delta, *ts, cpu_buffer->write_stamp); 969 (unsigned long long)*delta,
970 (unsigned long long)*ts,
971 (unsigned long long)cpu_buffer->write_stamp);
970 WARN_ON(1); 972 WARN_ON(1);
971 } 973 }
972 974
@@ -1020,8 +1022,23 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1020 struct ring_buffer_event *event; 1022 struct ring_buffer_event *event;
1021 u64 ts, delta; 1023 u64 ts, delta;
1022 int commit = 0; 1024 int commit = 0;
1025 int nr_loops = 0;
1023 1026
1024 again: 1027 again:
1028 /*
1029 * We allow for interrupts to reenter here and do a trace.
1030 * If one does, it will cause this original code to loop
1031 * back here. Even with heavy interrupts happening, this
1032 * should only happen a few times in a row. If this happens
1033 * 1000 times in a row, there must be either an interrupt
1034 * storm or we have something buggy.
1035 * Bail!
1036 */
1037 if (unlikely(++nr_loops > 1000)) {
1038 RB_WARN_ON(cpu_buffer, 1);
1039 return NULL;
1040 }
1041
1025 ts = ring_buffer_time_stamp(cpu_buffer->cpu); 1042 ts = ring_buffer_time_stamp(cpu_buffer->cpu);
1026 1043
1027 /* 1044 /*
@@ -1530,10 +1547,23 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1530{ 1547{
1531 struct buffer_page *reader = NULL; 1548 struct buffer_page *reader = NULL;
1532 unsigned long flags; 1549 unsigned long flags;
1550 int nr_loops = 0;
1533 1551
1534 spin_lock_irqsave(&cpu_buffer->lock, flags); 1552 spin_lock_irqsave(&cpu_buffer->lock, flags);
1535 1553
1536 again: 1554 again:
1555 /*
1556 * This should normally only loop twice. But because the
1557 * start of the reader inserts an empty page, it causes
1558 * a case where we will loop three times. There should be no
1559 * reason to loop four times (that I know of).
1560 */
1561 if (unlikely(++nr_loops > 3)) {
1562 RB_WARN_ON(cpu_buffer, 1);
1563 reader = NULL;
1564 goto out;
1565 }
1566
1537 reader = cpu_buffer->reader_page; 1567 reader = cpu_buffer->reader_page;
1538 1568
1539 /* If there's more to read, return this page */ 1569 /* If there's more to read, return this page */
@@ -1663,6 +1693,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1663 struct ring_buffer_per_cpu *cpu_buffer; 1693 struct ring_buffer_per_cpu *cpu_buffer;
1664 struct ring_buffer_event *event; 1694 struct ring_buffer_event *event;
1665 struct buffer_page *reader; 1695 struct buffer_page *reader;
1696 int nr_loops = 0;
1666 1697
1667 if (!cpu_isset(cpu, buffer->cpumask)) 1698 if (!cpu_isset(cpu, buffer->cpumask))
1668 return NULL; 1699 return NULL;
@@ -1670,6 +1701,19 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1670 cpu_buffer = buffer->buffers[cpu]; 1701 cpu_buffer = buffer->buffers[cpu];
1671 1702
1672 again: 1703 again:
1704 /*
1705 * We repeat when a timestamp is encountered. It is possible
1706 * to get multiple timestamps from an interrupt entering just
1707 * as one timestamp is about to be written. The max times
1708 * that this can happen is the number of nested interrupts we
1709 * can have. Nesting 10 deep of interrupts is clearly
1710 * an anomaly.
1711 */
1712 if (unlikely(++nr_loops > 10)) {
1713 RB_WARN_ON(cpu_buffer, 1);
1714 return NULL;
1715 }
1716
1673 reader = rb_get_reader_page(cpu_buffer); 1717 reader = rb_get_reader_page(cpu_buffer);
1674 if (!reader) 1718 if (!reader)
1675 return NULL; 1719 return NULL;
@@ -1720,6 +1764,7 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1720 struct ring_buffer *buffer; 1764 struct ring_buffer *buffer;
1721 struct ring_buffer_per_cpu *cpu_buffer; 1765 struct ring_buffer_per_cpu *cpu_buffer;
1722 struct ring_buffer_event *event; 1766 struct ring_buffer_event *event;
1767 int nr_loops = 0;
1723 1768
1724 if (ring_buffer_iter_empty(iter)) 1769 if (ring_buffer_iter_empty(iter))
1725 return NULL; 1770 return NULL;
@@ -1728,6 +1773,19 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1728 buffer = cpu_buffer->buffer; 1773 buffer = cpu_buffer->buffer;
1729 1774
1730 again: 1775 again:
1776 /*
1777 * We repeat when a timestamp is encountered. It is possible
1778 * to get multiple timestamps from an interrupt entering just
1779 * as one timestamp is about to be written. The max times
1780 * that this can happen is the number of nested interrupts we
1781 * can have. Nesting 10 deep of interrupts is clearly
1782 * an anomaly.
1783 */
1784 if (unlikely(++nr_loops > 10)) {
1785 RB_WARN_ON(cpu_buffer, 1);
1786 return NULL;
1787 }
1788
1731 if (rb_per_cpu_empty(cpu_buffer)) 1789 if (rb_per_cpu_empty(cpu_buffer))
1732 return NULL; 1790 return NULL;
1733 1791
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d345d649d073..9f3b478f9171 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -34,6 +34,7 @@
34 34
35#include <linux/stacktrace.h> 35#include <linux/stacktrace.h>
36#include <linux/ring_buffer.h> 36#include <linux/ring_buffer.h>
37#include <linux/irqflags.h>
37 38
38#include "trace.h" 39#include "trace.h"
39 40
@@ -655,7 +656,11 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
655 entry->preempt_count = pc & 0xff; 656 entry->preempt_count = pc & 0xff;
656 entry->pid = (tsk) ? tsk->pid : 0; 657 entry->pid = (tsk) ? tsk->pid : 0;
657 entry->flags = 658 entry->flags =
659#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
658 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | 660 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
661#else
662 TRACE_FLAG_IRQS_NOSUPPORT |
663#endif
659 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | 664 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
660 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | 665 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
661 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); 666 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
@@ -700,6 +705,7 @@ static void ftrace_trace_stack(struct trace_array *tr,
700 unsigned long flags, 705 unsigned long flags,
701 int skip, int pc) 706 int skip, int pc)
702{ 707{
708#ifdef CONFIG_STACKTRACE
703 struct ring_buffer_event *event; 709 struct ring_buffer_event *event;
704 struct stack_entry *entry; 710 struct stack_entry *entry;
705 struct stack_trace trace; 711 struct stack_trace trace;
@@ -725,6 +731,7 @@ static void ftrace_trace_stack(struct trace_array *tr,
725 731
726 save_stack_trace(&trace); 732 save_stack_trace(&trace);
727 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 733 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
734#endif
728} 735}
729 736
730void __trace_stack(struct trace_array *tr, 737void __trace_stack(struct trace_array *tr,
@@ -851,7 +858,7 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
851 preempt_enable_notrace(); 858 preempt_enable_notrace();
852} 859}
853 860
854#ifdef CONFIG_FTRACE 861#ifdef CONFIG_FUNCTION_TRACER
855static void 862static void
856function_trace_call(unsigned long ip, unsigned long parent_ip) 863function_trace_call(unsigned long ip, unsigned long parent_ip)
857{ 864{
@@ -865,9 +872,6 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
865 if (unlikely(!ftrace_function_enabled)) 872 if (unlikely(!ftrace_function_enabled))
866 return; 873 return;
867 874
868 if (skip_trace(ip))
869 return;
870
871 pc = preempt_count(); 875 pc = preempt_count();
872 resched = need_resched(); 876 resched = need_resched();
873 preempt_disable_notrace(); 877 preempt_disable_notrace();
@@ -1084,17 +1088,20 @@ static void s_stop(struct seq_file *m, void *p)
1084 mutex_unlock(&trace_types_lock); 1088 mutex_unlock(&trace_types_lock);
1085} 1089}
1086 1090
1087#define KRETPROBE_MSG "[unknown/kretprobe'd]"
1088
1089#ifdef CONFIG_KRETPROBES 1091#ifdef CONFIG_KRETPROBES
1090static inline int kretprobed(unsigned long addr) 1092static inline const char *kretprobed(const char *name)
1091{ 1093{
1092 return addr == (unsigned long)kretprobe_trampoline; 1094 static const char tramp_name[] = "kretprobe_trampoline";
1095 int size = sizeof(tramp_name);
1096
1097 if (strncmp(tramp_name, name, size) == 0)
1098 return "[unknown/kretprobe'd]";
1099 return name;
1093} 1100}
1094#else 1101#else
1095static inline int kretprobed(unsigned long addr) 1102static inline const char *kretprobed(const char *name)
1096{ 1103{
1097 return 0; 1104 return name;
1098} 1105}
1099#endif /* CONFIG_KRETPROBES */ 1106#endif /* CONFIG_KRETPROBES */
1100 1107
@@ -1103,10 +1110,13 @@ seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
1103{ 1110{
1104#ifdef CONFIG_KALLSYMS 1111#ifdef CONFIG_KALLSYMS
1105 char str[KSYM_SYMBOL_LEN]; 1112 char str[KSYM_SYMBOL_LEN];
1113 const char *name;
1106 1114
1107 kallsyms_lookup(address, NULL, NULL, NULL, str); 1115 kallsyms_lookup(address, NULL, NULL, NULL, str);
1108 1116
1109 return trace_seq_printf(s, fmt, str); 1117 name = kretprobed(str);
1118
1119 return trace_seq_printf(s, fmt, name);
1110#endif 1120#endif
1111 return 1; 1121 return 1;
1112} 1122}
@@ -1117,9 +1127,12 @@ seq_print_sym_offset(struct trace_seq *s, const char *fmt,
1117{ 1127{
1118#ifdef CONFIG_KALLSYMS 1128#ifdef CONFIG_KALLSYMS
1119 char str[KSYM_SYMBOL_LEN]; 1129 char str[KSYM_SYMBOL_LEN];
1130 const char *name;
1120 1131
1121 sprint_symbol(str, address); 1132 sprint_symbol(str, address);
1122 return trace_seq_printf(s, fmt, str); 1133 name = kretprobed(str);
1134
1135 return trace_seq_printf(s, fmt, name);
1123#endif 1136#endif
1124 return 1; 1137 return 1;
1125} 1138}
@@ -1246,7 +1259,8 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
1246 trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid); 1259 trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
1247 trace_seq_printf(s, "%3d", cpu); 1260 trace_seq_printf(s, "%3d", cpu);
1248 trace_seq_printf(s, "%c%c", 1261 trace_seq_printf(s, "%c%c",
1249 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.', 1262 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
1263 (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' : '.',
1250 ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.')); 1264 ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
1251 1265
1252 hardirq = entry->flags & TRACE_FLAG_HARDIRQ; 1266 hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
@@ -1372,10 +1386,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1372 1386
1373 seq_print_ip_sym(s, field->ip, sym_flags); 1387 seq_print_ip_sym(s, field->ip, sym_flags);
1374 trace_seq_puts(s, " ("); 1388 trace_seq_puts(s, " (");
1375 if (kretprobed(field->parent_ip)) 1389 seq_print_ip_sym(s, field->parent_ip, sym_flags);
1376 trace_seq_puts(s, KRETPROBE_MSG);
1377 else
1378 seq_print_ip_sym(s, field->parent_ip, sym_flags);
1379 trace_seq_puts(s, ")\n"); 1390 trace_seq_puts(s, ")\n");
1380 break; 1391 break;
1381 } 1392 }
@@ -1491,12 +1502,9 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
1491 ret = trace_seq_printf(s, " <-"); 1502 ret = trace_seq_printf(s, " <-");
1492 if (!ret) 1503 if (!ret)
1493 return TRACE_TYPE_PARTIAL_LINE; 1504 return TRACE_TYPE_PARTIAL_LINE;
1494 if (kretprobed(field->parent_ip)) 1505 ret = seq_print_ip_sym(s,
1495 ret = trace_seq_puts(s, KRETPROBE_MSG); 1506 field->parent_ip,
1496 else 1507 sym_flags);
1497 ret = seq_print_ip_sym(s,
1498 field->parent_ip,
1499 sym_flags);
1500 if (!ret) 1508 if (!ret)
1501 return TRACE_TYPE_PARTIAL_LINE; 1509 return TRACE_TYPE_PARTIAL_LINE;
1502 } 1510 }
@@ -2379,9 +2387,10 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2379 int i; 2387 int i;
2380 size_t ret; 2388 size_t ret;
2381 2389
2390 ret = cnt;
2391
2382 if (cnt > max_tracer_type_len) 2392 if (cnt > max_tracer_type_len)
2383 cnt = max_tracer_type_len; 2393 cnt = max_tracer_type_len;
2384 ret = cnt;
2385 2394
2386 if (copy_from_user(&buf, ubuf, cnt)) 2395 if (copy_from_user(&buf, ubuf, cnt))
2387 return -EFAULT; 2396 return -EFAULT;
@@ -2414,8 +2423,8 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2414 out: 2423 out:
2415 mutex_unlock(&trace_types_lock); 2424 mutex_unlock(&trace_types_lock);
2416 2425
2417 if (ret == cnt) 2426 if (ret > 0)
2418 filp->f_pos += cnt; 2427 filp->f_pos += ret;
2419 2428
2420 return ret; 2429 return ret;
2421} 2430}
@@ -3097,7 +3106,7 @@ void ftrace_dump(void)
3097 dump_ran = 1; 3106 dump_ran = 1;
3098 3107
3099 /* No turning back! */ 3108 /* No turning back! */
3100 ftrace_kill_atomic(); 3109 ftrace_kill();
3101 3110
3102 for_each_tracing_cpu(cpu) { 3111 for_each_tracing_cpu(cpu) {
3103 atomic_inc(&global_trace.data[cpu]->disabled); 3112 atomic_inc(&global_trace.data[cpu]->disabled);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f1f99572cde7..8465ad052707 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -120,18 +120,20 @@ struct trace_boot {
120/* 120/*
121 * trace_flag_type is an enumeration that holds different 121 * trace_flag_type is an enumeration that holds different
122 * states when a trace occurs. These are: 122 * states when a trace occurs. These are:
123 * IRQS_OFF - interrupts were disabled 123 * IRQS_OFF - interrupts were disabled
124 * NEED_RESCED - reschedule is requested 124 * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags
125 * HARDIRQ - inside an interrupt handler 125 * NEED_RESCED - reschedule is requested
126 * SOFTIRQ - inside a softirq handler 126 * HARDIRQ - inside an interrupt handler
127 * CONT - multiple entries hold the trace item 127 * SOFTIRQ - inside a softirq handler
128 * CONT - multiple entries hold the trace item
128 */ 129 */
129enum trace_flag_type { 130enum trace_flag_type {
130 TRACE_FLAG_IRQS_OFF = 0x01, 131 TRACE_FLAG_IRQS_OFF = 0x01,
131 TRACE_FLAG_NEED_RESCHED = 0x02, 132 TRACE_FLAG_IRQS_NOSUPPORT = 0x02,
132 TRACE_FLAG_HARDIRQ = 0x04, 133 TRACE_FLAG_NEED_RESCHED = 0x04,
133 TRACE_FLAG_SOFTIRQ = 0x08, 134 TRACE_FLAG_HARDIRQ = 0x08,
134 TRACE_FLAG_CONT = 0x10, 135 TRACE_FLAG_SOFTIRQ = 0x10,
136 TRACE_FLAG_CONT = 0x20,
135}; 137};
136 138
137#define TRACE_BUF_SIZE 1024 139#define TRACE_BUF_SIZE 1024
@@ -335,7 +337,7 @@ void update_max_tr_single(struct trace_array *tr,
335 337
336extern cycle_t ftrace_now(int cpu); 338extern cycle_t ftrace_now(int cpu);
337 339
338#ifdef CONFIG_FTRACE 340#ifdef CONFIG_FUNCTION_TRACER
339void tracing_start_function_trace(void); 341void tracing_start_function_trace(void);
340void tracing_stop_function_trace(void); 342void tracing_stop_function_trace(void);
341#else 343#else
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index e90eb0c2c56c..0f85a64003d3 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -64,7 +64,7 @@ static void function_trace_ctrl_update(struct trace_array *tr)
64 64
65static struct tracer function_trace __read_mostly = 65static struct tracer function_trace __read_mostly =
66{ 66{
67 .name = "ftrace", 67 .name = "function",
68 .init = function_trace_init, 68 .init = function_trace_init,
69 .reset = function_trace_reset, 69 .reset = function_trace_reset,
70 .ctrl_update = function_trace_ctrl_update, 70 .ctrl_update = function_trace_ctrl_update,
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index a7db7f040ae0..9c74071c10e0 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -63,7 +63,7 @@ irq_trace(void)
63 */ 63 */
64static __cacheline_aligned_in_smp unsigned long max_sequence; 64static __cacheline_aligned_in_smp unsigned long max_sequence;
65 65
66#ifdef CONFIG_FTRACE 66#ifdef CONFIG_FUNCTION_TRACER
67/* 67/*
68 * irqsoff uses its own tracer function to keep the overhead down: 68 * irqsoff uses its own tracer function to keep the overhead down:
69 */ 69 */
@@ -104,7 +104,7 @@ static struct ftrace_ops trace_ops __read_mostly =
104{ 104{
105 .func = irqsoff_tracer_call, 105 .func = irqsoff_tracer_call,
106}; 106};
107#endif /* CONFIG_FTRACE */ 107#endif /* CONFIG_FUNCTION_TRACER */
108 108
109/* 109/*
110 * Should this new latency be reported/recorded? 110 * Should this new latency be reported/recorded?
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index fe4a252c2363..3ae93f16b565 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -31,7 +31,7 @@ static raw_spinlock_t wakeup_lock =
31 31
32static void __wakeup_reset(struct trace_array *tr); 32static void __wakeup_reset(struct trace_array *tr);
33 33
34#ifdef CONFIG_FTRACE 34#ifdef CONFIG_FUNCTION_TRACER
35/* 35/*
36 * irqsoff uses its own tracer function to keep the overhead down: 36 * irqsoff uses its own tracer function to keep the overhead down:
37 */ 37 */
@@ -96,7 +96,7 @@ static struct ftrace_ops trace_ops __read_mostly =
96{ 96{
97 .func = wakeup_tracer_call, 97 .func = wakeup_tracer_call,
98}; 98};
99#endif /* CONFIG_FTRACE */ 99#endif /* CONFIG_FUNCTION_TRACER */
100 100
101/* 101/*
102 * Should this new latency be reported/recorded? 102 * Should this new latency be reported/recorded?
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 09cf230d7eca..90bc752a7580 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -70,7 +70,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
70 return ret; 70 return ret;
71} 71}
72 72
73#ifdef CONFIG_FTRACE 73#ifdef CONFIG_FUNCTION_TRACER
74 74
75#ifdef CONFIG_DYNAMIC_FTRACE 75#ifdef CONFIG_DYNAMIC_FTRACE
76 76
@@ -99,13 +99,6 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
99 /* passed in by parameter to fool gcc from optimizing */ 99 /* passed in by parameter to fool gcc from optimizing */
100 func(); 100 func();
101 101
102 /* update the records */
103 ret = ftrace_force_update();
104 if (ret) {
105 printk(KERN_CONT ".. ftraced failed .. ");
106 return ret;
107 }
108
109 /* 102 /*
110 * Some archs *cough*PowerPC*cough* add charachters to the 103 * Some archs *cough*PowerPC*cough* add charachters to the
111 * start of the function names. We simply put a '*' to 104 * start of the function names. We simply put a '*' to
@@ -183,13 +176,6 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
183 /* make sure msleep has been recorded */ 176 /* make sure msleep has been recorded */
184 msleep(1); 177 msleep(1);
185 178
186 /* force the recorded functions to be traced */
187 ret = ftrace_force_update();
188 if (ret) {
189 printk(KERN_CONT ".. ftraced failed .. ");
190 return ret;
191 }
192
193 /* start the tracing */ 179 /* start the tracing */
194 ftrace_enabled = 1; 180 ftrace_enabled = 1;
195 tracer_enabled = 1; 181 tracer_enabled = 1;
@@ -226,7 +212,7 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
226 212
227 return ret; 213 return ret;
228} 214}
229#endif /* CONFIG_FTRACE */ 215#endif /* CONFIG_FUNCTION_TRACER */
230 216
231#ifdef CONFIG_IRQSOFF_TRACER 217#ifdef CONFIG_IRQSOFF_TRACER
232int 218int
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 74c5d9a3afae..be682b62fe58 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -44,6 +44,10 @@ static inline void check_stack(void)
44 if (this_size <= max_stack_size) 44 if (this_size <= max_stack_size)
45 return; 45 return;
46 46
47 /* we do not handle interrupt stacks yet */
48 if (!object_is_on_stack(&this_size))
49 return;
50
47 raw_local_irq_save(flags); 51 raw_local_irq_save(flags);
48 __raw_spin_lock(&max_stack_lock); 52 __raw_spin_lock(&max_stack_lock);
49 53
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index f2b7c28a4708..af8c85664882 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -131,6 +131,9 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe)
131 131
132 old = entry->funcs; 132 old = entry->funcs;
133 133
134 if (!old)
135 return NULL;
136
134 debug_print_probes(entry); 137 debug_print_probes(entry);
135 /* (N -> M), (N > 1, M >= 0) probes */ 138 /* (N -> M), (N > 1, M >= 0) probes */
136 for (nr_probes = 0; old[nr_probes]; nr_probes++) { 139 for (nr_probes = 0; old[nr_probes]; nr_probes++) {
@@ -388,6 +391,11 @@ int tracepoint_probe_unregister(const char *name, void *probe)
388 if (entry->rcu_pending) 391 if (entry->rcu_pending)
389 rcu_barrier_sched(); 392 rcu_barrier_sched();
390 old = tracepoint_entry_remove_probe(entry, probe); 393 old = tracepoint_entry_remove_probe(entry, probe);
394 if (!old) {
395 printk(KERN_WARNING "Warning: Trying to unregister a probe"
396 "that doesn't exist\n");
397 goto end;
398 }
391 mutex_unlock(&tracepoints_mutex); 399 mutex_unlock(&tracepoints_mutex);
392 tracepoint_update_probes(); /* may update entry */ 400 tracepoint_update_probes(); /* may update entry */
393 mutex_lock(&tracepoints_mutex); 401 mutex_lock(&tracepoints_mutex);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f928f2a87b9b..d4dc69ddebd7 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -970,6 +970,51 @@ undo:
970 return ret; 970 return ret;
971} 971}
972 972
973#ifdef CONFIG_SMP
974struct work_for_cpu {
975 struct work_struct work;
976 long (*fn)(void *);
977 void *arg;
978 long ret;
979};
980
981static void do_work_for_cpu(struct work_struct *w)
982{
983 struct work_for_cpu *wfc = container_of(w, struct work_for_cpu, work);
984
985 wfc->ret = wfc->fn(wfc->arg);
986}
987
988/**
989 * work_on_cpu - run a function in user context on a particular cpu
990 * @cpu: the cpu to run on
991 * @fn: the function to run
992 * @arg: the function arg
993 *
994 * This will return -EINVAL in the cpu is not online, or the return value
995 * of @fn otherwise.
996 */
997long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
998{
999 struct work_for_cpu wfc;
1000
1001 INIT_WORK(&wfc.work, do_work_for_cpu);
1002 wfc.fn = fn;
1003 wfc.arg = arg;
1004 get_online_cpus();
1005 if (unlikely(!cpu_online(cpu)))
1006 wfc.ret = -EINVAL;
1007 else {
1008 schedule_work_on(cpu, &wfc.work);
1009 flush_work(&wfc.work);
1010 }
1011 put_online_cpus();
1012
1013 return wfc.ret;
1014}
1015EXPORT_SYMBOL_GPL(work_on_cpu);
1016#endif /* CONFIG_SMP */
1017
973void __init init_workqueues(void) 1018void __init init_workqueues(void)
974{ 1019{
975 cpu_populated_map = cpu_online_map; 1020 cpu_populated_map = cpu_online_map;