aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2011-06-04 06:13:06 -0400
committerIngo Molnar <mingo@elte.hu>2011-06-04 06:13:06 -0400
commit710054ba25c0d1f8f41c22ce13ba336503fb5318 (patch)
treef9b09b722bf511841539173d946f90a20fc2e59a /kernel
parent74c355fbdfedd3820046dba4f537876cea54c207 (diff)
parentb273fa9716aa1564bee88ceee62f9042981cdc81 (diff)
Merge branch 'perf/urgent' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpuset.c4
-rw-r--r--kernel/events/core.c8
-rw-r--r--kernel/fork.c42
-rw-r--r--kernel/jump_label.c18
-rw-r--r--kernel/kthread.c4
-rw-r--r--kernel/pm_qos_params.c37
-rw-r--r--kernel/rcutree.c208
-rw-r--r--kernel/rcutree.h30
-rw-r--r--kernel/rcutree_plugin.h33
-rw-r--r--kernel/rcutree_trace.c12
-rw-r--r--kernel/sched.c56
-rw-r--r--kernel/sched_fair.c5
-rw-r--r--kernel/sched_rt.c10
-rw-r--r--kernel/sched_stats.h4
-rw-r--r--kernel/trace/ftrace.c31
-rw-r--r--kernel/trace/ring_buffer.c10
-rw-r--r--kernel/trace/trace.h15
-rw-r--r--kernel/trace/trace_events.c7
-rw-r--r--kernel/trace/trace_output.c27
-rw-r--r--kernel/watchdog.c9
20 files changed, 337 insertions, 233 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 1ceeb049c827..9c9b7545c810 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2190,7 +2190,7 @@ int cpuset_cpus_allowed_fallback(struct task_struct *tsk)
2190 rcu_read_lock(); 2190 rcu_read_lock();
2191 cs = task_cs(tsk); 2191 cs = task_cs(tsk);
2192 if (cs) 2192 if (cs)
2193 cpumask_copy(&tsk->cpus_allowed, cs->cpus_allowed); 2193 do_set_cpus_allowed(tsk, cs->cpus_allowed);
2194 rcu_read_unlock(); 2194 rcu_read_unlock();
2195 2195
2196 /* 2196 /*
@@ -2217,7 +2217,7 @@ int cpuset_cpus_allowed_fallback(struct task_struct *tsk)
2217 * Like above we can temporary set any mask and rely on 2217 * Like above we can temporary set any mask and rely on
2218 * set_cpus_allowed_ptr() as synchronization point. 2218 * set_cpus_allowed_ptr() as synchronization point.
2219 */ 2219 */
2220 cpumask_copy(&tsk->cpus_allowed, cpu_possible_mask); 2220 do_set_cpus_allowed(tsk, cpu_possible_mask);
2221 cpu = cpumask_any(cpu_active_mask); 2221 cpu = cpumask_any(cpu_active_mask);
2222 } 2222 }
2223 2223
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 8a15944bf9d2..9efe7108ccaf 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5028,6 +5028,14 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
5028 else 5028 else
5029 perf_event_output(event, nmi, data, regs); 5029 perf_event_output(event, nmi, data, regs);
5030 5030
5031 if (event->fasync && event->pending_kill) {
5032 if (nmi) {
5033 event->pending_wakeup = 1;
5034 irq_work_queue(&event->pending);
5035 } else
5036 perf_event_wakeup(event);
5037 }
5038
5031 return ret; 5039 return ret;
5032} 5040}
5033 5041
diff --git a/kernel/fork.c b/kernel/fork.c
index ca406d916713..0276c30401a0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -484,20 +484,6 @@ static void mm_init_aio(struct mm_struct *mm)
484#endif 484#endif
485} 485}
486 486
487int mm_init_cpumask(struct mm_struct *mm, struct mm_struct *oldmm)
488{
489#ifdef CONFIG_CPUMASK_OFFSTACK
490 if (!alloc_cpumask_var(&mm->cpu_vm_mask_var, GFP_KERNEL))
491 return -ENOMEM;
492
493 if (oldmm)
494 cpumask_copy(mm_cpumask(mm), mm_cpumask(oldmm));
495 else
496 memset(mm_cpumask(mm), 0, cpumask_size());
497#endif
498 return 0;
499}
500
501static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) 487static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
502{ 488{
503 atomic_set(&mm->mm_users, 1); 489 atomic_set(&mm->mm_users, 1);
@@ -538,17 +524,8 @@ struct mm_struct * mm_alloc(void)
538 return NULL; 524 return NULL;
539 525
540 memset(mm, 0, sizeof(*mm)); 526 memset(mm, 0, sizeof(*mm));
541 mm = mm_init(mm, current); 527 mm_init_cpumask(mm);
542 if (!mm) 528 return mm_init(mm, current);
543 return NULL;
544
545 if (mm_init_cpumask(mm, NULL)) {
546 mm_free_pgd(mm);
547 free_mm(mm);
548 return NULL;
549 }
550
551 return mm;
552} 529}
553 530
554/* 531/*
@@ -559,7 +536,6 @@ struct mm_struct * mm_alloc(void)
559void __mmdrop(struct mm_struct *mm) 536void __mmdrop(struct mm_struct *mm)
560{ 537{
561 BUG_ON(mm == &init_mm); 538 BUG_ON(mm == &init_mm);
562 free_cpumask_var(mm->cpu_vm_mask_var);
563 mm_free_pgd(mm); 539 mm_free_pgd(mm);
564 destroy_context(mm); 540 destroy_context(mm);
565 mmu_notifier_mm_destroy(mm); 541 mmu_notifier_mm_destroy(mm);
@@ -753,6 +729,7 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
753 goto fail_nomem; 729 goto fail_nomem;
754 730
755 memcpy(mm, oldmm, sizeof(*mm)); 731 memcpy(mm, oldmm, sizeof(*mm));
732 mm_init_cpumask(mm);
756 733
757 /* Initializing for Swap token stuff */ 734 /* Initializing for Swap token stuff */
758 mm->token_priority = 0; 735 mm->token_priority = 0;
@@ -765,9 +742,6 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
765 if (!mm_init(mm, tsk)) 742 if (!mm_init(mm, tsk))
766 goto fail_nomem; 743 goto fail_nomem;
767 744
768 if (mm_init_cpumask(mm, oldmm))
769 goto fail_nocpumask;
770
771 if (init_new_context(tsk, mm)) 745 if (init_new_context(tsk, mm))
772 goto fail_nocontext; 746 goto fail_nocontext;
773 747
@@ -794,9 +768,6 @@ fail_nomem:
794 return NULL; 768 return NULL;
795 769
796fail_nocontext: 770fail_nocontext:
797 free_cpumask_var(mm->cpu_vm_mask_var);
798
799fail_nocpumask:
800 /* 771 /*
801 * If init_new_context() failed, we cannot use mmput() to free the mm 772 * If init_new_context() failed, we cannot use mmput() to free the mm
802 * because it calls destroy_context() 773 * because it calls destroy_context()
@@ -1591,6 +1562,13 @@ void __init proc_caches_init(void)
1591 fs_cachep = kmem_cache_create("fs_cache", 1562 fs_cachep = kmem_cache_create("fs_cache",
1592 sizeof(struct fs_struct), 0, 1563 sizeof(struct fs_struct), 0,
1593 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); 1564 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
1565 /*
1566 * FIXME! The "sizeof(struct mm_struct)" currently includes the
1567 * whole struct cpumask for the OFFSTACK case. We could change
1568 * this to *only* allocate as much of it as required by the
1569 * maximum number of CPU's we can ever have. The cpumask_allocation
1570 * is at the end of the structure, exactly for that reason.
1571 */
1594 mm_cachep = kmem_cache_create("mm_struct", 1572 mm_cachep = kmem_cache_create("mm_struct",
1595 sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, 1573 sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
1596 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); 1574 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 74d1c099fbd1..fa27e750dbc0 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -105,9 +105,12 @@ static int __jump_label_text_reserved(struct jump_entry *iter_start,
105} 105}
106 106
107static void __jump_label_update(struct jump_label_key *key, 107static void __jump_label_update(struct jump_label_key *key,
108 struct jump_entry *entry, int enable) 108 struct jump_entry *entry,
109 struct jump_entry *stop, int enable)
109{ 110{
110 for (; entry->key == (jump_label_t)(unsigned long)key; entry++) { 111 for (; (entry < stop) &&
112 (entry->key == (jump_label_t)(unsigned long)key);
113 entry++) {
111 /* 114 /*
112 * entry->code set to 0 invalidates module init text sections 115 * entry->code set to 0 invalidates module init text sections
113 * kernel_text_address() verifies we are not in core kernel 116 * kernel_text_address() verifies we are not in core kernel
@@ -181,7 +184,11 @@ static void __jump_label_mod_update(struct jump_label_key *key, int enable)
181 struct jump_label_mod *mod = key->next; 184 struct jump_label_mod *mod = key->next;
182 185
183 while (mod) { 186 while (mod) {
184 __jump_label_update(key, mod->entries, enable); 187 struct module *m = mod->mod;
188
189 __jump_label_update(key, mod->entries,
190 m->jump_entries + m->num_jump_entries,
191 enable);
185 mod = mod->next; 192 mod = mod->next;
186 } 193 }
187} 194}
@@ -245,7 +252,8 @@ static int jump_label_add_module(struct module *mod)
245 key->next = jlm; 252 key->next = jlm;
246 253
247 if (jump_label_enabled(key)) 254 if (jump_label_enabled(key))
248 __jump_label_update(key, iter, JUMP_LABEL_ENABLE); 255 __jump_label_update(key, iter, iter_stop,
256 JUMP_LABEL_ENABLE);
249 } 257 }
250 258
251 return 0; 259 return 0;
@@ -371,7 +379,7 @@ static void jump_label_update(struct jump_label_key *key, int enable)
371 379
372 /* if there are no users, entry can be NULL */ 380 /* if there are no users, entry can be NULL */
373 if (entry) 381 if (entry)
374 __jump_label_update(key, entry, enable); 382 __jump_label_update(key, entry, __stop___jump_table, enable);
375 383
376#ifdef CONFIG_MODULES 384#ifdef CONFIG_MODULES
377 __jump_label_mod_update(key, enable); 385 __jump_label_mod_update(key, enable);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 3b34d2732bce..4ba7cccb4994 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -202,8 +202,8 @@ void kthread_bind(struct task_struct *p, unsigned int cpu)
202 return; 202 return;
203 } 203 }
204 204
205 p->cpus_allowed = cpumask_of_cpu(cpu); 205 /* It's safe because the task is inactive. */
206 p->rt.nr_cpus_allowed = 1; 206 do_set_cpus_allowed(p, cpumask_of(cpu));
207 p->flags |= PF_THREAD_BOUND; 207 p->flags |= PF_THREAD_BOUND;
208} 208}
209EXPORT_SYMBOL(kthread_bind); 209EXPORT_SYMBOL(kthread_bind);
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c
index fd8d1e035df9..6824ca7d4d0c 100644
--- a/kernel/pm_qos_params.c
+++ b/kernel/pm_qos_params.c
@@ -54,11 +54,17 @@ enum pm_qos_type {
54 PM_QOS_MIN /* return the smallest value */ 54 PM_QOS_MIN /* return the smallest value */
55}; 55};
56 56
57/*
58 * Note: The lockless read path depends on the CPU accessing
59 * target_value atomically. Atomic access is only guaranteed on all CPU
60 * types linux supports for 32 bit quantites
61 */
57struct pm_qos_object { 62struct pm_qos_object {
58 struct plist_head requests; 63 struct plist_head requests;
59 struct blocking_notifier_head *notifiers; 64 struct blocking_notifier_head *notifiers;
60 struct miscdevice pm_qos_power_miscdev; 65 struct miscdevice pm_qos_power_miscdev;
61 char *name; 66 char *name;
67 s32 target_value; /* Do not change to 64 bit */
62 s32 default_value; 68 s32 default_value;
63 enum pm_qos_type type; 69 enum pm_qos_type type;
64}; 70};
@@ -71,7 +77,8 @@ static struct pm_qos_object cpu_dma_pm_qos = {
71 .requests = PLIST_HEAD_INIT(cpu_dma_pm_qos.requests, pm_qos_lock), 77 .requests = PLIST_HEAD_INIT(cpu_dma_pm_qos.requests, pm_qos_lock),
72 .notifiers = &cpu_dma_lat_notifier, 78 .notifiers = &cpu_dma_lat_notifier,
73 .name = "cpu_dma_latency", 79 .name = "cpu_dma_latency",
74 .default_value = 2000 * USEC_PER_SEC, 80 .target_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE,
81 .default_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE,
75 .type = PM_QOS_MIN, 82 .type = PM_QOS_MIN,
76}; 83};
77 84
@@ -80,7 +87,8 @@ static struct pm_qos_object network_lat_pm_qos = {
80 .requests = PLIST_HEAD_INIT(network_lat_pm_qos.requests, pm_qos_lock), 87 .requests = PLIST_HEAD_INIT(network_lat_pm_qos.requests, pm_qos_lock),
81 .notifiers = &network_lat_notifier, 88 .notifiers = &network_lat_notifier,
82 .name = "network_latency", 89 .name = "network_latency",
83 .default_value = 2000 * USEC_PER_SEC, 90 .target_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE,
91 .default_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE,
84 .type = PM_QOS_MIN 92 .type = PM_QOS_MIN
85}; 93};
86 94
@@ -90,7 +98,8 @@ static struct pm_qos_object network_throughput_pm_qos = {
90 .requests = PLIST_HEAD_INIT(network_throughput_pm_qos.requests, pm_qos_lock), 98 .requests = PLIST_HEAD_INIT(network_throughput_pm_qos.requests, pm_qos_lock),
91 .notifiers = &network_throughput_notifier, 99 .notifiers = &network_throughput_notifier,
92 .name = "network_throughput", 100 .name = "network_throughput",
93 .default_value = 0, 101 .target_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE,
102 .default_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE,
94 .type = PM_QOS_MAX, 103 .type = PM_QOS_MAX,
95}; 104};
96 105
@@ -136,6 +145,16 @@ static inline int pm_qos_get_value(struct pm_qos_object *o)
136 } 145 }
137} 146}
138 147
148static inline s32 pm_qos_read_value(struct pm_qos_object *o)
149{
150 return o->target_value;
151}
152
153static inline void pm_qos_set_value(struct pm_qos_object *o, s32 value)
154{
155 o->target_value = value;
156}
157
139static void update_target(struct pm_qos_object *o, struct plist_node *node, 158static void update_target(struct pm_qos_object *o, struct plist_node *node,
140 int del, int value) 159 int del, int value)
141{ 160{
@@ -160,6 +179,7 @@ static void update_target(struct pm_qos_object *o, struct plist_node *node,
160 plist_add(node, &o->requests); 179 plist_add(node, &o->requests);
161 } 180 }
162 curr_value = pm_qos_get_value(o); 181 curr_value = pm_qos_get_value(o);
182 pm_qos_set_value(o, curr_value);
163 spin_unlock_irqrestore(&pm_qos_lock, flags); 183 spin_unlock_irqrestore(&pm_qos_lock, flags);
164 184
165 if (prev_value != curr_value) 185 if (prev_value != curr_value)
@@ -194,18 +214,11 @@ static int find_pm_qos_object_by_minor(int minor)
194 * pm_qos_request - returns current system wide qos expectation 214 * pm_qos_request - returns current system wide qos expectation
195 * @pm_qos_class: identification of which qos value is requested 215 * @pm_qos_class: identification of which qos value is requested
196 * 216 *
197 * This function returns the current target value in an atomic manner. 217 * This function returns the current target value.
198 */ 218 */
199int pm_qos_request(int pm_qos_class) 219int pm_qos_request(int pm_qos_class)
200{ 220{
201 unsigned long flags; 221 return pm_qos_read_value(pm_qos_array[pm_qos_class]);
202 int value;
203
204 spin_lock_irqsave(&pm_qos_lock, flags);
205 value = pm_qos_get_value(pm_qos_array[pm_qos_class]);
206 spin_unlock_irqrestore(&pm_qos_lock, flags);
207
208 return value;
209} 222}
210EXPORT_SYMBOL_GPL(pm_qos_request); 223EXPORT_SYMBOL_GPL(pm_qos_request);
211 224
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index f07d2f03181a..89419ff92e99 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -36,7 +36,7 @@
36#include <linux/interrupt.h> 36#include <linux/interrupt.h>
37#include <linux/sched.h> 37#include <linux/sched.h>
38#include <linux/nmi.h> 38#include <linux/nmi.h>
39#include <asm/atomic.h> 39#include <linux/atomic.h>
40#include <linux/bitops.h> 40#include <linux/bitops.h>
41#include <linux/module.h> 41#include <linux/module.h>
42#include <linux/completion.h> 42#include <linux/completion.h>
@@ -95,7 +95,6 @@ static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
95DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); 95DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
96DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu); 96DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu);
97DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); 97DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
98static DEFINE_PER_CPU(wait_queue_head_t, rcu_cpu_wq);
99DEFINE_PER_CPU(char, rcu_cpu_has_work); 98DEFINE_PER_CPU(char, rcu_cpu_has_work);
100static char rcu_kthreads_spawnable; 99static char rcu_kthreads_spawnable;
101 100
@@ -163,7 +162,7 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch);
163#ifdef CONFIG_NO_HZ 162#ifdef CONFIG_NO_HZ
164DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { 163DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
165 .dynticks_nesting = 1, 164 .dynticks_nesting = 1,
166 .dynticks = 1, 165 .dynticks = ATOMIC_INIT(1),
167}; 166};
168#endif /* #ifdef CONFIG_NO_HZ */ 167#endif /* #ifdef CONFIG_NO_HZ */
169 168
@@ -322,13 +321,25 @@ void rcu_enter_nohz(void)
322 unsigned long flags; 321 unsigned long flags;
323 struct rcu_dynticks *rdtp; 322 struct rcu_dynticks *rdtp;
324 323
325 smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
326 local_irq_save(flags); 324 local_irq_save(flags);
327 rdtp = &__get_cpu_var(rcu_dynticks); 325 rdtp = &__get_cpu_var(rcu_dynticks);
328 rdtp->dynticks++; 326 if (--rdtp->dynticks_nesting) {
329 rdtp->dynticks_nesting--; 327 local_irq_restore(flags);
330 WARN_ON_ONCE(rdtp->dynticks & 0x1); 328 return;
329 }
330 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
331 smp_mb__before_atomic_inc(); /* See above. */
332 atomic_inc(&rdtp->dynticks);
333 smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */
334 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
331 local_irq_restore(flags); 335 local_irq_restore(flags);
336
337 /* If the interrupt queued a callback, get out of dyntick mode. */
338 if (in_irq() &&
339 (__get_cpu_var(rcu_sched_data).nxtlist ||
340 __get_cpu_var(rcu_bh_data).nxtlist ||
341 rcu_preempt_needs_cpu(smp_processor_id())))
342 set_need_resched();
332} 343}
333 344
334/* 345/*
@@ -344,11 +355,16 @@ void rcu_exit_nohz(void)
344 355
345 local_irq_save(flags); 356 local_irq_save(flags);
346 rdtp = &__get_cpu_var(rcu_dynticks); 357 rdtp = &__get_cpu_var(rcu_dynticks);
347 rdtp->dynticks++; 358 if (rdtp->dynticks_nesting++) {
348 rdtp->dynticks_nesting++; 359 local_irq_restore(flags);
349 WARN_ON_ONCE(!(rdtp->dynticks & 0x1)); 360 return;
361 }
362 smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */
363 atomic_inc(&rdtp->dynticks);
364 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
365 smp_mb__after_atomic_inc(); /* See above. */
366 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
350 local_irq_restore(flags); 367 local_irq_restore(flags);
351 smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
352} 368}
353 369
354/** 370/**
@@ -362,11 +378,15 @@ void rcu_nmi_enter(void)
362{ 378{
363 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); 379 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
364 380
365 if (rdtp->dynticks & 0x1) 381 if (rdtp->dynticks_nmi_nesting == 0 &&
382 (atomic_read(&rdtp->dynticks) & 0x1))
366 return; 383 return;
367 rdtp->dynticks_nmi++; 384 rdtp->dynticks_nmi_nesting++;
368 WARN_ON_ONCE(!(rdtp->dynticks_nmi & 0x1)); 385 smp_mb__before_atomic_inc(); /* Force delay from prior write. */
369 smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ 386 atomic_inc(&rdtp->dynticks);
387 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
388 smp_mb__after_atomic_inc(); /* See above. */
389 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
370} 390}
371 391
372/** 392/**
@@ -380,11 +400,14 @@ void rcu_nmi_exit(void)
380{ 400{
381 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); 401 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
382 402
383 if (rdtp->dynticks & 0x1) 403 if (rdtp->dynticks_nmi_nesting == 0 ||
404 --rdtp->dynticks_nmi_nesting != 0)
384 return; 405 return;
385 smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ 406 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
386 rdtp->dynticks_nmi++; 407 smp_mb__before_atomic_inc(); /* See above. */
387 WARN_ON_ONCE(rdtp->dynticks_nmi & 0x1); 408 atomic_inc(&rdtp->dynticks);
409 smp_mb__after_atomic_inc(); /* Force delay to next write. */
410 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
388} 411}
389 412
390/** 413/**
@@ -395,13 +418,7 @@ void rcu_nmi_exit(void)
395 */ 418 */
396void rcu_irq_enter(void) 419void rcu_irq_enter(void)
397{ 420{
398 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); 421 rcu_exit_nohz();
399
400 if (rdtp->dynticks_nesting++)
401 return;
402 rdtp->dynticks++;
403 WARN_ON_ONCE(!(rdtp->dynticks & 0x1));
404 smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
405} 422}
406 423
407/** 424/**
@@ -413,18 +430,7 @@ void rcu_irq_enter(void)
413 */ 430 */
414void rcu_irq_exit(void) 431void rcu_irq_exit(void)
415{ 432{
416 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); 433 rcu_enter_nohz();
417
418 if (--rdtp->dynticks_nesting)
419 return;
420 smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
421 rdtp->dynticks++;
422 WARN_ON_ONCE(rdtp->dynticks & 0x1);
423
424 /* If the interrupt queued a callback, get out of dyntick mode. */
425 if (__this_cpu_read(rcu_sched_data.nxtlist) ||
426 __this_cpu_read(rcu_bh_data.nxtlist))
427 set_need_resched();
428} 434}
429 435
430#ifdef CONFIG_SMP 436#ifdef CONFIG_SMP
@@ -436,19 +442,8 @@ void rcu_irq_exit(void)
436 */ 442 */
437static int dyntick_save_progress_counter(struct rcu_data *rdp) 443static int dyntick_save_progress_counter(struct rcu_data *rdp)
438{ 444{
439 int ret; 445 rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks);
440 int snap; 446 return 0;
441 int snap_nmi;
442
443 snap = rdp->dynticks->dynticks;
444 snap_nmi = rdp->dynticks->dynticks_nmi;
445 smp_mb(); /* Order sampling of snap with end of grace period. */
446 rdp->dynticks_snap = snap;
447 rdp->dynticks_nmi_snap = snap_nmi;
448 ret = ((snap & 0x1) == 0) && ((snap_nmi & 0x1) == 0);
449 if (ret)
450 rdp->dynticks_fqs++;
451 return ret;
452} 447}
453 448
454/* 449/*
@@ -459,16 +454,11 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp)
459 */ 454 */
460static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) 455static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
461{ 456{
462 long curr; 457 unsigned long curr;
463 long curr_nmi; 458 unsigned long snap;
464 long snap;
465 long snap_nmi;
466 459
467 curr = rdp->dynticks->dynticks; 460 curr = (unsigned long)atomic_add_return(0, &rdp->dynticks->dynticks);
468 snap = rdp->dynticks_snap; 461 snap = (unsigned long)rdp->dynticks_snap;
469 curr_nmi = rdp->dynticks->dynticks_nmi;
470 snap_nmi = rdp->dynticks_nmi_snap;
471 smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
472 462
473 /* 463 /*
474 * If the CPU passed through or entered a dynticks idle phase with 464 * If the CPU passed through or entered a dynticks idle phase with
@@ -478,8 +468,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
478 * read-side critical section that started before the beginning 468 * read-side critical section that started before the beginning
479 * of the current RCU grace period. 469 * of the current RCU grace period.
480 */ 470 */
481 if ((curr != snap || (curr & 0x1) == 0) && 471 if ((curr & 0x1) == 0 || ULONG_CMP_GE(curr, snap + 2)) {
482 (curr_nmi != snap_nmi || (curr_nmi & 0x1) == 0)) {
483 rdp->dynticks_fqs++; 472 rdp->dynticks_fqs++;
484 return 1; 473 return 1;
485 } 474 }
@@ -908,6 +897,12 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
908 unsigned long gp_duration; 897 unsigned long gp_duration;
909 898
910 WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); 899 WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
900
901 /*
902 * Ensure that all grace-period and pre-grace-period activity
903 * is seen before the assignment to rsp->completed.
904 */
905 smp_mb(); /* See above block comment. */
911 gp_duration = jiffies - rsp->gp_start; 906 gp_duration = jiffies - rsp->gp_start;
912 if (gp_duration > rsp->gp_max) 907 if (gp_duration > rsp->gp_max)
913 rsp->gp_max = gp_duration; 908 rsp->gp_max = gp_duration;
@@ -1455,25 +1450,11 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
1455 */ 1450 */
1456static void rcu_process_callbacks(void) 1451static void rcu_process_callbacks(void)
1457{ 1452{
1458 /*
1459 * Memory references from any prior RCU read-side critical sections
1460 * executed by the interrupted code must be seen before any RCU
1461 * grace-period manipulations below.
1462 */
1463 smp_mb(); /* See above block comment. */
1464
1465 __rcu_process_callbacks(&rcu_sched_state, 1453 __rcu_process_callbacks(&rcu_sched_state,
1466 &__get_cpu_var(rcu_sched_data)); 1454 &__get_cpu_var(rcu_sched_data));
1467 __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); 1455 __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
1468 rcu_preempt_process_callbacks(); 1456 rcu_preempt_process_callbacks();
1469 1457
1470 /*
1471 * Memory references from any later RCU read-side critical sections
1472 * executed by the interrupted code must be seen after any RCU
1473 * grace-period manipulations above.
1474 */
1475 smp_mb(); /* See above block comment. */
1476
1477 /* If we are last CPU on way to dyntick-idle mode, accelerate it. */ 1458 /* If we are last CPU on way to dyntick-idle mode, accelerate it. */
1478 rcu_needs_cpu_flush(); 1459 rcu_needs_cpu_flush();
1479} 1460}
@@ -1494,7 +1475,7 @@ static void invoke_rcu_cpu_kthread(void)
1494 local_irq_restore(flags); 1475 local_irq_restore(flags);
1495 return; 1476 return;
1496 } 1477 }
1497 wake_up(&__get_cpu_var(rcu_cpu_wq)); 1478 wake_up_process(__this_cpu_read(rcu_cpu_kthread_task));
1498 local_irq_restore(flags); 1479 local_irq_restore(flags);
1499} 1480}
1500 1481
@@ -1544,13 +1525,10 @@ static void rcu_cpu_kthread_setrt(int cpu, int to_rt)
1544 */ 1525 */
1545static void rcu_cpu_kthread_timer(unsigned long arg) 1526static void rcu_cpu_kthread_timer(unsigned long arg)
1546{ 1527{
1547 unsigned long flags;
1548 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg); 1528 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg);
1549 struct rcu_node *rnp = rdp->mynode; 1529 struct rcu_node *rnp = rdp->mynode;
1550 1530
1551 raw_spin_lock_irqsave(&rnp->lock, flags); 1531 atomic_or(rdp->grpmask, &rnp->wakemask);
1552 rnp->wakemask |= rdp->grpmask;
1553 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1554 invoke_rcu_node_kthread(rnp); 1532 invoke_rcu_node_kthread(rnp);
1555} 1533}
1556 1534
@@ -1617,14 +1595,12 @@ static int rcu_cpu_kthread(void *arg)
1617 unsigned long flags; 1595 unsigned long flags;
1618 int spincnt = 0; 1596 int spincnt = 0;
1619 unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu); 1597 unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu);
1620 wait_queue_head_t *wqp = &per_cpu(rcu_cpu_wq, cpu);
1621 char work; 1598 char work;
1622 char *workp = &per_cpu(rcu_cpu_has_work, cpu); 1599 char *workp = &per_cpu(rcu_cpu_has_work, cpu);
1623 1600
1624 for (;;) { 1601 for (;;) {
1625 *statusp = RCU_KTHREAD_WAITING; 1602 *statusp = RCU_KTHREAD_WAITING;
1626 wait_event_interruptible(*wqp, 1603 rcu_wait(*workp != 0 || kthread_should_stop());
1627 *workp != 0 || kthread_should_stop());
1628 local_bh_disable(); 1604 local_bh_disable();
1629 if (rcu_cpu_kthread_should_stop(cpu)) { 1605 if (rcu_cpu_kthread_should_stop(cpu)) {
1630 local_bh_enable(); 1606 local_bh_enable();
@@ -1675,7 +1651,6 @@ static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu)
1675 per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; 1651 per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu;
1676 WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); 1652 WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL);
1677 per_cpu(rcu_cpu_kthread_task, cpu) = t; 1653 per_cpu(rcu_cpu_kthread_task, cpu) = t;
1678 wake_up_process(t);
1679 sp.sched_priority = RCU_KTHREAD_PRIO; 1654 sp.sched_priority = RCU_KTHREAD_PRIO;
1680 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); 1655 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1681 return 0; 1656 return 0;
@@ -1698,11 +1673,10 @@ static int rcu_node_kthread(void *arg)
1698 1673
1699 for (;;) { 1674 for (;;) {
1700 rnp->node_kthread_status = RCU_KTHREAD_WAITING; 1675 rnp->node_kthread_status = RCU_KTHREAD_WAITING;
1701 wait_event_interruptible(rnp->node_wq, rnp->wakemask != 0); 1676 rcu_wait(atomic_read(&rnp->wakemask) != 0);
1702 rnp->node_kthread_status = RCU_KTHREAD_RUNNING; 1677 rnp->node_kthread_status = RCU_KTHREAD_RUNNING;
1703 raw_spin_lock_irqsave(&rnp->lock, flags); 1678 raw_spin_lock_irqsave(&rnp->lock, flags);
1704 mask = rnp->wakemask; 1679 mask = atomic_xchg(&rnp->wakemask, 0);
1705 rnp->wakemask = 0;
1706 rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */ 1680 rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */
1707 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) { 1681 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) {
1708 if ((mask & 0x1) == 0) 1682 if ((mask & 0x1) == 0)
@@ -1783,13 +1757,14 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp,
1783 raw_spin_lock_irqsave(&rnp->lock, flags); 1757 raw_spin_lock_irqsave(&rnp->lock, flags);
1784 rnp->node_kthread_task = t; 1758 rnp->node_kthread_task = t;
1785 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1759 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1786 wake_up_process(t);
1787 sp.sched_priority = 99; 1760 sp.sched_priority = 99;
1788 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); 1761 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1789 } 1762 }
1790 return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index); 1763 return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index);
1791} 1764}
1792 1765
1766static void rcu_wake_one_boost_kthread(struct rcu_node *rnp);
1767
1793/* 1768/*
1794 * Spawn all kthreads -- called as soon as the scheduler is running. 1769 * Spawn all kthreads -- called as soon as the scheduler is running.
1795 */ 1770 */
@@ -1797,24 +1772,31 @@ static int __init rcu_spawn_kthreads(void)
1797{ 1772{
1798 int cpu; 1773 int cpu;
1799 struct rcu_node *rnp; 1774 struct rcu_node *rnp;
1775 struct task_struct *t;
1800 1776
1801 rcu_kthreads_spawnable = 1; 1777 rcu_kthreads_spawnable = 1;
1802 for_each_possible_cpu(cpu) { 1778 for_each_possible_cpu(cpu) {
1803 init_waitqueue_head(&per_cpu(rcu_cpu_wq, cpu));
1804 per_cpu(rcu_cpu_has_work, cpu) = 0; 1779 per_cpu(rcu_cpu_has_work, cpu) = 0;
1805 if (cpu_online(cpu)) 1780 if (cpu_online(cpu)) {
1806 (void)rcu_spawn_one_cpu_kthread(cpu); 1781 (void)rcu_spawn_one_cpu_kthread(cpu);
1782 t = per_cpu(rcu_cpu_kthread_task, cpu);
1783 if (t)
1784 wake_up_process(t);
1785 }
1807 } 1786 }
1808 rnp = rcu_get_root(rcu_state); 1787 rnp = rcu_get_root(rcu_state);
1809 init_waitqueue_head(&rnp->node_wq);
1810 rcu_init_boost_waitqueue(rnp);
1811 (void)rcu_spawn_one_node_kthread(rcu_state, rnp); 1788 (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
1812 if (NUM_RCU_NODES > 1) 1789 if (rnp->node_kthread_task)
1790 wake_up_process(rnp->node_kthread_task);
1791 if (NUM_RCU_NODES > 1) {
1813 rcu_for_each_leaf_node(rcu_state, rnp) { 1792 rcu_for_each_leaf_node(rcu_state, rnp) {
1814 init_waitqueue_head(&rnp->node_wq);
1815 rcu_init_boost_waitqueue(rnp);
1816 (void)rcu_spawn_one_node_kthread(rcu_state, rnp); 1793 (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
1794 t = rnp->node_kthread_task;
1795 if (t)
1796 wake_up_process(t);
1797 rcu_wake_one_boost_kthread(rnp);
1817 } 1798 }
1799 }
1818 return 0; 1800 return 0;
1819} 1801}
1820early_initcall(rcu_spawn_kthreads); 1802early_initcall(rcu_spawn_kthreads);
@@ -2218,14 +2200,14 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
2218 raw_spin_unlock_irqrestore(&rsp->onofflock, flags); 2200 raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
2219} 2201}
2220 2202
2221static void __cpuinit rcu_online_cpu(int cpu) 2203static void __cpuinit rcu_prepare_cpu(int cpu)
2222{ 2204{
2223 rcu_init_percpu_data(cpu, &rcu_sched_state, 0); 2205 rcu_init_percpu_data(cpu, &rcu_sched_state, 0);
2224 rcu_init_percpu_data(cpu, &rcu_bh_state, 0); 2206 rcu_init_percpu_data(cpu, &rcu_bh_state, 0);
2225 rcu_preempt_init_percpu_data(cpu); 2207 rcu_preempt_init_percpu_data(cpu);
2226} 2208}
2227 2209
2228static void __cpuinit rcu_online_kthreads(int cpu) 2210static void __cpuinit rcu_prepare_kthreads(int cpu)
2229{ 2211{
2230 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); 2212 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
2231 struct rcu_node *rnp = rdp->mynode; 2213 struct rcu_node *rnp = rdp->mynode;
@@ -2239,6 +2221,31 @@ static void __cpuinit rcu_online_kthreads(int cpu)
2239} 2221}
2240 2222
2241/* 2223/*
2224 * kthread_create() creates threads in TASK_UNINTERRUPTIBLE state,
2225 * but the RCU threads are woken on demand, and if demand is low this
2226 * could be a while triggering the hung task watchdog.
2227 *
2228 * In order to avoid this, poke all tasks once the CPU is fully
2229 * up and running.
2230 */
2231static void __cpuinit rcu_online_kthreads(int cpu)
2232{
2233 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
2234 struct rcu_node *rnp = rdp->mynode;
2235 struct task_struct *t;
2236
2237 t = per_cpu(rcu_cpu_kthread_task, cpu);
2238 if (t)
2239 wake_up_process(t);
2240
2241 t = rnp->node_kthread_task;
2242 if (t)
2243 wake_up_process(t);
2244
2245 rcu_wake_one_boost_kthread(rnp);
2246}
2247
2248/*
2242 * Handle CPU online/offline notification events. 2249 * Handle CPU online/offline notification events.
2243 */ 2250 */
2244static int __cpuinit rcu_cpu_notify(struct notifier_block *self, 2251static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
@@ -2251,10 +2258,11 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
2251 switch (action) { 2258 switch (action) {
2252 case CPU_UP_PREPARE: 2259 case CPU_UP_PREPARE:
2253 case CPU_UP_PREPARE_FROZEN: 2260 case CPU_UP_PREPARE_FROZEN:
2254 rcu_online_cpu(cpu); 2261 rcu_prepare_cpu(cpu);
2255 rcu_online_kthreads(cpu); 2262 rcu_prepare_kthreads(cpu);
2256 break; 2263 break;
2257 case CPU_ONLINE: 2264 case CPU_ONLINE:
2265 rcu_online_kthreads(cpu);
2258 case CPU_DOWN_FAILED: 2266 case CPU_DOWN_FAILED:
2259 rcu_node_kthread_setaffinity(rnp, -1); 2267 rcu_node_kthread_setaffinity(rnp, -1);
2260 rcu_cpu_kthread_setrt(cpu, 1); 2268 rcu_cpu_kthread_setrt(cpu, 1);
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 257664815d5d..7b9a08b4aaea 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -84,11 +84,9 @@
84 * Dynticks per-CPU state. 84 * Dynticks per-CPU state.
85 */ 85 */
86struct rcu_dynticks { 86struct rcu_dynticks {
87 int dynticks_nesting; /* Track nesting level, sort of. */ 87 int dynticks_nesting; /* Track irq/process nesting level. */
88 int dynticks; /* Even value for dynticks-idle, else odd. */ 88 int dynticks_nmi_nesting; /* Track NMI nesting level. */
89 int dynticks_nmi; /* Even value for either dynticks-idle or */ 89 atomic_t dynticks; /* Even value for dynticks-idle, else odd. */
90 /* not in nmi handler, else odd. So this */
91 /* remains even for nmi from irq handler. */
92}; 90};
93 91
94/* RCU's kthread states for tracing. */ 92/* RCU's kthread states for tracing. */
@@ -121,7 +119,9 @@ struct rcu_node {
121 /* elements that need to drain to allow the */ 119 /* elements that need to drain to allow the */
122 /* current expedited grace period to */ 120 /* current expedited grace period to */
123 /* complete (only for TREE_PREEMPT_RCU). */ 121 /* complete (only for TREE_PREEMPT_RCU). */
124 unsigned long wakemask; /* CPUs whose kthread needs to be awakened. */ 122 atomic_t wakemask; /* CPUs whose kthread needs to be awakened. */
123 /* Since this has meaning only for leaf */
124 /* rcu_node structures, 32 bits suffices. */
125 unsigned long qsmaskinit; 125 unsigned long qsmaskinit;
126 /* Per-GP initial value for qsmask & expmask. */ 126 /* Per-GP initial value for qsmask & expmask. */
127 unsigned long grpmask; /* Mask to apply to parent qsmask. */ 127 unsigned long grpmask; /* Mask to apply to parent qsmask. */
@@ -159,9 +159,6 @@ struct rcu_node {
159 struct task_struct *boost_kthread_task; 159 struct task_struct *boost_kthread_task;
160 /* kthread that takes care of priority */ 160 /* kthread that takes care of priority */
161 /* boosting for this rcu_node structure. */ 161 /* boosting for this rcu_node structure. */
162 wait_queue_head_t boost_wq;
163 /* Wait queue on which to park the boost */
164 /* kthread. */
165 unsigned int boost_kthread_status; 162 unsigned int boost_kthread_status;
166 /* State of boost_kthread_task for tracing. */ 163 /* State of boost_kthread_task for tracing. */
167 unsigned long n_tasks_boosted; 164 unsigned long n_tasks_boosted;
@@ -188,9 +185,6 @@ struct rcu_node {
188 /* kthread that takes care of this rcu_node */ 185 /* kthread that takes care of this rcu_node */
189 /* structure, for example, awakening the */ 186 /* structure, for example, awakening the */
190 /* per-CPU kthreads as needed. */ 187 /* per-CPU kthreads as needed. */
191 wait_queue_head_t node_wq;
192 /* Wait queue on which to park the per-node */
193 /* kthread. */
194 unsigned int node_kthread_status; 188 unsigned int node_kthread_status;
195 /* State of node_kthread_task for tracing. */ 189 /* State of node_kthread_task for tracing. */
196} ____cacheline_internodealigned_in_smp; 190} ____cacheline_internodealigned_in_smp;
@@ -284,7 +278,6 @@ struct rcu_data {
284 /* 3) dynticks interface. */ 278 /* 3) dynticks interface. */
285 struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */ 279 struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */
286 int dynticks_snap; /* Per-GP tracking for dynticks. */ 280 int dynticks_snap; /* Per-GP tracking for dynticks. */
287 int dynticks_nmi_snap; /* Per-GP tracking for dynticks_nmi. */
288#endif /* #ifdef CONFIG_NO_HZ */ 281#endif /* #ifdef CONFIG_NO_HZ */
289 282
290 /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ 283 /* 4) reasons this CPU needed to be kicked by force_quiescent_state */
@@ -337,6 +330,16 @@ struct rcu_data {
337 /* scheduling clock irq */ 330 /* scheduling clock irq */
338 /* before ratting on them. */ 331 /* before ratting on them. */
339 332
333#define rcu_wait(cond) \
334do { \
335 for (;;) { \
336 set_current_state(TASK_INTERRUPTIBLE); \
337 if (cond) \
338 break; \
339 schedule(); \
340 } \
341 __set_current_state(TASK_RUNNING); \
342} while (0)
340 343
341/* 344/*
342 * RCU global state, including node hierarchy. This hierarchy is 345 * RCU global state, including node hierarchy. This hierarchy is
@@ -446,7 +449,6 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
446static void rcu_preempt_send_cbs_to_online(void); 449static void rcu_preempt_send_cbs_to_online(void);
447static void __init __rcu_init_preempt(void); 450static void __init __rcu_init_preempt(void);
448static void rcu_needs_cpu_flush(void); 451static void rcu_needs_cpu_flush(void);
449static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp);
450static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); 452static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
451static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, 453static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp,
452 cpumask_var_t cm); 454 cpumask_var_t cm);
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 3f6559a5f5cd..c8bff3099a89 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -1196,8 +1196,7 @@ static int rcu_boost_kthread(void *arg)
1196 1196
1197 for (;;) { 1197 for (;;) {
1198 rnp->boost_kthread_status = RCU_KTHREAD_WAITING; 1198 rnp->boost_kthread_status = RCU_KTHREAD_WAITING;
1199 wait_event_interruptible(rnp->boost_wq, rnp->boost_tasks || 1199 rcu_wait(rnp->boost_tasks || rnp->exp_tasks);
1200 rnp->exp_tasks);
1201 rnp->boost_kthread_status = RCU_KTHREAD_RUNNING; 1200 rnp->boost_kthread_status = RCU_KTHREAD_RUNNING;
1202 more2boost = rcu_boost(rnp); 1201 more2boost = rcu_boost(rnp);
1203 if (more2boost) 1202 if (more2boost)
@@ -1275,14 +1274,6 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1275} 1274}
1276 1275
1277/* 1276/*
1278 * Initialize the RCU-boost waitqueue.
1279 */
1280static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp)
1281{
1282 init_waitqueue_head(&rnp->boost_wq);
1283}
1284
1285/*
1286 * Create an RCU-boost kthread for the specified node if one does not 1277 * Create an RCU-boost kthread for the specified node if one does not
1287 * already exist. We only create this kthread for preemptible RCU. 1278 * already exist. We only create this kthread for preemptible RCU.
1288 * Returns zero if all is well, a negated errno otherwise. 1279 * Returns zero if all is well, a negated errno otherwise.
@@ -1306,12 +1297,17 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
1306 raw_spin_lock_irqsave(&rnp->lock, flags); 1297 raw_spin_lock_irqsave(&rnp->lock, flags);
1307 rnp->boost_kthread_task = t; 1298 rnp->boost_kthread_task = t;
1308 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1299 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1309 wake_up_process(t);
1310 sp.sched_priority = RCU_KTHREAD_PRIO; 1300 sp.sched_priority = RCU_KTHREAD_PRIO;
1311 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); 1301 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1312 return 0; 1302 return 0;
1313} 1303}
1314 1304
1305static void __cpuinit rcu_wake_one_boost_kthread(struct rcu_node *rnp)
1306{
1307 if (rnp->boost_kthread_task)
1308 wake_up_process(rnp->boost_kthread_task);
1309}
1310
1315#else /* #ifdef CONFIG_RCU_BOOST */ 1311#else /* #ifdef CONFIG_RCU_BOOST */
1316 1312
1317static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) 1313static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
@@ -1328,10 +1324,6 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1328{ 1324{
1329} 1325}
1330 1326
1331static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp)
1332{
1333}
1334
1335static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, 1327static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
1336 struct rcu_node *rnp, 1328 struct rcu_node *rnp,
1337 int rnp_index) 1329 int rnp_index)
@@ -1339,6 +1331,10 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
1339 return 0; 1331 return 0;
1340} 1332}
1341 1333
1334static void __cpuinit rcu_wake_one_boost_kthread(struct rcu_node *rnp)
1335{
1336}
1337
1342#endif /* #else #ifdef CONFIG_RCU_BOOST */ 1338#endif /* #else #ifdef CONFIG_RCU_BOOST */
1343 1339
1344#ifndef CONFIG_SMP 1340#ifndef CONFIG_SMP
@@ -1520,7 +1516,6 @@ int rcu_needs_cpu(int cpu)
1520{ 1516{
1521 int c = 0; 1517 int c = 0;
1522 int snap; 1518 int snap;
1523 int snap_nmi;
1524 int thatcpu; 1519 int thatcpu;
1525 1520
1526 /* Check for being in the holdoff period. */ 1521 /* Check for being in the holdoff period. */
@@ -1531,10 +1526,10 @@ int rcu_needs_cpu(int cpu)
1531 for_each_online_cpu(thatcpu) { 1526 for_each_online_cpu(thatcpu) {
1532 if (thatcpu == cpu) 1527 if (thatcpu == cpu)
1533 continue; 1528 continue;
1534 snap = per_cpu(rcu_dynticks, thatcpu).dynticks; 1529 snap = atomic_add_return(0, &per_cpu(rcu_dynticks,
1535 snap_nmi = per_cpu(rcu_dynticks, thatcpu).dynticks_nmi; 1530 thatcpu).dynticks);
1536 smp_mb(); /* Order sampling of snap with end of grace period. */ 1531 smp_mb(); /* Order sampling of snap with end of grace period. */
1537 if (((snap & 0x1) != 0) || ((snap_nmi & 0x1) != 0)) { 1532 if ((snap & 0x1) != 0) {
1538 per_cpu(rcu_dyntick_drain, cpu) = 0; 1533 per_cpu(rcu_dyntick_drain, cpu) = 0;
1539 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; 1534 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1;
1540 return rcu_needs_cpu_quick_check(cpu); 1535 return rcu_needs_cpu_quick_check(cpu);
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index aa0fd72b4bc7..9678cc3650f5 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -69,10 +69,10 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
69 rdp->passed_quiesc, rdp->passed_quiesc_completed, 69 rdp->passed_quiesc, rdp->passed_quiesc_completed,
70 rdp->qs_pending); 70 rdp->qs_pending);
71#ifdef CONFIG_NO_HZ 71#ifdef CONFIG_NO_HZ
72 seq_printf(m, " dt=%d/%d dn=%d df=%lu", 72 seq_printf(m, " dt=%d/%d/%d df=%lu",
73 rdp->dynticks->dynticks, 73 atomic_read(&rdp->dynticks->dynticks),
74 rdp->dynticks->dynticks_nesting, 74 rdp->dynticks->dynticks_nesting,
75 rdp->dynticks->dynticks_nmi, 75 rdp->dynticks->dynticks_nmi_nesting,
76 rdp->dynticks_fqs); 76 rdp->dynticks_fqs);
77#endif /* #ifdef CONFIG_NO_HZ */ 77#endif /* #ifdef CONFIG_NO_HZ */
78 seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi); 78 seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi);
@@ -141,9 +141,9 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
141 rdp->qs_pending); 141 rdp->qs_pending);
142#ifdef CONFIG_NO_HZ 142#ifdef CONFIG_NO_HZ
143 seq_printf(m, ",%d,%d,%d,%lu", 143 seq_printf(m, ",%d,%d,%d,%lu",
144 rdp->dynticks->dynticks, 144 atomic_read(&rdp->dynticks->dynticks),
145 rdp->dynticks->dynticks_nesting, 145 rdp->dynticks->dynticks_nesting,
146 rdp->dynticks->dynticks_nmi, 146 rdp->dynticks->dynticks_nmi_nesting,
147 rdp->dynticks_fqs); 147 rdp->dynticks_fqs);
148#endif /* #ifdef CONFIG_NO_HZ */ 148#endif /* #ifdef CONFIG_NO_HZ */
149 seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi); 149 seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi);
@@ -167,7 +167,7 @@ static int show_rcudata_csv(struct seq_file *m, void *unused)
167{ 167{
168 seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\","); 168 seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\",");
169#ifdef CONFIG_NO_HZ 169#ifdef CONFIG_NO_HZ
170 seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\","); 170 seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\",");
171#endif /* #ifdef CONFIG_NO_HZ */ 171#endif /* #ifdef CONFIG_NO_HZ */
172 seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\",\"ci\",\"co\",\"ca\"\n"); 172 seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\",\"ci\",\"co\",\"ca\"\n");
173#ifdef CONFIG_TREE_PREEMPT_RCU 173#ifdef CONFIG_TREE_PREEMPT_RCU
diff --git a/kernel/sched.c b/kernel/sched.c
index 5e43e9dc65d1..cbb3a0eee58e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2573,7 +2573,26 @@ static void ttwu_queue_remote(struct task_struct *p, int cpu)
2573 if (!next) 2573 if (!next)
2574 smp_send_reschedule(cpu); 2574 smp_send_reschedule(cpu);
2575} 2575}
2576#endif 2576
2577#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
2578static int ttwu_activate_remote(struct task_struct *p, int wake_flags)
2579{
2580 struct rq *rq;
2581 int ret = 0;
2582
2583 rq = __task_rq_lock(p);
2584 if (p->on_cpu) {
2585 ttwu_activate(rq, p, ENQUEUE_WAKEUP);
2586 ttwu_do_wakeup(rq, p, wake_flags);
2587 ret = 1;
2588 }
2589 __task_rq_unlock(rq);
2590
2591 return ret;
2592
2593}
2594#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
2595#endif /* CONFIG_SMP */
2577 2596
2578static void ttwu_queue(struct task_struct *p, int cpu) 2597static void ttwu_queue(struct task_struct *p, int cpu)
2579{ 2598{
@@ -2631,17 +2650,17 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
2631 while (p->on_cpu) { 2650 while (p->on_cpu) {
2632#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW 2651#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
2633 /* 2652 /*
2634 * If called from interrupt context we could have landed in the 2653 * In case the architecture enables interrupts in
2635 * middle of schedule(), in this case we should take care not 2654 * context_switch(), we cannot busy wait, since that
2636 * to spin on ->on_cpu if p is current, since that would 2655 * would lead to deadlocks when an interrupt hits and
2637 * deadlock. 2656 * tries to wake up @prev. So bail and do a complete
2657 * remote wakeup.
2638 */ 2658 */
2639 if (p == current) { 2659 if (ttwu_activate_remote(p, wake_flags))
2640 ttwu_queue(p, cpu);
2641 goto stat; 2660 goto stat;
2642 } 2661#else
2643#endif
2644 cpu_relax(); 2662 cpu_relax();
2663#endif
2645 } 2664 }
2646 /* 2665 /*
2647 * Pairs with the smp_wmb() in finish_lock_switch(). 2666 * Pairs with the smp_wmb() in finish_lock_switch().
@@ -5841,7 +5860,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
5841 idle->state = TASK_RUNNING; 5860 idle->state = TASK_RUNNING;
5842 idle->se.exec_start = sched_clock(); 5861 idle->se.exec_start = sched_clock();
5843 5862
5844 cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); 5863 do_set_cpus_allowed(idle, cpumask_of(cpu));
5845 /* 5864 /*
5846 * We're having a chicken and egg problem, even though we are 5865 * We're having a chicken and egg problem, even though we are
5847 * holding rq->lock, the cpu isn't yet set to this cpu so the 5866 * holding rq->lock, the cpu isn't yet set to this cpu so the
@@ -5929,6 +5948,16 @@ static inline void sched_init_granularity(void)
5929} 5948}
5930 5949
5931#ifdef CONFIG_SMP 5950#ifdef CONFIG_SMP
5951void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
5952{
5953 if (p->sched_class && p->sched_class->set_cpus_allowed)
5954 p->sched_class->set_cpus_allowed(p, new_mask);
5955 else {
5956 cpumask_copy(&p->cpus_allowed, new_mask);
5957 p->rt.nr_cpus_allowed = cpumask_weight(new_mask);
5958 }
5959}
5960
5932/* 5961/*
5933 * This is how migration works: 5962 * This is how migration works:
5934 * 5963 *
@@ -5974,12 +6003,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
5974 goto out; 6003 goto out;
5975 } 6004 }
5976 6005
5977 if (p->sched_class->set_cpus_allowed) 6006 do_set_cpus_allowed(p, new_mask);
5978 p->sched_class->set_cpus_allowed(p, new_mask);
5979 else {
5980 cpumask_copy(&p->cpus_allowed, new_mask);
5981 p->rt.nr_cpus_allowed = cpumask_weight(new_mask);
5982 }
5983 6007
5984 /* Can the task run on the task's current CPU? If so, we're done */ 6008 /* Can the task run on the task's current CPU? If so, we're done */
5985 if (cpumask_test_cpu(task_cpu(p), new_mask)) 6009 if (cpumask_test_cpu(task_cpu(p), new_mask))
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index e32a9b70ee9c..433491c2dc8f 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1076,8 +1076,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
1076 se->on_rq = 0; 1076 se->on_rq = 0;
1077 update_cfs_load(cfs_rq, 0); 1077 update_cfs_load(cfs_rq, 0);
1078 account_entity_dequeue(cfs_rq, se); 1078 account_entity_dequeue(cfs_rq, se);
1079 update_min_vruntime(cfs_rq);
1080 update_cfs_shares(cfs_rq);
1081 1079
1082 /* 1080 /*
1083 * Normalize the entity after updating the min_vruntime because the 1081 * Normalize the entity after updating the min_vruntime because the
@@ -1086,6 +1084,9 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
1086 */ 1084 */
1087 if (!(flags & DEQUEUE_SLEEP)) 1085 if (!(flags & DEQUEUE_SLEEP))
1088 se->vruntime -= cfs_rq->min_vruntime; 1086 se->vruntime -= cfs_rq->min_vruntime;
1087
1088 update_min_vruntime(cfs_rq);
1089 update_cfs_shares(cfs_rq);
1089} 1090}
1090 1091
1091/* 1092/*
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 64b2a37c07d0..88725c939e0b 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1263,6 +1263,7 @@ static int find_lowest_rq(struct task_struct *task)
1263 if (!cpumask_test_cpu(this_cpu, lowest_mask)) 1263 if (!cpumask_test_cpu(this_cpu, lowest_mask))
1264 this_cpu = -1; /* Skip this_cpu opt if not among lowest */ 1264 this_cpu = -1; /* Skip this_cpu opt if not among lowest */
1265 1265
1266 rcu_read_lock();
1266 for_each_domain(cpu, sd) { 1267 for_each_domain(cpu, sd) {
1267 if (sd->flags & SD_WAKE_AFFINE) { 1268 if (sd->flags & SD_WAKE_AFFINE) {
1268 int best_cpu; 1269 int best_cpu;
@@ -1272,15 +1273,20 @@ static int find_lowest_rq(struct task_struct *task)
1272 * remote processor. 1273 * remote processor.
1273 */ 1274 */
1274 if (this_cpu != -1 && 1275 if (this_cpu != -1 &&
1275 cpumask_test_cpu(this_cpu, sched_domain_span(sd))) 1276 cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {
1277 rcu_read_unlock();
1276 return this_cpu; 1278 return this_cpu;
1279 }
1277 1280
1278 best_cpu = cpumask_first_and(lowest_mask, 1281 best_cpu = cpumask_first_and(lowest_mask,
1279 sched_domain_span(sd)); 1282 sched_domain_span(sd));
1280 if (best_cpu < nr_cpu_ids) 1283 if (best_cpu < nr_cpu_ids) {
1284 rcu_read_unlock();
1281 return best_cpu; 1285 return best_cpu;
1286 }
1282 } 1287 }
1283 } 1288 }
1289 rcu_read_unlock();
1284 1290
1285 /* 1291 /*
1286 * And finally, if there were no matches within the domains 1292 * And finally, if there were no matches within the domains
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index 48ddf431db0e..331e01bcd026 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -37,7 +37,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
37 37
38#ifdef CONFIG_SMP 38#ifdef CONFIG_SMP
39 /* domain-specific stats */ 39 /* domain-specific stats */
40 preempt_disable(); 40 rcu_read_lock();
41 for_each_domain(cpu, sd) { 41 for_each_domain(cpu, sd) {
42 enum cpu_idle_type itype; 42 enum cpu_idle_type itype;
43 43
@@ -64,7 +64,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
64 sd->ttwu_wake_remote, sd->ttwu_move_affine, 64 sd->ttwu_wake_remote, sd->ttwu_move_affine,
65 sd->ttwu_move_balance); 65 sd->ttwu_move_balance);
66 } 66 }
67 preempt_enable(); 67 rcu_read_unlock();
68#endif 68#endif
69 } 69 }
70 kfree(mask_str); 70 kfree(mask_str);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index d017c2c82c44..1ee417fcbfa5 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -109,12 +109,18 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip);
109static void ftrace_global_list_func(unsigned long ip, 109static void ftrace_global_list_func(unsigned long ip,
110 unsigned long parent_ip) 110 unsigned long parent_ip)
111{ 111{
112 struct ftrace_ops *op = rcu_dereference_raw(ftrace_global_list); /*see above*/ 112 struct ftrace_ops *op;
113
114 if (unlikely(trace_recursion_test(TRACE_GLOBAL_BIT)))
115 return;
113 116
117 trace_recursion_set(TRACE_GLOBAL_BIT);
118 op = rcu_dereference_raw(ftrace_global_list); /*see above*/
114 while (op != &ftrace_list_end) { 119 while (op != &ftrace_list_end) {
115 op->func(ip, parent_ip); 120 op->func(ip, parent_ip);
116 op = rcu_dereference_raw(op->next); /*see above*/ 121 op = rcu_dereference_raw(op->next); /*see above*/
117 }; 122 };
123 trace_recursion_clear(TRACE_GLOBAL_BIT);
118} 124}
119 125
120static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip) 126static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip)
@@ -1638,12 +1644,12 @@ static void ftrace_startup_enable(int command)
1638 ftrace_run_update_code(command); 1644 ftrace_run_update_code(command);
1639} 1645}
1640 1646
1641static void ftrace_startup(struct ftrace_ops *ops, int command) 1647static int ftrace_startup(struct ftrace_ops *ops, int command)
1642{ 1648{
1643 bool hash_enable = true; 1649 bool hash_enable = true;
1644 1650
1645 if (unlikely(ftrace_disabled)) 1651 if (unlikely(ftrace_disabled))
1646 return; 1652 return -ENODEV;
1647 1653
1648 ftrace_start_up++; 1654 ftrace_start_up++;
1649 command |= FTRACE_ENABLE_CALLS; 1655 command |= FTRACE_ENABLE_CALLS;
@@ -1662,6 +1668,8 @@ static void ftrace_startup(struct ftrace_ops *ops, int command)
1662 ftrace_hash_rec_enable(ops, 1); 1668 ftrace_hash_rec_enable(ops, 1);
1663 1669
1664 ftrace_startup_enable(command); 1670 ftrace_startup_enable(command);
1671
1672 return 0;
1665} 1673}
1666 1674
1667static void ftrace_shutdown(struct ftrace_ops *ops, int command) 1675static void ftrace_shutdown(struct ftrace_ops *ops, int command)
@@ -2501,7 +2509,7 @@ static void __enable_ftrace_function_probe(void)
2501 2509
2502 ret = __register_ftrace_function(&trace_probe_ops); 2510 ret = __register_ftrace_function(&trace_probe_ops);
2503 if (!ret) 2511 if (!ret)
2504 ftrace_startup(&trace_probe_ops, 0); 2512 ret = ftrace_startup(&trace_probe_ops, 0);
2505 2513
2506 ftrace_probe_registered = 1; 2514 ftrace_probe_registered = 1;
2507} 2515}
@@ -3466,7 +3474,11 @@ device_initcall(ftrace_nodyn_init);
3466static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; } 3474static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; }
3467static inline void ftrace_startup_enable(int command) { } 3475static inline void ftrace_startup_enable(int command) { }
3468/* Keep as macros so we do not need to define the commands */ 3476/* Keep as macros so we do not need to define the commands */
3469# define ftrace_startup(ops, command) do { } while (0) 3477# define ftrace_startup(ops, command) \
3478 ({ \
3479 (ops)->flags |= FTRACE_OPS_FL_ENABLED; \
3480 0; \
3481 })
3470# define ftrace_shutdown(ops, command) do { } while (0) 3482# define ftrace_shutdown(ops, command) do { } while (0)
3471# define ftrace_startup_sysctl() do { } while (0) 3483# define ftrace_startup_sysctl() do { } while (0)
3472# define ftrace_shutdown_sysctl() do { } while (0) 3484# define ftrace_shutdown_sysctl() do { } while (0)
@@ -3484,6 +3496,10 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip)
3484{ 3496{
3485 struct ftrace_ops *op; 3497 struct ftrace_ops *op;
3486 3498
3499 if (unlikely(trace_recursion_test(TRACE_INTERNAL_BIT)))
3500 return;
3501
3502 trace_recursion_set(TRACE_INTERNAL_BIT);
3487 /* 3503 /*
3488 * Some of the ops may be dynamically allocated, 3504 * Some of the ops may be dynamically allocated,
3489 * they must be freed after a synchronize_sched(). 3505 * they must be freed after a synchronize_sched().
@@ -3496,6 +3512,7 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip)
3496 op = rcu_dereference_raw(op->next); 3512 op = rcu_dereference_raw(op->next);
3497 }; 3513 };
3498 preempt_enable_notrace(); 3514 preempt_enable_notrace();
3515 trace_recursion_clear(TRACE_INTERNAL_BIT);
3499} 3516}
3500 3517
3501static void clear_ftrace_swapper(void) 3518static void clear_ftrace_swapper(void)
@@ -3799,7 +3816,7 @@ int register_ftrace_function(struct ftrace_ops *ops)
3799 3816
3800 ret = __register_ftrace_function(ops); 3817 ret = __register_ftrace_function(ops);
3801 if (!ret) 3818 if (!ret)
3802 ftrace_startup(ops, 0); 3819 ret = ftrace_startup(ops, 0);
3803 3820
3804 3821
3805 out_unlock: 3822 out_unlock:
@@ -4045,7 +4062,7 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
4045 ftrace_graph_return = retfunc; 4062 ftrace_graph_return = retfunc;
4046 ftrace_graph_entry = entryfunc; 4063 ftrace_graph_entry = entryfunc;
4047 4064
4048 ftrace_startup(&global_ops, FTRACE_START_FUNC_RET); 4065 ret = ftrace_startup(&global_ops, FTRACE_START_FUNC_RET);
4049 4066
4050out: 4067out:
4051 mutex_unlock(&ftrace_lock); 4068 mutex_unlock(&ftrace_lock);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 0ef7b4b2a1f7..b0c7aa407943 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2216,7 +2216,7 @@ static noinline void trace_recursive_fail(void)
2216 2216
2217 printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:" 2217 printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:"
2218 "HC[%lu]:SC[%lu]:NMI[%lu]\n", 2218 "HC[%lu]:SC[%lu]:NMI[%lu]\n",
2219 current->trace_recursion, 2219 trace_recursion_buffer(),
2220 hardirq_count() >> HARDIRQ_SHIFT, 2220 hardirq_count() >> HARDIRQ_SHIFT,
2221 softirq_count() >> SOFTIRQ_SHIFT, 2221 softirq_count() >> SOFTIRQ_SHIFT,
2222 in_nmi()); 2222 in_nmi());
@@ -2226,9 +2226,9 @@ static noinline void trace_recursive_fail(void)
2226 2226
2227static inline int trace_recursive_lock(void) 2227static inline int trace_recursive_lock(void)
2228{ 2228{
2229 current->trace_recursion++; 2229 trace_recursion_inc();
2230 2230
2231 if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH)) 2231 if (likely(trace_recursion_buffer() < TRACE_RECURSIVE_DEPTH))
2232 return 0; 2232 return 0;
2233 2233
2234 trace_recursive_fail(); 2234 trace_recursive_fail();
@@ -2238,9 +2238,9 @@ static inline int trace_recursive_lock(void)
2238 2238
2239static inline void trace_recursive_unlock(void) 2239static inline void trace_recursive_unlock(void)
2240{ 2240{
2241 WARN_ON_ONCE(!current->trace_recursion); 2241 WARN_ON_ONCE(!trace_recursion_buffer());
2242 2242
2243 current->trace_recursion--; 2243 trace_recursion_dec();
2244} 2244}
2245 2245
2246#else 2246#else
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 6b69c4bd306f..229f8591f61d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -784,4 +784,19 @@ extern const char *__stop___trace_bprintk_fmt[];
784 FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) 784 FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print))
785#include "trace_entries.h" 785#include "trace_entries.h"
786 786
787/* Only current can touch trace_recursion */
788#define trace_recursion_inc() do { (current)->trace_recursion++; } while (0)
789#define trace_recursion_dec() do { (current)->trace_recursion--; } while (0)
790
791/* Ring buffer has the 10 LSB bits to count */
792#define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff)
793
794/* for function tracing recursion */
795#define TRACE_INTERNAL_BIT (1<<11)
796#define TRACE_GLOBAL_BIT (1<<12)
797
798#define trace_recursion_set(bit) do { (current)->trace_recursion |= (bit); } while (0)
799#define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(bit); } while (0)
800#define trace_recursion_test(bit) ((current)->trace_recursion & (bit))
801
787#endif /* _LINUX_KERNEL_TRACE_H */ 802#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 2fe110341359..686ec399f2a8 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1657,7 +1657,12 @@ static struct ftrace_ops trace_ops __initdata =
1657 1657
1658static __init void event_trace_self_test_with_function(void) 1658static __init void event_trace_self_test_with_function(void)
1659{ 1659{
1660 register_ftrace_function(&trace_ops); 1660 int ret;
1661 ret = register_ftrace_function(&trace_ops);
1662 if (WARN_ON(ret < 0)) {
1663 pr_info("Failed to enable function tracer for event tests\n");
1664 return;
1665 }
1661 pr_info("Running tests again, along with the function tracer\n"); 1666 pr_info("Running tests again, along with the function tracer\n");
1662 event_trace_self_tests(); 1667 event_trace_self_tests();
1663 unregister_ftrace_function(&trace_ops); 1668 unregister_ftrace_function(&trace_ops);
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index cf535ccedc86..e37de492a9e1 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -353,6 +353,33 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
353} 353}
354EXPORT_SYMBOL(ftrace_print_symbols_seq); 354EXPORT_SYMBOL(ftrace_print_symbols_seq);
355 355
356#if BITS_PER_LONG == 32
357const char *
358ftrace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val,
359 const struct trace_print_flags_u64 *symbol_array)
360{
361 int i;
362 const char *ret = p->buffer + p->len;
363
364 for (i = 0; symbol_array[i].name; i++) {
365
366 if (val != symbol_array[i].mask)
367 continue;
368
369 trace_seq_puts(p, symbol_array[i].name);
370 break;
371 }
372
373 if (!p->len)
374 trace_seq_printf(p, "0x%llx", val);
375
376 trace_seq_putc(p, 0);
377
378 return ret;
379}
380EXPORT_SYMBOL(ftrace_print_symbols_seq_u64);
381#endif
382
356const char * 383const char *
357ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len) 384ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len)
358{ 385{
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 7daa4b072e9f..3d0c56ad4792 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -415,15 +415,13 @@ static void watchdog_nmi_disable(int cpu) { return; }
415#endif /* CONFIG_HARDLOCKUP_DETECTOR */ 415#endif /* CONFIG_HARDLOCKUP_DETECTOR */
416 416
417/* prepare/enable/disable routines */ 417/* prepare/enable/disable routines */
418static int watchdog_prepare_cpu(int cpu) 418static void watchdog_prepare_cpu(int cpu)
419{ 419{
420 struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu); 420 struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu);
421 421
422 WARN_ON(per_cpu(softlockup_watchdog, cpu)); 422 WARN_ON(per_cpu(softlockup_watchdog, cpu));
423 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 423 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
424 hrtimer->function = watchdog_timer_fn; 424 hrtimer->function = watchdog_timer_fn;
425
426 return 0;
427} 425}
428 426
429static int watchdog_enable(int cpu) 427static int watchdog_enable(int cpu)
@@ -542,17 +540,16 @@ static int __cpuinit
542cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) 540cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
543{ 541{
544 int hotcpu = (unsigned long)hcpu; 542 int hotcpu = (unsigned long)hcpu;
545 int err = 0;
546 543
547 switch (action) { 544 switch (action) {
548 case CPU_UP_PREPARE: 545 case CPU_UP_PREPARE:
549 case CPU_UP_PREPARE_FROZEN: 546 case CPU_UP_PREPARE_FROZEN:
550 err = watchdog_prepare_cpu(hotcpu); 547 watchdog_prepare_cpu(hotcpu);
551 break; 548 break;
552 case CPU_ONLINE: 549 case CPU_ONLINE:
553 case CPU_ONLINE_FROZEN: 550 case CPU_ONLINE_FROZEN:
554 if (watchdog_enabled) 551 if (watchdog_enabled)
555 err = watchdog_enable(hotcpu); 552 watchdog_enable(hotcpu);
556 break; 553 break;
557#ifdef CONFIG_HOTPLUG_CPU 554#ifdef CONFIG_HOTPLUG_CPU
558 case CPU_UP_CANCELED: 555 case CPU_UP_CANCELED: