aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-09-04 12:36:54 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-04 12:36:54 -0400
commit6832d9652f395f7d13003e3884942c40f52ac1fa (patch)
tree40555ad5eda9700cb973dac4db136ad97f5e8b19 /kernel
parent228abe73ad67665d71eacd6a8a347dd76b0115ae (diff)
parentc2e7fcf53c3cb02b4ada1c66a9bc8a4d97d58aba (diff)
Merge branch 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull timers/nohz changes from Ingo Molnar: "It mostly contains fixes and full dynticks off-case optimizations, by Frederic Weisbecker" * 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (24 commits) nohz: Include local CPU in full dynticks global kick nohz: Optimize full dynticks's sched hooks with static keys nohz: Optimize full dynticks state checks with static keys nohz: Rename a few state variables vtime: Always debug check snapshot source _before_ updating it vtime: Always scale generic vtime accounting results vtime: Optimize full dynticks accounting off case with static keys vtime: Describe overriden functions in dedicated arch headers m68k: hardirq_count() only need preempt_mask.h hardirq: Split preempt count mask definitions context_tracking: Split low level state headers vtime: Fix racy cputime delta update vtime: Remove a few unneeded generic vtime state checks context_tracking: User/kernel broundary cross trace events context_tracking: Optimize context switch off case with static keys context_tracking: Optimize guest APIs off case with static key context_tracking: Optimize main APIs off case with static key context_tracking: Ground setup for static key use context_tracking: Remove full dynticks' hacky dependency on wide context tracking nohz: Only enable context tracking on full dynticks CPUs ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/context_tracking.c125
-rw-r--r--kernel/sched/core.c4
-rw-r--r--kernel/sched/cputime.c53
-rw-r--r--kernel/time/Kconfig1
-rw-r--r--kernel/time/tick-sched.c61
5 files changed, 117 insertions, 127 deletions
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 383f8231e436..247091bf0587 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -20,22 +20,33 @@
20#include <linux/hardirq.h> 20#include <linux/hardirq.h>
21#include <linux/export.h> 21#include <linux/export.h>
22 22
23DEFINE_PER_CPU(struct context_tracking, context_tracking) = { 23#define CREATE_TRACE_POINTS
24#ifdef CONFIG_CONTEXT_TRACKING_FORCE 24#include <trace/events/context_tracking.h>
25 .active = true, 25
26#endif 26struct static_key context_tracking_enabled = STATIC_KEY_INIT_FALSE;
27}; 27EXPORT_SYMBOL_GPL(context_tracking_enabled);
28
29DEFINE_PER_CPU(struct context_tracking, context_tracking);
30EXPORT_SYMBOL_GPL(context_tracking);
31
32void context_tracking_cpu_set(int cpu)
33{
34 if (!per_cpu(context_tracking.active, cpu)) {
35 per_cpu(context_tracking.active, cpu) = true;
36 static_key_slow_inc(&context_tracking_enabled);
37 }
38}
28 39
29/** 40/**
30 * user_enter - Inform the context tracking that the CPU is going to 41 * context_tracking_user_enter - Inform the context tracking that the CPU is going to
31 * enter userspace mode. 42 * enter userspace mode.
32 * 43 *
33 * This function must be called right before we switch from the kernel 44 * This function must be called right before we switch from the kernel
34 * to userspace, when it's guaranteed the remaining kernel instructions 45 * to userspace, when it's guaranteed the remaining kernel instructions
35 * to execute won't use any RCU read side critical section because this 46 * to execute won't use any RCU read side critical section because this
36 * function sets RCU in extended quiescent state. 47 * function sets RCU in extended quiescent state.
37 */ 48 */
38void user_enter(void) 49void context_tracking_user_enter(void)
39{ 50{
40 unsigned long flags; 51 unsigned long flags;
41 52
@@ -54,17 +65,32 @@ void user_enter(void)
54 WARN_ON_ONCE(!current->mm); 65 WARN_ON_ONCE(!current->mm);
55 66
56 local_irq_save(flags); 67 local_irq_save(flags);
57 if (__this_cpu_read(context_tracking.active) && 68 if ( __this_cpu_read(context_tracking.state) != IN_USER) {
58 __this_cpu_read(context_tracking.state) != IN_USER) { 69 if (__this_cpu_read(context_tracking.active)) {
70 trace_user_enter(0);
71 /*
72 * At this stage, only low level arch entry code remains and
73 * then we'll run in userspace. We can assume there won't be
74 * any RCU read-side critical section until the next call to
75 * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency
76 * on the tick.
77 */
78 vtime_user_enter(current);
79 rcu_user_enter();
80 }
59 /* 81 /*
60 * At this stage, only low level arch entry code remains and 82 * Even if context tracking is disabled on this CPU, because it's outside
61 * then we'll run in userspace. We can assume there won't be 83 * the full dynticks mask for example, we still have to keep track of the
62 * any RCU read-side critical section until the next call to 84 * context transitions and states to prevent inconsistency on those of
63 * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency 85 * other CPUs.
64 * on the tick. 86 * If a task triggers an exception in userspace, sleep on the exception
87 * handler and then migrate to another CPU, that new CPU must know where
88 * the exception returns by the time we call exception_exit().
89 * This information can only be provided by the previous CPU when it called
90 * exception_enter().
91 * OTOH we can spare the calls to vtime and RCU when context_tracking.active
92 * is false because we know that CPU is not tickless.
65 */ 93 */
66 vtime_user_enter(current);
67 rcu_user_enter();
68 __this_cpu_write(context_tracking.state, IN_USER); 94 __this_cpu_write(context_tracking.state, IN_USER);
69 } 95 }
70 local_irq_restore(flags); 96 local_irq_restore(flags);
@@ -87,10 +113,9 @@ void user_enter(void)
87 */ 113 */
88void __sched notrace preempt_schedule_context(void) 114void __sched notrace preempt_schedule_context(void)
89{ 115{
90 struct thread_info *ti = current_thread_info();
91 enum ctx_state prev_ctx; 116 enum ctx_state prev_ctx;
92 117
93 if (likely(ti->preempt_count || irqs_disabled())) 118 if (likely(!preemptible()))
94 return; 119 return;
95 120
96 /* 121 /*
@@ -112,8 +137,8 @@ EXPORT_SYMBOL_GPL(preempt_schedule_context);
112#endif /* CONFIG_PREEMPT */ 137#endif /* CONFIG_PREEMPT */
113 138
114/** 139/**
115 * user_exit - Inform the context tracking that the CPU is 140 * context_tracking_user_exit - Inform the context tracking that the CPU is
116 * exiting userspace mode and entering the kernel. 141 * exiting userspace mode and entering the kernel.
117 * 142 *
118 * This function must be called after we entered the kernel from userspace 143 * This function must be called after we entered the kernel from userspace
119 * before any use of RCU read side critical section. This potentially include 144 * before any use of RCU read side critical section. This potentially include
@@ -122,7 +147,7 @@ EXPORT_SYMBOL_GPL(preempt_schedule_context);
122 * This call supports re-entrancy. This way it can be called from any exception 147 * This call supports re-entrancy. This way it can be called from any exception
123 * handler without needing to know if we came from userspace or not. 148 * handler without needing to know if we came from userspace or not.
124 */ 149 */
125void user_exit(void) 150void context_tracking_user_exit(void)
126{ 151{
127 unsigned long flags; 152 unsigned long flags;
128 153
@@ -131,38 +156,22 @@ void user_exit(void)
131 156
132 local_irq_save(flags); 157 local_irq_save(flags);
133 if (__this_cpu_read(context_tracking.state) == IN_USER) { 158 if (__this_cpu_read(context_tracking.state) == IN_USER) {
134 /* 159 if (__this_cpu_read(context_tracking.active)) {
135 * We are going to run code that may use RCU. Inform 160 /*
136 * RCU core about that (ie: we may need the tick again). 161 * We are going to run code that may use RCU. Inform
137 */ 162 * RCU core about that (ie: we may need the tick again).
138 rcu_user_exit(); 163 */
139 vtime_user_exit(current); 164 rcu_user_exit();
165 vtime_user_exit(current);
166 trace_user_exit(0);
167 }
140 __this_cpu_write(context_tracking.state, IN_KERNEL); 168 __this_cpu_write(context_tracking.state, IN_KERNEL);
141 } 169 }
142 local_irq_restore(flags); 170 local_irq_restore(flags);
143} 171}
144 172
145void guest_enter(void)
146{
147 if (vtime_accounting_enabled())
148 vtime_guest_enter(current);
149 else
150 __guest_enter();
151}
152EXPORT_SYMBOL_GPL(guest_enter);
153
154void guest_exit(void)
155{
156 if (vtime_accounting_enabled())
157 vtime_guest_exit(current);
158 else
159 __guest_exit();
160}
161EXPORT_SYMBOL_GPL(guest_exit);
162
163
164/** 173/**
165 * context_tracking_task_switch - context switch the syscall callbacks 174 * __context_tracking_task_switch - context switch the syscall callbacks
166 * @prev: the task that is being switched out 175 * @prev: the task that is being switched out
167 * @next: the task that is being switched in 176 * @next: the task that is being switched in
168 * 177 *
@@ -174,11 +183,19 @@ EXPORT_SYMBOL_GPL(guest_exit);
174 * migrate to some CPU that doesn't do the context tracking. As such the TIF 183 * migrate to some CPU that doesn't do the context tracking. As such the TIF
175 * flag may not be desired there. 184 * flag may not be desired there.
176 */ 185 */
177void context_tracking_task_switch(struct task_struct *prev, 186void __context_tracking_task_switch(struct task_struct *prev,
178 struct task_struct *next) 187 struct task_struct *next)
179{ 188{
180 if (__this_cpu_read(context_tracking.active)) { 189 clear_tsk_thread_flag(prev, TIF_NOHZ);
181 clear_tsk_thread_flag(prev, TIF_NOHZ); 190 set_tsk_thread_flag(next, TIF_NOHZ);
182 set_tsk_thread_flag(next, TIF_NOHZ);
183 }
184} 191}
192
193#ifdef CONFIG_CONTEXT_TRACKING_FORCE
194void __init context_tracking_init(void)
195{
196 int cpu;
197
198 for_each_possible_cpu(cpu)
199 context_tracking_cpu_set(cpu);
200}
201#endif
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b8e2162fc803..725aa067ad63 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2527,13 +2527,11 @@ void __sched schedule_preempt_disabled(void)
2527 */ 2527 */
2528asmlinkage void __sched notrace preempt_schedule(void) 2528asmlinkage void __sched notrace preempt_schedule(void)
2529{ 2529{
2530 struct thread_info *ti = current_thread_info();
2531
2532 /* 2530 /*
2533 * If there is a non-zero preempt_count or interrupts are disabled, 2531 * If there is a non-zero preempt_count or interrupts are disabled,
2534 * we do not want to preempt the current task. Just return.. 2532 * we do not want to preempt the current task. Just return..
2535 */ 2533 */
2536 if (likely(ti->preempt_count || irqs_disabled())) 2534 if (likely(!preemptible()))
2537 return; 2535 return;
2538 2536
2539 do { 2537 do {
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index e89ccefef278..ace34f95e200 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -378,11 +378,8 @@ static inline void irqtime_account_process_tick(struct task_struct *p, int user_
378#ifdef CONFIG_VIRT_CPU_ACCOUNTING 378#ifdef CONFIG_VIRT_CPU_ACCOUNTING
379 379
380#ifndef __ARCH_HAS_VTIME_TASK_SWITCH 380#ifndef __ARCH_HAS_VTIME_TASK_SWITCH
381void vtime_task_switch(struct task_struct *prev) 381void vtime_common_task_switch(struct task_struct *prev)
382{ 382{
383 if (!vtime_accounting_enabled())
384 return;
385
386 if (is_idle_task(prev)) 383 if (is_idle_task(prev))
387 vtime_account_idle(prev); 384 vtime_account_idle(prev);
388 else 385 else
@@ -404,11 +401,8 @@ void vtime_task_switch(struct task_struct *prev)
404 * vtime_account(). 401 * vtime_account().
405 */ 402 */
406#ifndef __ARCH_HAS_VTIME_ACCOUNT 403#ifndef __ARCH_HAS_VTIME_ACCOUNT
407void vtime_account_irq_enter(struct task_struct *tsk) 404void vtime_common_account_irq_enter(struct task_struct *tsk)
408{ 405{
409 if (!vtime_accounting_enabled())
410 return;
411
412 if (!in_interrupt()) { 406 if (!in_interrupt()) {
413 /* 407 /*
414 * If we interrupted user, context_tracking_in_user() 408 * If we interrupted user, context_tracking_in_user()
@@ -428,7 +422,7 @@ void vtime_account_irq_enter(struct task_struct *tsk)
428 } 422 }
429 vtime_account_system(tsk); 423 vtime_account_system(tsk);
430} 424}
431EXPORT_SYMBOL_GPL(vtime_account_irq_enter); 425EXPORT_SYMBOL_GPL(vtime_common_account_irq_enter);
432#endif /* __ARCH_HAS_VTIME_ACCOUNT */ 426#endif /* __ARCH_HAS_VTIME_ACCOUNT */
433#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ 427#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
434 428
@@ -559,12 +553,6 @@ static void cputime_adjust(struct task_cputime *curr,
559{ 553{
560 cputime_t rtime, stime, utime, total; 554 cputime_t rtime, stime, utime, total;
561 555
562 if (vtime_accounting_enabled()) {
563 *ut = curr->utime;
564 *st = curr->stime;
565 return;
566 }
567
568 stime = curr->stime; 556 stime = curr->stime;
569 total = stime + curr->utime; 557 total = stime + curr->utime;
570 558
@@ -664,23 +652,17 @@ static void __vtime_account_system(struct task_struct *tsk)
664 652
665void vtime_account_system(struct task_struct *tsk) 653void vtime_account_system(struct task_struct *tsk)
666{ 654{
667 if (!vtime_accounting_enabled())
668 return;
669
670 write_seqlock(&tsk->vtime_seqlock); 655 write_seqlock(&tsk->vtime_seqlock);
671 __vtime_account_system(tsk); 656 __vtime_account_system(tsk);
672 write_sequnlock(&tsk->vtime_seqlock); 657 write_sequnlock(&tsk->vtime_seqlock);
673} 658}
674 659
675void vtime_account_irq_exit(struct task_struct *tsk) 660void vtime_gen_account_irq_exit(struct task_struct *tsk)
676{ 661{
677 if (!vtime_accounting_enabled())
678 return;
679
680 write_seqlock(&tsk->vtime_seqlock); 662 write_seqlock(&tsk->vtime_seqlock);
663 __vtime_account_system(tsk);
681 if (context_tracking_in_user()) 664 if (context_tracking_in_user())
682 tsk->vtime_snap_whence = VTIME_USER; 665 tsk->vtime_snap_whence = VTIME_USER;
683 __vtime_account_system(tsk);
684 write_sequnlock(&tsk->vtime_seqlock); 666 write_sequnlock(&tsk->vtime_seqlock);
685} 667}
686 668
@@ -688,12 +670,8 @@ void vtime_account_user(struct task_struct *tsk)
688{ 670{
689 cputime_t delta_cpu; 671 cputime_t delta_cpu;
690 672
691 if (!vtime_accounting_enabled())
692 return;
693
694 delta_cpu = get_vtime_delta(tsk);
695
696 write_seqlock(&tsk->vtime_seqlock); 673 write_seqlock(&tsk->vtime_seqlock);
674 delta_cpu = get_vtime_delta(tsk);
697 tsk->vtime_snap_whence = VTIME_SYS; 675 tsk->vtime_snap_whence = VTIME_SYS;
698 account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu)); 676 account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
699 write_sequnlock(&tsk->vtime_seqlock); 677 write_sequnlock(&tsk->vtime_seqlock);
@@ -701,22 +679,27 @@ void vtime_account_user(struct task_struct *tsk)
701 679
702void vtime_user_enter(struct task_struct *tsk) 680void vtime_user_enter(struct task_struct *tsk)
703{ 681{
704 if (!vtime_accounting_enabled())
705 return;
706
707 write_seqlock(&tsk->vtime_seqlock); 682 write_seqlock(&tsk->vtime_seqlock);
708 tsk->vtime_snap_whence = VTIME_USER;
709 __vtime_account_system(tsk); 683 __vtime_account_system(tsk);
684 tsk->vtime_snap_whence = VTIME_USER;
710 write_sequnlock(&tsk->vtime_seqlock); 685 write_sequnlock(&tsk->vtime_seqlock);
711} 686}
712 687
713void vtime_guest_enter(struct task_struct *tsk) 688void vtime_guest_enter(struct task_struct *tsk)
714{ 689{
690 /*
691 * The flags must be updated under the lock with
692 * the vtime_snap flush and update.
693 * That enforces a right ordering and update sequence
694 * synchronization against the reader (task_gtime())
695 * that can thus safely catch up with a tickless delta.
696 */
715 write_seqlock(&tsk->vtime_seqlock); 697 write_seqlock(&tsk->vtime_seqlock);
716 __vtime_account_system(tsk); 698 __vtime_account_system(tsk);
717 current->flags |= PF_VCPU; 699 current->flags |= PF_VCPU;
718 write_sequnlock(&tsk->vtime_seqlock); 700 write_sequnlock(&tsk->vtime_seqlock);
719} 701}
702EXPORT_SYMBOL_GPL(vtime_guest_enter);
720 703
721void vtime_guest_exit(struct task_struct *tsk) 704void vtime_guest_exit(struct task_struct *tsk)
722{ 705{
@@ -725,6 +708,7 @@ void vtime_guest_exit(struct task_struct *tsk)
725 current->flags &= ~PF_VCPU; 708 current->flags &= ~PF_VCPU;
726 write_sequnlock(&tsk->vtime_seqlock); 709 write_sequnlock(&tsk->vtime_seqlock);
727} 710}
711EXPORT_SYMBOL_GPL(vtime_guest_exit);
728 712
729void vtime_account_idle(struct task_struct *tsk) 713void vtime_account_idle(struct task_struct *tsk)
730{ 714{
@@ -733,11 +717,6 @@ void vtime_account_idle(struct task_struct *tsk)
733 account_idle_time(delta_cpu); 717 account_idle_time(delta_cpu);
734} 718}
735 719
736bool vtime_accounting_enabled(void)
737{
738 return context_tracking_active();
739}
740
741void arch_vtime_task_switch(struct task_struct *prev) 720void arch_vtime_task_switch(struct task_struct *prev)
742{ 721{
743 write_seqlock(&prev->vtime_seqlock); 722 write_seqlock(&prev->vtime_seqlock);
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 3381f098070f..2b62fe86f9ec 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -105,7 +105,6 @@ config NO_HZ_FULL
105 select RCU_USER_QS 105 select RCU_USER_QS
106 select RCU_NOCB_CPU 106 select RCU_NOCB_CPU
107 select VIRT_CPU_ACCOUNTING_GEN 107 select VIRT_CPU_ACCOUNTING_GEN
108 select CONTEXT_TRACKING_FORCE
109 select IRQ_WORK 108 select IRQ_WORK
110 help 109 help
111 Adaptively try to shutdown the tick whenever possible, even when 110 Adaptively try to shutdown the tick whenever possible, even when
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index e8a1516cc0a3..3612fc77f834 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -23,6 +23,7 @@
23#include <linux/irq_work.h> 23#include <linux/irq_work.h>
24#include <linux/posix-timers.h> 24#include <linux/posix-timers.h>
25#include <linux/perf_event.h> 25#include <linux/perf_event.h>
26#include <linux/context_tracking.h>
26 27
27#include <asm/irq_regs.h> 28#include <asm/irq_regs.h>
28 29
@@ -148,8 +149,8 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
148} 149}
149 150
150#ifdef CONFIG_NO_HZ_FULL 151#ifdef CONFIG_NO_HZ_FULL
151static cpumask_var_t nohz_full_mask; 152cpumask_var_t tick_nohz_full_mask;
152bool have_nohz_full_mask; 153bool tick_nohz_full_running;
153 154
154static bool can_stop_full_tick(void) 155static bool can_stop_full_tick(void)
155{ 156{
@@ -182,7 +183,7 @@ static bool can_stop_full_tick(void)
182 * Don't allow the user to think they can get 183 * Don't allow the user to think they can get
183 * full NO_HZ with this machine. 184 * full NO_HZ with this machine.
184 */ 185 */
185 WARN_ONCE(have_nohz_full_mask, 186 WARN_ONCE(tick_nohz_full_running,
186 "NO_HZ FULL will not work with unstable sched clock"); 187 "NO_HZ FULL will not work with unstable sched clock");
187 return false; 188 return false;
188 } 189 }
@@ -197,7 +198,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now);
197 * Re-evaluate the need for the tick on the current CPU 198 * Re-evaluate the need for the tick on the current CPU
198 * and restart it if necessary. 199 * and restart it if necessary.
199 */ 200 */
200void tick_nohz_full_check(void) 201void __tick_nohz_full_check(void)
201{ 202{
202 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); 203 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
203 204
@@ -211,7 +212,7 @@ void tick_nohz_full_check(void)
211 212
212static void nohz_full_kick_work_func(struct irq_work *work) 213static void nohz_full_kick_work_func(struct irq_work *work)
213{ 214{
214 tick_nohz_full_check(); 215 __tick_nohz_full_check();
215} 216}
216 217
217static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { 218static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
@@ -230,7 +231,7 @@ void tick_nohz_full_kick(void)
230 231
231static void nohz_full_kick_ipi(void *info) 232static void nohz_full_kick_ipi(void *info)
232{ 233{
233 tick_nohz_full_check(); 234 __tick_nohz_full_check();
234} 235}
235 236
236/* 237/*
@@ -239,12 +240,13 @@ static void nohz_full_kick_ipi(void *info)
239 */ 240 */
240void tick_nohz_full_kick_all(void) 241void tick_nohz_full_kick_all(void)
241{ 242{
242 if (!have_nohz_full_mask) 243 if (!tick_nohz_full_running)
243 return; 244 return;
244 245
245 preempt_disable(); 246 preempt_disable();
246 smp_call_function_many(nohz_full_mask, 247 smp_call_function_many(tick_nohz_full_mask,
247 nohz_full_kick_ipi, NULL, false); 248 nohz_full_kick_ipi, NULL, false);
249 tick_nohz_full_kick();
248 preempt_enable(); 250 preempt_enable();
249} 251}
250 252
@@ -253,7 +255,7 @@ void tick_nohz_full_kick_all(void)
253 * It might need the tick due to per task/process properties: 255 * It might need the tick due to per task/process properties:
254 * perf events, posix cpu timers, ... 256 * perf events, posix cpu timers, ...
255 */ 257 */
256void tick_nohz_task_switch(struct task_struct *tsk) 258void __tick_nohz_task_switch(struct task_struct *tsk)
257{ 259{
258 unsigned long flags; 260 unsigned long flags;
259 261
@@ -269,31 +271,23 @@ out:
269 local_irq_restore(flags); 271 local_irq_restore(flags);
270} 272}
271 273
272int tick_nohz_full_cpu(int cpu)
273{
274 if (!have_nohz_full_mask)
275 return 0;
276
277 return cpumask_test_cpu(cpu, nohz_full_mask);
278}
279
280/* Parse the boot-time nohz CPU list from the kernel parameters. */ 274/* Parse the boot-time nohz CPU list from the kernel parameters. */
281static int __init tick_nohz_full_setup(char *str) 275static int __init tick_nohz_full_setup(char *str)
282{ 276{
283 int cpu; 277 int cpu;
284 278
285 alloc_bootmem_cpumask_var(&nohz_full_mask); 279 alloc_bootmem_cpumask_var(&tick_nohz_full_mask);
286 if (cpulist_parse(str, nohz_full_mask) < 0) { 280 if (cpulist_parse(str, tick_nohz_full_mask) < 0) {
287 pr_warning("NOHZ: Incorrect nohz_full cpumask\n"); 281 pr_warning("NOHZ: Incorrect nohz_full cpumask\n");
288 return 1; 282 return 1;
289 } 283 }
290 284
291 cpu = smp_processor_id(); 285 cpu = smp_processor_id();
292 if (cpumask_test_cpu(cpu, nohz_full_mask)) { 286 if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
293 pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu); 287 pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu);
294 cpumask_clear_cpu(cpu, nohz_full_mask); 288 cpumask_clear_cpu(cpu, tick_nohz_full_mask);
295 } 289 }
296 have_nohz_full_mask = true; 290 tick_nohz_full_running = true;
297 291
298 return 1; 292 return 1;
299} 293}
@@ -311,7 +305,7 @@ static int tick_nohz_cpu_down_callback(struct notifier_block *nfb,
311 * If we handle the timekeeping duty for full dynticks CPUs, 305 * If we handle the timekeeping duty for full dynticks CPUs,
312 * we can't safely shutdown that CPU. 306 * we can't safely shutdown that CPU.
313 */ 307 */
314 if (have_nohz_full_mask && tick_do_timer_cpu == cpu) 308 if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
315 return NOTIFY_BAD; 309 return NOTIFY_BAD;
316 break; 310 break;
317 } 311 }
@@ -330,31 +324,34 @@ static int tick_nohz_init_all(void)
330 int err = -1; 324 int err = -1;
331 325
332#ifdef CONFIG_NO_HZ_FULL_ALL 326#ifdef CONFIG_NO_HZ_FULL_ALL
333 if (!alloc_cpumask_var(&nohz_full_mask, GFP_KERNEL)) { 327 if (!alloc_cpumask_var(&tick_nohz_full_mask, GFP_KERNEL)) {
334 pr_err("NO_HZ: Can't allocate full dynticks cpumask\n"); 328 pr_err("NO_HZ: Can't allocate full dynticks cpumask\n");
335 return err; 329 return err;
336 } 330 }
337 err = 0; 331 err = 0;
338 cpumask_setall(nohz_full_mask); 332 cpumask_setall(tick_nohz_full_mask);
339 cpumask_clear_cpu(smp_processor_id(), nohz_full_mask); 333 cpumask_clear_cpu(smp_processor_id(), tick_nohz_full_mask);
340 have_nohz_full_mask = true; 334 tick_nohz_full_running = true;
341#endif 335#endif
342 return err; 336 return err;
343} 337}
344 338
345void __init tick_nohz_init(void) 339void __init tick_nohz_init(void)
346{ 340{
347 if (!have_nohz_full_mask) { 341 int cpu;
342
343 if (!tick_nohz_full_running) {
348 if (tick_nohz_init_all() < 0) 344 if (tick_nohz_init_all() < 0)
349 return; 345 return;
350 } 346 }
351 347
348 for_each_cpu(cpu, tick_nohz_full_mask)
349 context_tracking_cpu_set(cpu);
350
352 cpu_notifier(tick_nohz_cpu_down_callback, 0); 351 cpu_notifier(tick_nohz_cpu_down_callback, 0);
353 cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask); 352 cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), tick_nohz_full_mask);
354 pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); 353 pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf);
355} 354}
356#else
357#define have_nohz_full_mask (0)
358#endif 355#endif
359 356
360/* 357/*
@@ -732,7 +729,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
732 return false; 729 return false;
733 } 730 }
734 731
735 if (have_nohz_full_mask) { 732 if (tick_nohz_full_enabled()) {
736 /* 733 /*
737 * Keep the tick alive to guarantee timekeeping progression 734 * Keep the tick alive to guarantee timekeeping progression
738 * if there are full dynticks CPUs around 735 * if there are full dynticks CPUs around