aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2013-08-14 11:58:56 -0400
committerIngo Molnar <mingo@kernel.org>2013-08-14 11:58:56 -0400
commit6f1d657668ac3041b65265d3653d7e9172a0d603 (patch)
tree6e837c683783708637cc4caf9de759166c7469b7 /kernel
parentd4e4ab86bcba5a72779c43dc1459f71fea3d89c8 (diff)
parentd13508f9440e46dccac6a2dd48d51a73b2207482 (diff)
Merge branch 'timers/nohz-v3' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks into timers/nohz
Pull nohz improvements from Frederic Weisbecker: " It mostly contains fixes and full dynticks off-case optimizations. I believe that distros want to enable this feature so it seems important to optimize the case where the "nohz_full=" parameter is empty. ie: I'm trying to remove any performance regression that comes with NO_HZ_FULL=y when the feature is not used. This patchset improves the current situation a lot (off-case appears to be around 11% faster with hackbench, although I guess it may vary depending on the configuration but it should be significantly faster in any case) now there is still some work to do: I can still observe a remaining loss of 1.6% throughput seen with hackbench compared to CONFIG_NO_HZ_FULL=n. " Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/context_tracking.c125
-rw-r--r--kernel/sched/core.c4
-rw-r--r--kernel/sched/cputime.c53
-rw-r--r--kernel/time/Kconfig1
-rw-r--r--kernel/time/sched_clock.c2
-rw-r--r--kernel/time/tick-sched.c59
6 files changed, 116 insertions, 128 deletions
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 383f8231e436..247091bf0587 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -20,22 +20,33 @@
20#include <linux/hardirq.h> 20#include <linux/hardirq.h>
21#include <linux/export.h> 21#include <linux/export.h>
22 22
23DEFINE_PER_CPU(struct context_tracking, context_tracking) = { 23#define CREATE_TRACE_POINTS
24#ifdef CONFIG_CONTEXT_TRACKING_FORCE 24#include <trace/events/context_tracking.h>
25 .active = true, 25
26#endif 26struct static_key context_tracking_enabled = STATIC_KEY_INIT_FALSE;
27}; 27EXPORT_SYMBOL_GPL(context_tracking_enabled);
28
29DEFINE_PER_CPU(struct context_tracking, context_tracking);
30EXPORT_SYMBOL_GPL(context_tracking);
31
32void context_tracking_cpu_set(int cpu)
33{
34 if (!per_cpu(context_tracking.active, cpu)) {
35 per_cpu(context_tracking.active, cpu) = true;
36 static_key_slow_inc(&context_tracking_enabled);
37 }
38}
28 39
29/** 40/**
30 * user_enter - Inform the context tracking that the CPU is going to 41 * context_tracking_user_enter - Inform the context tracking that the CPU is going to
31 * enter userspace mode. 42 * enter userspace mode.
32 * 43 *
33 * This function must be called right before we switch from the kernel 44 * This function must be called right before we switch from the kernel
34 * to userspace, when it's guaranteed the remaining kernel instructions 45 * to userspace, when it's guaranteed the remaining kernel instructions
35 * to execute won't use any RCU read side critical section because this 46 * to execute won't use any RCU read side critical section because this
36 * function sets RCU in extended quiescent state. 47 * function sets RCU in extended quiescent state.
37 */ 48 */
38void user_enter(void) 49void context_tracking_user_enter(void)
39{ 50{
40 unsigned long flags; 51 unsigned long flags;
41 52
@@ -54,17 +65,32 @@ void user_enter(void)
54 WARN_ON_ONCE(!current->mm); 65 WARN_ON_ONCE(!current->mm);
55 66
56 local_irq_save(flags); 67 local_irq_save(flags);
57 if (__this_cpu_read(context_tracking.active) && 68 if ( __this_cpu_read(context_tracking.state) != IN_USER) {
58 __this_cpu_read(context_tracking.state) != IN_USER) { 69 if (__this_cpu_read(context_tracking.active)) {
70 trace_user_enter(0);
71 /*
72 * At this stage, only low level arch entry code remains and
73 * then we'll run in userspace. We can assume there won't be
74 * any RCU read-side critical section until the next call to
75 * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency
76 * on the tick.
77 */
78 vtime_user_enter(current);
79 rcu_user_enter();
80 }
59 /* 81 /*
60 * At this stage, only low level arch entry code remains and 82 * Even if context tracking is disabled on this CPU, because it's outside
61 * then we'll run in userspace. We can assume there won't be 83 * the full dynticks mask for example, we still have to keep track of the
62 * any RCU read-side critical section until the next call to 84 * context transitions and states to prevent inconsistency on those of
63 * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency 85 * other CPUs.
64 * on the tick. 86 * If a task triggers an exception in userspace, sleep on the exception
87 * handler and then migrate to another CPU, that new CPU must know where
88 * the exception returns by the time we call exception_exit().
89 * This information can only be provided by the previous CPU when it called
90 * exception_enter().
91 * OTOH we can spare the calls to vtime and RCU when context_tracking.active
92 * is false because we know that CPU is not tickless.
65 */ 93 */
66 vtime_user_enter(current);
67 rcu_user_enter();
68 __this_cpu_write(context_tracking.state, IN_USER); 94 __this_cpu_write(context_tracking.state, IN_USER);
69 } 95 }
70 local_irq_restore(flags); 96 local_irq_restore(flags);
@@ -87,10 +113,9 @@ void user_enter(void)
87 */ 113 */
88void __sched notrace preempt_schedule_context(void) 114void __sched notrace preempt_schedule_context(void)
89{ 115{
90 struct thread_info *ti = current_thread_info();
91 enum ctx_state prev_ctx; 116 enum ctx_state prev_ctx;
92 117
93 if (likely(ti->preempt_count || irqs_disabled())) 118 if (likely(!preemptible()))
94 return; 119 return;
95 120
96 /* 121 /*
@@ -112,8 +137,8 @@ EXPORT_SYMBOL_GPL(preempt_schedule_context);
112#endif /* CONFIG_PREEMPT */ 137#endif /* CONFIG_PREEMPT */
113 138
114/** 139/**
115 * user_exit - Inform the context tracking that the CPU is 140 * context_tracking_user_exit - Inform the context tracking that the CPU is
116 * exiting userspace mode and entering the kernel. 141 * exiting userspace mode and entering the kernel.
117 * 142 *
118 * This function must be called after we entered the kernel from userspace 143 * This function must be called after we entered the kernel from userspace
119 * before any use of RCU read side critical section. This potentially include 144 * before any use of RCU read side critical section. This potentially include
@@ -122,7 +147,7 @@ EXPORT_SYMBOL_GPL(preempt_schedule_context);
122 * This call supports re-entrancy. This way it can be called from any exception 147 * This call supports re-entrancy. This way it can be called from any exception
123 * handler without needing to know if we came from userspace or not. 148 * handler without needing to know if we came from userspace or not.
124 */ 149 */
125void user_exit(void) 150void context_tracking_user_exit(void)
126{ 151{
127 unsigned long flags; 152 unsigned long flags;
128 153
@@ -131,38 +156,22 @@ void user_exit(void)
131 156
132 local_irq_save(flags); 157 local_irq_save(flags);
133 if (__this_cpu_read(context_tracking.state) == IN_USER) { 158 if (__this_cpu_read(context_tracking.state) == IN_USER) {
134 /* 159 if (__this_cpu_read(context_tracking.active)) {
135 * We are going to run code that may use RCU. Inform 160 /*
136 * RCU core about that (ie: we may need the tick again). 161 * We are going to run code that may use RCU. Inform
137 */ 162 * RCU core about that (ie: we may need the tick again).
138 rcu_user_exit(); 163 */
139 vtime_user_exit(current); 164 rcu_user_exit();
165 vtime_user_exit(current);
166 trace_user_exit(0);
167 }
140 __this_cpu_write(context_tracking.state, IN_KERNEL); 168 __this_cpu_write(context_tracking.state, IN_KERNEL);
141 } 169 }
142 local_irq_restore(flags); 170 local_irq_restore(flags);
143} 171}
144 172
145void guest_enter(void)
146{
147 if (vtime_accounting_enabled())
148 vtime_guest_enter(current);
149 else
150 __guest_enter();
151}
152EXPORT_SYMBOL_GPL(guest_enter);
153
154void guest_exit(void)
155{
156 if (vtime_accounting_enabled())
157 vtime_guest_exit(current);
158 else
159 __guest_exit();
160}
161EXPORT_SYMBOL_GPL(guest_exit);
162
163
164/** 173/**
165 * context_tracking_task_switch - context switch the syscall callbacks 174 * __context_tracking_task_switch - context switch the syscall callbacks
166 * @prev: the task that is being switched out 175 * @prev: the task that is being switched out
167 * @next: the task that is being switched in 176 * @next: the task that is being switched in
168 * 177 *
@@ -174,11 +183,19 @@ EXPORT_SYMBOL_GPL(guest_exit);
174 * migrate to some CPU that doesn't do the context tracking. As such the TIF 183 * migrate to some CPU that doesn't do the context tracking. As such the TIF
175 * flag may not be desired there. 184 * flag may not be desired there.
176 */ 185 */
177void context_tracking_task_switch(struct task_struct *prev, 186void __context_tracking_task_switch(struct task_struct *prev,
178 struct task_struct *next) 187 struct task_struct *next)
179{ 188{
180 if (__this_cpu_read(context_tracking.active)) { 189 clear_tsk_thread_flag(prev, TIF_NOHZ);
181 clear_tsk_thread_flag(prev, TIF_NOHZ); 190 set_tsk_thread_flag(next, TIF_NOHZ);
182 set_tsk_thread_flag(next, TIF_NOHZ);
183 }
184} 191}
192
193#ifdef CONFIG_CONTEXT_TRACKING_FORCE
194void __init context_tracking_init(void)
195{
196 int cpu;
197
198 for_each_possible_cpu(cpu)
199 context_tracking_cpu_set(cpu);
200}
201#endif
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b7c32cb7bfeb..3fb7acee7326 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2510,13 +2510,11 @@ void __sched schedule_preempt_disabled(void)
2510 */ 2510 */
2511asmlinkage void __sched notrace preempt_schedule(void) 2511asmlinkage void __sched notrace preempt_schedule(void)
2512{ 2512{
2513 struct thread_info *ti = current_thread_info();
2514
2515 /* 2513 /*
2516 * If there is a non-zero preempt_count or interrupts are disabled, 2514 * If there is a non-zero preempt_count or interrupts are disabled,
2517 * we do not want to preempt the current task. Just return.. 2515 * we do not want to preempt the current task. Just return..
2518 */ 2516 */
2519 if (likely(ti->preempt_count || irqs_disabled())) 2517 if (likely(!preemptible()))
2520 return; 2518 return;
2521 2519
2522 do { 2520 do {
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index a7959e05a9d5..c1d7493825ae 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -378,11 +378,8 @@ static inline void irqtime_account_process_tick(struct task_struct *p, int user_
378#ifdef CONFIG_VIRT_CPU_ACCOUNTING 378#ifdef CONFIG_VIRT_CPU_ACCOUNTING
379 379
380#ifndef __ARCH_HAS_VTIME_TASK_SWITCH 380#ifndef __ARCH_HAS_VTIME_TASK_SWITCH
381void vtime_task_switch(struct task_struct *prev) 381void vtime_common_task_switch(struct task_struct *prev)
382{ 382{
383 if (!vtime_accounting_enabled())
384 return;
385
386 if (is_idle_task(prev)) 383 if (is_idle_task(prev))
387 vtime_account_idle(prev); 384 vtime_account_idle(prev);
388 else 385 else
@@ -404,11 +401,8 @@ void vtime_task_switch(struct task_struct *prev)
404 * vtime_account(). 401 * vtime_account().
405 */ 402 */
406#ifndef __ARCH_HAS_VTIME_ACCOUNT 403#ifndef __ARCH_HAS_VTIME_ACCOUNT
407void vtime_account_irq_enter(struct task_struct *tsk) 404void vtime_common_account_irq_enter(struct task_struct *tsk)
408{ 405{
409 if (!vtime_accounting_enabled())
410 return;
411
412 if (!in_interrupt()) { 406 if (!in_interrupt()) {
413 /* 407 /*
414 * If we interrupted user, context_tracking_in_user() 408 * If we interrupted user, context_tracking_in_user()
@@ -428,7 +422,7 @@ void vtime_account_irq_enter(struct task_struct *tsk)
428 } 422 }
429 vtime_account_system(tsk); 423 vtime_account_system(tsk);
430} 424}
431EXPORT_SYMBOL_GPL(vtime_account_irq_enter); 425EXPORT_SYMBOL_GPL(vtime_common_account_irq_enter);
432#endif /* __ARCH_HAS_VTIME_ACCOUNT */ 426#endif /* __ARCH_HAS_VTIME_ACCOUNT */
433#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ 427#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
434 428
@@ -559,12 +553,6 @@ static void cputime_adjust(struct task_cputime *curr,
559{ 553{
560 cputime_t rtime, stime, utime, total; 554 cputime_t rtime, stime, utime, total;
561 555
562 if (vtime_accounting_enabled()) {
563 *ut = curr->utime;
564 *st = curr->stime;
565 return;
566 }
567
568 stime = curr->stime; 556 stime = curr->stime;
569 total = stime + curr->utime; 557 total = stime + curr->utime;
570 558
@@ -664,23 +652,17 @@ static void __vtime_account_system(struct task_struct *tsk)
664 652
665void vtime_account_system(struct task_struct *tsk) 653void vtime_account_system(struct task_struct *tsk)
666{ 654{
667 if (!vtime_accounting_enabled())
668 return;
669
670 write_seqlock(&tsk->vtime_seqlock); 655 write_seqlock(&tsk->vtime_seqlock);
671 __vtime_account_system(tsk); 656 __vtime_account_system(tsk);
672 write_sequnlock(&tsk->vtime_seqlock); 657 write_sequnlock(&tsk->vtime_seqlock);
673} 658}
674 659
675void vtime_account_irq_exit(struct task_struct *tsk) 660void vtime_gen_account_irq_exit(struct task_struct *tsk)
676{ 661{
677 if (!vtime_accounting_enabled())
678 return;
679
680 write_seqlock(&tsk->vtime_seqlock); 662 write_seqlock(&tsk->vtime_seqlock);
663 __vtime_account_system(tsk);
681 if (context_tracking_in_user()) 664 if (context_tracking_in_user())
682 tsk->vtime_snap_whence = VTIME_USER; 665 tsk->vtime_snap_whence = VTIME_USER;
683 __vtime_account_system(tsk);
684 write_sequnlock(&tsk->vtime_seqlock); 666 write_sequnlock(&tsk->vtime_seqlock);
685} 667}
686 668
@@ -688,12 +670,8 @@ void vtime_account_user(struct task_struct *tsk)
688{ 670{
689 cputime_t delta_cpu; 671 cputime_t delta_cpu;
690 672
691 if (!vtime_accounting_enabled())
692 return;
693
694 delta_cpu = get_vtime_delta(tsk);
695
696 write_seqlock(&tsk->vtime_seqlock); 673 write_seqlock(&tsk->vtime_seqlock);
674 delta_cpu = get_vtime_delta(tsk);
697 tsk->vtime_snap_whence = VTIME_SYS; 675 tsk->vtime_snap_whence = VTIME_SYS;
698 account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu)); 676 account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
699 write_sequnlock(&tsk->vtime_seqlock); 677 write_sequnlock(&tsk->vtime_seqlock);
@@ -701,22 +679,27 @@ void vtime_account_user(struct task_struct *tsk)
701 679
702void vtime_user_enter(struct task_struct *tsk) 680void vtime_user_enter(struct task_struct *tsk)
703{ 681{
704 if (!vtime_accounting_enabled())
705 return;
706
707 write_seqlock(&tsk->vtime_seqlock); 682 write_seqlock(&tsk->vtime_seqlock);
708 tsk->vtime_snap_whence = VTIME_USER;
709 __vtime_account_system(tsk); 683 __vtime_account_system(tsk);
684 tsk->vtime_snap_whence = VTIME_USER;
710 write_sequnlock(&tsk->vtime_seqlock); 685 write_sequnlock(&tsk->vtime_seqlock);
711} 686}
712 687
713void vtime_guest_enter(struct task_struct *tsk) 688void vtime_guest_enter(struct task_struct *tsk)
714{ 689{
690 /*
691 * The flags must be updated under the lock with
692 * the vtime_snap flush and update.
693 * That enforces a right ordering and update sequence
694 * synchronization against the reader (task_gtime())
695 * that can thus safely catch up with a tickless delta.
696 */
715 write_seqlock(&tsk->vtime_seqlock); 697 write_seqlock(&tsk->vtime_seqlock);
716 __vtime_account_system(tsk); 698 __vtime_account_system(tsk);
717 current->flags |= PF_VCPU; 699 current->flags |= PF_VCPU;
718 write_sequnlock(&tsk->vtime_seqlock); 700 write_sequnlock(&tsk->vtime_seqlock);
719} 701}
702EXPORT_SYMBOL_GPL(vtime_guest_enter);
720 703
721void vtime_guest_exit(struct task_struct *tsk) 704void vtime_guest_exit(struct task_struct *tsk)
722{ 705{
@@ -725,6 +708,7 @@ void vtime_guest_exit(struct task_struct *tsk)
725 current->flags &= ~PF_VCPU; 708 current->flags &= ~PF_VCPU;
726 write_sequnlock(&tsk->vtime_seqlock); 709 write_sequnlock(&tsk->vtime_seqlock);
727} 710}
711EXPORT_SYMBOL_GPL(vtime_guest_exit);
728 712
729void vtime_account_idle(struct task_struct *tsk) 713void vtime_account_idle(struct task_struct *tsk)
730{ 714{
@@ -733,11 +717,6 @@ void vtime_account_idle(struct task_struct *tsk)
733 account_idle_time(delta_cpu); 717 account_idle_time(delta_cpu);
734} 718}
735 719
736bool vtime_accounting_enabled(void)
737{
738 return context_tracking_active();
739}
740
741void arch_vtime_task_switch(struct task_struct *prev) 720void arch_vtime_task_switch(struct task_struct *prev)
742{ 721{
743 write_seqlock(&prev->vtime_seqlock); 722 write_seqlock(&prev->vtime_seqlock);
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 70f27e89012b..747bbc70f53b 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -105,7 +105,6 @@ config NO_HZ_FULL
105 select RCU_USER_QS 105 select RCU_USER_QS
106 select RCU_NOCB_CPU 106 select RCU_NOCB_CPU
107 select VIRT_CPU_ACCOUNTING_GEN 107 select VIRT_CPU_ACCOUNTING_GEN
108 select CONTEXT_TRACKING_FORCE
109 select IRQ_WORK 108 select IRQ_WORK
110 help 109 help
111 Adaptively try to shutdown the tick whenever possible, even when 110 Adaptively try to shutdown the tick whenever possible, even when
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index a326f27d7f09..0b479a6a22bb 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -121,7 +121,7 @@ void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
121 BUG_ON(bits > 32); 121 BUG_ON(bits > 32);
122 WARN_ON(!irqs_disabled()); 122 WARN_ON(!irqs_disabled());
123 read_sched_clock = read; 123 read_sched_clock = read;
124 sched_clock_mask = (1 << bits) - 1; 124 sched_clock_mask = (1ULL << bits) - 1;
125 cd.rate = rate; 125 cd.rate = rate;
126 126
127 /* calculate the mult/shift to convert counter ticks to ns. */ 127 /* calculate the mult/shift to convert counter ticks to ns. */
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index e77edc97e036..adea6fc3ba2a 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -23,6 +23,7 @@
23#include <linux/irq_work.h> 23#include <linux/irq_work.h>
24#include <linux/posix-timers.h> 24#include <linux/posix-timers.h>
25#include <linux/perf_event.h> 25#include <linux/perf_event.h>
26#include <linux/context_tracking.h>
26 27
27#include <asm/irq_regs.h> 28#include <asm/irq_regs.h>
28 29
@@ -148,8 +149,8 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
148} 149}
149 150
150#ifdef CONFIG_NO_HZ_FULL 151#ifdef CONFIG_NO_HZ_FULL
151static cpumask_var_t nohz_full_mask; 152cpumask_var_t tick_nohz_full_mask;
152bool have_nohz_full_mask; 153bool tick_nohz_full_running;
153 154
154static bool can_stop_full_tick(void) 155static bool can_stop_full_tick(void)
155{ 156{
@@ -182,7 +183,8 @@ static bool can_stop_full_tick(void)
182 * Don't allow the user to think they can get 183 * Don't allow the user to think they can get
183 * full NO_HZ with this machine. 184 * full NO_HZ with this machine.
184 */ 185 */
185 WARN_ONCE(1, "NO_HZ FULL will not work with unstable sched clock"); 186 WARN_ONCE(tick_nohz_full_running,
187 "NO_HZ FULL will not work with unstable sched clock");
186 return false; 188 return false;
187 } 189 }
188#endif 190#endif
@@ -196,7 +198,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now);
196 * Re-evaluate the need for the tick on the current CPU 198 * Re-evaluate the need for the tick on the current CPU
197 * and restart it if necessary. 199 * and restart it if necessary.
198 */ 200 */
199void tick_nohz_full_check(void) 201void __tick_nohz_full_check(void)
200{ 202{
201 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); 203 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
202 204
@@ -210,7 +212,7 @@ void tick_nohz_full_check(void)
210 212
211static void nohz_full_kick_work_func(struct irq_work *work) 213static void nohz_full_kick_work_func(struct irq_work *work)
212{ 214{
213 tick_nohz_full_check(); 215 __tick_nohz_full_check();
214} 216}
215 217
216static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { 218static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
@@ -229,7 +231,7 @@ void tick_nohz_full_kick(void)
229 231
230static void nohz_full_kick_ipi(void *info) 232static void nohz_full_kick_ipi(void *info)
231{ 233{
232 tick_nohz_full_check(); 234 __tick_nohz_full_check();
233} 235}
234 236
235/* 237/*
@@ -238,11 +240,11 @@ static void nohz_full_kick_ipi(void *info)
238 */ 240 */
239void tick_nohz_full_kick_all(void) 241void tick_nohz_full_kick_all(void)
240{ 242{
241 if (!have_nohz_full_mask) 243 if (!tick_nohz_full_running)
242 return; 244 return;
243 245
244 preempt_disable(); 246 preempt_disable();
245 smp_call_function_many(nohz_full_mask, 247 smp_call_function_many(tick_nohz_full_mask,
246 nohz_full_kick_ipi, NULL, false); 248 nohz_full_kick_ipi, NULL, false);
247 preempt_enable(); 249 preempt_enable();
248} 250}
@@ -252,7 +254,7 @@ void tick_nohz_full_kick_all(void)
252 * It might need the tick due to per task/process properties: 254 * It might need the tick due to per task/process properties:
253 * perf events, posix cpu timers, ... 255 * perf events, posix cpu timers, ...
254 */ 256 */
255void tick_nohz_task_switch(struct task_struct *tsk) 257void __tick_nohz_task_switch(struct task_struct *tsk)
256{ 258{
257 unsigned long flags; 259 unsigned long flags;
258 260
@@ -268,31 +270,23 @@ out:
268 local_irq_restore(flags); 270 local_irq_restore(flags);
269} 271}
270 272
271int tick_nohz_full_cpu(int cpu)
272{
273 if (!have_nohz_full_mask)
274 return 0;
275
276 return cpumask_test_cpu(cpu, nohz_full_mask);
277}
278
279/* Parse the boot-time nohz CPU list from the kernel parameters. */ 273/* Parse the boot-time nohz CPU list from the kernel parameters. */
280static int __init tick_nohz_full_setup(char *str) 274static int __init tick_nohz_full_setup(char *str)
281{ 275{
282 int cpu; 276 int cpu;
283 277
284 alloc_bootmem_cpumask_var(&nohz_full_mask); 278 alloc_bootmem_cpumask_var(&tick_nohz_full_mask);
285 if (cpulist_parse(str, nohz_full_mask) < 0) { 279 if (cpulist_parse(str, tick_nohz_full_mask) < 0) {
286 pr_warning("NOHZ: Incorrect nohz_full cpumask\n"); 280 pr_warning("NOHZ: Incorrect nohz_full cpumask\n");
287 return 1; 281 return 1;
288 } 282 }
289 283
290 cpu = smp_processor_id(); 284 cpu = smp_processor_id();
291 if (cpumask_test_cpu(cpu, nohz_full_mask)) { 285 if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
292 pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu); 286 pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu);
293 cpumask_clear_cpu(cpu, nohz_full_mask); 287 cpumask_clear_cpu(cpu, tick_nohz_full_mask);
294 } 288 }
295 have_nohz_full_mask = true; 289 tick_nohz_full_running = true;
296 290
297 return 1; 291 return 1;
298} 292}
@@ -310,7 +304,7 @@ static int tick_nohz_cpu_down_callback(struct notifier_block *nfb,
310 * If we handle the timekeeping duty for full dynticks CPUs, 304 * If we handle the timekeeping duty for full dynticks CPUs,
311 * we can't safely shutdown that CPU. 305 * we can't safely shutdown that CPU.
312 */ 306 */
313 if (have_nohz_full_mask && tick_do_timer_cpu == cpu) 307 if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
314 return NOTIFY_BAD; 308 return NOTIFY_BAD;
315 break; 309 break;
316 } 310 }
@@ -329,14 +323,14 @@ static int tick_nohz_init_all(void)
329 int err = -1; 323 int err = -1;
330 324
331#ifdef CONFIG_NO_HZ_FULL_ALL 325#ifdef CONFIG_NO_HZ_FULL_ALL
332 if (!alloc_cpumask_var(&nohz_full_mask, GFP_KERNEL)) { 326 if (!alloc_cpumask_var(&tick_nohz_full_mask, GFP_KERNEL)) {
333 pr_err("NO_HZ: Can't allocate full dynticks cpumask\n"); 327 pr_err("NO_HZ: Can't allocate full dynticks cpumask\n");
334 return err; 328 return err;
335 } 329 }
336 err = 0; 330 err = 0;
337 cpumask_setall(nohz_full_mask); 331 cpumask_setall(tick_nohz_full_mask);
338 cpumask_clear_cpu(smp_processor_id(), nohz_full_mask); 332 cpumask_clear_cpu(smp_processor_id(), tick_nohz_full_mask);
339 have_nohz_full_mask = true; 333 tick_nohz_full_running = true;
340#endif 334#endif
341 return err; 335 return err;
342} 336}
@@ -345,17 +339,18 @@ void __init tick_nohz_init(void)
345{ 339{
346 int cpu; 340 int cpu;
347 341
348 if (!have_nohz_full_mask) { 342 if (!tick_nohz_full_running) {
349 if (tick_nohz_init_all() < 0) 343 if (tick_nohz_init_all() < 0)
350 return; 344 return;
351 } 345 }
352 346
347 for_each_cpu(cpu, tick_nohz_full_mask)
348 context_tracking_cpu_set(cpu);
349
353 cpu_notifier(tick_nohz_cpu_down_callback, 0); 350 cpu_notifier(tick_nohz_cpu_down_callback, 0);
354 cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask); 351 cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), tick_nohz_full_mask);
355 pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); 352 pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf);
356} 353}
357#else
358#define have_nohz_full_mask (0)
359#endif 354#endif
360 355
361/* 356/*
@@ -733,7 +728,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
733 return false; 728 return false;
734 } 729 }
735 730
736 if (have_nohz_full_mask) { 731 if (tick_nohz_full_enabled()) {
737 /* 732 /*
738 * Keep the tick alive to guarantee timekeeping progression 733 * Keep the tick alive to guarantee timekeeping progression
739 * if there are full dynticks CPUs around 734 * if there are full dynticks CPUs around