diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-04 12:36:54 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-04 12:36:54 -0400 |
commit | 6832d9652f395f7d13003e3884942c40f52ac1fa (patch) | |
tree | 40555ad5eda9700cb973dac4db136ad97f5e8b19 /kernel | |
parent | 228abe73ad67665d71eacd6a8a347dd76b0115ae (diff) | |
parent | c2e7fcf53c3cb02b4ada1c66a9bc8a4d97d58aba (diff) |
Merge branch 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull timers/nohz changes from Ingo Molnar:
"It mostly contains fixes and full dynticks off-case optimizations, by
Frederic Weisbecker"
* 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (24 commits)
nohz: Include local CPU in full dynticks global kick
nohz: Optimize full dynticks's sched hooks with static keys
nohz: Optimize full dynticks state checks with static keys
nohz: Rename a few state variables
vtime: Always debug check snapshot source _before_ updating it
vtime: Always scale generic vtime accounting results
vtime: Optimize full dynticks accounting off case with static keys
vtime: Describe overriden functions in dedicated arch headers
m68k: hardirq_count() only need preempt_mask.h
hardirq: Split preempt count mask definitions
context_tracking: Split low level state headers
vtime: Fix racy cputime delta update
vtime: Remove a few unneeded generic vtime state checks
context_tracking: User/kernel broundary cross trace events
context_tracking: Optimize context switch off case with static keys
context_tracking: Optimize guest APIs off case with static key
context_tracking: Optimize main APIs off case with static key
context_tracking: Ground setup for static key use
context_tracking: Remove full dynticks' hacky dependency on wide context tracking
nohz: Only enable context tracking on full dynticks CPUs
...
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/context_tracking.c | 125 | ||||
-rw-r--r-- | kernel/sched/core.c | 4 | ||||
-rw-r--r-- | kernel/sched/cputime.c | 53 | ||||
-rw-r--r-- | kernel/time/Kconfig | 1 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 61 |
5 files changed, 117 insertions, 127 deletions
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index 383f8231e436..247091bf0587 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c | |||
@@ -20,22 +20,33 @@ | |||
20 | #include <linux/hardirq.h> | 20 | #include <linux/hardirq.h> |
21 | #include <linux/export.h> | 21 | #include <linux/export.h> |
22 | 22 | ||
23 | DEFINE_PER_CPU(struct context_tracking, context_tracking) = { | 23 | #define CREATE_TRACE_POINTS |
24 | #ifdef CONFIG_CONTEXT_TRACKING_FORCE | 24 | #include <trace/events/context_tracking.h> |
25 | .active = true, | 25 | |
26 | #endif | 26 | struct static_key context_tracking_enabled = STATIC_KEY_INIT_FALSE; |
27 | }; | 27 | EXPORT_SYMBOL_GPL(context_tracking_enabled); |
28 | |||
29 | DEFINE_PER_CPU(struct context_tracking, context_tracking); | ||
30 | EXPORT_SYMBOL_GPL(context_tracking); | ||
31 | |||
32 | void context_tracking_cpu_set(int cpu) | ||
33 | { | ||
34 | if (!per_cpu(context_tracking.active, cpu)) { | ||
35 | per_cpu(context_tracking.active, cpu) = true; | ||
36 | static_key_slow_inc(&context_tracking_enabled); | ||
37 | } | ||
38 | } | ||
28 | 39 | ||
29 | /** | 40 | /** |
30 | * user_enter - Inform the context tracking that the CPU is going to | 41 | * context_tracking_user_enter - Inform the context tracking that the CPU is going to |
31 | * enter userspace mode. | 42 | * enter userspace mode. |
32 | * | 43 | * |
33 | * This function must be called right before we switch from the kernel | 44 | * This function must be called right before we switch from the kernel |
34 | * to userspace, when it's guaranteed the remaining kernel instructions | 45 | * to userspace, when it's guaranteed the remaining kernel instructions |
35 | * to execute won't use any RCU read side critical section because this | 46 | * to execute won't use any RCU read side critical section because this |
36 | * function sets RCU in extended quiescent state. | 47 | * function sets RCU in extended quiescent state. |
37 | */ | 48 | */ |
38 | void user_enter(void) | 49 | void context_tracking_user_enter(void) |
39 | { | 50 | { |
40 | unsigned long flags; | 51 | unsigned long flags; |
41 | 52 | ||
@@ -54,17 +65,32 @@ void user_enter(void) | |||
54 | WARN_ON_ONCE(!current->mm); | 65 | WARN_ON_ONCE(!current->mm); |
55 | 66 | ||
56 | local_irq_save(flags); | 67 | local_irq_save(flags); |
57 | if (__this_cpu_read(context_tracking.active) && | 68 | if ( __this_cpu_read(context_tracking.state) != IN_USER) { |
58 | __this_cpu_read(context_tracking.state) != IN_USER) { | 69 | if (__this_cpu_read(context_tracking.active)) { |
70 | trace_user_enter(0); | ||
71 | /* | ||
72 | * At this stage, only low level arch entry code remains and | ||
73 | * then we'll run in userspace. We can assume there won't be | ||
74 | * any RCU read-side critical section until the next call to | ||
75 | * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency | ||
76 | * on the tick. | ||
77 | */ | ||
78 | vtime_user_enter(current); | ||
79 | rcu_user_enter(); | ||
80 | } | ||
59 | /* | 81 | /* |
60 | * At this stage, only low level arch entry code remains and | 82 | * Even if context tracking is disabled on this CPU, because it's outside |
61 | * then we'll run in userspace. We can assume there won't be | 83 | * the full dynticks mask for example, we still have to keep track of the |
62 | * any RCU read-side critical section until the next call to | 84 | * context transitions and states to prevent inconsistency on those of |
63 | * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency | 85 | * other CPUs. |
64 | * on the tick. | 86 | * If a task triggers an exception in userspace, sleep on the exception |
87 | * handler and then migrate to another CPU, that new CPU must know where | ||
88 | * the exception returns by the time we call exception_exit(). | ||
89 | * This information can only be provided by the previous CPU when it called | ||
90 | * exception_enter(). | ||
91 | * OTOH we can spare the calls to vtime and RCU when context_tracking.active | ||
92 | * is false because we know that CPU is not tickless. | ||
65 | */ | 93 | */ |
66 | vtime_user_enter(current); | ||
67 | rcu_user_enter(); | ||
68 | __this_cpu_write(context_tracking.state, IN_USER); | 94 | __this_cpu_write(context_tracking.state, IN_USER); |
69 | } | 95 | } |
70 | local_irq_restore(flags); | 96 | local_irq_restore(flags); |
@@ -87,10 +113,9 @@ void user_enter(void) | |||
87 | */ | 113 | */ |
88 | void __sched notrace preempt_schedule_context(void) | 114 | void __sched notrace preempt_schedule_context(void) |
89 | { | 115 | { |
90 | struct thread_info *ti = current_thread_info(); | ||
91 | enum ctx_state prev_ctx; | 116 | enum ctx_state prev_ctx; |
92 | 117 | ||
93 | if (likely(ti->preempt_count || irqs_disabled())) | 118 | if (likely(!preemptible())) |
94 | return; | 119 | return; |
95 | 120 | ||
96 | /* | 121 | /* |
@@ -112,8 +137,8 @@ EXPORT_SYMBOL_GPL(preempt_schedule_context); | |||
112 | #endif /* CONFIG_PREEMPT */ | 137 | #endif /* CONFIG_PREEMPT */ |
113 | 138 | ||
114 | /** | 139 | /** |
115 | * user_exit - Inform the context tracking that the CPU is | 140 | * context_tracking_user_exit - Inform the context tracking that the CPU is |
116 | * exiting userspace mode and entering the kernel. | 141 | * exiting userspace mode and entering the kernel. |
117 | * | 142 | * |
118 | * This function must be called after we entered the kernel from userspace | 143 | * This function must be called after we entered the kernel from userspace |
119 | * before any use of RCU read side critical section. This potentially include | 144 | * before any use of RCU read side critical section. This potentially include |
@@ -122,7 +147,7 @@ EXPORT_SYMBOL_GPL(preempt_schedule_context); | |||
122 | * This call supports re-entrancy. This way it can be called from any exception | 147 | * This call supports re-entrancy. This way it can be called from any exception |
123 | * handler without needing to know if we came from userspace or not. | 148 | * handler without needing to know if we came from userspace or not. |
124 | */ | 149 | */ |
125 | void user_exit(void) | 150 | void context_tracking_user_exit(void) |
126 | { | 151 | { |
127 | unsigned long flags; | 152 | unsigned long flags; |
128 | 153 | ||
@@ -131,38 +156,22 @@ void user_exit(void) | |||
131 | 156 | ||
132 | local_irq_save(flags); | 157 | local_irq_save(flags); |
133 | if (__this_cpu_read(context_tracking.state) == IN_USER) { | 158 | if (__this_cpu_read(context_tracking.state) == IN_USER) { |
134 | /* | 159 | if (__this_cpu_read(context_tracking.active)) { |
135 | * We are going to run code that may use RCU. Inform | 160 | /* |
136 | * RCU core about that (ie: we may need the tick again). | 161 | * We are going to run code that may use RCU. Inform |
137 | */ | 162 | * RCU core about that (ie: we may need the tick again). |
138 | rcu_user_exit(); | 163 | */ |
139 | vtime_user_exit(current); | 164 | rcu_user_exit(); |
165 | vtime_user_exit(current); | ||
166 | trace_user_exit(0); | ||
167 | } | ||
140 | __this_cpu_write(context_tracking.state, IN_KERNEL); | 168 | __this_cpu_write(context_tracking.state, IN_KERNEL); |
141 | } | 169 | } |
142 | local_irq_restore(flags); | 170 | local_irq_restore(flags); |
143 | } | 171 | } |
144 | 172 | ||
145 | void guest_enter(void) | ||
146 | { | ||
147 | if (vtime_accounting_enabled()) | ||
148 | vtime_guest_enter(current); | ||
149 | else | ||
150 | __guest_enter(); | ||
151 | } | ||
152 | EXPORT_SYMBOL_GPL(guest_enter); | ||
153 | |||
154 | void guest_exit(void) | ||
155 | { | ||
156 | if (vtime_accounting_enabled()) | ||
157 | vtime_guest_exit(current); | ||
158 | else | ||
159 | __guest_exit(); | ||
160 | } | ||
161 | EXPORT_SYMBOL_GPL(guest_exit); | ||
162 | |||
163 | |||
164 | /** | 173 | /** |
165 | * context_tracking_task_switch - context switch the syscall callbacks | 174 | * __context_tracking_task_switch - context switch the syscall callbacks |
166 | * @prev: the task that is being switched out | 175 | * @prev: the task that is being switched out |
167 | * @next: the task that is being switched in | 176 | * @next: the task that is being switched in |
168 | * | 177 | * |
@@ -174,11 +183,19 @@ EXPORT_SYMBOL_GPL(guest_exit); | |||
174 | * migrate to some CPU that doesn't do the context tracking. As such the TIF | 183 | * migrate to some CPU that doesn't do the context tracking. As such the TIF |
175 | * flag may not be desired there. | 184 | * flag may not be desired there. |
176 | */ | 185 | */ |
177 | void context_tracking_task_switch(struct task_struct *prev, | 186 | void __context_tracking_task_switch(struct task_struct *prev, |
178 | struct task_struct *next) | 187 | struct task_struct *next) |
179 | { | 188 | { |
180 | if (__this_cpu_read(context_tracking.active)) { | 189 | clear_tsk_thread_flag(prev, TIF_NOHZ); |
181 | clear_tsk_thread_flag(prev, TIF_NOHZ); | 190 | set_tsk_thread_flag(next, TIF_NOHZ); |
182 | set_tsk_thread_flag(next, TIF_NOHZ); | ||
183 | } | ||
184 | } | 191 | } |
192 | |||
193 | #ifdef CONFIG_CONTEXT_TRACKING_FORCE | ||
194 | void __init context_tracking_init(void) | ||
195 | { | ||
196 | int cpu; | ||
197 | |||
198 | for_each_possible_cpu(cpu) | ||
199 | context_tracking_cpu_set(cpu); | ||
200 | } | ||
201 | #endif | ||
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b8e2162fc803..725aa067ad63 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -2527,13 +2527,11 @@ void __sched schedule_preempt_disabled(void) | |||
2527 | */ | 2527 | */ |
2528 | asmlinkage void __sched notrace preempt_schedule(void) | 2528 | asmlinkage void __sched notrace preempt_schedule(void) |
2529 | { | 2529 | { |
2530 | struct thread_info *ti = current_thread_info(); | ||
2531 | |||
2532 | /* | 2530 | /* |
2533 | * If there is a non-zero preempt_count or interrupts are disabled, | 2531 | * If there is a non-zero preempt_count or interrupts are disabled, |
2534 | * we do not want to preempt the current task. Just return.. | 2532 | * we do not want to preempt the current task. Just return.. |
2535 | */ | 2533 | */ |
2536 | if (likely(ti->preempt_count || irqs_disabled())) | 2534 | if (likely(!preemptible())) |
2537 | return; | 2535 | return; |
2538 | 2536 | ||
2539 | do { | 2537 | do { |
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index e89ccefef278..ace34f95e200 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c | |||
@@ -378,11 +378,8 @@ static inline void irqtime_account_process_tick(struct task_struct *p, int user_ | |||
378 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING | 378 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING |
379 | 379 | ||
380 | #ifndef __ARCH_HAS_VTIME_TASK_SWITCH | 380 | #ifndef __ARCH_HAS_VTIME_TASK_SWITCH |
381 | void vtime_task_switch(struct task_struct *prev) | 381 | void vtime_common_task_switch(struct task_struct *prev) |
382 | { | 382 | { |
383 | if (!vtime_accounting_enabled()) | ||
384 | return; | ||
385 | |||
386 | if (is_idle_task(prev)) | 383 | if (is_idle_task(prev)) |
387 | vtime_account_idle(prev); | 384 | vtime_account_idle(prev); |
388 | else | 385 | else |
@@ -404,11 +401,8 @@ void vtime_task_switch(struct task_struct *prev) | |||
404 | * vtime_account(). | 401 | * vtime_account(). |
405 | */ | 402 | */ |
406 | #ifndef __ARCH_HAS_VTIME_ACCOUNT | 403 | #ifndef __ARCH_HAS_VTIME_ACCOUNT |
407 | void vtime_account_irq_enter(struct task_struct *tsk) | 404 | void vtime_common_account_irq_enter(struct task_struct *tsk) |
408 | { | 405 | { |
409 | if (!vtime_accounting_enabled()) | ||
410 | return; | ||
411 | |||
412 | if (!in_interrupt()) { | 406 | if (!in_interrupt()) { |
413 | /* | 407 | /* |
414 | * If we interrupted user, context_tracking_in_user() | 408 | * If we interrupted user, context_tracking_in_user() |
@@ -428,7 +422,7 @@ void vtime_account_irq_enter(struct task_struct *tsk) | |||
428 | } | 422 | } |
429 | vtime_account_system(tsk); | 423 | vtime_account_system(tsk); |
430 | } | 424 | } |
431 | EXPORT_SYMBOL_GPL(vtime_account_irq_enter); | 425 | EXPORT_SYMBOL_GPL(vtime_common_account_irq_enter); |
432 | #endif /* __ARCH_HAS_VTIME_ACCOUNT */ | 426 | #endif /* __ARCH_HAS_VTIME_ACCOUNT */ |
433 | #endif /* CONFIG_VIRT_CPU_ACCOUNTING */ | 427 | #endif /* CONFIG_VIRT_CPU_ACCOUNTING */ |
434 | 428 | ||
@@ -559,12 +553,6 @@ static void cputime_adjust(struct task_cputime *curr, | |||
559 | { | 553 | { |
560 | cputime_t rtime, stime, utime, total; | 554 | cputime_t rtime, stime, utime, total; |
561 | 555 | ||
562 | if (vtime_accounting_enabled()) { | ||
563 | *ut = curr->utime; | ||
564 | *st = curr->stime; | ||
565 | return; | ||
566 | } | ||
567 | |||
568 | stime = curr->stime; | 556 | stime = curr->stime; |
569 | total = stime + curr->utime; | 557 | total = stime + curr->utime; |
570 | 558 | ||
@@ -664,23 +652,17 @@ static void __vtime_account_system(struct task_struct *tsk) | |||
664 | 652 | ||
665 | void vtime_account_system(struct task_struct *tsk) | 653 | void vtime_account_system(struct task_struct *tsk) |
666 | { | 654 | { |
667 | if (!vtime_accounting_enabled()) | ||
668 | return; | ||
669 | |||
670 | write_seqlock(&tsk->vtime_seqlock); | 655 | write_seqlock(&tsk->vtime_seqlock); |
671 | __vtime_account_system(tsk); | 656 | __vtime_account_system(tsk); |
672 | write_sequnlock(&tsk->vtime_seqlock); | 657 | write_sequnlock(&tsk->vtime_seqlock); |
673 | } | 658 | } |
674 | 659 | ||
675 | void vtime_account_irq_exit(struct task_struct *tsk) | 660 | void vtime_gen_account_irq_exit(struct task_struct *tsk) |
676 | { | 661 | { |
677 | if (!vtime_accounting_enabled()) | ||
678 | return; | ||
679 | |||
680 | write_seqlock(&tsk->vtime_seqlock); | 662 | write_seqlock(&tsk->vtime_seqlock); |
663 | __vtime_account_system(tsk); | ||
681 | if (context_tracking_in_user()) | 664 | if (context_tracking_in_user()) |
682 | tsk->vtime_snap_whence = VTIME_USER; | 665 | tsk->vtime_snap_whence = VTIME_USER; |
683 | __vtime_account_system(tsk); | ||
684 | write_sequnlock(&tsk->vtime_seqlock); | 666 | write_sequnlock(&tsk->vtime_seqlock); |
685 | } | 667 | } |
686 | 668 | ||
@@ -688,12 +670,8 @@ void vtime_account_user(struct task_struct *tsk) | |||
688 | { | 670 | { |
689 | cputime_t delta_cpu; | 671 | cputime_t delta_cpu; |
690 | 672 | ||
691 | if (!vtime_accounting_enabled()) | ||
692 | return; | ||
693 | |||
694 | delta_cpu = get_vtime_delta(tsk); | ||
695 | |||
696 | write_seqlock(&tsk->vtime_seqlock); | 673 | write_seqlock(&tsk->vtime_seqlock); |
674 | delta_cpu = get_vtime_delta(tsk); | ||
697 | tsk->vtime_snap_whence = VTIME_SYS; | 675 | tsk->vtime_snap_whence = VTIME_SYS; |
698 | account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu)); | 676 | account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu)); |
699 | write_sequnlock(&tsk->vtime_seqlock); | 677 | write_sequnlock(&tsk->vtime_seqlock); |
@@ -701,22 +679,27 @@ void vtime_account_user(struct task_struct *tsk) | |||
701 | 679 | ||
702 | void vtime_user_enter(struct task_struct *tsk) | 680 | void vtime_user_enter(struct task_struct *tsk) |
703 | { | 681 | { |
704 | if (!vtime_accounting_enabled()) | ||
705 | return; | ||
706 | |||
707 | write_seqlock(&tsk->vtime_seqlock); | 682 | write_seqlock(&tsk->vtime_seqlock); |
708 | tsk->vtime_snap_whence = VTIME_USER; | ||
709 | __vtime_account_system(tsk); | 683 | __vtime_account_system(tsk); |
684 | tsk->vtime_snap_whence = VTIME_USER; | ||
710 | write_sequnlock(&tsk->vtime_seqlock); | 685 | write_sequnlock(&tsk->vtime_seqlock); |
711 | } | 686 | } |
712 | 687 | ||
713 | void vtime_guest_enter(struct task_struct *tsk) | 688 | void vtime_guest_enter(struct task_struct *tsk) |
714 | { | 689 | { |
690 | /* | ||
691 | * The flags must be updated under the lock with | ||
692 | * the vtime_snap flush and update. | ||
693 | * That enforces a right ordering and update sequence | ||
694 | * synchronization against the reader (task_gtime()) | ||
695 | * that can thus safely catch up with a tickless delta. | ||
696 | */ | ||
715 | write_seqlock(&tsk->vtime_seqlock); | 697 | write_seqlock(&tsk->vtime_seqlock); |
716 | __vtime_account_system(tsk); | 698 | __vtime_account_system(tsk); |
717 | current->flags |= PF_VCPU; | 699 | current->flags |= PF_VCPU; |
718 | write_sequnlock(&tsk->vtime_seqlock); | 700 | write_sequnlock(&tsk->vtime_seqlock); |
719 | } | 701 | } |
702 | EXPORT_SYMBOL_GPL(vtime_guest_enter); | ||
720 | 703 | ||
721 | void vtime_guest_exit(struct task_struct *tsk) | 704 | void vtime_guest_exit(struct task_struct *tsk) |
722 | { | 705 | { |
@@ -725,6 +708,7 @@ void vtime_guest_exit(struct task_struct *tsk) | |||
725 | current->flags &= ~PF_VCPU; | 708 | current->flags &= ~PF_VCPU; |
726 | write_sequnlock(&tsk->vtime_seqlock); | 709 | write_sequnlock(&tsk->vtime_seqlock); |
727 | } | 710 | } |
711 | EXPORT_SYMBOL_GPL(vtime_guest_exit); | ||
728 | 712 | ||
729 | void vtime_account_idle(struct task_struct *tsk) | 713 | void vtime_account_idle(struct task_struct *tsk) |
730 | { | 714 | { |
@@ -733,11 +717,6 @@ void vtime_account_idle(struct task_struct *tsk) | |||
733 | account_idle_time(delta_cpu); | 717 | account_idle_time(delta_cpu); |
734 | } | 718 | } |
735 | 719 | ||
736 | bool vtime_accounting_enabled(void) | ||
737 | { | ||
738 | return context_tracking_active(); | ||
739 | } | ||
740 | |||
741 | void arch_vtime_task_switch(struct task_struct *prev) | 720 | void arch_vtime_task_switch(struct task_struct *prev) |
742 | { | 721 | { |
743 | write_seqlock(&prev->vtime_seqlock); | 722 | write_seqlock(&prev->vtime_seqlock); |
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 3381f098070f..2b62fe86f9ec 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig | |||
@@ -105,7 +105,6 @@ config NO_HZ_FULL | |||
105 | select RCU_USER_QS | 105 | select RCU_USER_QS |
106 | select RCU_NOCB_CPU | 106 | select RCU_NOCB_CPU |
107 | select VIRT_CPU_ACCOUNTING_GEN | 107 | select VIRT_CPU_ACCOUNTING_GEN |
108 | select CONTEXT_TRACKING_FORCE | ||
109 | select IRQ_WORK | 108 | select IRQ_WORK |
110 | help | 109 | help |
111 | Adaptively try to shutdown the tick whenever possible, even when | 110 | Adaptively try to shutdown the tick whenever possible, even when |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index e8a1516cc0a3..3612fc77f834 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/irq_work.h> | 23 | #include <linux/irq_work.h> |
24 | #include <linux/posix-timers.h> | 24 | #include <linux/posix-timers.h> |
25 | #include <linux/perf_event.h> | 25 | #include <linux/perf_event.h> |
26 | #include <linux/context_tracking.h> | ||
26 | 27 | ||
27 | #include <asm/irq_regs.h> | 28 | #include <asm/irq_regs.h> |
28 | 29 | ||
@@ -148,8 +149,8 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) | |||
148 | } | 149 | } |
149 | 150 | ||
150 | #ifdef CONFIG_NO_HZ_FULL | 151 | #ifdef CONFIG_NO_HZ_FULL |
151 | static cpumask_var_t nohz_full_mask; | 152 | cpumask_var_t tick_nohz_full_mask; |
152 | bool have_nohz_full_mask; | 153 | bool tick_nohz_full_running; |
153 | 154 | ||
154 | static bool can_stop_full_tick(void) | 155 | static bool can_stop_full_tick(void) |
155 | { | 156 | { |
@@ -182,7 +183,7 @@ static bool can_stop_full_tick(void) | |||
182 | * Don't allow the user to think they can get | 183 | * Don't allow the user to think they can get |
183 | * full NO_HZ with this machine. | 184 | * full NO_HZ with this machine. |
184 | */ | 185 | */ |
185 | WARN_ONCE(have_nohz_full_mask, | 186 | WARN_ONCE(tick_nohz_full_running, |
186 | "NO_HZ FULL will not work with unstable sched clock"); | 187 | "NO_HZ FULL will not work with unstable sched clock"); |
187 | return false; | 188 | return false; |
188 | } | 189 | } |
@@ -197,7 +198,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now); | |||
197 | * Re-evaluate the need for the tick on the current CPU | 198 | * Re-evaluate the need for the tick on the current CPU |
198 | * and restart it if necessary. | 199 | * and restart it if necessary. |
199 | */ | 200 | */ |
200 | void tick_nohz_full_check(void) | 201 | void __tick_nohz_full_check(void) |
201 | { | 202 | { |
202 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | 203 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); |
203 | 204 | ||
@@ -211,7 +212,7 @@ void tick_nohz_full_check(void) | |||
211 | 212 | ||
212 | static void nohz_full_kick_work_func(struct irq_work *work) | 213 | static void nohz_full_kick_work_func(struct irq_work *work) |
213 | { | 214 | { |
214 | tick_nohz_full_check(); | 215 | __tick_nohz_full_check(); |
215 | } | 216 | } |
216 | 217 | ||
217 | static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { | 218 | static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { |
@@ -230,7 +231,7 @@ void tick_nohz_full_kick(void) | |||
230 | 231 | ||
231 | static void nohz_full_kick_ipi(void *info) | 232 | static void nohz_full_kick_ipi(void *info) |
232 | { | 233 | { |
233 | tick_nohz_full_check(); | 234 | __tick_nohz_full_check(); |
234 | } | 235 | } |
235 | 236 | ||
236 | /* | 237 | /* |
@@ -239,12 +240,13 @@ static void nohz_full_kick_ipi(void *info) | |||
239 | */ | 240 | */ |
240 | void tick_nohz_full_kick_all(void) | 241 | void tick_nohz_full_kick_all(void) |
241 | { | 242 | { |
242 | if (!have_nohz_full_mask) | 243 | if (!tick_nohz_full_running) |
243 | return; | 244 | return; |
244 | 245 | ||
245 | preempt_disable(); | 246 | preempt_disable(); |
246 | smp_call_function_many(nohz_full_mask, | 247 | smp_call_function_many(tick_nohz_full_mask, |
247 | nohz_full_kick_ipi, NULL, false); | 248 | nohz_full_kick_ipi, NULL, false); |
249 | tick_nohz_full_kick(); | ||
248 | preempt_enable(); | 250 | preempt_enable(); |
249 | } | 251 | } |
250 | 252 | ||
@@ -253,7 +255,7 @@ void tick_nohz_full_kick_all(void) | |||
253 | * It might need the tick due to per task/process properties: | 255 | * It might need the tick due to per task/process properties: |
254 | * perf events, posix cpu timers, ... | 256 | * perf events, posix cpu timers, ... |
255 | */ | 257 | */ |
256 | void tick_nohz_task_switch(struct task_struct *tsk) | 258 | void __tick_nohz_task_switch(struct task_struct *tsk) |
257 | { | 259 | { |
258 | unsigned long flags; | 260 | unsigned long flags; |
259 | 261 | ||
@@ -269,31 +271,23 @@ out: | |||
269 | local_irq_restore(flags); | 271 | local_irq_restore(flags); |
270 | } | 272 | } |
271 | 273 | ||
272 | int tick_nohz_full_cpu(int cpu) | ||
273 | { | ||
274 | if (!have_nohz_full_mask) | ||
275 | return 0; | ||
276 | |||
277 | return cpumask_test_cpu(cpu, nohz_full_mask); | ||
278 | } | ||
279 | |||
280 | /* Parse the boot-time nohz CPU list from the kernel parameters. */ | 274 | /* Parse the boot-time nohz CPU list from the kernel parameters. */ |
281 | static int __init tick_nohz_full_setup(char *str) | 275 | static int __init tick_nohz_full_setup(char *str) |
282 | { | 276 | { |
283 | int cpu; | 277 | int cpu; |
284 | 278 | ||
285 | alloc_bootmem_cpumask_var(&nohz_full_mask); | 279 | alloc_bootmem_cpumask_var(&tick_nohz_full_mask); |
286 | if (cpulist_parse(str, nohz_full_mask) < 0) { | 280 | if (cpulist_parse(str, tick_nohz_full_mask) < 0) { |
287 | pr_warning("NOHZ: Incorrect nohz_full cpumask\n"); | 281 | pr_warning("NOHZ: Incorrect nohz_full cpumask\n"); |
288 | return 1; | 282 | return 1; |
289 | } | 283 | } |
290 | 284 | ||
291 | cpu = smp_processor_id(); | 285 | cpu = smp_processor_id(); |
292 | if (cpumask_test_cpu(cpu, nohz_full_mask)) { | 286 | if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) { |
293 | pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu); | 287 | pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu); |
294 | cpumask_clear_cpu(cpu, nohz_full_mask); | 288 | cpumask_clear_cpu(cpu, tick_nohz_full_mask); |
295 | } | 289 | } |
296 | have_nohz_full_mask = true; | 290 | tick_nohz_full_running = true; |
297 | 291 | ||
298 | return 1; | 292 | return 1; |
299 | } | 293 | } |
@@ -311,7 +305,7 @@ static int tick_nohz_cpu_down_callback(struct notifier_block *nfb, | |||
311 | * If we handle the timekeeping duty for full dynticks CPUs, | 305 | * If we handle the timekeeping duty for full dynticks CPUs, |
312 | * we can't safely shutdown that CPU. | 306 | * we can't safely shutdown that CPU. |
313 | */ | 307 | */ |
314 | if (have_nohz_full_mask && tick_do_timer_cpu == cpu) | 308 | if (tick_nohz_full_running && tick_do_timer_cpu == cpu) |
315 | return NOTIFY_BAD; | 309 | return NOTIFY_BAD; |
316 | break; | 310 | break; |
317 | } | 311 | } |
@@ -330,31 +324,34 @@ static int tick_nohz_init_all(void) | |||
330 | int err = -1; | 324 | int err = -1; |
331 | 325 | ||
332 | #ifdef CONFIG_NO_HZ_FULL_ALL | 326 | #ifdef CONFIG_NO_HZ_FULL_ALL |
333 | if (!alloc_cpumask_var(&nohz_full_mask, GFP_KERNEL)) { | 327 | if (!alloc_cpumask_var(&tick_nohz_full_mask, GFP_KERNEL)) { |
334 | pr_err("NO_HZ: Can't allocate full dynticks cpumask\n"); | 328 | pr_err("NO_HZ: Can't allocate full dynticks cpumask\n"); |
335 | return err; | 329 | return err; |
336 | } | 330 | } |
337 | err = 0; | 331 | err = 0; |
338 | cpumask_setall(nohz_full_mask); | 332 | cpumask_setall(tick_nohz_full_mask); |
339 | cpumask_clear_cpu(smp_processor_id(), nohz_full_mask); | 333 | cpumask_clear_cpu(smp_processor_id(), tick_nohz_full_mask); |
340 | have_nohz_full_mask = true; | 334 | tick_nohz_full_running = true; |
341 | #endif | 335 | #endif |
342 | return err; | 336 | return err; |
343 | } | 337 | } |
344 | 338 | ||
345 | void __init tick_nohz_init(void) | 339 | void __init tick_nohz_init(void) |
346 | { | 340 | { |
347 | if (!have_nohz_full_mask) { | 341 | int cpu; |
342 | |||
343 | if (!tick_nohz_full_running) { | ||
348 | if (tick_nohz_init_all() < 0) | 344 | if (tick_nohz_init_all() < 0) |
349 | return; | 345 | return; |
350 | } | 346 | } |
351 | 347 | ||
348 | for_each_cpu(cpu, tick_nohz_full_mask) | ||
349 | context_tracking_cpu_set(cpu); | ||
350 | |||
352 | cpu_notifier(tick_nohz_cpu_down_callback, 0); | 351 | cpu_notifier(tick_nohz_cpu_down_callback, 0); |
353 | cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask); | 352 | cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), tick_nohz_full_mask); |
354 | pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); | 353 | pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); |
355 | } | 354 | } |
356 | #else | ||
357 | #define have_nohz_full_mask (0) | ||
358 | #endif | 355 | #endif |
359 | 356 | ||
360 | /* | 357 | /* |
@@ -732,7 +729,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) | |||
732 | return false; | 729 | return false; |
733 | } | 730 | } |
734 | 731 | ||
735 | if (have_nohz_full_mask) { | 732 | if (tick_nohz_full_enabled()) { |
736 | /* | 733 | /* |
737 | * Keep the tick alive to guarantee timekeeping progression | 734 | * Keep the tick alive to guarantee timekeeping progression |
738 | * if there are full dynticks CPUs around | 735 | * if there are full dynticks CPUs around |