diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-04 12:36:54 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-04 12:36:54 -0400 |
| commit | 6832d9652f395f7d13003e3884942c40f52ac1fa (patch) | |
| tree | 40555ad5eda9700cb973dac4db136ad97f5e8b19 /kernel | |
| parent | 228abe73ad67665d71eacd6a8a347dd76b0115ae (diff) | |
| parent | c2e7fcf53c3cb02b4ada1c66a9bc8a4d97d58aba (diff) | |
Merge branch 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull timers/nohz changes from Ingo Molnar:
"It mostly contains fixes and full dynticks off-case optimizations, by
Frederic Weisbecker"
* 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (24 commits)
nohz: Include local CPU in full dynticks global kick
nohz: Optimize full dynticks's sched hooks with static keys
nohz: Optimize full dynticks state checks with static keys
nohz: Rename a few state variables
vtime: Always debug check snapshot source _before_ updating it
vtime: Always scale generic vtime accounting results
vtime: Optimize full dynticks accounting off case with static keys
vtime: Describe overriden functions in dedicated arch headers
m68k: hardirq_count() only need preempt_mask.h
hardirq: Split preempt count mask definitions
context_tracking: Split low level state headers
vtime: Fix racy cputime delta update
vtime: Remove a few unneeded generic vtime state checks
context_tracking: User/kernel broundary cross trace events
context_tracking: Optimize context switch off case with static keys
context_tracking: Optimize guest APIs off case with static key
context_tracking: Optimize main APIs off case with static key
context_tracking: Ground setup for static key use
context_tracking: Remove full dynticks' hacky dependency on wide context tracking
nohz: Only enable context tracking on full dynticks CPUs
...
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/context_tracking.c | 125 | ||||
| -rw-r--r-- | kernel/sched/core.c | 4 | ||||
| -rw-r--r-- | kernel/sched/cputime.c | 53 | ||||
| -rw-r--r-- | kernel/time/Kconfig | 1 | ||||
| -rw-r--r-- | kernel/time/tick-sched.c | 61 |
5 files changed, 117 insertions, 127 deletions
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index 383f8231e436..247091bf0587 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c | |||
| @@ -20,22 +20,33 @@ | |||
| 20 | #include <linux/hardirq.h> | 20 | #include <linux/hardirq.h> |
| 21 | #include <linux/export.h> | 21 | #include <linux/export.h> |
| 22 | 22 | ||
| 23 | DEFINE_PER_CPU(struct context_tracking, context_tracking) = { | 23 | #define CREATE_TRACE_POINTS |
| 24 | #ifdef CONFIG_CONTEXT_TRACKING_FORCE | 24 | #include <trace/events/context_tracking.h> |
| 25 | .active = true, | 25 | |
| 26 | #endif | 26 | struct static_key context_tracking_enabled = STATIC_KEY_INIT_FALSE; |
| 27 | }; | 27 | EXPORT_SYMBOL_GPL(context_tracking_enabled); |
| 28 | |||
| 29 | DEFINE_PER_CPU(struct context_tracking, context_tracking); | ||
| 30 | EXPORT_SYMBOL_GPL(context_tracking); | ||
| 31 | |||
| 32 | void context_tracking_cpu_set(int cpu) | ||
| 33 | { | ||
| 34 | if (!per_cpu(context_tracking.active, cpu)) { | ||
| 35 | per_cpu(context_tracking.active, cpu) = true; | ||
| 36 | static_key_slow_inc(&context_tracking_enabled); | ||
| 37 | } | ||
| 38 | } | ||
| 28 | 39 | ||
| 29 | /** | 40 | /** |
| 30 | * user_enter - Inform the context tracking that the CPU is going to | 41 | * context_tracking_user_enter - Inform the context tracking that the CPU is going to |
| 31 | * enter userspace mode. | 42 | * enter userspace mode. |
| 32 | * | 43 | * |
| 33 | * This function must be called right before we switch from the kernel | 44 | * This function must be called right before we switch from the kernel |
| 34 | * to userspace, when it's guaranteed the remaining kernel instructions | 45 | * to userspace, when it's guaranteed the remaining kernel instructions |
| 35 | * to execute won't use any RCU read side critical section because this | 46 | * to execute won't use any RCU read side critical section because this |
| 36 | * function sets RCU in extended quiescent state. | 47 | * function sets RCU in extended quiescent state. |
| 37 | */ | 48 | */ |
| 38 | void user_enter(void) | 49 | void context_tracking_user_enter(void) |
| 39 | { | 50 | { |
| 40 | unsigned long flags; | 51 | unsigned long flags; |
| 41 | 52 | ||
| @@ -54,17 +65,32 @@ void user_enter(void) | |||
| 54 | WARN_ON_ONCE(!current->mm); | 65 | WARN_ON_ONCE(!current->mm); |
| 55 | 66 | ||
| 56 | local_irq_save(flags); | 67 | local_irq_save(flags); |
| 57 | if (__this_cpu_read(context_tracking.active) && | 68 | if ( __this_cpu_read(context_tracking.state) != IN_USER) { |
| 58 | __this_cpu_read(context_tracking.state) != IN_USER) { | 69 | if (__this_cpu_read(context_tracking.active)) { |
| 70 | trace_user_enter(0); | ||
| 71 | /* | ||
| 72 | * At this stage, only low level arch entry code remains and | ||
| 73 | * then we'll run in userspace. We can assume there won't be | ||
| 74 | * any RCU read-side critical section until the next call to | ||
| 75 | * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency | ||
| 76 | * on the tick. | ||
| 77 | */ | ||
| 78 | vtime_user_enter(current); | ||
| 79 | rcu_user_enter(); | ||
| 80 | } | ||
| 59 | /* | 81 | /* |
| 60 | * At this stage, only low level arch entry code remains and | 82 | * Even if context tracking is disabled on this CPU, because it's outside |
| 61 | * then we'll run in userspace. We can assume there won't be | 83 | * the full dynticks mask for example, we still have to keep track of the |
| 62 | * any RCU read-side critical section until the next call to | 84 | * context transitions and states to prevent inconsistency on those of |
| 63 | * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency | 85 | * other CPUs. |
| 64 | * on the tick. | 86 | * If a task triggers an exception in userspace, sleep on the exception |
| 87 | * handler and then migrate to another CPU, that new CPU must know where | ||
| 88 | * the exception returns by the time we call exception_exit(). | ||
| 89 | * This information can only be provided by the previous CPU when it called | ||
| 90 | * exception_enter(). | ||
| 91 | * OTOH we can spare the calls to vtime and RCU when context_tracking.active | ||
| 92 | * is false because we know that CPU is not tickless. | ||
| 65 | */ | 93 | */ |
| 66 | vtime_user_enter(current); | ||
| 67 | rcu_user_enter(); | ||
| 68 | __this_cpu_write(context_tracking.state, IN_USER); | 94 | __this_cpu_write(context_tracking.state, IN_USER); |
| 69 | } | 95 | } |
| 70 | local_irq_restore(flags); | 96 | local_irq_restore(flags); |
| @@ -87,10 +113,9 @@ void user_enter(void) | |||
| 87 | */ | 113 | */ |
| 88 | void __sched notrace preempt_schedule_context(void) | 114 | void __sched notrace preempt_schedule_context(void) |
| 89 | { | 115 | { |
| 90 | struct thread_info *ti = current_thread_info(); | ||
| 91 | enum ctx_state prev_ctx; | 116 | enum ctx_state prev_ctx; |
| 92 | 117 | ||
| 93 | if (likely(ti->preempt_count || irqs_disabled())) | 118 | if (likely(!preemptible())) |
| 94 | return; | 119 | return; |
| 95 | 120 | ||
| 96 | /* | 121 | /* |
| @@ -112,8 +137,8 @@ EXPORT_SYMBOL_GPL(preempt_schedule_context); | |||
| 112 | #endif /* CONFIG_PREEMPT */ | 137 | #endif /* CONFIG_PREEMPT */ |
| 113 | 138 | ||
| 114 | /** | 139 | /** |
| 115 | * user_exit - Inform the context tracking that the CPU is | 140 | * context_tracking_user_exit - Inform the context tracking that the CPU is |
| 116 | * exiting userspace mode and entering the kernel. | 141 | * exiting userspace mode and entering the kernel. |
| 117 | * | 142 | * |
| 118 | * This function must be called after we entered the kernel from userspace | 143 | * This function must be called after we entered the kernel from userspace |
| 119 | * before any use of RCU read side critical section. This potentially include | 144 | * before any use of RCU read side critical section. This potentially include |
| @@ -122,7 +147,7 @@ EXPORT_SYMBOL_GPL(preempt_schedule_context); | |||
| 122 | * This call supports re-entrancy. This way it can be called from any exception | 147 | * This call supports re-entrancy. This way it can be called from any exception |
| 123 | * handler without needing to know if we came from userspace or not. | 148 | * handler without needing to know if we came from userspace or not. |
| 124 | */ | 149 | */ |
| 125 | void user_exit(void) | 150 | void context_tracking_user_exit(void) |
| 126 | { | 151 | { |
| 127 | unsigned long flags; | 152 | unsigned long flags; |
| 128 | 153 | ||
| @@ -131,38 +156,22 @@ void user_exit(void) | |||
| 131 | 156 | ||
| 132 | local_irq_save(flags); | 157 | local_irq_save(flags); |
| 133 | if (__this_cpu_read(context_tracking.state) == IN_USER) { | 158 | if (__this_cpu_read(context_tracking.state) == IN_USER) { |
| 134 | /* | 159 | if (__this_cpu_read(context_tracking.active)) { |
| 135 | * We are going to run code that may use RCU. Inform | 160 | /* |
| 136 | * RCU core about that (ie: we may need the tick again). | 161 | * We are going to run code that may use RCU. Inform |
| 137 | */ | 162 | * RCU core about that (ie: we may need the tick again). |
| 138 | rcu_user_exit(); | 163 | */ |
| 139 | vtime_user_exit(current); | 164 | rcu_user_exit(); |
| 165 | vtime_user_exit(current); | ||
| 166 | trace_user_exit(0); | ||
| 167 | } | ||
| 140 | __this_cpu_write(context_tracking.state, IN_KERNEL); | 168 | __this_cpu_write(context_tracking.state, IN_KERNEL); |
| 141 | } | 169 | } |
| 142 | local_irq_restore(flags); | 170 | local_irq_restore(flags); |
| 143 | } | 171 | } |
| 144 | 172 | ||
| 145 | void guest_enter(void) | ||
| 146 | { | ||
| 147 | if (vtime_accounting_enabled()) | ||
| 148 | vtime_guest_enter(current); | ||
| 149 | else | ||
| 150 | __guest_enter(); | ||
| 151 | } | ||
| 152 | EXPORT_SYMBOL_GPL(guest_enter); | ||
| 153 | |||
| 154 | void guest_exit(void) | ||
| 155 | { | ||
| 156 | if (vtime_accounting_enabled()) | ||
| 157 | vtime_guest_exit(current); | ||
| 158 | else | ||
| 159 | __guest_exit(); | ||
| 160 | } | ||
| 161 | EXPORT_SYMBOL_GPL(guest_exit); | ||
| 162 | |||
| 163 | |||
| 164 | /** | 173 | /** |
| 165 | * context_tracking_task_switch - context switch the syscall callbacks | 174 | * __context_tracking_task_switch - context switch the syscall callbacks |
| 166 | * @prev: the task that is being switched out | 175 | * @prev: the task that is being switched out |
| 167 | * @next: the task that is being switched in | 176 | * @next: the task that is being switched in |
| 168 | * | 177 | * |
| @@ -174,11 +183,19 @@ EXPORT_SYMBOL_GPL(guest_exit); | |||
| 174 | * migrate to some CPU that doesn't do the context tracking. As such the TIF | 183 | * migrate to some CPU that doesn't do the context tracking. As such the TIF |
| 175 | * flag may not be desired there. | 184 | * flag may not be desired there. |
| 176 | */ | 185 | */ |
| 177 | void context_tracking_task_switch(struct task_struct *prev, | 186 | void __context_tracking_task_switch(struct task_struct *prev, |
| 178 | struct task_struct *next) | 187 | struct task_struct *next) |
| 179 | { | 188 | { |
| 180 | if (__this_cpu_read(context_tracking.active)) { | 189 | clear_tsk_thread_flag(prev, TIF_NOHZ); |
| 181 | clear_tsk_thread_flag(prev, TIF_NOHZ); | 190 | set_tsk_thread_flag(next, TIF_NOHZ); |
| 182 | set_tsk_thread_flag(next, TIF_NOHZ); | ||
| 183 | } | ||
| 184 | } | 191 | } |
| 192 | |||
| 193 | #ifdef CONFIG_CONTEXT_TRACKING_FORCE | ||
| 194 | void __init context_tracking_init(void) | ||
| 195 | { | ||
| 196 | int cpu; | ||
| 197 | |||
| 198 | for_each_possible_cpu(cpu) | ||
| 199 | context_tracking_cpu_set(cpu); | ||
| 200 | } | ||
| 201 | #endif | ||
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b8e2162fc803..725aa067ad63 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -2527,13 +2527,11 @@ void __sched schedule_preempt_disabled(void) | |||
| 2527 | */ | 2527 | */ |
| 2528 | asmlinkage void __sched notrace preempt_schedule(void) | 2528 | asmlinkage void __sched notrace preempt_schedule(void) |
| 2529 | { | 2529 | { |
| 2530 | struct thread_info *ti = current_thread_info(); | ||
| 2531 | |||
| 2532 | /* | 2530 | /* |
| 2533 | * If there is a non-zero preempt_count or interrupts are disabled, | 2531 | * If there is a non-zero preempt_count or interrupts are disabled, |
| 2534 | * we do not want to preempt the current task. Just return.. | 2532 | * we do not want to preempt the current task. Just return.. |
| 2535 | */ | 2533 | */ |
| 2536 | if (likely(ti->preempt_count || irqs_disabled())) | 2534 | if (likely(!preemptible())) |
| 2537 | return; | 2535 | return; |
| 2538 | 2536 | ||
| 2539 | do { | 2537 | do { |
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index e89ccefef278..ace34f95e200 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c | |||
| @@ -378,11 +378,8 @@ static inline void irqtime_account_process_tick(struct task_struct *p, int user_ | |||
| 378 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING | 378 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING |
| 379 | 379 | ||
| 380 | #ifndef __ARCH_HAS_VTIME_TASK_SWITCH | 380 | #ifndef __ARCH_HAS_VTIME_TASK_SWITCH |
| 381 | void vtime_task_switch(struct task_struct *prev) | 381 | void vtime_common_task_switch(struct task_struct *prev) |
| 382 | { | 382 | { |
| 383 | if (!vtime_accounting_enabled()) | ||
| 384 | return; | ||
| 385 | |||
| 386 | if (is_idle_task(prev)) | 383 | if (is_idle_task(prev)) |
| 387 | vtime_account_idle(prev); | 384 | vtime_account_idle(prev); |
| 388 | else | 385 | else |
| @@ -404,11 +401,8 @@ void vtime_task_switch(struct task_struct *prev) | |||
| 404 | * vtime_account(). | 401 | * vtime_account(). |
| 405 | */ | 402 | */ |
| 406 | #ifndef __ARCH_HAS_VTIME_ACCOUNT | 403 | #ifndef __ARCH_HAS_VTIME_ACCOUNT |
| 407 | void vtime_account_irq_enter(struct task_struct *tsk) | 404 | void vtime_common_account_irq_enter(struct task_struct *tsk) |
| 408 | { | 405 | { |
| 409 | if (!vtime_accounting_enabled()) | ||
| 410 | return; | ||
| 411 | |||
| 412 | if (!in_interrupt()) { | 406 | if (!in_interrupt()) { |
| 413 | /* | 407 | /* |
| 414 | * If we interrupted user, context_tracking_in_user() | 408 | * If we interrupted user, context_tracking_in_user() |
| @@ -428,7 +422,7 @@ void vtime_account_irq_enter(struct task_struct *tsk) | |||
| 428 | } | 422 | } |
| 429 | vtime_account_system(tsk); | 423 | vtime_account_system(tsk); |
| 430 | } | 424 | } |
| 431 | EXPORT_SYMBOL_GPL(vtime_account_irq_enter); | 425 | EXPORT_SYMBOL_GPL(vtime_common_account_irq_enter); |
| 432 | #endif /* __ARCH_HAS_VTIME_ACCOUNT */ | 426 | #endif /* __ARCH_HAS_VTIME_ACCOUNT */ |
| 433 | #endif /* CONFIG_VIRT_CPU_ACCOUNTING */ | 427 | #endif /* CONFIG_VIRT_CPU_ACCOUNTING */ |
| 434 | 428 | ||
| @@ -559,12 +553,6 @@ static void cputime_adjust(struct task_cputime *curr, | |||
| 559 | { | 553 | { |
| 560 | cputime_t rtime, stime, utime, total; | 554 | cputime_t rtime, stime, utime, total; |
| 561 | 555 | ||
| 562 | if (vtime_accounting_enabled()) { | ||
| 563 | *ut = curr->utime; | ||
| 564 | *st = curr->stime; | ||
| 565 | return; | ||
| 566 | } | ||
| 567 | |||
| 568 | stime = curr->stime; | 556 | stime = curr->stime; |
| 569 | total = stime + curr->utime; | 557 | total = stime + curr->utime; |
| 570 | 558 | ||
| @@ -664,23 +652,17 @@ static void __vtime_account_system(struct task_struct *tsk) | |||
| 664 | 652 | ||
| 665 | void vtime_account_system(struct task_struct *tsk) | 653 | void vtime_account_system(struct task_struct *tsk) |
| 666 | { | 654 | { |
| 667 | if (!vtime_accounting_enabled()) | ||
| 668 | return; | ||
| 669 | |||
| 670 | write_seqlock(&tsk->vtime_seqlock); | 655 | write_seqlock(&tsk->vtime_seqlock); |
| 671 | __vtime_account_system(tsk); | 656 | __vtime_account_system(tsk); |
| 672 | write_sequnlock(&tsk->vtime_seqlock); | 657 | write_sequnlock(&tsk->vtime_seqlock); |
| 673 | } | 658 | } |
| 674 | 659 | ||
| 675 | void vtime_account_irq_exit(struct task_struct *tsk) | 660 | void vtime_gen_account_irq_exit(struct task_struct *tsk) |
| 676 | { | 661 | { |
| 677 | if (!vtime_accounting_enabled()) | ||
| 678 | return; | ||
| 679 | |||
| 680 | write_seqlock(&tsk->vtime_seqlock); | 662 | write_seqlock(&tsk->vtime_seqlock); |
| 663 | __vtime_account_system(tsk); | ||
| 681 | if (context_tracking_in_user()) | 664 | if (context_tracking_in_user()) |
| 682 | tsk->vtime_snap_whence = VTIME_USER; | 665 | tsk->vtime_snap_whence = VTIME_USER; |
| 683 | __vtime_account_system(tsk); | ||
| 684 | write_sequnlock(&tsk->vtime_seqlock); | 666 | write_sequnlock(&tsk->vtime_seqlock); |
| 685 | } | 667 | } |
| 686 | 668 | ||
| @@ -688,12 +670,8 @@ void vtime_account_user(struct task_struct *tsk) | |||
| 688 | { | 670 | { |
| 689 | cputime_t delta_cpu; | 671 | cputime_t delta_cpu; |
| 690 | 672 | ||
| 691 | if (!vtime_accounting_enabled()) | ||
| 692 | return; | ||
| 693 | |||
| 694 | delta_cpu = get_vtime_delta(tsk); | ||
| 695 | |||
| 696 | write_seqlock(&tsk->vtime_seqlock); | 673 | write_seqlock(&tsk->vtime_seqlock); |
| 674 | delta_cpu = get_vtime_delta(tsk); | ||
| 697 | tsk->vtime_snap_whence = VTIME_SYS; | 675 | tsk->vtime_snap_whence = VTIME_SYS; |
| 698 | account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu)); | 676 | account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu)); |
| 699 | write_sequnlock(&tsk->vtime_seqlock); | 677 | write_sequnlock(&tsk->vtime_seqlock); |
| @@ -701,22 +679,27 @@ void vtime_account_user(struct task_struct *tsk) | |||
| 701 | 679 | ||
| 702 | void vtime_user_enter(struct task_struct *tsk) | 680 | void vtime_user_enter(struct task_struct *tsk) |
| 703 | { | 681 | { |
| 704 | if (!vtime_accounting_enabled()) | ||
| 705 | return; | ||
| 706 | |||
| 707 | write_seqlock(&tsk->vtime_seqlock); | 682 | write_seqlock(&tsk->vtime_seqlock); |
| 708 | tsk->vtime_snap_whence = VTIME_USER; | ||
| 709 | __vtime_account_system(tsk); | 683 | __vtime_account_system(tsk); |
| 684 | tsk->vtime_snap_whence = VTIME_USER; | ||
| 710 | write_sequnlock(&tsk->vtime_seqlock); | 685 | write_sequnlock(&tsk->vtime_seqlock); |
| 711 | } | 686 | } |
| 712 | 687 | ||
| 713 | void vtime_guest_enter(struct task_struct *tsk) | 688 | void vtime_guest_enter(struct task_struct *tsk) |
| 714 | { | 689 | { |
| 690 | /* | ||
| 691 | * The flags must be updated under the lock with | ||
| 692 | * the vtime_snap flush and update. | ||
| 693 | * That enforces a right ordering and update sequence | ||
| 694 | * synchronization against the reader (task_gtime()) | ||
| 695 | * that can thus safely catch up with a tickless delta. | ||
| 696 | */ | ||
| 715 | write_seqlock(&tsk->vtime_seqlock); | 697 | write_seqlock(&tsk->vtime_seqlock); |
| 716 | __vtime_account_system(tsk); | 698 | __vtime_account_system(tsk); |
| 717 | current->flags |= PF_VCPU; | 699 | current->flags |= PF_VCPU; |
| 718 | write_sequnlock(&tsk->vtime_seqlock); | 700 | write_sequnlock(&tsk->vtime_seqlock); |
| 719 | } | 701 | } |
| 702 | EXPORT_SYMBOL_GPL(vtime_guest_enter); | ||
| 720 | 703 | ||
| 721 | void vtime_guest_exit(struct task_struct *tsk) | 704 | void vtime_guest_exit(struct task_struct *tsk) |
| 722 | { | 705 | { |
| @@ -725,6 +708,7 @@ void vtime_guest_exit(struct task_struct *tsk) | |||
| 725 | current->flags &= ~PF_VCPU; | 708 | current->flags &= ~PF_VCPU; |
| 726 | write_sequnlock(&tsk->vtime_seqlock); | 709 | write_sequnlock(&tsk->vtime_seqlock); |
| 727 | } | 710 | } |
| 711 | EXPORT_SYMBOL_GPL(vtime_guest_exit); | ||
| 728 | 712 | ||
| 729 | void vtime_account_idle(struct task_struct *tsk) | 713 | void vtime_account_idle(struct task_struct *tsk) |
| 730 | { | 714 | { |
| @@ -733,11 +717,6 @@ void vtime_account_idle(struct task_struct *tsk) | |||
| 733 | account_idle_time(delta_cpu); | 717 | account_idle_time(delta_cpu); |
| 734 | } | 718 | } |
| 735 | 719 | ||
| 736 | bool vtime_accounting_enabled(void) | ||
| 737 | { | ||
| 738 | return context_tracking_active(); | ||
| 739 | } | ||
| 740 | |||
| 741 | void arch_vtime_task_switch(struct task_struct *prev) | 720 | void arch_vtime_task_switch(struct task_struct *prev) |
| 742 | { | 721 | { |
| 743 | write_seqlock(&prev->vtime_seqlock); | 722 | write_seqlock(&prev->vtime_seqlock); |
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 3381f098070f..2b62fe86f9ec 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig | |||
| @@ -105,7 +105,6 @@ config NO_HZ_FULL | |||
| 105 | select RCU_USER_QS | 105 | select RCU_USER_QS |
| 106 | select RCU_NOCB_CPU | 106 | select RCU_NOCB_CPU |
| 107 | select VIRT_CPU_ACCOUNTING_GEN | 107 | select VIRT_CPU_ACCOUNTING_GEN |
| 108 | select CONTEXT_TRACKING_FORCE | ||
| 109 | select IRQ_WORK | 108 | select IRQ_WORK |
| 110 | help | 109 | help |
| 111 | Adaptively try to shutdown the tick whenever possible, even when | 110 | Adaptively try to shutdown the tick whenever possible, even when |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index e8a1516cc0a3..3612fc77f834 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
| @@ -23,6 +23,7 @@ | |||
| 23 | #include <linux/irq_work.h> | 23 | #include <linux/irq_work.h> |
| 24 | #include <linux/posix-timers.h> | 24 | #include <linux/posix-timers.h> |
| 25 | #include <linux/perf_event.h> | 25 | #include <linux/perf_event.h> |
| 26 | #include <linux/context_tracking.h> | ||
| 26 | 27 | ||
| 27 | #include <asm/irq_regs.h> | 28 | #include <asm/irq_regs.h> |
| 28 | 29 | ||
| @@ -148,8 +149,8 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) | |||
| 148 | } | 149 | } |
| 149 | 150 | ||
| 150 | #ifdef CONFIG_NO_HZ_FULL | 151 | #ifdef CONFIG_NO_HZ_FULL |
| 151 | static cpumask_var_t nohz_full_mask; | 152 | cpumask_var_t tick_nohz_full_mask; |
| 152 | bool have_nohz_full_mask; | 153 | bool tick_nohz_full_running; |
| 153 | 154 | ||
| 154 | static bool can_stop_full_tick(void) | 155 | static bool can_stop_full_tick(void) |
| 155 | { | 156 | { |
| @@ -182,7 +183,7 @@ static bool can_stop_full_tick(void) | |||
| 182 | * Don't allow the user to think they can get | 183 | * Don't allow the user to think they can get |
| 183 | * full NO_HZ with this machine. | 184 | * full NO_HZ with this machine. |
| 184 | */ | 185 | */ |
| 185 | WARN_ONCE(have_nohz_full_mask, | 186 | WARN_ONCE(tick_nohz_full_running, |
| 186 | "NO_HZ FULL will not work with unstable sched clock"); | 187 | "NO_HZ FULL will not work with unstable sched clock"); |
| 187 | return false; | 188 | return false; |
| 188 | } | 189 | } |
| @@ -197,7 +198,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now); | |||
| 197 | * Re-evaluate the need for the tick on the current CPU | 198 | * Re-evaluate the need for the tick on the current CPU |
| 198 | * and restart it if necessary. | 199 | * and restart it if necessary. |
| 199 | */ | 200 | */ |
| 200 | void tick_nohz_full_check(void) | 201 | void __tick_nohz_full_check(void) |
| 201 | { | 202 | { |
| 202 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | 203 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); |
| 203 | 204 | ||
| @@ -211,7 +212,7 @@ void tick_nohz_full_check(void) | |||
| 211 | 212 | ||
| 212 | static void nohz_full_kick_work_func(struct irq_work *work) | 213 | static void nohz_full_kick_work_func(struct irq_work *work) |
| 213 | { | 214 | { |
| 214 | tick_nohz_full_check(); | 215 | __tick_nohz_full_check(); |
| 215 | } | 216 | } |
| 216 | 217 | ||
| 217 | static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { | 218 | static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { |
| @@ -230,7 +231,7 @@ void tick_nohz_full_kick(void) | |||
| 230 | 231 | ||
| 231 | static void nohz_full_kick_ipi(void *info) | 232 | static void nohz_full_kick_ipi(void *info) |
| 232 | { | 233 | { |
| 233 | tick_nohz_full_check(); | 234 | __tick_nohz_full_check(); |
| 234 | } | 235 | } |
| 235 | 236 | ||
| 236 | /* | 237 | /* |
| @@ -239,12 +240,13 @@ static void nohz_full_kick_ipi(void *info) | |||
| 239 | */ | 240 | */ |
| 240 | void tick_nohz_full_kick_all(void) | 241 | void tick_nohz_full_kick_all(void) |
| 241 | { | 242 | { |
| 242 | if (!have_nohz_full_mask) | 243 | if (!tick_nohz_full_running) |
| 243 | return; | 244 | return; |
| 244 | 245 | ||
| 245 | preempt_disable(); | 246 | preempt_disable(); |
| 246 | smp_call_function_many(nohz_full_mask, | 247 | smp_call_function_many(tick_nohz_full_mask, |
| 247 | nohz_full_kick_ipi, NULL, false); | 248 | nohz_full_kick_ipi, NULL, false); |
| 249 | tick_nohz_full_kick(); | ||
| 248 | preempt_enable(); | 250 | preempt_enable(); |
| 249 | } | 251 | } |
| 250 | 252 | ||
| @@ -253,7 +255,7 @@ void tick_nohz_full_kick_all(void) | |||
| 253 | * It might need the tick due to per task/process properties: | 255 | * It might need the tick due to per task/process properties: |
| 254 | * perf events, posix cpu timers, ... | 256 | * perf events, posix cpu timers, ... |
| 255 | */ | 257 | */ |
| 256 | void tick_nohz_task_switch(struct task_struct *tsk) | 258 | void __tick_nohz_task_switch(struct task_struct *tsk) |
| 257 | { | 259 | { |
| 258 | unsigned long flags; | 260 | unsigned long flags; |
| 259 | 261 | ||
| @@ -269,31 +271,23 @@ out: | |||
| 269 | local_irq_restore(flags); | 271 | local_irq_restore(flags); |
| 270 | } | 272 | } |
| 271 | 273 | ||
| 272 | int tick_nohz_full_cpu(int cpu) | ||
| 273 | { | ||
| 274 | if (!have_nohz_full_mask) | ||
| 275 | return 0; | ||
| 276 | |||
| 277 | return cpumask_test_cpu(cpu, nohz_full_mask); | ||
| 278 | } | ||
| 279 | |||
| 280 | /* Parse the boot-time nohz CPU list from the kernel parameters. */ | 274 | /* Parse the boot-time nohz CPU list from the kernel parameters. */ |
| 281 | static int __init tick_nohz_full_setup(char *str) | 275 | static int __init tick_nohz_full_setup(char *str) |
| 282 | { | 276 | { |
| 283 | int cpu; | 277 | int cpu; |
| 284 | 278 | ||
| 285 | alloc_bootmem_cpumask_var(&nohz_full_mask); | 279 | alloc_bootmem_cpumask_var(&tick_nohz_full_mask); |
| 286 | if (cpulist_parse(str, nohz_full_mask) < 0) { | 280 | if (cpulist_parse(str, tick_nohz_full_mask) < 0) { |
| 287 | pr_warning("NOHZ: Incorrect nohz_full cpumask\n"); | 281 | pr_warning("NOHZ: Incorrect nohz_full cpumask\n"); |
| 288 | return 1; | 282 | return 1; |
| 289 | } | 283 | } |
| 290 | 284 | ||
| 291 | cpu = smp_processor_id(); | 285 | cpu = smp_processor_id(); |
| 292 | if (cpumask_test_cpu(cpu, nohz_full_mask)) { | 286 | if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) { |
| 293 | pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu); | 287 | pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu); |
| 294 | cpumask_clear_cpu(cpu, nohz_full_mask); | 288 | cpumask_clear_cpu(cpu, tick_nohz_full_mask); |
| 295 | } | 289 | } |
| 296 | have_nohz_full_mask = true; | 290 | tick_nohz_full_running = true; |
| 297 | 291 | ||
| 298 | return 1; | 292 | return 1; |
| 299 | } | 293 | } |
| @@ -311,7 +305,7 @@ static int tick_nohz_cpu_down_callback(struct notifier_block *nfb, | |||
| 311 | * If we handle the timekeeping duty for full dynticks CPUs, | 305 | * If we handle the timekeeping duty for full dynticks CPUs, |
| 312 | * we can't safely shutdown that CPU. | 306 | * we can't safely shutdown that CPU. |
| 313 | */ | 307 | */ |
| 314 | if (have_nohz_full_mask && tick_do_timer_cpu == cpu) | 308 | if (tick_nohz_full_running && tick_do_timer_cpu == cpu) |
| 315 | return NOTIFY_BAD; | 309 | return NOTIFY_BAD; |
| 316 | break; | 310 | break; |
| 317 | } | 311 | } |
| @@ -330,31 +324,34 @@ static int tick_nohz_init_all(void) | |||
| 330 | int err = -1; | 324 | int err = -1; |
| 331 | 325 | ||
| 332 | #ifdef CONFIG_NO_HZ_FULL_ALL | 326 | #ifdef CONFIG_NO_HZ_FULL_ALL |
| 333 | if (!alloc_cpumask_var(&nohz_full_mask, GFP_KERNEL)) { | 327 | if (!alloc_cpumask_var(&tick_nohz_full_mask, GFP_KERNEL)) { |
| 334 | pr_err("NO_HZ: Can't allocate full dynticks cpumask\n"); | 328 | pr_err("NO_HZ: Can't allocate full dynticks cpumask\n"); |
| 335 | return err; | 329 | return err; |
| 336 | } | 330 | } |
| 337 | err = 0; | 331 | err = 0; |
| 338 | cpumask_setall(nohz_full_mask); | 332 | cpumask_setall(tick_nohz_full_mask); |
| 339 | cpumask_clear_cpu(smp_processor_id(), nohz_full_mask); | 333 | cpumask_clear_cpu(smp_processor_id(), tick_nohz_full_mask); |
| 340 | have_nohz_full_mask = true; | 334 | tick_nohz_full_running = true; |
| 341 | #endif | 335 | #endif |
| 342 | return err; | 336 | return err; |
| 343 | } | 337 | } |
| 344 | 338 | ||
| 345 | void __init tick_nohz_init(void) | 339 | void __init tick_nohz_init(void) |
| 346 | { | 340 | { |
| 347 | if (!have_nohz_full_mask) { | 341 | int cpu; |
| 342 | |||
| 343 | if (!tick_nohz_full_running) { | ||
| 348 | if (tick_nohz_init_all() < 0) | 344 | if (tick_nohz_init_all() < 0) |
| 349 | return; | 345 | return; |
| 350 | } | 346 | } |
| 351 | 347 | ||
| 348 | for_each_cpu(cpu, tick_nohz_full_mask) | ||
| 349 | context_tracking_cpu_set(cpu); | ||
| 350 | |||
| 352 | cpu_notifier(tick_nohz_cpu_down_callback, 0); | 351 | cpu_notifier(tick_nohz_cpu_down_callback, 0); |
| 353 | cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask); | 352 | cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), tick_nohz_full_mask); |
| 354 | pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); | 353 | pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); |
| 355 | } | 354 | } |
| 356 | #else | ||
| 357 | #define have_nohz_full_mask (0) | ||
| 358 | #endif | 355 | #endif |
| 359 | 356 | ||
| 360 | /* | 357 | /* |
| @@ -732,7 +729,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) | |||
| 732 | return false; | 729 | return false; |
| 733 | } | 730 | } |
| 734 | 731 | ||
| 735 | if (have_nohz_full_mask) { | 732 | if (tick_nohz_full_enabled()) { |
| 736 | /* | 733 | /* |
| 737 | * Keep the tick alive to guarantee timekeeping progression | 734 | * Keep the tick alive to guarantee timekeeping progression |
| 738 | * if there are full dynticks CPUs around | 735 | * if there are full dynticks CPUs around |
