diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-14 22:44:38 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-14 22:44:38 -0400 |
commit | e23604edac2a7be6a8808a5d13fac6b9df4eb9a8 (patch) | |
tree | 7a7fa87796fb1dc6dd355675816e2e59546d4edd | |
parent | d4e796152a049f6a675f8b6dcf7080a9d80014e5 (diff) | |
parent | 1f25184656a00a59e3a953189070d42a749f6aee (diff) |
Merge branch 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull NOHZ updates from Ingo Molnar:
"NOHZ enhancements, by Frederic Weisbecker, which reorganizes/refactors
the NOHZ 'can the tick be stopped?' infrastructure and related code to
be data driven, and harmonizes the naming and handling of all the
various properties"
[ This makes the ugly "fetch_or()" macro that the scheduler used
internally a new generic helper, and does a bad job at it.
I'm pulling it, but I've asked Ingo and Frederic to get this
fixed up ]
* 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
sched-clock: Migrate to use new tick dependency mask model
posix-cpu-timers: Migrate to use new tick dependency mask model
sched: Migrate sched to use new tick dependency mask model
sched: Account rr tasks
perf: Migrate perf to use new tick dependency mask model
nohz: Use enum code for tick stop failure tracing message
nohz: New tick dependency mask
nohz: Implement wide kick on top of irq work
atomic: Export fetch_or()
-rw-r--r-- | include/linux/atomic.h | 21 | ||||
-rw-r--r-- | include/linux/perf_event.h | 6 | ||||
-rw-r--r-- | include/linux/posix-timers.h | 3 | ||||
-rw-r--r-- | include/linux/sched.h | 11 | ||||
-rw-r--r-- | include/linux/tick.h | 97 | ||||
-rw-r--r-- | include/trace/events/timer.h | 36 | ||||
-rw-r--r-- | kernel/events/core.c | 65 | ||||
-rw-r--r-- | kernel/sched/clock.c | 5 | ||||
-rw-r--r-- | kernel/sched/core.c | 49 | ||||
-rw-r--r-- | kernel/sched/rt.c | 16 | ||||
-rw-r--r-- | kernel/sched/sched.h | 48 | ||||
-rw-r--r-- | kernel/time/posix-cpu-timers.c | 52 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 175 | ||||
-rw-r--r-- | kernel/time/tick-sched.h | 1 |
14 files changed, 424 insertions, 161 deletions
diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 301de78d65f7..6c502cb13c95 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h | |||
@@ -548,6 +548,27 @@ static inline int atomic_dec_if_positive(atomic_t *v) | |||
548 | } | 548 | } |
549 | #endif | 549 | #endif |
550 | 550 | ||
551 | /** | ||
552 | * fetch_or - perform *ptr |= mask and return old value of *ptr | ||
553 | * @ptr: pointer to value | ||
554 | * @mask: mask to OR on the value | ||
555 | * | ||
556 | * cmpxchg based fetch_or, macro so it works for different integer types | ||
557 | */ | ||
558 | #ifndef fetch_or | ||
559 | #define fetch_or(ptr, mask) \ | ||
560 | ({ typeof(*(ptr)) __old, __val = *(ptr); \ | ||
561 | for (;;) { \ | ||
562 | __old = cmpxchg((ptr), __val, __val | (mask)); \ | ||
563 | if (__old == __val) \ | ||
564 | break; \ | ||
565 | __val = __old; \ | ||
566 | } \ | ||
567 | __old; \ | ||
568 | }) | ||
569 | #endif | ||
570 | |||
571 | |||
551 | #ifdef CONFIG_GENERIC_ATOMIC64 | 572 | #ifdef CONFIG_GENERIC_ATOMIC64 |
552 | #include <asm-generic/atomic64.h> | 573 | #include <asm-generic/atomic64.h> |
553 | #endif | 574 | #endif |
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index a9d8cab18b00..79ec7bbf0155 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
@@ -1110,12 +1110,6 @@ static inline void perf_event_task_tick(void) { } | |||
1110 | static inline int perf_event_release_kernel(struct perf_event *event) { return 0; } | 1110 | static inline int perf_event_release_kernel(struct perf_event *event) { return 0; } |
1111 | #endif | 1111 | #endif |
1112 | 1112 | ||
1113 | #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_NO_HZ_FULL) | ||
1114 | extern bool perf_event_can_stop_tick(void); | ||
1115 | #else | ||
1116 | static inline bool perf_event_can_stop_tick(void) { return true; } | ||
1117 | #endif | ||
1118 | |||
1119 | #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) | 1113 | #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) |
1120 | extern void perf_restore_debug_store(void); | 1114 | extern void perf_restore_debug_store(void); |
1121 | #else | 1115 | #else |
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 907f3fd191ac..62d44c176071 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h | |||
@@ -128,9 +128,6 @@ void posix_cpu_timer_schedule(struct k_itimer *timer); | |||
128 | void run_posix_cpu_timers(struct task_struct *task); | 128 | void run_posix_cpu_timers(struct task_struct *task); |
129 | void posix_cpu_timers_exit(struct task_struct *task); | 129 | void posix_cpu_timers_exit(struct task_struct *task); |
130 | void posix_cpu_timers_exit_group(struct task_struct *task); | 130 | void posix_cpu_timers_exit_group(struct task_struct *task); |
131 | |||
132 | bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk); | ||
133 | |||
134 | void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx, | 131 | void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx, |
135 | cputime_t *newval, cputime_t *oldval); | 132 | cputime_t *newval, cputime_t *oldval); |
136 | 133 | ||
diff --git a/include/linux/sched.h b/include/linux/sched.h index 838a89a78332..c617ea12c6b7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -717,6 +717,10 @@ struct signal_struct { | |||
717 | /* Earliest-expiration cache. */ | 717 | /* Earliest-expiration cache. */ |
718 | struct task_cputime cputime_expires; | 718 | struct task_cputime cputime_expires; |
719 | 719 | ||
720 | #ifdef CONFIG_NO_HZ_FULL | ||
721 | unsigned long tick_dep_mask; | ||
722 | #endif | ||
723 | |||
720 | struct list_head cpu_timers[3]; | 724 | struct list_head cpu_timers[3]; |
721 | 725 | ||
722 | struct pid *tty_old_pgrp; | 726 | struct pid *tty_old_pgrp; |
@@ -1542,6 +1546,10 @@ struct task_struct { | |||
1542 | VTIME_SYS, | 1546 | VTIME_SYS, |
1543 | } vtime_snap_whence; | 1547 | } vtime_snap_whence; |
1544 | #endif | 1548 | #endif |
1549 | |||
1550 | #ifdef CONFIG_NO_HZ_FULL | ||
1551 | unsigned long tick_dep_mask; | ||
1552 | #endif | ||
1545 | unsigned long nvcsw, nivcsw; /* context switch counts */ | 1553 | unsigned long nvcsw, nivcsw; /* context switch counts */ |
1546 | u64 start_time; /* monotonic time in nsec */ | 1554 | u64 start_time; /* monotonic time in nsec */ |
1547 | u64 real_start_time; /* boot based time in nsec */ | 1555 | u64 real_start_time; /* boot based time in nsec */ |
@@ -2356,10 +2364,7 @@ static inline void wake_up_nohz_cpu(int cpu) { } | |||
2356 | #endif | 2364 | #endif |
2357 | 2365 | ||
2358 | #ifdef CONFIG_NO_HZ_FULL | 2366 | #ifdef CONFIG_NO_HZ_FULL |
2359 | extern bool sched_can_stop_tick(void); | ||
2360 | extern u64 scheduler_tick_max_deferment(void); | 2367 | extern u64 scheduler_tick_max_deferment(void); |
2361 | #else | ||
2362 | static inline bool sched_can_stop_tick(void) { return false; } | ||
2363 | #endif | 2368 | #endif |
2364 | 2369 | ||
2365 | #ifdef CONFIG_SCHED_AUTOGROUP | 2370 | #ifdef CONFIG_SCHED_AUTOGROUP |
diff --git a/include/linux/tick.h b/include/linux/tick.h index 97fd4e543846..21f73649a4dc 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h | |||
@@ -97,6 +97,19 @@ static inline void tick_broadcast_exit(void) | |||
97 | tick_broadcast_oneshot_control(TICK_BROADCAST_EXIT); | 97 | tick_broadcast_oneshot_control(TICK_BROADCAST_EXIT); |
98 | } | 98 | } |
99 | 99 | ||
100 | enum tick_dep_bits { | ||
101 | TICK_DEP_BIT_POSIX_TIMER = 0, | ||
102 | TICK_DEP_BIT_PERF_EVENTS = 1, | ||
103 | TICK_DEP_BIT_SCHED = 2, | ||
104 | TICK_DEP_BIT_CLOCK_UNSTABLE = 3 | ||
105 | }; | ||
106 | |||
107 | #define TICK_DEP_MASK_NONE 0 | ||
108 | #define TICK_DEP_MASK_POSIX_TIMER (1 << TICK_DEP_BIT_POSIX_TIMER) | ||
109 | #define TICK_DEP_MASK_PERF_EVENTS (1 << TICK_DEP_BIT_PERF_EVENTS) | ||
110 | #define TICK_DEP_MASK_SCHED (1 << TICK_DEP_BIT_SCHED) | ||
111 | #define TICK_DEP_MASK_CLOCK_UNSTABLE (1 << TICK_DEP_BIT_CLOCK_UNSTABLE) | ||
112 | |||
100 | #ifdef CONFIG_NO_HZ_COMMON | 113 | #ifdef CONFIG_NO_HZ_COMMON |
101 | extern int tick_nohz_enabled; | 114 | extern int tick_nohz_enabled; |
102 | extern int tick_nohz_tick_stopped(void); | 115 | extern int tick_nohz_tick_stopped(void); |
@@ -154,9 +167,73 @@ static inline int housekeeping_any_cpu(void) | |||
154 | return cpumask_any_and(housekeeping_mask, cpu_online_mask); | 167 | return cpumask_any_and(housekeeping_mask, cpu_online_mask); |
155 | } | 168 | } |
156 | 169 | ||
157 | extern void tick_nohz_full_kick(void); | 170 | extern void tick_nohz_dep_set(enum tick_dep_bits bit); |
171 | extern void tick_nohz_dep_clear(enum tick_dep_bits bit); | ||
172 | extern void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit); | ||
173 | extern void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit); | ||
174 | extern void tick_nohz_dep_set_task(struct task_struct *tsk, | ||
175 | enum tick_dep_bits bit); | ||
176 | extern void tick_nohz_dep_clear_task(struct task_struct *tsk, | ||
177 | enum tick_dep_bits bit); | ||
178 | extern void tick_nohz_dep_set_signal(struct signal_struct *signal, | ||
179 | enum tick_dep_bits bit); | ||
180 | extern void tick_nohz_dep_clear_signal(struct signal_struct *signal, | ||
181 | enum tick_dep_bits bit); | ||
182 | |||
183 | /* | ||
184 | * The below are tick_nohz_[set,clear]_dep() wrappers that optimize off-cases | ||
185 | * on top of static keys. | ||
186 | */ | ||
187 | static inline void tick_dep_set(enum tick_dep_bits bit) | ||
188 | { | ||
189 | if (tick_nohz_full_enabled()) | ||
190 | tick_nohz_dep_set(bit); | ||
191 | } | ||
192 | |||
193 | static inline void tick_dep_clear(enum tick_dep_bits bit) | ||
194 | { | ||
195 | if (tick_nohz_full_enabled()) | ||
196 | tick_nohz_dep_clear(bit); | ||
197 | } | ||
198 | |||
199 | static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit) | ||
200 | { | ||
201 | if (tick_nohz_full_cpu(cpu)) | ||
202 | tick_nohz_dep_set_cpu(cpu, bit); | ||
203 | } | ||
204 | |||
205 | static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit) | ||
206 | { | ||
207 | if (tick_nohz_full_cpu(cpu)) | ||
208 | tick_nohz_dep_clear_cpu(cpu, bit); | ||
209 | } | ||
210 | |||
211 | static inline void tick_dep_set_task(struct task_struct *tsk, | ||
212 | enum tick_dep_bits bit) | ||
213 | { | ||
214 | if (tick_nohz_full_enabled()) | ||
215 | tick_nohz_dep_set_task(tsk, bit); | ||
216 | } | ||
217 | static inline void tick_dep_clear_task(struct task_struct *tsk, | ||
218 | enum tick_dep_bits bit) | ||
219 | { | ||
220 | if (tick_nohz_full_enabled()) | ||
221 | tick_nohz_dep_clear_task(tsk, bit); | ||
222 | } | ||
223 | static inline void tick_dep_set_signal(struct signal_struct *signal, | ||
224 | enum tick_dep_bits bit) | ||
225 | { | ||
226 | if (tick_nohz_full_enabled()) | ||
227 | tick_nohz_dep_set_signal(signal, bit); | ||
228 | } | ||
229 | static inline void tick_dep_clear_signal(struct signal_struct *signal, | ||
230 | enum tick_dep_bits bit) | ||
231 | { | ||
232 | if (tick_nohz_full_enabled()) | ||
233 | tick_nohz_dep_clear_signal(signal, bit); | ||
234 | } | ||
235 | |||
158 | extern void tick_nohz_full_kick_cpu(int cpu); | 236 | extern void tick_nohz_full_kick_cpu(int cpu); |
159 | extern void tick_nohz_full_kick_all(void); | ||
160 | extern void __tick_nohz_task_switch(void); | 237 | extern void __tick_nohz_task_switch(void); |
161 | #else | 238 | #else |
162 | static inline int housekeeping_any_cpu(void) | 239 | static inline int housekeeping_any_cpu(void) |
@@ -166,9 +243,21 @@ static inline int housekeeping_any_cpu(void) | |||
166 | static inline bool tick_nohz_full_enabled(void) { return false; } | 243 | static inline bool tick_nohz_full_enabled(void) { return false; } |
167 | static inline bool tick_nohz_full_cpu(int cpu) { return false; } | 244 | static inline bool tick_nohz_full_cpu(int cpu) { return false; } |
168 | static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { } | 245 | static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { } |
246 | |||
247 | static inline void tick_dep_set(enum tick_dep_bits bit) { } | ||
248 | static inline void tick_dep_clear(enum tick_dep_bits bit) { } | ||
249 | static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit) { } | ||
250 | static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { } | ||
251 | static inline void tick_dep_set_task(struct task_struct *tsk, | ||
252 | enum tick_dep_bits bit) { } | ||
253 | static inline void tick_dep_clear_task(struct task_struct *tsk, | ||
254 | enum tick_dep_bits bit) { } | ||
255 | static inline void tick_dep_set_signal(struct signal_struct *signal, | ||
256 | enum tick_dep_bits bit) { } | ||
257 | static inline void tick_dep_clear_signal(struct signal_struct *signal, | ||
258 | enum tick_dep_bits bit) { } | ||
259 | |||
169 | static inline void tick_nohz_full_kick_cpu(int cpu) { } | 260 | static inline void tick_nohz_full_kick_cpu(int cpu) { } |
170 | static inline void tick_nohz_full_kick(void) { } | ||
171 | static inline void tick_nohz_full_kick_all(void) { } | ||
172 | static inline void __tick_nohz_task_switch(void) { } | 261 | static inline void __tick_nohz_task_switch(void) { } |
173 | #endif | 262 | #endif |
174 | 263 | ||
diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index 073b9ac245ba..51440131d337 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h | |||
@@ -328,23 +328,49 @@ TRACE_EVENT(itimer_expire, | |||
328 | ); | 328 | ); |
329 | 329 | ||
330 | #ifdef CONFIG_NO_HZ_COMMON | 330 | #ifdef CONFIG_NO_HZ_COMMON |
331 | |||
332 | #define TICK_DEP_NAMES \ | ||
333 | tick_dep_name(NONE) \ | ||
334 | tick_dep_name(POSIX_TIMER) \ | ||
335 | tick_dep_name(PERF_EVENTS) \ | ||
336 | tick_dep_name(SCHED) \ | ||
337 | tick_dep_name_end(CLOCK_UNSTABLE) | ||
338 | |||
339 | #undef tick_dep_name | ||
340 | #undef tick_dep_name_end | ||
341 | |||
342 | #define tick_dep_name(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep); | ||
343 | #define tick_dep_name_end(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep); | ||
344 | |||
345 | TICK_DEP_NAMES | ||
346 | |||
347 | #undef tick_dep_name | ||
348 | #undef tick_dep_name_end | ||
349 | |||
350 | #define tick_dep_name(sdep) { TICK_DEP_MASK_##sdep, #sdep }, | ||
351 | #define tick_dep_name_end(sdep) { TICK_DEP_MASK_##sdep, #sdep } | ||
352 | |||
353 | #define show_tick_dep_name(val) \ | ||
354 | __print_symbolic(val, TICK_DEP_NAMES) | ||
355 | |||
331 | TRACE_EVENT(tick_stop, | 356 | TRACE_EVENT(tick_stop, |
332 | 357 | ||
333 | TP_PROTO(int success, char *error_msg), | 358 | TP_PROTO(int success, int dependency), |
334 | 359 | ||
335 | TP_ARGS(success, error_msg), | 360 | TP_ARGS(success, dependency), |
336 | 361 | ||
337 | TP_STRUCT__entry( | 362 | TP_STRUCT__entry( |
338 | __field( int , success ) | 363 | __field( int , success ) |
339 | __string( msg, error_msg ) | 364 | __field( int , dependency ) |
340 | ), | 365 | ), |
341 | 366 | ||
342 | TP_fast_assign( | 367 | TP_fast_assign( |
343 | __entry->success = success; | 368 | __entry->success = success; |
344 | __assign_str(msg, error_msg); | 369 | __entry->dependency = dependency; |
345 | ), | 370 | ), |
346 | 371 | ||
347 | TP_printk("success=%s msg=%s", __entry->success ? "yes" : "no", __get_str(msg)) | 372 | TP_printk("success=%d dependency=%s", __entry->success, \ |
373 | show_tick_dep_name(__entry->dependency)) | ||
348 | ); | 374 | ); |
349 | #endif | 375 | #endif |
350 | 376 | ||
diff --git a/kernel/events/core.c b/kernel/events/core.c index b7231498de47..712570dddacd 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -3112,17 +3112,6 @@ done: | |||
3112 | return rotate; | 3112 | return rotate; |
3113 | } | 3113 | } |
3114 | 3114 | ||
3115 | #ifdef CONFIG_NO_HZ_FULL | ||
3116 | bool perf_event_can_stop_tick(void) | ||
3117 | { | ||
3118 | if (atomic_read(&nr_freq_events) || | ||
3119 | __this_cpu_read(perf_throttled_count)) | ||
3120 | return false; | ||
3121 | else | ||
3122 | return true; | ||
3123 | } | ||
3124 | #endif | ||
3125 | |||
3126 | void perf_event_task_tick(void) | 3115 | void perf_event_task_tick(void) |
3127 | { | 3116 | { |
3128 | struct list_head *head = this_cpu_ptr(&active_ctx_list); | 3117 | struct list_head *head = this_cpu_ptr(&active_ctx_list); |
@@ -3133,6 +3122,7 @@ void perf_event_task_tick(void) | |||
3133 | 3122 | ||
3134 | __this_cpu_inc(perf_throttled_seq); | 3123 | __this_cpu_inc(perf_throttled_seq); |
3135 | throttled = __this_cpu_xchg(perf_throttled_count, 0); | 3124 | throttled = __this_cpu_xchg(perf_throttled_count, 0); |
3125 | tick_dep_clear_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS); | ||
3136 | 3126 | ||
3137 | list_for_each_entry_safe(ctx, tmp, head, active_ctx_list) | 3127 | list_for_each_entry_safe(ctx, tmp, head, active_ctx_list) |
3138 | perf_adjust_freq_unthr_context(ctx, throttled); | 3128 | perf_adjust_freq_unthr_context(ctx, throttled); |
@@ -3564,6 +3554,28 @@ static void unaccount_event_cpu(struct perf_event *event, int cpu) | |||
3564 | atomic_dec(&per_cpu(perf_cgroup_events, cpu)); | 3554 | atomic_dec(&per_cpu(perf_cgroup_events, cpu)); |
3565 | } | 3555 | } |
3566 | 3556 | ||
3557 | #ifdef CONFIG_NO_HZ_FULL | ||
3558 | static DEFINE_SPINLOCK(nr_freq_lock); | ||
3559 | #endif | ||
3560 | |||
3561 | static void unaccount_freq_event_nohz(void) | ||
3562 | { | ||
3563 | #ifdef CONFIG_NO_HZ_FULL | ||
3564 | spin_lock(&nr_freq_lock); | ||
3565 | if (atomic_dec_and_test(&nr_freq_events)) | ||
3566 | tick_nohz_dep_clear(TICK_DEP_BIT_PERF_EVENTS); | ||
3567 | spin_unlock(&nr_freq_lock); | ||
3568 | #endif | ||
3569 | } | ||
3570 | |||
3571 | static void unaccount_freq_event(void) | ||
3572 | { | ||
3573 | if (tick_nohz_full_enabled()) | ||
3574 | unaccount_freq_event_nohz(); | ||
3575 | else | ||
3576 | atomic_dec(&nr_freq_events); | ||
3577 | } | ||
3578 | |||
3567 | static void unaccount_event(struct perf_event *event) | 3579 | static void unaccount_event(struct perf_event *event) |
3568 | { | 3580 | { |
3569 | bool dec = false; | 3581 | bool dec = false; |
@@ -3580,7 +3592,7 @@ static void unaccount_event(struct perf_event *event) | |||
3580 | if (event->attr.task) | 3592 | if (event->attr.task) |
3581 | atomic_dec(&nr_task_events); | 3593 | atomic_dec(&nr_task_events); |
3582 | if (event->attr.freq) | 3594 | if (event->attr.freq) |
3583 | atomic_dec(&nr_freq_events); | 3595 | unaccount_freq_event(); |
3584 | if (event->attr.context_switch) { | 3596 | if (event->attr.context_switch) { |
3585 | dec = true; | 3597 | dec = true; |
3586 | atomic_dec(&nr_switch_events); | 3598 | atomic_dec(&nr_switch_events); |
@@ -6424,9 +6436,9 @@ static int __perf_event_overflow(struct perf_event *event, | |||
6424 | if (unlikely(throttle | 6436 | if (unlikely(throttle |
6425 | && hwc->interrupts >= max_samples_per_tick)) { | 6437 | && hwc->interrupts >= max_samples_per_tick)) { |
6426 | __this_cpu_inc(perf_throttled_count); | 6438 | __this_cpu_inc(perf_throttled_count); |
6439 | tick_dep_set_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS); | ||
6427 | hwc->interrupts = MAX_INTERRUPTS; | 6440 | hwc->interrupts = MAX_INTERRUPTS; |
6428 | perf_log_throttle(event, 0); | 6441 | perf_log_throttle(event, 0); |
6429 | tick_nohz_full_kick(); | ||
6430 | ret = 1; | 6442 | ret = 1; |
6431 | } | 6443 | } |
6432 | } | 6444 | } |
@@ -7815,6 +7827,27 @@ static void account_event_cpu(struct perf_event *event, int cpu) | |||
7815 | atomic_inc(&per_cpu(perf_cgroup_events, cpu)); | 7827 | atomic_inc(&per_cpu(perf_cgroup_events, cpu)); |
7816 | } | 7828 | } |
7817 | 7829 | ||
7830 | /* Freq events need the tick to stay alive (see perf_event_task_tick). */ | ||
7831 | static void account_freq_event_nohz(void) | ||
7832 | { | ||
7833 | #ifdef CONFIG_NO_HZ_FULL | ||
7834 | /* Lock so we don't race with concurrent unaccount */ | ||
7835 | spin_lock(&nr_freq_lock); | ||
7836 | if (atomic_inc_return(&nr_freq_events) == 1) | ||
7837 | tick_nohz_dep_set(TICK_DEP_BIT_PERF_EVENTS); | ||
7838 | spin_unlock(&nr_freq_lock); | ||
7839 | #endif | ||
7840 | } | ||
7841 | |||
7842 | static void account_freq_event(void) | ||
7843 | { | ||
7844 | if (tick_nohz_full_enabled()) | ||
7845 | account_freq_event_nohz(); | ||
7846 | else | ||
7847 | atomic_inc(&nr_freq_events); | ||
7848 | } | ||
7849 | |||
7850 | |||
7818 | static void account_event(struct perf_event *event) | 7851 | static void account_event(struct perf_event *event) |
7819 | { | 7852 | { |
7820 | bool inc = false; | 7853 | bool inc = false; |
@@ -7830,10 +7863,8 @@ static void account_event(struct perf_event *event) | |||
7830 | atomic_inc(&nr_comm_events); | 7863 | atomic_inc(&nr_comm_events); |
7831 | if (event->attr.task) | 7864 | if (event->attr.task) |
7832 | atomic_inc(&nr_task_events); | 7865 | atomic_inc(&nr_task_events); |
7833 | if (event->attr.freq) { | 7866 | if (event->attr.freq) |
7834 | if (atomic_inc_return(&nr_freq_events) == 1) | 7867 | account_freq_event(); |
7835 | tick_nohz_full_kick_all(); | ||
7836 | } | ||
7837 | if (event->attr.context_switch) { | 7868 | if (event->attr.context_switch) { |
7838 | atomic_inc(&nr_switch_events); | 7869 | atomic_inc(&nr_switch_events); |
7839 | inc = true; | 7870 | inc = true; |
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index bc54e84675da..fedb967a9841 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c | |||
@@ -61,6 +61,7 @@ | |||
61 | #include <linux/static_key.h> | 61 | #include <linux/static_key.h> |
62 | #include <linux/workqueue.h> | 62 | #include <linux/workqueue.h> |
63 | #include <linux/compiler.h> | 63 | #include <linux/compiler.h> |
64 | #include <linux/tick.h> | ||
64 | 65 | ||
65 | /* | 66 | /* |
66 | * Scheduler clock - returns current time in nanosec units. | 67 | * Scheduler clock - returns current time in nanosec units. |
@@ -89,6 +90,8 @@ static void __set_sched_clock_stable(void) | |||
89 | { | 90 | { |
90 | if (!sched_clock_stable()) | 91 | if (!sched_clock_stable()) |
91 | static_key_slow_inc(&__sched_clock_stable); | 92 | static_key_slow_inc(&__sched_clock_stable); |
93 | |||
94 | tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE); | ||
92 | } | 95 | } |
93 | 96 | ||
94 | void set_sched_clock_stable(void) | 97 | void set_sched_clock_stable(void) |
@@ -108,6 +111,8 @@ static void __clear_sched_clock_stable(struct work_struct *work) | |||
108 | /* XXX worry about clock continuity */ | 111 | /* XXX worry about clock continuity */ |
109 | if (sched_clock_stable()) | 112 | if (sched_clock_stable()) |
110 | static_key_slow_dec(&__sched_clock_stable); | 113 | static_key_slow_dec(&__sched_clock_stable); |
114 | |||
115 | tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE); | ||
111 | } | 116 | } |
112 | 117 | ||
113 | static DECLARE_WORK(sched_clock_work, __clear_sched_clock_stable); | 118 | static DECLARE_WORK(sched_clock_work, __clear_sched_clock_stable); |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 05114b15b6d1..e5725b931bee 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -320,20 +320,6 @@ static inline void init_hrtick(void) | |||
320 | } | 320 | } |
321 | #endif /* CONFIG_SCHED_HRTICK */ | 321 | #endif /* CONFIG_SCHED_HRTICK */ |
322 | 322 | ||
323 | /* | ||
324 | * cmpxchg based fetch_or, macro so it works for different integer types | ||
325 | */ | ||
326 | #define fetch_or(ptr, val) \ | ||
327 | ({ typeof(*(ptr)) __old, __val = *(ptr); \ | ||
328 | for (;;) { \ | ||
329 | __old = cmpxchg((ptr), __val, __val | (val)); \ | ||
330 | if (__old == __val) \ | ||
331 | break; \ | ||
332 | __val = __old; \ | ||
333 | } \ | ||
334 | __old; \ | ||
335 | }) | ||
336 | |||
337 | #if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG) | 323 | #if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG) |
338 | /* | 324 | /* |
339 | * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG, | 325 | * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG, |
@@ -582,31 +568,36 @@ static inline bool got_nohz_idle_kick(void) | |||
582 | #endif /* CONFIG_NO_HZ_COMMON */ | 568 | #endif /* CONFIG_NO_HZ_COMMON */ |
583 | 569 | ||
584 | #ifdef CONFIG_NO_HZ_FULL | 570 | #ifdef CONFIG_NO_HZ_FULL |
585 | bool sched_can_stop_tick(void) | 571 | bool sched_can_stop_tick(struct rq *rq) |
586 | { | 572 | { |
573 | int fifo_nr_running; | ||
574 | |||
575 | /* Deadline tasks, even if single, need the tick */ | ||
576 | if (rq->dl.dl_nr_running) | ||
577 | return false; | ||
578 | |||
587 | /* | 579 | /* |
588 | * FIFO realtime policy runs the highest priority task. Other runnable | 580 | * FIFO realtime policy runs the highest priority task (after DEADLINE). |
589 | * tasks are of a lower priority. The scheduler tick does nothing. | 581 | * Other runnable tasks are of a lower priority. The scheduler tick |
582 | * isn't needed. | ||
590 | */ | 583 | */ |
591 | if (current->policy == SCHED_FIFO) | 584 | fifo_nr_running = rq->rt.rt_nr_running - rq->rt.rr_nr_running; |
585 | if (fifo_nr_running) | ||
592 | return true; | 586 | return true; |
593 | 587 | ||
594 | /* | 588 | /* |
595 | * Round-robin realtime tasks time slice with other tasks at the same | 589 | * Round-robin realtime tasks time slice with other tasks at the same |
596 | * realtime priority. Is this task the only one at this priority? | 590 | * realtime priority. |
597 | */ | 591 | */ |
598 | if (current->policy == SCHED_RR) { | 592 | if (rq->rt.rr_nr_running) { |
599 | struct sched_rt_entity *rt_se = ¤t->rt; | 593 | if (rq->rt.rr_nr_running == 1) |
600 | 594 | return true; | |
601 | return list_is_singular(&rt_se->run_list); | 595 | else |
596 | return false; | ||
602 | } | 597 | } |
603 | 598 | ||
604 | /* | 599 | /* Normal multitasking need periodic preemption checks */ |
605 | * More than one running task need preemption. | 600 | if (rq->cfs.nr_running > 1) |
606 | * nr_running update is assumed to be visible | ||
607 | * after IPI is sent from wakers. | ||
608 | */ | ||
609 | if (this_rq()->nr_running > 1) | ||
610 | return false; | 601 | return false; |
611 | 602 | ||
612 | return true; | 603 | return true; |
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index a774b4dbf291..562471329487 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c | |||
@@ -1150,12 +1150,27 @@ unsigned int rt_se_nr_running(struct sched_rt_entity *rt_se) | |||
1150 | } | 1150 | } |
1151 | 1151 | ||
1152 | static inline | 1152 | static inline |
1153 | unsigned int rt_se_rr_nr_running(struct sched_rt_entity *rt_se) | ||
1154 | { | ||
1155 | struct rt_rq *group_rq = group_rt_rq(rt_se); | ||
1156 | struct task_struct *tsk; | ||
1157 | |||
1158 | if (group_rq) | ||
1159 | return group_rq->rr_nr_running; | ||
1160 | |||
1161 | tsk = rt_task_of(rt_se); | ||
1162 | |||
1163 | return (tsk->policy == SCHED_RR) ? 1 : 0; | ||
1164 | } | ||
1165 | |||
1166 | static inline | ||
1153 | void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | 1167 | void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) |
1154 | { | 1168 | { |
1155 | int prio = rt_se_prio(rt_se); | 1169 | int prio = rt_se_prio(rt_se); |
1156 | 1170 | ||
1157 | WARN_ON(!rt_prio(prio)); | 1171 | WARN_ON(!rt_prio(prio)); |
1158 | rt_rq->rt_nr_running += rt_se_nr_running(rt_se); | 1172 | rt_rq->rt_nr_running += rt_se_nr_running(rt_se); |
1173 | rt_rq->rr_nr_running += rt_se_rr_nr_running(rt_se); | ||
1159 | 1174 | ||
1160 | inc_rt_prio(rt_rq, prio); | 1175 | inc_rt_prio(rt_rq, prio); |
1161 | inc_rt_migration(rt_se, rt_rq); | 1176 | inc_rt_migration(rt_se, rt_rq); |
@@ -1168,6 +1183,7 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
1168 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); | 1183 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); |
1169 | WARN_ON(!rt_rq->rt_nr_running); | 1184 | WARN_ON(!rt_rq->rt_nr_running); |
1170 | rt_rq->rt_nr_running -= rt_se_nr_running(rt_se); | 1185 | rt_rq->rt_nr_running -= rt_se_nr_running(rt_se); |
1186 | rt_rq->rr_nr_running -= rt_se_rr_nr_running(rt_se); | ||
1171 | 1187 | ||
1172 | dec_rt_prio(rt_rq, rt_se_prio(rt_se)); | 1188 | dec_rt_prio(rt_rq, rt_se_prio(rt_se)); |
1173 | dec_rt_migration(rt_se, rt_rq); | 1189 | dec_rt_migration(rt_se, rt_rq); |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index ef5875fff5b7..b2ff5a2bd6df 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -450,6 +450,7 @@ static inline int rt_bandwidth_enabled(void) | |||
450 | struct rt_rq { | 450 | struct rt_rq { |
451 | struct rt_prio_array active; | 451 | struct rt_prio_array active; |
452 | unsigned int rt_nr_running; | 452 | unsigned int rt_nr_running; |
453 | unsigned int rr_nr_running; | ||
453 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED | 454 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED |
454 | struct { | 455 | struct { |
455 | int curr; /* highest queued rt task prio */ | 456 | int curr; /* highest queued rt task prio */ |
@@ -1313,6 +1314,35 @@ unsigned long to_ratio(u64 period, u64 runtime); | |||
1313 | 1314 | ||
1314 | extern void init_entity_runnable_average(struct sched_entity *se); | 1315 | extern void init_entity_runnable_average(struct sched_entity *se); |
1315 | 1316 | ||
1317 | #ifdef CONFIG_NO_HZ_FULL | ||
1318 | extern bool sched_can_stop_tick(struct rq *rq); | ||
1319 | |||
1320 | /* | ||
1321 | * Tick may be needed by tasks in the runqueue depending on their policy and | ||
1322 | * requirements. If tick is needed, lets send the target an IPI to kick it out of | ||
1323 | * nohz mode if necessary. | ||
1324 | */ | ||
1325 | static inline void sched_update_tick_dependency(struct rq *rq) | ||
1326 | { | ||
1327 | int cpu; | ||
1328 | |||
1329 | if (!tick_nohz_full_enabled()) | ||
1330 | return; | ||
1331 | |||
1332 | cpu = cpu_of(rq); | ||
1333 | |||
1334 | if (!tick_nohz_full_cpu(cpu)) | ||
1335 | return; | ||
1336 | |||
1337 | if (sched_can_stop_tick(rq)) | ||
1338 | tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED); | ||
1339 | else | ||
1340 | tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED); | ||
1341 | } | ||
1342 | #else | ||
1343 | static inline void sched_update_tick_dependency(struct rq *rq) { } | ||
1344 | #endif | ||
1345 | |||
1316 | static inline void add_nr_running(struct rq *rq, unsigned count) | 1346 | static inline void add_nr_running(struct rq *rq, unsigned count) |
1317 | { | 1347 | { |
1318 | unsigned prev_nr = rq->nr_running; | 1348 | unsigned prev_nr = rq->nr_running; |
@@ -1324,26 +1354,16 @@ static inline void add_nr_running(struct rq *rq, unsigned count) | |||
1324 | if (!rq->rd->overload) | 1354 | if (!rq->rd->overload) |
1325 | rq->rd->overload = true; | 1355 | rq->rd->overload = true; |
1326 | #endif | 1356 | #endif |
1327 | |||
1328 | #ifdef CONFIG_NO_HZ_FULL | ||
1329 | if (tick_nohz_full_cpu(rq->cpu)) { | ||
1330 | /* | ||
1331 | * Tick is needed if more than one task runs on a CPU. | ||
1332 | * Send the target an IPI to kick it out of nohz mode. | ||
1333 | * | ||
1334 | * We assume that IPI implies full memory barrier and the | ||
1335 | * new value of rq->nr_running is visible on reception | ||
1336 | * from the target. | ||
1337 | */ | ||
1338 | tick_nohz_full_kick_cpu(rq->cpu); | ||
1339 | } | ||
1340 | #endif | ||
1341 | } | 1357 | } |
1358 | |||
1359 | sched_update_tick_dependency(rq); | ||
1342 | } | 1360 | } |
1343 | 1361 | ||
1344 | static inline void sub_nr_running(struct rq *rq, unsigned count) | 1362 | static inline void sub_nr_running(struct rq *rq, unsigned count) |
1345 | { | 1363 | { |
1346 | rq->nr_running -= count; | 1364 | rq->nr_running -= count; |
1365 | /* Check if we still need preemption */ | ||
1366 | sched_update_tick_dependency(rq); | ||
1347 | } | 1367 | } |
1348 | 1368 | ||
1349 | static inline void rq_last_tick_reset(struct rq *rq) | 1369 | static inline void rq_last_tick_reset(struct rq *rq) |
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index f5e86d282d52..1cafba860b08 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c | |||
@@ -333,7 +333,6 @@ static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) | |||
333 | return err; | 333 | return err; |
334 | } | 334 | } |
335 | 335 | ||
336 | |||
337 | /* | 336 | /* |
338 | * Validate the clockid_t for a new CPU-clock timer, and initialize the timer. | 337 | * Validate the clockid_t for a new CPU-clock timer, and initialize the timer. |
339 | * This is called from sys_timer_create() and do_cpu_nanosleep() with the | 338 | * This is called from sys_timer_create() and do_cpu_nanosleep() with the |
@@ -517,6 +516,10 @@ static void arm_timer(struct k_itimer *timer) | |||
517 | cputime_expires->sched_exp = exp; | 516 | cputime_expires->sched_exp = exp; |
518 | break; | 517 | break; |
519 | } | 518 | } |
519 | if (CPUCLOCK_PERTHREAD(timer->it_clock)) | ||
520 | tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER); | ||
521 | else | ||
522 | tick_dep_set_signal(p->signal, TICK_DEP_BIT_POSIX_TIMER); | ||
520 | } | 523 | } |
521 | } | 524 | } |
522 | 525 | ||
@@ -582,39 +585,6 @@ static int cpu_timer_sample_group(const clockid_t which_clock, | |||
582 | return 0; | 585 | return 0; |
583 | } | 586 | } |
584 | 587 | ||
585 | #ifdef CONFIG_NO_HZ_FULL | ||
586 | static void nohz_kick_work_fn(struct work_struct *work) | ||
587 | { | ||
588 | tick_nohz_full_kick_all(); | ||
589 | } | ||
590 | |||
591 | static DECLARE_WORK(nohz_kick_work, nohz_kick_work_fn); | ||
592 | |||
593 | /* | ||
594 | * We need the IPIs to be sent from sane process context. | ||
595 | * The posix cpu timers are always set with irqs disabled. | ||
596 | */ | ||
597 | static void posix_cpu_timer_kick_nohz(void) | ||
598 | { | ||
599 | if (context_tracking_is_enabled()) | ||
600 | schedule_work(&nohz_kick_work); | ||
601 | } | ||
602 | |||
603 | bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk) | ||
604 | { | ||
605 | if (!task_cputime_zero(&tsk->cputime_expires)) | ||
606 | return false; | ||
607 | |||
608 | /* Check if cputimer is running. This is accessed without locking. */ | ||
609 | if (READ_ONCE(tsk->signal->cputimer.running)) | ||
610 | return false; | ||
611 | |||
612 | return true; | ||
613 | } | ||
614 | #else | ||
615 | static inline void posix_cpu_timer_kick_nohz(void) { } | ||
616 | #endif | ||
617 | |||
618 | /* | 588 | /* |
619 | * Guts of sys_timer_settime for CPU timers. | 589 | * Guts of sys_timer_settime for CPU timers. |
620 | * This is called with the timer locked and interrupts disabled. | 590 | * This is called with the timer locked and interrupts disabled. |
@@ -761,8 +731,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, | |||
761 | sample_to_timespec(timer->it_clock, | 731 | sample_to_timespec(timer->it_clock, |
762 | old_incr, &old->it_interval); | 732 | old_incr, &old->it_interval); |
763 | } | 733 | } |
764 | if (!ret) | 734 | |
765 | posix_cpu_timer_kick_nohz(); | ||
766 | return ret; | 735 | return ret; |
767 | } | 736 | } |
768 | 737 | ||
@@ -911,6 +880,8 @@ static void check_thread_timers(struct task_struct *tsk, | |||
911 | __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); | 880 | __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); |
912 | } | 881 | } |
913 | } | 882 | } |
883 | if (task_cputime_zero(tsk_expires)) | ||
884 | tick_dep_clear_task(tsk, TICK_DEP_BIT_POSIX_TIMER); | ||
914 | } | 885 | } |
915 | 886 | ||
916 | static inline void stop_process_timers(struct signal_struct *sig) | 887 | static inline void stop_process_timers(struct signal_struct *sig) |
@@ -919,6 +890,7 @@ static inline void stop_process_timers(struct signal_struct *sig) | |||
919 | 890 | ||
920 | /* Turn off cputimer->running. This is done without locking. */ | 891 | /* Turn off cputimer->running. This is done without locking. */ |
921 | WRITE_ONCE(cputimer->running, false); | 892 | WRITE_ONCE(cputimer->running, false); |
893 | tick_dep_clear_signal(sig, TICK_DEP_BIT_POSIX_TIMER); | ||
922 | } | 894 | } |
923 | 895 | ||
924 | static u32 onecputick; | 896 | static u32 onecputick; |
@@ -1095,8 +1067,6 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) | |||
1095 | arm_timer(timer); | 1067 | arm_timer(timer); |
1096 | unlock_task_sighand(p, &flags); | 1068 | unlock_task_sighand(p, &flags); |
1097 | 1069 | ||
1098 | /* Kick full dynticks CPUs in case they need to tick on the new timer */ | ||
1099 | posix_cpu_timer_kick_nohz(); | ||
1100 | out: | 1070 | out: |
1101 | timer->it_overrun_last = timer->it_overrun; | 1071 | timer->it_overrun_last = timer->it_overrun; |
1102 | timer->it_overrun = -1; | 1072 | timer->it_overrun = -1; |
@@ -1270,7 +1240,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, | |||
1270 | } | 1240 | } |
1271 | 1241 | ||
1272 | if (!*newval) | 1242 | if (!*newval) |
1273 | goto out; | 1243 | return; |
1274 | *newval += now; | 1244 | *newval += now; |
1275 | } | 1245 | } |
1276 | 1246 | ||
@@ -1288,8 +1258,8 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, | |||
1288 | tsk->signal->cputime_expires.virt_exp = *newval; | 1258 | tsk->signal->cputime_expires.virt_exp = *newval; |
1289 | break; | 1259 | break; |
1290 | } | 1260 | } |
1291 | out: | 1261 | |
1292 | posix_cpu_timer_kick_nohz(); | 1262 | tick_dep_set_signal(tsk->signal, TICK_DEP_BIT_POSIX_TIMER); |
1293 | } | 1263 | } |
1294 | 1264 | ||
1295 | static int do_cpu_nanosleep(const clockid_t which_clock, int flags, | 1265 | static int do_cpu_nanosleep(const clockid_t which_clock, int flags, |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 0b17424349eb..969e6704c3c9 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -22,7 +22,6 @@ | |||
22 | #include <linux/module.h> | 22 | #include <linux/module.h> |
23 | #include <linux/irq_work.h> | 23 | #include <linux/irq_work.h> |
24 | #include <linux/posix-timers.h> | 24 | #include <linux/posix-timers.h> |
25 | #include <linux/perf_event.h> | ||
26 | #include <linux/context_tracking.h> | 25 | #include <linux/context_tracking.h> |
27 | 26 | ||
28 | #include <asm/irq_regs.h> | 27 | #include <asm/irq_regs.h> |
@@ -158,54 +157,63 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) | |||
158 | cpumask_var_t tick_nohz_full_mask; | 157 | cpumask_var_t tick_nohz_full_mask; |
159 | cpumask_var_t housekeeping_mask; | 158 | cpumask_var_t housekeeping_mask; |
160 | bool tick_nohz_full_running; | 159 | bool tick_nohz_full_running; |
160 | static unsigned long tick_dep_mask; | ||
161 | 161 | ||
162 | static bool can_stop_full_tick(void) | 162 | static void trace_tick_dependency(unsigned long dep) |
163 | { | ||
164 | if (dep & TICK_DEP_MASK_POSIX_TIMER) { | ||
165 | trace_tick_stop(0, TICK_DEP_MASK_POSIX_TIMER); | ||
166 | return; | ||
167 | } | ||
168 | |||
169 | if (dep & TICK_DEP_MASK_PERF_EVENTS) { | ||
170 | trace_tick_stop(0, TICK_DEP_MASK_PERF_EVENTS); | ||
171 | return; | ||
172 | } | ||
173 | |||
174 | if (dep & TICK_DEP_MASK_SCHED) { | ||
175 | trace_tick_stop(0, TICK_DEP_MASK_SCHED); | ||
176 | return; | ||
177 | } | ||
178 | |||
179 | if (dep & TICK_DEP_MASK_CLOCK_UNSTABLE) | ||
180 | trace_tick_stop(0, TICK_DEP_MASK_CLOCK_UNSTABLE); | ||
181 | } | ||
182 | |||
183 | static bool can_stop_full_tick(struct tick_sched *ts) | ||
163 | { | 184 | { |
164 | WARN_ON_ONCE(!irqs_disabled()); | 185 | WARN_ON_ONCE(!irqs_disabled()); |
165 | 186 | ||
166 | if (!sched_can_stop_tick()) { | 187 | if (tick_dep_mask) { |
167 | trace_tick_stop(0, "more than 1 task in runqueue\n"); | 188 | trace_tick_dependency(tick_dep_mask); |
168 | return false; | 189 | return false; |
169 | } | 190 | } |
170 | 191 | ||
171 | if (!posix_cpu_timers_can_stop_tick(current)) { | 192 | if (ts->tick_dep_mask) { |
172 | trace_tick_stop(0, "posix timers running\n"); | 193 | trace_tick_dependency(ts->tick_dep_mask); |
173 | return false; | 194 | return false; |
174 | } | 195 | } |
175 | 196 | ||
176 | if (!perf_event_can_stop_tick()) { | 197 | if (current->tick_dep_mask) { |
177 | trace_tick_stop(0, "perf events running\n"); | 198 | trace_tick_dependency(current->tick_dep_mask); |
178 | return false; | 199 | return false; |
179 | } | 200 | } |
180 | 201 | ||
181 | /* sched_clock_tick() needs us? */ | 202 | if (current->signal->tick_dep_mask) { |
182 | #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK | 203 | trace_tick_dependency(current->signal->tick_dep_mask); |
183 | /* | ||
184 | * TODO: kick full dynticks CPUs when | ||
185 | * sched_clock_stable is set. | ||
186 | */ | ||
187 | if (!sched_clock_stable()) { | ||
188 | trace_tick_stop(0, "unstable sched clock\n"); | ||
189 | /* | ||
190 | * Don't allow the user to think they can get | ||
191 | * full NO_HZ with this machine. | ||
192 | */ | ||
193 | WARN_ONCE(tick_nohz_full_running, | ||
194 | "NO_HZ FULL will not work with unstable sched clock"); | ||
195 | return false; | 204 | return false; |
196 | } | 205 | } |
197 | #endif | ||
198 | 206 | ||
199 | return true; | 207 | return true; |
200 | } | 208 | } |
201 | 209 | ||
202 | static void nohz_full_kick_work_func(struct irq_work *work) | 210 | static void nohz_full_kick_func(struct irq_work *work) |
203 | { | 211 | { |
204 | /* Empty, the tick restart happens on tick_nohz_irq_exit() */ | 212 | /* Empty, the tick restart happens on tick_nohz_irq_exit() */ |
205 | } | 213 | } |
206 | 214 | ||
207 | static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { | 215 | static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { |
208 | .func = nohz_full_kick_work_func, | 216 | .func = nohz_full_kick_func, |
209 | }; | 217 | }; |
210 | 218 | ||
211 | /* | 219 | /* |
@@ -214,7 +222,7 @@ static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { | |||
214 | * This kick, unlike tick_nohz_full_kick_cpu() and tick_nohz_full_kick_all(), | 222 | * This kick, unlike tick_nohz_full_kick_cpu() and tick_nohz_full_kick_all(), |
215 | * is NMI safe. | 223 | * is NMI safe. |
216 | */ | 224 | */ |
217 | void tick_nohz_full_kick(void) | 225 | static void tick_nohz_full_kick(void) |
218 | { | 226 | { |
219 | if (!tick_nohz_full_cpu(smp_processor_id())) | 227 | if (!tick_nohz_full_cpu(smp_processor_id())) |
220 | return; | 228 | return; |
@@ -234,27 +242,112 @@ void tick_nohz_full_kick_cpu(int cpu) | |||
234 | irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu); | 242 | irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu); |
235 | } | 243 | } |
236 | 244 | ||
237 | static void nohz_full_kick_ipi(void *info) | ||
238 | { | ||
239 | /* Empty, the tick restart happens on tick_nohz_irq_exit() */ | ||
240 | } | ||
241 | |||
242 | /* | 245 | /* |
243 | * Kick all full dynticks CPUs in order to force these to re-evaluate | 246 | * Kick all full dynticks CPUs in order to force these to re-evaluate |
244 | * their dependency on the tick and restart it if necessary. | 247 | * their dependency on the tick and restart it if necessary. |
245 | */ | 248 | */ |
246 | void tick_nohz_full_kick_all(void) | 249 | static void tick_nohz_full_kick_all(void) |
247 | { | 250 | { |
251 | int cpu; | ||
252 | |||
248 | if (!tick_nohz_full_running) | 253 | if (!tick_nohz_full_running) |
249 | return; | 254 | return; |
250 | 255 | ||
251 | preempt_disable(); | 256 | preempt_disable(); |
252 | smp_call_function_many(tick_nohz_full_mask, | 257 | for_each_cpu_and(cpu, tick_nohz_full_mask, cpu_online_mask) |
253 | nohz_full_kick_ipi, NULL, false); | 258 | tick_nohz_full_kick_cpu(cpu); |
254 | tick_nohz_full_kick(); | ||
255 | preempt_enable(); | 259 | preempt_enable(); |
256 | } | 260 | } |
257 | 261 | ||
262 | static void tick_nohz_dep_set_all(unsigned long *dep, | ||
263 | enum tick_dep_bits bit) | ||
264 | { | ||
265 | unsigned long prev; | ||
266 | |||
267 | prev = fetch_or(dep, BIT_MASK(bit)); | ||
268 | if (!prev) | ||
269 | tick_nohz_full_kick_all(); | ||
270 | } | ||
271 | |||
272 | /* | ||
273 | * Set a global tick dependency. Used by perf events that rely on freq and | ||
274 | * by unstable clock. | ||
275 | */ | ||
276 | void tick_nohz_dep_set(enum tick_dep_bits bit) | ||
277 | { | ||
278 | tick_nohz_dep_set_all(&tick_dep_mask, bit); | ||
279 | } | ||
280 | |||
281 | void tick_nohz_dep_clear(enum tick_dep_bits bit) | ||
282 | { | ||
283 | clear_bit(bit, &tick_dep_mask); | ||
284 | } | ||
285 | |||
286 | /* | ||
287 | * Set per-CPU tick dependency. Used by scheduler and perf events in order to | ||
288 | * manage events throttling. | ||
289 | */ | ||
290 | void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit) | ||
291 | { | ||
292 | unsigned long prev; | ||
293 | struct tick_sched *ts; | ||
294 | |||
295 | ts = per_cpu_ptr(&tick_cpu_sched, cpu); | ||
296 | |||
297 | prev = fetch_or(&ts->tick_dep_mask, BIT_MASK(bit)); | ||
298 | if (!prev) { | ||
299 | preempt_disable(); | ||
300 | /* Perf needs local kick that is NMI safe */ | ||
301 | if (cpu == smp_processor_id()) { | ||
302 | tick_nohz_full_kick(); | ||
303 | } else { | ||
304 | /* Remote irq work not NMI-safe */ | ||
305 | if (!WARN_ON_ONCE(in_nmi())) | ||
306 | tick_nohz_full_kick_cpu(cpu); | ||
307 | } | ||
308 | preempt_enable(); | ||
309 | } | ||
310 | } | ||
311 | |||
312 | void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit) | ||
313 | { | ||
314 | struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu); | ||
315 | |||
316 | clear_bit(bit, &ts->tick_dep_mask); | ||
317 | } | ||
318 | |||
319 | /* | ||
320 | * Set a per-task tick dependency. Posix CPU timers need this in order to elapse | ||
321 | * per task timers. | ||
322 | */ | ||
323 | void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit) | ||
324 | { | ||
325 | /* | ||
326 | * We could optimize this with just kicking the target running the task | ||
327 | * if that noise matters for nohz full users. | ||
328 | */ | ||
329 | tick_nohz_dep_set_all(&tsk->tick_dep_mask, bit); | ||
330 | } | ||
331 | |||
332 | void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit) | ||
333 | { | ||
334 | clear_bit(bit, &tsk->tick_dep_mask); | ||
335 | } | ||
336 | |||
337 | /* | ||
338 | * Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse | ||
339 | * per process timers. | ||
340 | */ | ||
341 | void tick_nohz_dep_set_signal(struct signal_struct *sig, enum tick_dep_bits bit) | ||
342 | { | ||
343 | tick_nohz_dep_set_all(&sig->tick_dep_mask, bit); | ||
344 | } | ||
345 | |||
346 | void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit) | ||
347 | { | ||
348 | clear_bit(bit, &sig->tick_dep_mask); | ||
349 | } | ||
350 | |||
258 | /* | 351 | /* |
259 | * Re-evaluate the need for the tick as we switch the current task. | 352 | * Re-evaluate the need for the tick as we switch the current task. |
260 | * It might need the tick due to per task/process properties: | 353 | * It might need the tick due to per task/process properties: |
@@ -263,15 +356,19 @@ void tick_nohz_full_kick_all(void) | |||
263 | void __tick_nohz_task_switch(void) | 356 | void __tick_nohz_task_switch(void) |
264 | { | 357 | { |
265 | unsigned long flags; | 358 | unsigned long flags; |
359 | struct tick_sched *ts; | ||
266 | 360 | ||
267 | local_irq_save(flags); | 361 | local_irq_save(flags); |
268 | 362 | ||
269 | if (!tick_nohz_full_cpu(smp_processor_id())) | 363 | if (!tick_nohz_full_cpu(smp_processor_id())) |
270 | goto out; | 364 | goto out; |
271 | 365 | ||
272 | if (tick_nohz_tick_stopped() && !can_stop_full_tick()) | 366 | ts = this_cpu_ptr(&tick_cpu_sched); |
273 | tick_nohz_full_kick(); | ||
274 | 367 | ||
368 | if (ts->tick_stopped) { | ||
369 | if (current->tick_dep_mask || current->signal->tick_dep_mask) | ||
370 | tick_nohz_full_kick(); | ||
371 | } | ||
275 | out: | 372 | out: |
276 | local_irq_restore(flags); | 373 | local_irq_restore(flags); |
277 | } | 374 | } |
@@ -689,7 +786,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, | |||
689 | 786 | ||
690 | ts->last_tick = hrtimer_get_expires(&ts->sched_timer); | 787 | ts->last_tick = hrtimer_get_expires(&ts->sched_timer); |
691 | ts->tick_stopped = 1; | 788 | ts->tick_stopped = 1; |
692 | trace_tick_stop(1, " "); | 789 | trace_tick_stop(1, TICK_DEP_MASK_NONE); |
693 | } | 790 | } |
694 | 791 | ||
695 | /* | 792 | /* |
@@ -740,7 +837,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts) | |||
740 | if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE) | 837 | if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE) |
741 | return; | 838 | return; |
742 | 839 | ||
743 | if (can_stop_full_tick()) | 840 | if (can_stop_full_tick(ts)) |
744 | tick_nohz_stop_sched_tick(ts, ktime_get(), cpu); | 841 | tick_nohz_stop_sched_tick(ts, ktime_get(), cpu); |
745 | else if (ts->tick_stopped) | 842 | else if (ts->tick_stopped) |
746 | tick_nohz_restart_sched_tick(ts, ktime_get(), 1); | 843 | tick_nohz_restart_sched_tick(ts, ktime_get(), 1); |
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h index a4a8d4e9baa1..eb4e32566a83 100644 --- a/kernel/time/tick-sched.h +++ b/kernel/time/tick-sched.h | |||
@@ -60,6 +60,7 @@ struct tick_sched { | |||
60 | u64 next_timer; | 60 | u64 next_timer; |
61 | ktime_t idle_expires; | 61 | ktime_t idle_expires; |
62 | int do_timer_last; | 62 | int do_timer_last; |
63 | unsigned long tick_dep_mask; | ||
63 | }; | 64 | }; |
64 | 65 | ||
65 | extern struct tick_sched *tick_get_tick_sched(int cpu); | 66 | extern struct tick_sched *tick_get_tick_sched(int cpu); |