aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-03-14 22:44:38 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-03-14 22:44:38 -0400
commite23604edac2a7be6a8808a5d13fac6b9df4eb9a8 (patch)
tree7a7fa87796fb1dc6dd355675816e2e59546d4edd
parentd4e796152a049f6a675f8b6dcf7080a9d80014e5 (diff)
parent1f25184656a00a59e3a953189070d42a749f6aee (diff)
Merge branch 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull NOHZ updates from Ingo Molnar: "NOHZ enhancements, by Frederic Weisbecker, which reorganizes/refactors the NOHZ 'can the tick be stopped?' infrastructure and related code to be data driven, and harmonizes the naming and handling of all the various properties" [ This makes the ugly "fetch_or()" macro that the scheduler used internally a new generic helper, and does a bad job at it. I'm pulling it, but I've asked Ingo and Frederic to get this fixed up ] * 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched-clock: Migrate to use new tick dependency mask model posix-cpu-timers: Migrate to use new tick dependency mask model sched: Migrate sched to use new tick dependency mask model sched: Account rr tasks perf: Migrate perf to use new tick dependency mask model nohz: Use enum code for tick stop failure tracing message nohz: New tick dependency mask nohz: Implement wide kick on top of irq work atomic: Export fetch_or()
-rw-r--r--include/linux/atomic.h21
-rw-r--r--include/linux/perf_event.h6
-rw-r--r--include/linux/posix-timers.h3
-rw-r--r--include/linux/sched.h11
-rw-r--r--include/linux/tick.h97
-rw-r--r--include/trace/events/timer.h36
-rw-r--r--kernel/events/core.c65
-rw-r--r--kernel/sched/clock.c5
-rw-r--r--kernel/sched/core.c49
-rw-r--r--kernel/sched/rt.c16
-rw-r--r--kernel/sched/sched.h48
-rw-r--r--kernel/time/posix-cpu-timers.c52
-rw-r--r--kernel/time/tick-sched.c175
-rw-r--r--kernel/time/tick-sched.h1
14 files changed, 424 insertions, 161 deletions
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 301de78d65f7..6c502cb13c95 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -548,6 +548,27 @@ static inline int atomic_dec_if_positive(atomic_t *v)
548} 548}
549#endif 549#endif
550 550
551/**
552 * fetch_or - perform *ptr |= mask and return old value of *ptr
553 * @ptr: pointer to value
554 * @mask: mask to OR on the value
555 *
556 * cmpxchg based fetch_or, macro so it works for different integer types
557 */
558#ifndef fetch_or
559#define fetch_or(ptr, mask) \
560({ typeof(*(ptr)) __old, __val = *(ptr); \
561 for (;;) { \
562 __old = cmpxchg((ptr), __val, __val | (mask)); \
563 if (__old == __val) \
564 break; \
565 __val = __old; \
566 } \
567 __old; \
568})
569#endif
570
571
551#ifdef CONFIG_GENERIC_ATOMIC64 572#ifdef CONFIG_GENERIC_ATOMIC64
552#include <asm-generic/atomic64.h> 573#include <asm-generic/atomic64.h>
553#endif 574#endif
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index a9d8cab18b00..79ec7bbf0155 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1110,12 +1110,6 @@ static inline void perf_event_task_tick(void) { }
1110static inline int perf_event_release_kernel(struct perf_event *event) { return 0; } 1110static inline int perf_event_release_kernel(struct perf_event *event) { return 0; }
1111#endif 1111#endif
1112 1112
1113#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_NO_HZ_FULL)
1114extern bool perf_event_can_stop_tick(void);
1115#else
1116static inline bool perf_event_can_stop_tick(void) { return true; }
1117#endif
1118
1119#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) 1113#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
1120extern void perf_restore_debug_store(void); 1114extern void perf_restore_debug_store(void);
1121#else 1115#else
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 907f3fd191ac..62d44c176071 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -128,9 +128,6 @@ void posix_cpu_timer_schedule(struct k_itimer *timer);
128void run_posix_cpu_timers(struct task_struct *task); 128void run_posix_cpu_timers(struct task_struct *task);
129void posix_cpu_timers_exit(struct task_struct *task); 129void posix_cpu_timers_exit(struct task_struct *task);
130void posix_cpu_timers_exit_group(struct task_struct *task); 130void posix_cpu_timers_exit_group(struct task_struct *task);
131
132bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk);
133
134void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx, 131void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
135 cputime_t *newval, cputime_t *oldval); 132 cputime_t *newval, cputime_t *oldval);
136 133
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 838a89a78332..c617ea12c6b7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -717,6 +717,10 @@ struct signal_struct {
717 /* Earliest-expiration cache. */ 717 /* Earliest-expiration cache. */
718 struct task_cputime cputime_expires; 718 struct task_cputime cputime_expires;
719 719
720#ifdef CONFIG_NO_HZ_FULL
721 unsigned long tick_dep_mask;
722#endif
723
720 struct list_head cpu_timers[3]; 724 struct list_head cpu_timers[3];
721 725
722 struct pid *tty_old_pgrp; 726 struct pid *tty_old_pgrp;
@@ -1542,6 +1546,10 @@ struct task_struct {
1542 VTIME_SYS, 1546 VTIME_SYS,
1543 } vtime_snap_whence; 1547 } vtime_snap_whence;
1544#endif 1548#endif
1549
1550#ifdef CONFIG_NO_HZ_FULL
1551 unsigned long tick_dep_mask;
1552#endif
1545 unsigned long nvcsw, nivcsw; /* context switch counts */ 1553 unsigned long nvcsw, nivcsw; /* context switch counts */
1546 u64 start_time; /* monotonic time in nsec */ 1554 u64 start_time; /* monotonic time in nsec */
1547 u64 real_start_time; /* boot based time in nsec */ 1555 u64 real_start_time; /* boot based time in nsec */
@@ -2356,10 +2364,7 @@ static inline void wake_up_nohz_cpu(int cpu) { }
2356#endif 2364#endif
2357 2365
2358#ifdef CONFIG_NO_HZ_FULL 2366#ifdef CONFIG_NO_HZ_FULL
2359extern bool sched_can_stop_tick(void);
2360extern u64 scheduler_tick_max_deferment(void); 2367extern u64 scheduler_tick_max_deferment(void);
2361#else
2362static inline bool sched_can_stop_tick(void) { return false; }
2363#endif 2368#endif
2364 2369
2365#ifdef CONFIG_SCHED_AUTOGROUP 2370#ifdef CONFIG_SCHED_AUTOGROUP
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 97fd4e543846..21f73649a4dc 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -97,6 +97,19 @@ static inline void tick_broadcast_exit(void)
97 tick_broadcast_oneshot_control(TICK_BROADCAST_EXIT); 97 tick_broadcast_oneshot_control(TICK_BROADCAST_EXIT);
98} 98}
99 99
100enum tick_dep_bits {
101 TICK_DEP_BIT_POSIX_TIMER = 0,
102 TICK_DEP_BIT_PERF_EVENTS = 1,
103 TICK_DEP_BIT_SCHED = 2,
104 TICK_DEP_BIT_CLOCK_UNSTABLE = 3
105};
106
107#define TICK_DEP_MASK_NONE 0
108#define TICK_DEP_MASK_POSIX_TIMER (1 << TICK_DEP_BIT_POSIX_TIMER)
109#define TICK_DEP_MASK_PERF_EVENTS (1 << TICK_DEP_BIT_PERF_EVENTS)
110#define TICK_DEP_MASK_SCHED (1 << TICK_DEP_BIT_SCHED)
111#define TICK_DEP_MASK_CLOCK_UNSTABLE (1 << TICK_DEP_BIT_CLOCK_UNSTABLE)
112
100#ifdef CONFIG_NO_HZ_COMMON 113#ifdef CONFIG_NO_HZ_COMMON
101extern int tick_nohz_enabled; 114extern int tick_nohz_enabled;
102extern int tick_nohz_tick_stopped(void); 115extern int tick_nohz_tick_stopped(void);
@@ -154,9 +167,73 @@ static inline int housekeeping_any_cpu(void)
154 return cpumask_any_and(housekeeping_mask, cpu_online_mask); 167 return cpumask_any_and(housekeeping_mask, cpu_online_mask);
155} 168}
156 169
157extern void tick_nohz_full_kick(void); 170extern void tick_nohz_dep_set(enum tick_dep_bits bit);
171extern void tick_nohz_dep_clear(enum tick_dep_bits bit);
172extern void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit);
173extern void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit);
174extern void tick_nohz_dep_set_task(struct task_struct *tsk,
175 enum tick_dep_bits bit);
176extern void tick_nohz_dep_clear_task(struct task_struct *tsk,
177 enum tick_dep_bits bit);
178extern void tick_nohz_dep_set_signal(struct signal_struct *signal,
179 enum tick_dep_bits bit);
180extern void tick_nohz_dep_clear_signal(struct signal_struct *signal,
181 enum tick_dep_bits bit);
182
183/*
184 * The below are tick_nohz_[set,clear]_dep() wrappers that optimize off-cases
185 * on top of static keys.
186 */
187static inline void tick_dep_set(enum tick_dep_bits bit)
188{
189 if (tick_nohz_full_enabled())
190 tick_nohz_dep_set(bit);
191}
192
193static inline void tick_dep_clear(enum tick_dep_bits bit)
194{
195 if (tick_nohz_full_enabled())
196 tick_nohz_dep_clear(bit);
197}
198
199static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit)
200{
201 if (tick_nohz_full_cpu(cpu))
202 tick_nohz_dep_set_cpu(cpu, bit);
203}
204
205static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
206{
207 if (tick_nohz_full_cpu(cpu))
208 tick_nohz_dep_clear_cpu(cpu, bit);
209}
210
211static inline void tick_dep_set_task(struct task_struct *tsk,
212 enum tick_dep_bits bit)
213{
214 if (tick_nohz_full_enabled())
215 tick_nohz_dep_set_task(tsk, bit);
216}
217static inline void tick_dep_clear_task(struct task_struct *tsk,
218 enum tick_dep_bits bit)
219{
220 if (tick_nohz_full_enabled())
221 tick_nohz_dep_clear_task(tsk, bit);
222}
223static inline void tick_dep_set_signal(struct signal_struct *signal,
224 enum tick_dep_bits bit)
225{
226 if (tick_nohz_full_enabled())
227 tick_nohz_dep_set_signal(signal, bit);
228}
229static inline void tick_dep_clear_signal(struct signal_struct *signal,
230 enum tick_dep_bits bit)
231{
232 if (tick_nohz_full_enabled())
233 tick_nohz_dep_clear_signal(signal, bit);
234}
235
158extern void tick_nohz_full_kick_cpu(int cpu); 236extern void tick_nohz_full_kick_cpu(int cpu);
159extern void tick_nohz_full_kick_all(void);
160extern void __tick_nohz_task_switch(void); 237extern void __tick_nohz_task_switch(void);
161#else 238#else
162static inline int housekeeping_any_cpu(void) 239static inline int housekeeping_any_cpu(void)
@@ -166,9 +243,21 @@ static inline int housekeeping_any_cpu(void)
166static inline bool tick_nohz_full_enabled(void) { return false; } 243static inline bool tick_nohz_full_enabled(void) { return false; }
167static inline bool tick_nohz_full_cpu(int cpu) { return false; } 244static inline bool tick_nohz_full_cpu(int cpu) { return false; }
168static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { } 245static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { }
246
247static inline void tick_dep_set(enum tick_dep_bits bit) { }
248static inline void tick_dep_clear(enum tick_dep_bits bit) { }
249static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit) { }
250static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { }
251static inline void tick_dep_set_task(struct task_struct *tsk,
252 enum tick_dep_bits bit) { }
253static inline void tick_dep_clear_task(struct task_struct *tsk,
254 enum tick_dep_bits bit) { }
255static inline void tick_dep_set_signal(struct signal_struct *signal,
256 enum tick_dep_bits bit) { }
257static inline void tick_dep_clear_signal(struct signal_struct *signal,
258 enum tick_dep_bits bit) { }
259
169static inline void tick_nohz_full_kick_cpu(int cpu) { } 260static inline void tick_nohz_full_kick_cpu(int cpu) { }
170static inline void tick_nohz_full_kick(void) { }
171static inline void tick_nohz_full_kick_all(void) { }
172static inline void __tick_nohz_task_switch(void) { } 261static inline void __tick_nohz_task_switch(void) { }
173#endif 262#endif
174 263
diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index 073b9ac245ba..51440131d337 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -328,23 +328,49 @@ TRACE_EVENT(itimer_expire,
328); 328);
329 329
330#ifdef CONFIG_NO_HZ_COMMON 330#ifdef CONFIG_NO_HZ_COMMON
331
332#define TICK_DEP_NAMES \
333 tick_dep_name(NONE) \
334 tick_dep_name(POSIX_TIMER) \
335 tick_dep_name(PERF_EVENTS) \
336 tick_dep_name(SCHED) \
337 tick_dep_name_end(CLOCK_UNSTABLE)
338
339#undef tick_dep_name
340#undef tick_dep_name_end
341
342#define tick_dep_name(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
343#define tick_dep_name_end(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
344
345TICK_DEP_NAMES
346
347#undef tick_dep_name
348#undef tick_dep_name_end
349
350#define tick_dep_name(sdep) { TICK_DEP_MASK_##sdep, #sdep },
351#define tick_dep_name_end(sdep) { TICK_DEP_MASK_##sdep, #sdep }
352
353#define show_tick_dep_name(val) \
354 __print_symbolic(val, TICK_DEP_NAMES)
355
331TRACE_EVENT(tick_stop, 356TRACE_EVENT(tick_stop,
332 357
333 TP_PROTO(int success, char *error_msg), 358 TP_PROTO(int success, int dependency),
334 359
335 TP_ARGS(success, error_msg), 360 TP_ARGS(success, dependency),
336 361
337 TP_STRUCT__entry( 362 TP_STRUCT__entry(
338 __field( int , success ) 363 __field( int , success )
339 __string( msg, error_msg ) 364 __field( int , dependency )
340 ), 365 ),
341 366
342 TP_fast_assign( 367 TP_fast_assign(
343 __entry->success = success; 368 __entry->success = success;
344 __assign_str(msg, error_msg); 369 __entry->dependency = dependency;
345 ), 370 ),
346 371
347 TP_printk("success=%s msg=%s", __entry->success ? "yes" : "no", __get_str(msg)) 372 TP_printk("success=%d dependency=%s", __entry->success, \
373 show_tick_dep_name(__entry->dependency))
348); 374);
349#endif 375#endif
350 376
diff --git a/kernel/events/core.c b/kernel/events/core.c
index b7231498de47..712570dddacd 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3112,17 +3112,6 @@ done:
3112 return rotate; 3112 return rotate;
3113} 3113}
3114 3114
3115#ifdef CONFIG_NO_HZ_FULL
3116bool perf_event_can_stop_tick(void)
3117{
3118 if (atomic_read(&nr_freq_events) ||
3119 __this_cpu_read(perf_throttled_count))
3120 return false;
3121 else
3122 return true;
3123}
3124#endif
3125
3126void perf_event_task_tick(void) 3115void perf_event_task_tick(void)
3127{ 3116{
3128 struct list_head *head = this_cpu_ptr(&active_ctx_list); 3117 struct list_head *head = this_cpu_ptr(&active_ctx_list);
@@ -3133,6 +3122,7 @@ void perf_event_task_tick(void)
3133 3122
3134 __this_cpu_inc(perf_throttled_seq); 3123 __this_cpu_inc(perf_throttled_seq);
3135 throttled = __this_cpu_xchg(perf_throttled_count, 0); 3124 throttled = __this_cpu_xchg(perf_throttled_count, 0);
3125 tick_dep_clear_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
3136 3126
3137 list_for_each_entry_safe(ctx, tmp, head, active_ctx_list) 3127 list_for_each_entry_safe(ctx, tmp, head, active_ctx_list)
3138 perf_adjust_freq_unthr_context(ctx, throttled); 3128 perf_adjust_freq_unthr_context(ctx, throttled);
@@ -3564,6 +3554,28 @@ static void unaccount_event_cpu(struct perf_event *event, int cpu)
3564 atomic_dec(&per_cpu(perf_cgroup_events, cpu)); 3554 atomic_dec(&per_cpu(perf_cgroup_events, cpu));
3565} 3555}
3566 3556
3557#ifdef CONFIG_NO_HZ_FULL
3558static DEFINE_SPINLOCK(nr_freq_lock);
3559#endif
3560
3561static void unaccount_freq_event_nohz(void)
3562{
3563#ifdef CONFIG_NO_HZ_FULL
3564 spin_lock(&nr_freq_lock);
3565 if (atomic_dec_and_test(&nr_freq_events))
3566 tick_nohz_dep_clear(TICK_DEP_BIT_PERF_EVENTS);
3567 spin_unlock(&nr_freq_lock);
3568#endif
3569}
3570
3571static void unaccount_freq_event(void)
3572{
3573 if (tick_nohz_full_enabled())
3574 unaccount_freq_event_nohz();
3575 else
3576 atomic_dec(&nr_freq_events);
3577}
3578
3567static void unaccount_event(struct perf_event *event) 3579static void unaccount_event(struct perf_event *event)
3568{ 3580{
3569 bool dec = false; 3581 bool dec = false;
@@ -3580,7 +3592,7 @@ static void unaccount_event(struct perf_event *event)
3580 if (event->attr.task) 3592 if (event->attr.task)
3581 atomic_dec(&nr_task_events); 3593 atomic_dec(&nr_task_events);
3582 if (event->attr.freq) 3594 if (event->attr.freq)
3583 atomic_dec(&nr_freq_events); 3595 unaccount_freq_event();
3584 if (event->attr.context_switch) { 3596 if (event->attr.context_switch) {
3585 dec = true; 3597 dec = true;
3586 atomic_dec(&nr_switch_events); 3598 atomic_dec(&nr_switch_events);
@@ -6424,9 +6436,9 @@ static int __perf_event_overflow(struct perf_event *event,
6424 if (unlikely(throttle 6436 if (unlikely(throttle
6425 && hwc->interrupts >= max_samples_per_tick)) { 6437 && hwc->interrupts >= max_samples_per_tick)) {
6426 __this_cpu_inc(perf_throttled_count); 6438 __this_cpu_inc(perf_throttled_count);
6439 tick_dep_set_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
6427 hwc->interrupts = MAX_INTERRUPTS; 6440 hwc->interrupts = MAX_INTERRUPTS;
6428 perf_log_throttle(event, 0); 6441 perf_log_throttle(event, 0);
6429 tick_nohz_full_kick();
6430 ret = 1; 6442 ret = 1;
6431 } 6443 }
6432 } 6444 }
@@ -7815,6 +7827,27 @@ static void account_event_cpu(struct perf_event *event, int cpu)
7815 atomic_inc(&per_cpu(perf_cgroup_events, cpu)); 7827 atomic_inc(&per_cpu(perf_cgroup_events, cpu));
7816} 7828}
7817 7829
7830/* Freq events need the tick to stay alive (see perf_event_task_tick). */
7831static void account_freq_event_nohz(void)
7832{
7833#ifdef CONFIG_NO_HZ_FULL
7834 /* Lock so we don't race with concurrent unaccount */
7835 spin_lock(&nr_freq_lock);
7836 if (atomic_inc_return(&nr_freq_events) == 1)
7837 tick_nohz_dep_set(TICK_DEP_BIT_PERF_EVENTS);
7838 spin_unlock(&nr_freq_lock);
7839#endif
7840}
7841
7842static void account_freq_event(void)
7843{
7844 if (tick_nohz_full_enabled())
7845 account_freq_event_nohz();
7846 else
7847 atomic_inc(&nr_freq_events);
7848}
7849
7850
7818static void account_event(struct perf_event *event) 7851static void account_event(struct perf_event *event)
7819{ 7852{
7820 bool inc = false; 7853 bool inc = false;
@@ -7830,10 +7863,8 @@ static void account_event(struct perf_event *event)
7830 atomic_inc(&nr_comm_events); 7863 atomic_inc(&nr_comm_events);
7831 if (event->attr.task) 7864 if (event->attr.task)
7832 atomic_inc(&nr_task_events); 7865 atomic_inc(&nr_task_events);
7833 if (event->attr.freq) { 7866 if (event->attr.freq)
7834 if (atomic_inc_return(&nr_freq_events) == 1) 7867 account_freq_event();
7835 tick_nohz_full_kick_all();
7836 }
7837 if (event->attr.context_switch) { 7868 if (event->attr.context_switch) {
7838 atomic_inc(&nr_switch_events); 7869 atomic_inc(&nr_switch_events);
7839 inc = true; 7870 inc = true;
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index bc54e84675da..fedb967a9841 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -61,6 +61,7 @@
61#include <linux/static_key.h> 61#include <linux/static_key.h>
62#include <linux/workqueue.h> 62#include <linux/workqueue.h>
63#include <linux/compiler.h> 63#include <linux/compiler.h>
64#include <linux/tick.h>
64 65
65/* 66/*
66 * Scheduler clock - returns current time in nanosec units. 67 * Scheduler clock - returns current time in nanosec units.
@@ -89,6 +90,8 @@ static void __set_sched_clock_stable(void)
89{ 90{
90 if (!sched_clock_stable()) 91 if (!sched_clock_stable())
91 static_key_slow_inc(&__sched_clock_stable); 92 static_key_slow_inc(&__sched_clock_stable);
93
94 tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE);
92} 95}
93 96
94void set_sched_clock_stable(void) 97void set_sched_clock_stable(void)
@@ -108,6 +111,8 @@ static void __clear_sched_clock_stable(struct work_struct *work)
108 /* XXX worry about clock continuity */ 111 /* XXX worry about clock continuity */
109 if (sched_clock_stable()) 112 if (sched_clock_stable())
110 static_key_slow_dec(&__sched_clock_stable); 113 static_key_slow_dec(&__sched_clock_stable);
114
115 tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE);
111} 116}
112 117
113static DECLARE_WORK(sched_clock_work, __clear_sched_clock_stable); 118static DECLARE_WORK(sched_clock_work, __clear_sched_clock_stable);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 05114b15b6d1..e5725b931bee 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -320,20 +320,6 @@ static inline void init_hrtick(void)
320} 320}
321#endif /* CONFIG_SCHED_HRTICK */ 321#endif /* CONFIG_SCHED_HRTICK */
322 322
323/*
324 * cmpxchg based fetch_or, macro so it works for different integer types
325 */
326#define fetch_or(ptr, val) \
327({ typeof(*(ptr)) __old, __val = *(ptr); \
328 for (;;) { \
329 __old = cmpxchg((ptr), __val, __val | (val)); \
330 if (__old == __val) \
331 break; \
332 __val = __old; \
333 } \
334 __old; \
335})
336
337#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG) 323#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
338/* 324/*
339 * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG, 325 * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
@@ -582,31 +568,36 @@ static inline bool got_nohz_idle_kick(void)
582#endif /* CONFIG_NO_HZ_COMMON */ 568#endif /* CONFIG_NO_HZ_COMMON */
583 569
584#ifdef CONFIG_NO_HZ_FULL 570#ifdef CONFIG_NO_HZ_FULL
585bool sched_can_stop_tick(void) 571bool sched_can_stop_tick(struct rq *rq)
586{ 572{
573 int fifo_nr_running;
574
575 /* Deadline tasks, even if single, need the tick */
576 if (rq->dl.dl_nr_running)
577 return false;
578
587 /* 579 /*
588 * FIFO realtime policy runs the highest priority task. Other runnable 580 * FIFO realtime policy runs the highest priority task (after DEADLINE).
589 * tasks are of a lower priority. The scheduler tick does nothing. 581 * Other runnable tasks are of a lower priority. The scheduler tick
582 * isn't needed.
590 */ 583 */
591 if (current->policy == SCHED_FIFO) 584 fifo_nr_running = rq->rt.rt_nr_running - rq->rt.rr_nr_running;
585 if (fifo_nr_running)
592 return true; 586 return true;
593 587
594 /* 588 /*
595 * Round-robin realtime tasks time slice with other tasks at the same 589 * Round-robin realtime tasks time slice with other tasks at the same
596 * realtime priority. Is this task the only one at this priority? 590 * realtime priority.
597 */ 591 */
598 if (current->policy == SCHED_RR) { 592 if (rq->rt.rr_nr_running) {
599 struct sched_rt_entity *rt_se = &current->rt; 593 if (rq->rt.rr_nr_running == 1)
600 594 return true;
601 return list_is_singular(&rt_se->run_list); 595 else
596 return false;
602 } 597 }
603 598
604 /* 599 /* Normal multitasking need periodic preemption checks */
605 * More than one running task need preemption. 600 if (rq->cfs.nr_running > 1)
606 * nr_running update is assumed to be visible
607 * after IPI is sent from wakers.
608 */
609 if (this_rq()->nr_running > 1)
610 return false; 601 return false;
611 602
612 return true; 603 return true;
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index a774b4dbf291..562471329487 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1150,12 +1150,27 @@ unsigned int rt_se_nr_running(struct sched_rt_entity *rt_se)
1150} 1150}
1151 1151
1152static inline 1152static inline
1153unsigned int rt_se_rr_nr_running(struct sched_rt_entity *rt_se)
1154{
1155 struct rt_rq *group_rq = group_rt_rq(rt_se);
1156 struct task_struct *tsk;
1157
1158 if (group_rq)
1159 return group_rq->rr_nr_running;
1160
1161 tsk = rt_task_of(rt_se);
1162
1163 return (tsk->policy == SCHED_RR) ? 1 : 0;
1164}
1165
1166static inline
1153void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 1167void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1154{ 1168{
1155 int prio = rt_se_prio(rt_se); 1169 int prio = rt_se_prio(rt_se);
1156 1170
1157 WARN_ON(!rt_prio(prio)); 1171 WARN_ON(!rt_prio(prio));
1158 rt_rq->rt_nr_running += rt_se_nr_running(rt_se); 1172 rt_rq->rt_nr_running += rt_se_nr_running(rt_se);
1173 rt_rq->rr_nr_running += rt_se_rr_nr_running(rt_se);
1159 1174
1160 inc_rt_prio(rt_rq, prio); 1175 inc_rt_prio(rt_rq, prio);
1161 inc_rt_migration(rt_se, rt_rq); 1176 inc_rt_migration(rt_se, rt_rq);
@@ -1168,6 +1183,7 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1168 WARN_ON(!rt_prio(rt_se_prio(rt_se))); 1183 WARN_ON(!rt_prio(rt_se_prio(rt_se)));
1169 WARN_ON(!rt_rq->rt_nr_running); 1184 WARN_ON(!rt_rq->rt_nr_running);
1170 rt_rq->rt_nr_running -= rt_se_nr_running(rt_se); 1185 rt_rq->rt_nr_running -= rt_se_nr_running(rt_se);
1186 rt_rq->rr_nr_running -= rt_se_rr_nr_running(rt_se);
1171 1187
1172 dec_rt_prio(rt_rq, rt_se_prio(rt_se)); 1188 dec_rt_prio(rt_rq, rt_se_prio(rt_se));
1173 dec_rt_migration(rt_se, rt_rq); 1189 dec_rt_migration(rt_se, rt_rq);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ef5875fff5b7..b2ff5a2bd6df 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -450,6 +450,7 @@ static inline int rt_bandwidth_enabled(void)
450struct rt_rq { 450struct rt_rq {
451 struct rt_prio_array active; 451 struct rt_prio_array active;
452 unsigned int rt_nr_running; 452 unsigned int rt_nr_running;
453 unsigned int rr_nr_running;
453#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED 454#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
454 struct { 455 struct {
455 int curr; /* highest queued rt task prio */ 456 int curr; /* highest queued rt task prio */
@@ -1313,6 +1314,35 @@ unsigned long to_ratio(u64 period, u64 runtime);
1313 1314
1314extern void init_entity_runnable_average(struct sched_entity *se); 1315extern void init_entity_runnable_average(struct sched_entity *se);
1315 1316
1317#ifdef CONFIG_NO_HZ_FULL
1318extern bool sched_can_stop_tick(struct rq *rq);
1319
1320/*
1321 * Tick may be needed by tasks in the runqueue depending on their policy and
1322 * requirements. If tick is needed, lets send the target an IPI to kick it out of
1323 * nohz mode if necessary.
1324 */
1325static inline void sched_update_tick_dependency(struct rq *rq)
1326{
1327 int cpu;
1328
1329 if (!tick_nohz_full_enabled())
1330 return;
1331
1332 cpu = cpu_of(rq);
1333
1334 if (!tick_nohz_full_cpu(cpu))
1335 return;
1336
1337 if (sched_can_stop_tick(rq))
1338 tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
1339 else
1340 tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
1341}
1342#else
1343static inline void sched_update_tick_dependency(struct rq *rq) { }
1344#endif
1345
1316static inline void add_nr_running(struct rq *rq, unsigned count) 1346static inline void add_nr_running(struct rq *rq, unsigned count)
1317{ 1347{
1318 unsigned prev_nr = rq->nr_running; 1348 unsigned prev_nr = rq->nr_running;
@@ -1324,26 +1354,16 @@ static inline void add_nr_running(struct rq *rq, unsigned count)
1324 if (!rq->rd->overload) 1354 if (!rq->rd->overload)
1325 rq->rd->overload = true; 1355 rq->rd->overload = true;
1326#endif 1356#endif
1327
1328#ifdef CONFIG_NO_HZ_FULL
1329 if (tick_nohz_full_cpu(rq->cpu)) {
1330 /*
1331 * Tick is needed if more than one task runs on a CPU.
1332 * Send the target an IPI to kick it out of nohz mode.
1333 *
1334 * We assume that IPI implies full memory barrier and the
1335 * new value of rq->nr_running is visible on reception
1336 * from the target.
1337 */
1338 tick_nohz_full_kick_cpu(rq->cpu);
1339 }
1340#endif
1341 } 1357 }
1358
1359 sched_update_tick_dependency(rq);
1342} 1360}
1343 1361
1344static inline void sub_nr_running(struct rq *rq, unsigned count) 1362static inline void sub_nr_running(struct rq *rq, unsigned count)
1345{ 1363{
1346 rq->nr_running -= count; 1364 rq->nr_running -= count;
1365 /* Check if we still need preemption */
1366 sched_update_tick_dependency(rq);
1347} 1367}
1348 1368
1349static inline void rq_last_tick_reset(struct rq *rq) 1369static inline void rq_last_tick_reset(struct rq *rq)
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index f5e86d282d52..1cafba860b08 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -333,7 +333,6 @@ static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
333 return err; 333 return err;
334} 334}
335 335
336
337/* 336/*
338 * Validate the clockid_t for a new CPU-clock timer, and initialize the timer. 337 * Validate the clockid_t for a new CPU-clock timer, and initialize the timer.
339 * This is called from sys_timer_create() and do_cpu_nanosleep() with the 338 * This is called from sys_timer_create() and do_cpu_nanosleep() with the
@@ -517,6 +516,10 @@ static void arm_timer(struct k_itimer *timer)
517 cputime_expires->sched_exp = exp; 516 cputime_expires->sched_exp = exp;
518 break; 517 break;
519 } 518 }
519 if (CPUCLOCK_PERTHREAD(timer->it_clock))
520 tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER);
521 else
522 tick_dep_set_signal(p->signal, TICK_DEP_BIT_POSIX_TIMER);
520 } 523 }
521} 524}
522 525
@@ -582,39 +585,6 @@ static int cpu_timer_sample_group(const clockid_t which_clock,
582 return 0; 585 return 0;
583} 586}
584 587
585#ifdef CONFIG_NO_HZ_FULL
586static void nohz_kick_work_fn(struct work_struct *work)
587{
588 tick_nohz_full_kick_all();
589}
590
591static DECLARE_WORK(nohz_kick_work, nohz_kick_work_fn);
592
593/*
594 * We need the IPIs to be sent from sane process context.
595 * The posix cpu timers are always set with irqs disabled.
596 */
597static void posix_cpu_timer_kick_nohz(void)
598{
599 if (context_tracking_is_enabled())
600 schedule_work(&nohz_kick_work);
601}
602
603bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk)
604{
605 if (!task_cputime_zero(&tsk->cputime_expires))
606 return false;
607
608 /* Check if cputimer is running. This is accessed without locking. */
609 if (READ_ONCE(tsk->signal->cputimer.running))
610 return false;
611
612 return true;
613}
614#else
615static inline void posix_cpu_timer_kick_nohz(void) { }
616#endif
617
618/* 588/*
619 * Guts of sys_timer_settime for CPU timers. 589 * Guts of sys_timer_settime for CPU timers.
620 * This is called with the timer locked and interrupts disabled. 590 * This is called with the timer locked and interrupts disabled.
@@ -761,8 +731,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
761 sample_to_timespec(timer->it_clock, 731 sample_to_timespec(timer->it_clock,
762 old_incr, &old->it_interval); 732 old_incr, &old->it_interval);
763 } 733 }
764 if (!ret) 734
765 posix_cpu_timer_kick_nohz();
766 return ret; 735 return ret;
767} 736}
768 737
@@ -911,6 +880,8 @@ static void check_thread_timers(struct task_struct *tsk,
911 __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); 880 __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
912 } 881 }
913 } 882 }
883 if (task_cputime_zero(tsk_expires))
884 tick_dep_clear_task(tsk, TICK_DEP_BIT_POSIX_TIMER);
914} 885}
915 886
916static inline void stop_process_timers(struct signal_struct *sig) 887static inline void stop_process_timers(struct signal_struct *sig)
@@ -919,6 +890,7 @@ static inline void stop_process_timers(struct signal_struct *sig)
919 890
920 /* Turn off cputimer->running. This is done without locking. */ 891 /* Turn off cputimer->running. This is done without locking. */
921 WRITE_ONCE(cputimer->running, false); 892 WRITE_ONCE(cputimer->running, false);
893 tick_dep_clear_signal(sig, TICK_DEP_BIT_POSIX_TIMER);
922} 894}
923 895
924static u32 onecputick; 896static u32 onecputick;
@@ -1095,8 +1067,6 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
1095 arm_timer(timer); 1067 arm_timer(timer);
1096 unlock_task_sighand(p, &flags); 1068 unlock_task_sighand(p, &flags);
1097 1069
1098 /* Kick full dynticks CPUs in case they need to tick on the new timer */
1099 posix_cpu_timer_kick_nohz();
1100out: 1070out:
1101 timer->it_overrun_last = timer->it_overrun; 1071 timer->it_overrun_last = timer->it_overrun;
1102 timer->it_overrun = -1; 1072 timer->it_overrun = -1;
@@ -1270,7 +1240,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
1270 } 1240 }
1271 1241
1272 if (!*newval) 1242 if (!*newval)
1273 goto out; 1243 return;
1274 *newval += now; 1244 *newval += now;
1275 } 1245 }
1276 1246
@@ -1288,8 +1258,8 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
1288 tsk->signal->cputime_expires.virt_exp = *newval; 1258 tsk->signal->cputime_expires.virt_exp = *newval;
1289 break; 1259 break;
1290 } 1260 }
1291out: 1261
1292 posix_cpu_timer_kick_nohz(); 1262 tick_dep_set_signal(tsk->signal, TICK_DEP_BIT_POSIX_TIMER);
1293} 1263}
1294 1264
1295static int do_cpu_nanosleep(const clockid_t which_clock, int flags, 1265static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 0b17424349eb..969e6704c3c9 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -22,7 +22,6 @@
22#include <linux/module.h> 22#include <linux/module.h>
23#include <linux/irq_work.h> 23#include <linux/irq_work.h>
24#include <linux/posix-timers.h> 24#include <linux/posix-timers.h>
25#include <linux/perf_event.h>
26#include <linux/context_tracking.h> 25#include <linux/context_tracking.h>
27 26
28#include <asm/irq_regs.h> 27#include <asm/irq_regs.h>
@@ -158,54 +157,63 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
158cpumask_var_t tick_nohz_full_mask; 157cpumask_var_t tick_nohz_full_mask;
159cpumask_var_t housekeeping_mask; 158cpumask_var_t housekeeping_mask;
160bool tick_nohz_full_running; 159bool tick_nohz_full_running;
160static unsigned long tick_dep_mask;
161 161
162static bool can_stop_full_tick(void) 162static void trace_tick_dependency(unsigned long dep)
163{
164 if (dep & TICK_DEP_MASK_POSIX_TIMER) {
165 trace_tick_stop(0, TICK_DEP_MASK_POSIX_TIMER);
166 return;
167 }
168
169 if (dep & TICK_DEP_MASK_PERF_EVENTS) {
170 trace_tick_stop(0, TICK_DEP_MASK_PERF_EVENTS);
171 return;
172 }
173
174 if (dep & TICK_DEP_MASK_SCHED) {
175 trace_tick_stop(0, TICK_DEP_MASK_SCHED);
176 return;
177 }
178
179 if (dep & TICK_DEP_MASK_CLOCK_UNSTABLE)
180 trace_tick_stop(0, TICK_DEP_MASK_CLOCK_UNSTABLE);
181}
182
183static bool can_stop_full_tick(struct tick_sched *ts)
163{ 184{
164 WARN_ON_ONCE(!irqs_disabled()); 185 WARN_ON_ONCE(!irqs_disabled());
165 186
166 if (!sched_can_stop_tick()) { 187 if (tick_dep_mask) {
167 trace_tick_stop(0, "more than 1 task in runqueue\n"); 188 trace_tick_dependency(tick_dep_mask);
168 return false; 189 return false;
169 } 190 }
170 191
171 if (!posix_cpu_timers_can_stop_tick(current)) { 192 if (ts->tick_dep_mask) {
172 trace_tick_stop(0, "posix timers running\n"); 193 trace_tick_dependency(ts->tick_dep_mask);
173 return false; 194 return false;
174 } 195 }
175 196
176 if (!perf_event_can_stop_tick()) { 197 if (current->tick_dep_mask) {
177 trace_tick_stop(0, "perf events running\n"); 198 trace_tick_dependency(current->tick_dep_mask);
178 return false; 199 return false;
179 } 200 }
180 201
181 /* sched_clock_tick() needs us? */ 202 if (current->signal->tick_dep_mask) {
182#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK 203 trace_tick_dependency(current->signal->tick_dep_mask);
183 /*
184 * TODO: kick full dynticks CPUs when
185 * sched_clock_stable is set.
186 */
187 if (!sched_clock_stable()) {
188 trace_tick_stop(0, "unstable sched clock\n");
189 /*
190 * Don't allow the user to think they can get
191 * full NO_HZ with this machine.
192 */
193 WARN_ONCE(tick_nohz_full_running,
194 "NO_HZ FULL will not work with unstable sched clock");
195 return false; 204 return false;
196 } 205 }
197#endif
198 206
199 return true; 207 return true;
200} 208}
201 209
202static void nohz_full_kick_work_func(struct irq_work *work) 210static void nohz_full_kick_func(struct irq_work *work)
203{ 211{
204 /* Empty, the tick restart happens on tick_nohz_irq_exit() */ 212 /* Empty, the tick restart happens on tick_nohz_irq_exit() */
205} 213}
206 214
207static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { 215static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
208 .func = nohz_full_kick_work_func, 216 .func = nohz_full_kick_func,
209}; 217};
210 218
211/* 219/*
@@ -214,7 +222,7 @@ static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
214 * This kick, unlike tick_nohz_full_kick_cpu() and tick_nohz_full_kick_all(), 222 * This kick, unlike tick_nohz_full_kick_cpu() and tick_nohz_full_kick_all(),
215 * is NMI safe. 223 * is NMI safe.
216 */ 224 */
217void tick_nohz_full_kick(void) 225static void tick_nohz_full_kick(void)
218{ 226{
219 if (!tick_nohz_full_cpu(smp_processor_id())) 227 if (!tick_nohz_full_cpu(smp_processor_id()))
220 return; 228 return;
@@ -234,27 +242,112 @@ void tick_nohz_full_kick_cpu(int cpu)
234 irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu); 242 irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu);
235} 243}
236 244
237static void nohz_full_kick_ipi(void *info)
238{
239 /* Empty, the tick restart happens on tick_nohz_irq_exit() */
240}
241
242/* 245/*
243 * Kick all full dynticks CPUs in order to force these to re-evaluate 246 * Kick all full dynticks CPUs in order to force these to re-evaluate
244 * their dependency on the tick and restart it if necessary. 247 * their dependency on the tick and restart it if necessary.
245 */ 248 */
246void tick_nohz_full_kick_all(void) 249static void tick_nohz_full_kick_all(void)
247{ 250{
251 int cpu;
252
248 if (!tick_nohz_full_running) 253 if (!tick_nohz_full_running)
249 return; 254 return;
250 255
251 preempt_disable(); 256 preempt_disable();
252 smp_call_function_many(tick_nohz_full_mask, 257 for_each_cpu_and(cpu, tick_nohz_full_mask, cpu_online_mask)
253 nohz_full_kick_ipi, NULL, false); 258 tick_nohz_full_kick_cpu(cpu);
254 tick_nohz_full_kick();
255 preempt_enable(); 259 preempt_enable();
256} 260}
257 261
262static void tick_nohz_dep_set_all(unsigned long *dep,
263 enum tick_dep_bits bit)
264{
265 unsigned long prev;
266
267 prev = fetch_or(dep, BIT_MASK(bit));
268 if (!prev)
269 tick_nohz_full_kick_all();
270}
271
272/*
273 * Set a global tick dependency. Used by perf events that rely on freq and
274 * by unstable clock.
275 */
276void tick_nohz_dep_set(enum tick_dep_bits bit)
277{
278 tick_nohz_dep_set_all(&tick_dep_mask, bit);
279}
280
281void tick_nohz_dep_clear(enum tick_dep_bits bit)
282{
283 clear_bit(bit, &tick_dep_mask);
284}
285
286/*
287 * Set per-CPU tick dependency. Used by scheduler and perf events in order to
288 * manage events throttling.
289 */
290void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit)
291{
292 unsigned long prev;
293 struct tick_sched *ts;
294
295 ts = per_cpu_ptr(&tick_cpu_sched, cpu);
296
297 prev = fetch_or(&ts->tick_dep_mask, BIT_MASK(bit));
298 if (!prev) {
299 preempt_disable();
300 /* Perf needs local kick that is NMI safe */
301 if (cpu == smp_processor_id()) {
302 tick_nohz_full_kick();
303 } else {
304 /* Remote irq work not NMI-safe */
305 if (!WARN_ON_ONCE(in_nmi()))
306 tick_nohz_full_kick_cpu(cpu);
307 }
308 preempt_enable();
309 }
310}
311
312void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
313{
314 struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);
315
316 clear_bit(bit, &ts->tick_dep_mask);
317}
318
319/*
320 * Set a per-task tick dependency. Posix CPU timers need this in order to elapse
321 * per task timers.
322 */
323void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit)
324{
325 /*
326 * We could optimize this with just kicking the target running the task
327 * if that noise matters for nohz full users.
328 */
329 tick_nohz_dep_set_all(&tsk->tick_dep_mask, bit);
330}
331
332void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit)
333{
334 clear_bit(bit, &tsk->tick_dep_mask);
335}
336
337/*
338 * Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse
339 * per process timers.
340 */
341void tick_nohz_dep_set_signal(struct signal_struct *sig, enum tick_dep_bits bit)
342{
343 tick_nohz_dep_set_all(&sig->tick_dep_mask, bit);
344}
345
346void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit)
347{
348 clear_bit(bit, &sig->tick_dep_mask);
349}
350
258/* 351/*
259 * Re-evaluate the need for the tick as we switch the current task. 352 * Re-evaluate the need for the tick as we switch the current task.
260 * It might need the tick due to per task/process properties: 353 * It might need the tick due to per task/process properties:
@@ -263,15 +356,19 @@ void tick_nohz_full_kick_all(void)
263void __tick_nohz_task_switch(void) 356void __tick_nohz_task_switch(void)
264{ 357{
265 unsigned long flags; 358 unsigned long flags;
359 struct tick_sched *ts;
266 360
267 local_irq_save(flags); 361 local_irq_save(flags);
268 362
269 if (!tick_nohz_full_cpu(smp_processor_id())) 363 if (!tick_nohz_full_cpu(smp_processor_id()))
270 goto out; 364 goto out;
271 365
272 if (tick_nohz_tick_stopped() && !can_stop_full_tick()) 366 ts = this_cpu_ptr(&tick_cpu_sched);
273 tick_nohz_full_kick();
274 367
368 if (ts->tick_stopped) {
369 if (current->tick_dep_mask || current->signal->tick_dep_mask)
370 tick_nohz_full_kick();
371 }
275out: 372out:
276 local_irq_restore(flags); 373 local_irq_restore(flags);
277} 374}
@@ -689,7 +786,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
689 786
690 ts->last_tick = hrtimer_get_expires(&ts->sched_timer); 787 ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
691 ts->tick_stopped = 1; 788 ts->tick_stopped = 1;
692 trace_tick_stop(1, " "); 789 trace_tick_stop(1, TICK_DEP_MASK_NONE);
693 } 790 }
694 791
695 /* 792 /*
@@ -740,7 +837,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts)
740 if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE) 837 if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
741 return; 838 return;
742 839
743 if (can_stop_full_tick()) 840 if (can_stop_full_tick(ts))
744 tick_nohz_stop_sched_tick(ts, ktime_get(), cpu); 841 tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
745 else if (ts->tick_stopped) 842 else if (ts->tick_stopped)
746 tick_nohz_restart_sched_tick(ts, ktime_get(), 1); 843 tick_nohz_restart_sched_tick(ts, ktime_get(), 1);
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index a4a8d4e9baa1..eb4e32566a83 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -60,6 +60,7 @@ struct tick_sched {
60 u64 next_timer; 60 u64 next_timer;
61 ktime_t idle_expires; 61 ktime_t idle_expires;
62 int do_timer_last; 62 int do_timer_last;
63 unsigned long tick_dep_mask;
63}; 64};
64 65
65extern struct tick_sched *tick_get_tick_sched(int cpu); 66extern struct tick_sched *tick_get_tick_sched(int cpu);