aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2016-03-08 07:17:54 -0500
committerIngo Molnar <mingo@kernel.org>2016-03-08 07:17:54 -0500
commit1f25184656a00a59e3a953189070d42a749f6aee (patch)
treec69dbf4f09a6aaaa54f8c962e3a029eb3715d0c9
parente2857b8f11a289ed2b61d18d0665e05c1053c446 (diff)
parent4f49b90abb4aca6fe677c95fc352fd0674d489bd (diff)
Merge branch 'timers/core-v9' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks into timers/nohz
Pull nohz enhancements from Frederic Weisbecker: "Currently in nohz full configs, the tick dependency is checked asynchronously by nohz code from interrupt and context switch for each concerned subsystem with a set of function provided by these. Such functions are made of many conditions and details that can be heavyweight as they are called on fastpath: sched_can_stop_tick(), posix_cpu_timer_can_stop_tick(), perf_event_can_stop_tick()... Thomas suggested a few months ago to make that tick dependency check synchronous. Instead of checking subsystems details from each interrupt to guess if the tick can be stopped, every subsystem that may have a tick dependency should set itself a flag specifying the state of that dependency. This way we can verify if we can stop the tick with a single lightweight mask check on fast path. This conversion from a pull to a push model to implement tick dependency is the core feature of this patchset that is split into: * Nohz wide kick simplification * Improve nohz tracing * Introduce tick dependency mask * Migrate scheduler, posix timers, perf events and sched clock tick dependencies to the tick dependency mask." Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--include/linux/atomic.h21
-rw-r--r--include/linux/perf_event.h6
-rw-r--r--include/linux/posix-timers.h3
-rw-r--r--include/linux/sched.h11
-rw-r--r--include/linux/tick.h97
-rw-r--r--include/trace/events/timer.h36
-rw-r--r--kernel/events/core.c65
-rw-r--r--kernel/sched/clock.c5
-rw-r--r--kernel/sched/core.c49
-rw-r--r--kernel/sched/rt.c16
-rw-r--r--kernel/sched/sched.h48
-rw-r--r--kernel/time/posix-cpu-timers.c52
-rw-r--r--kernel/time/tick-sched.c175
-rw-r--r--kernel/time/tick-sched.h1
14 files changed, 424 insertions, 161 deletions
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 301de78d65f7..6c502cb13c95 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -548,6 +548,27 @@ static inline int atomic_dec_if_positive(atomic_t *v)
548} 548}
549#endif 549#endif
550 550
551/**
552 * fetch_or - perform *ptr |= mask and return old value of *ptr
553 * @ptr: pointer to value
554 * @mask: mask to OR on the value
555 *
556 * cmpxchg based fetch_or, macro so it works for different integer types
557 */
558#ifndef fetch_or
559#define fetch_or(ptr, mask) \
560({ typeof(*(ptr)) __old, __val = *(ptr); \
561 for (;;) { \
562 __old = cmpxchg((ptr), __val, __val | (mask)); \
563 if (__old == __val) \
564 break; \
565 __val = __old; \
566 } \
567 __old; \
568})
569#endif
570
571
551#ifdef CONFIG_GENERIC_ATOMIC64 572#ifdef CONFIG_GENERIC_ATOMIC64
552#include <asm-generic/atomic64.h> 573#include <asm-generic/atomic64.h>
553#endif 574#endif
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index f5c5a3fa2c81..6e44efc19a6a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1109,12 +1109,6 @@ static inline void perf_event_task_tick(void) { }
1109static inline int perf_event_release_kernel(struct perf_event *event) { return 0; } 1109static inline int perf_event_release_kernel(struct perf_event *event) { return 0; }
1110#endif 1110#endif
1111 1111
1112#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_NO_HZ_FULL)
1113extern bool perf_event_can_stop_tick(void);
1114#else
1115static inline bool perf_event_can_stop_tick(void) { return true; }
1116#endif
1117
1118#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) 1112#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
1119extern void perf_restore_debug_store(void); 1113extern void perf_restore_debug_store(void);
1120#else 1114#else
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 907f3fd191ac..62d44c176071 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -128,9 +128,6 @@ void posix_cpu_timer_schedule(struct k_itimer *timer);
128void run_posix_cpu_timers(struct task_struct *task); 128void run_posix_cpu_timers(struct task_struct *task);
129void posix_cpu_timers_exit(struct task_struct *task); 129void posix_cpu_timers_exit(struct task_struct *task);
130void posix_cpu_timers_exit_group(struct task_struct *task); 130void posix_cpu_timers_exit_group(struct task_struct *task);
131
132bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk);
133
134void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx, 131void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
135 cputime_t *newval, cputime_t *oldval); 132 cputime_t *newval, cputime_t *oldval);
136 133
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a10494a94cc3..2b10348806d8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -719,6 +719,10 @@ struct signal_struct {
719 /* Earliest-expiration cache. */ 719 /* Earliest-expiration cache. */
720 struct task_cputime cputime_expires; 720 struct task_cputime cputime_expires;
721 721
722#ifdef CONFIG_NO_HZ_FULL
723 unsigned long tick_dep_mask;
724#endif
725
722 struct list_head cpu_timers[3]; 726 struct list_head cpu_timers[3];
723 727
724 struct pid *tty_old_pgrp; 728 struct pid *tty_old_pgrp;
@@ -1542,6 +1546,10 @@ struct task_struct {
1542 VTIME_SYS, 1546 VTIME_SYS,
1543 } vtime_snap_whence; 1547 } vtime_snap_whence;
1544#endif 1548#endif
1549
1550#ifdef CONFIG_NO_HZ_FULL
1551 unsigned long tick_dep_mask;
1552#endif
1545 unsigned long nvcsw, nivcsw; /* context switch counts */ 1553 unsigned long nvcsw, nivcsw; /* context switch counts */
1546 u64 start_time; /* monotonic time in nsec */ 1554 u64 start_time; /* monotonic time in nsec */
1547 u64 real_start_time; /* boot based time in nsec */ 1555 u64 real_start_time; /* boot based time in nsec */
@@ -2356,10 +2364,7 @@ static inline void wake_up_nohz_cpu(int cpu) { }
2356#endif 2364#endif
2357 2365
2358#ifdef CONFIG_NO_HZ_FULL 2366#ifdef CONFIG_NO_HZ_FULL
2359extern bool sched_can_stop_tick(void);
2360extern u64 scheduler_tick_max_deferment(void); 2367extern u64 scheduler_tick_max_deferment(void);
2361#else
2362static inline bool sched_can_stop_tick(void) { return false; }
2363#endif 2368#endif
2364 2369
2365#ifdef CONFIG_SCHED_AUTOGROUP 2370#ifdef CONFIG_SCHED_AUTOGROUP
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 97fd4e543846..21f73649a4dc 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -97,6 +97,19 @@ static inline void tick_broadcast_exit(void)
97 tick_broadcast_oneshot_control(TICK_BROADCAST_EXIT); 97 tick_broadcast_oneshot_control(TICK_BROADCAST_EXIT);
98} 98}
99 99
100enum tick_dep_bits {
101 TICK_DEP_BIT_POSIX_TIMER = 0,
102 TICK_DEP_BIT_PERF_EVENTS = 1,
103 TICK_DEP_BIT_SCHED = 2,
104 TICK_DEP_BIT_CLOCK_UNSTABLE = 3
105};
106
107#define TICK_DEP_MASK_NONE 0
108#define TICK_DEP_MASK_POSIX_TIMER (1 << TICK_DEP_BIT_POSIX_TIMER)
109#define TICK_DEP_MASK_PERF_EVENTS (1 << TICK_DEP_BIT_PERF_EVENTS)
110#define TICK_DEP_MASK_SCHED (1 << TICK_DEP_BIT_SCHED)
111#define TICK_DEP_MASK_CLOCK_UNSTABLE (1 << TICK_DEP_BIT_CLOCK_UNSTABLE)
112
100#ifdef CONFIG_NO_HZ_COMMON 113#ifdef CONFIG_NO_HZ_COMMON
101extern int tick_nohz_enabled; 114extern int tick_nohz_enabled;
102extern int tick_nohz_tick_stopped(void); 115extern int tick_nohz_tick_stopped(void);
@@ -154,9 +167,73 @@ static inline int housekeeping_any_cpu(void)
154 return cpumask_any_and(housekeeping_mask, cpu_online_mask); 167 return cpumask_any_and(housekeeping_mask, cpu_online_mask);
155} 168}
156 169
157extern void tick_nohz_full_kick(void); 170extern void tick_nohz_dep_set(enum tick_dep_bits bit);
171extern void tick_nohz_dep_clear(enum tick_dep_bits bit);
172extern void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit);
173extern void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit);
174extern void tick_nohz_dep_set_task(struct task_struct *tsk,
175 enum tick_dep_bits bit);
176extern void tick_nohz_dep_clear_task(struct task_struct *tsk,
177 enum tick_dep_bits bit);
178extern void tick_nohz_dep_set_signal(struct signal_struct *signal,
179 enum tick_dep_bits bit);
180extern void tick_nohz_dep_clear_signal(struct signal_struct *signal,
181 enum tick_dep_bits bit);
182
183/*
184 * The below are tick_nohz_[set,clear]_dep() wrappers that optimize off-cases
185 * on top of static keys.
186 */
187static inline void tick_dep_set(enum tick_dep_bits bit)
188{
189 if (tick_nohz_full_enabled())
190 tick_nohz_dep_set(bit);
191}
192
193static inline void tick_dep_clear(enum tick_dep_bits bit)
194{
195 if (tick_nohz_full_enabled())
196 tick_nohz_dep_clear(bit);
197}
198
199static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit)
200{
201 if (tick_nohz_full_cpu(cpu))
202 tick_nohz_dep_set_cpu(cpu, bit);
203}
204
205static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
206{
207 if (tick_nohz_full_cpu(cpu))
208 tick_nohz_dep_clear_cpu(cpu, bit);
209}
210
211static inline void tick_dep_set_task(struct task_struct *tsk,
212 enum tick_dep_bits bit)
213{
214 if (tick_nohz_full_enabled())
215 tick_nohz_dep_set_task(tsk, bit);
216}
217static inline void tick_dep_clear_task(struct task_struct *tsk,
218 enum tick_dep_bits bit)
219{
220 if (tick_nohz_full_enabled())
221 tick_nohz_dep_clear_task(tsk, bit);
222}
223static inline void tick_dep_set_signal(struct signal_struct *signal,
224 enum tick_dep_bits bit)
225{
226 if (tick_nohz_full_enabled())
227 tick_nohz_dep_set_signal(signal, bit);
228}
229static inline void tick_dep_clear_signal(struct signal_struct *signal,
230 enum tick_dep_bits bit)
231{
232 if (tick_nohz_full_enabled())
233 tick_nohz_dep_clear_signal(signal, bit);
234}
235
158extern void tick_nohz_full_kick_cpu(int cpu); 236extern void tick_nohz_full_kick_cpu(int cpu);
159extern void tick_nohz_full_kick_all(void);
160extern void __tick_nohz_task_switch(void); 237extern void __tick_nohz_task_switch(void);
161#else 238#else
162static inline int housekeeping_any_cpu(void) 239static inline int housekeeping_any_cpu(void)
@@ -166,9 +243,21 @@ static inline int housekeeping_any_cpu(void)
166static inline bool tick_nohz_full_enabled(void) { return false; } 243static inline bool tick_nohz_full_enabled(void) { return false; }
167static inline bool tick_nohz_full_cpu(int cpu) { return false; } 244static inline bool tick_nohz_full_cpu(int cpu) { return false; }
168static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { } 245static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { }
246
247static inline void tick_dep_set(enum tick_dep_bits bit) { }
248static inline void tick_dep_clear(enum tick_dep_bits bit) { }
249static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit) { }
250static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { }
251static inline void tick_dep_set_task(struct task_struct *tsk,
252 enum tick_dep_bits bit) { }
253static inline void tick_dep_clear_task(struct task_struct *tsk,
254 enum tick_dep_bits bit) { }
255static inline void tick_dep_set_signal(struct signal_struct *signal,
256 enum tick_dep_bits bit) { }
257static inline void tick_dep_clear_signal(struct signal_struct *signal,
258 enum tick_dep_bits bit) { }
259
169static inline void tick_nohz_full_kick_cpu(int cpu) { } 260static inline void tick_nohz_full_kick_cpu(int cpu) { }
170static inline void tick_nohz_full_kick(void) { }
171static inline void tick_nohz_full_kick_all(void) { }
172static inline void __tick_nohz_task_switch(void) { } 261static inline void __tick_nohz_task_switch(void) { }
173#endif 262#endif
174 263
diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index 073b9ac245ba..51440131d337 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -328,23 +328,49 @@ TRACE_EVENT(itimer_expire,
328); 328);
329 329
330#ifdef CONFIG_NO_HZ_COMMON 330#ifdef CONFIG_NO_HZ_COMMON
331
332#define TICK_DEP_NAMES \
333 tick_dep_name(NONE) \
334 tick_dep_name(POSIX_TIMER) \
335 tick_dep_name(PERF_EVENTS) \
336 tick_dep_name(SCHED) \
337 tick_dep_name_end(CLOCK_UNSTABLE)
338
339#undef tick_dep_name
340#undef tick_dep_name_end
341
342#define tick_dep_name(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
343#define tick_dep_name_end(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
344
345TICK_DEP_NAMES
346
347#undef tick_dep_name
348#undef tick_dep_name_end
349
350#define tick_dep_name(sdep) { TICK_DEP_MASK_##sdep, #sdep },
351#define tick_dep_name_end(sdep) { TICK_DEP_MASK_##sdep, #sdep }
352
353#define show_tick_dep_name(val) \
354 __print_symbolic(val, TICK_DEP_NAMES)
355
331TRACE_EVENT(tick_stop, 356TRACE_EVENT(tick_stop,
332 357
333 TP_PROTO(int success, char *error_msg), 358 TP_PROTO(int success, int dependency),
334 359
335 TP_ARGS(success, error_msg), 360 TP_ARGS(success, dependency),
336 361
337 TP_STRUCT__entry( 362 TP_STRUCT__entry(
338 __field( int , success ) 363 __field( int , success )
339 __string( msg, error_msg ) 364 __field( int , dependency )
340 ), 365 ),
341 366
342 TP_fast_assign( 367 TP_fast_assign(
343 __entry->success = success; 368 __entry->success = success;
344 __assign_str(msg, error_msg); 369 __entry->dependency = dependency;
345 ), 370 ),
346 371
347 TP_printk("success=%s msg=%s", __entry->success ? "yes" : "no", __get_str(msg)) 372 TP_printk("success=%d dependency=%s", __entry->success, \
373 show_tick_dep_name(__entry->dependency))
348); 374);
349#endif 375#endif
350 376
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 614614821f00..effe8d652c1d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3112,17 +3112,6 @@ done:
3112 return rotate; 3112 return rotate;
3113} 3113}
3114 3114
3115#ifdef CONFIG_NO_HZ_FULL
3116bool perf_event_can_stop_tick(void)
3117{
3118 if (atomic_read(&nr_freq_events) ||
3119 __this_cpu_read(perf_throttled_count))
3120 return false;
3121 else
3122 return true;
3123}
3124#endif
3125
3126void perf_event_task_tick(void) 3115void perf_event_task_tick(void)
3127{ 3116{
3128 struct list_head *head = this_cpu_ptr(&active_ctx_list); 3117 struct list_head *head = this_cpu_ptr(&active_ctx_list);
@@ -3133,6 +3122,7 @@ void perf_event_task_tick(void)
3133 3122
3134 __this_cpu_inc(perf_throttled_seq); 3123 __this_cpu_inc(perf_throttled_seq);
3135 throttled = __this_cpu_xchg(perf_throttled_count, 0); 3124 throttled = __this_cpu_xchg(perf_throttled_count, 0);
3125 tick_dep_clear_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
3136 3126
3137 list_for_each_entry_safe(ctx, tmp, head, active_ctx_list) 3127 list_for_each_entry_safe(ctx, tmp, head, active_ctx_list)
3138 perf_adjust_freq_unthr_context(ctx, throttled); 3128 perf_adjust_freq_unthr_context(ctx, throttled);
@@ -3564,6 +3554,28 @@ static void unaccount_event_cpu(struct perf_event *event, int cpu)
3564 atomic_dec(&per_cpu(perf_cgroup_events, cpu)); 3554 atomic_dec(&per_cpu(perf_cgroup_events, cpu));
3565} 3555}
3566 3556
3557#ifdef CONFIG_NO_HZ_FULL
3558static DEFINE_SPINLOCK(nr_freq_lock);
3559#endif
3560
3561static void unaccount_freq_event_nohz(void)
3562{
3563#ifdef CONFIG_NO_HZ_FULL
3564 spin_lock(&nr_freq_lock);
3565 if (atomic_dec_and_test(&nr_freq_events))
3566 tick_nohz_dep_clear(TICK_DEP_BIT_PERF_EVENTS);
3567 spin_unlock(&nr_freq_lock);
3568#endif
3569}
3570
3571static void unaccount_freq_event(void)
3572{
3573 if (tick_nohz_full_enabled())
3574 unaccount_freq_event_nohz();
3575 else
3576 atomic_dec(&nr_freq_events);
3577}
3578
3567static void unaccount_event(struct perf_event *event) 3579static void unaccount_event(struct perf_event *event)
3568{ 3580{
3569 bool dec = false; 3581 bool dec = false;
@@ -3580,7 +3592,7 @@ static void unaccount_event(struct perf_event *event)
3580 if (event->attr.task) 3592 if (event->attr.task)
3581 atomic_dec(&nr_task_events); 3593 atomic_dec(&nr_task_events);
3582 if (event->attr.freq) 3594 if (event->attr.freq)
3583 atomic_dec(&nr_freq_events); 3595 unaccount_freq_event();
3584 if (event->attr.context_switch) { 3596 if (event->attr.context_switch) {
3585 dec = true; 3597 dec = true;
3586 atomic_dec(&nr_switch_events); 3598 atomic_dec(&nr_switch_events);
@@ -6424,9 +6436,9 @@ static int __perf_event_overflow(struct perf_event *event,
6424 if (unlikely(throttle 6436 if (unlikely(throttle
6425 && hwc->interrupts >= max_samples_per_tick)) { 6437 && hwc->interrupts >= max_samples_per_tick)) {
6426 __this_cpu_inc(perf_throttled_count); 6438 __this_cpu_inc(perf_throttled_count);
6439 tick_dep_set_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
6427 hwc->interrupts = MAX_INTERRUPTS; 6440 hwc->interrupts = MAX_INTERRUPTS;
6428 perf_log_throttle(event, 0); 6441 perf_log_throttle(event, 0);
6429 tick_nohz_full_kick();
6430 ret = 1; 6442 ret = 1;
6431 } 6443 }
6432 } 6444 }
@@ -7816,6 +7828,27 @@ static void account_event_cpu(struct perf_event *event, int cpu)
7816 atomic_inc(&per_cpu(perf_cgroup_events, cpu)); 7828 atomic_inc(&per_cpu(perf_cgroup_events, cpu));
7817} 7829}
7818 7830
7831/* Freq events need the tick to stay alive (see perf_event_task_tick). */
7832static void account_freq_event_nohz(void)
7833{
7834#ifdef CONFIG_NO_HZ_FULL
7835 /* Lock so we don't race with concurrent unaccount */
7836 spin_lock(&nr_freq_lock);
7837 if (atomic_inc_return(&nr_freq_events) == 1)
7838 tick_nohz_dep_set(TICK_DEP_BIT_PERF_EVENTS);
7839 spin_unlock(&nr_freq_lock);
7840#endif
7841}
7842
7843static void account_freq_event(void)
7844{
7845 if (tick_nohz_full_enabled())
7846 account_freq_event_nohz();
7847 else
7848 atomic_inc(&nr_freq_events);
7849}
7850
7851
7819static void account_event(struct perf_event *event) 7852static void account_event(struct perf_event *event)
7820{ 7853{
7821 bool inc = false; 7854 bool inc = false;
@@ -7831,10 +7864,8 @@ static void account_event(struct perf_event *event)
7831 atomic_inc(&nr_comm_events); 7864 atomic_inc(&nr_comm_events);
7832 if (event->attr.task) 7865 if (event->attr.task)
7833 atomic_inc(&nr_task_events); 7866 atomic_inc(&nr_task_events);
7834 if (event->attr.freq) { 7867 if (event->attr.freq)
7835 if (atomic_inc_return(&nr_freq_events) == 1) 7868 account_freq_event();
7836 tick_nohz_full_kick_all();
7837 }
7838 if (event->attr.context_switch) { 7869 if (event->attr.context_switch) {
7839 atomic_inc(&nr_switch_events); 7870 atomic_inc(&nr_switch_events);
7840 inc = true; 7871 inc = true;
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index bc54e84675da..fedb967a9841 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -61,6 +61,7 @@
61#include <linux/static_key.h> 61#include <linux/static_key.h>
62#include <linux/workqueue.h> 62#include <linux/workqueue.h>
63#include <linux/compiler.h> 63#include <linux/compiler.h>
64#include <linux/tick.h>
64 65
65/* 66/*
66 * Scheduler clock - returns current time in nanosec units. 67 * Scheduler clock - returns current time in nanosec units.
@@ -89,6 +90,8 @@ static void __set_sched_clock_stable(void)
89{ 90{
90 if (!sched_clock_stable()) 91 if (!sched_clock_stable())
91 static_key_slow_inc(&__sched_clock_stable); 92 static_key_slow_inc(&__sched_clock_stable);
93
94 tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE);
92} 95}
93 96
94void set_sched_clock_stable(void) 97void set_sched_clock_stable(void)
@@ -108,6 +111,8 @@ static void __clear_sched_clock_stable(struct work_struct *work)
108 /* XXX worry about clock continuity */ 111 /* XXX worry about clock continuity */
109 if (sched_clock_stable()) 112 if (sched_clock_stable())
110 static_key_slow_dec(&__sched_clock_stable); 113 static_key_slow_dec(&__sched_clock_stable);
114
115 tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE);
111} 116}
112 117
113static DECLARE_WORK(sched_clock_work, __clear_sched_clock_stable); 118static DECLARE_WORK(sched_clock_work, __clear_sched_clock_stable);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9503d590e5ef..1fad82364ffe 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -453,20 +453,6 @@ static inline void init_hrtick(void)
453} 453}
454#endif /* CONFIG_SCHED_HRTICK */ 454#endif /* CONFIG_SCHED_HRTICK */
455 455
456/*
457 * cmpxchg based fetch_or, macro so it works for different integer types
458 */
459#define fetch_or(ptr, val) \
460({ typeof(*(ptr)) __old, __val = *(ptr); \
461 for (;;) { \
462 __old = cmpxchg((ptr), __val, __val | (val)); \
463 if (__old == __val) \
464 break; \
465 __val = __old; \
466 } \
467 __old; \
468})
469
470#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG) 456#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
471/* 457/*
472 * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG, 458 * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
@@ -715,31 +701,36 @@ static inline bool got_nohz_idle_kick(void)
715#endif /* CONFIG_NO_HZ_COMMON */ 701#endif /* CONFIG_NO_HZ_COMMON */
716 702
717#ifdef CONFIG_NO_HZ_FULL 703#ifdef CONFIG_NO_HZ_FULL
718bool sched_can_stop_tick(void) 704bool sched_can_stop_tick(struct rq *rq)
719{ 705{
706 int fifo_nr_running;
707
708 /* Deadline tasks, even if single, need the tick */
709 if (rq->dl.dl_nr_running)
710 return false;
711
720 /* 712 /*
721 * FIFO realtime policy runs the highest priority task. Other runnable 713 * FIFO realtime policy runs the highest priority task (after DEADLINE).
722 * tasks are of a lower priority. The scheduler tick does nothing. 714 * Other runnable tasks are of a lower priority. The scheduler tick
715 * isn't needed.
723 */ 716 */
724 if (current->policy == SCHED_FIFO) 717 fifo_nr_running = rq->rt.rt_nr_running - rq->rt.rr_nr_running;
718 if (fifo_nr_running)
725 return true; 719 return true;
726 720
727 /* 721 /*
728 * Round-robin realtime tasks time slice with other tasks at the same 722 * Round-robin realtime tasks time slice with other tasks at the same
729 * realtime priority. Is this task the only one at this priority? 723 * realtime priority.
730 */ 724 */
731 if (current->policy == SCHED_RR) { 725 if (rq->rt.rr_nr_running) {
732 struct sched_rt_entity *rt_se = &current->rt; 726 if (rq->rt.rr_nr_running == 1)
733 727 return true;
734 return list_is_singular(&rt_se->run_list); 728 else
729 return false;
735 } 730 }
736 731
737 /* 732 /* Normal multitasking need periodic preemption checks */
738 * More than one running task need preemption. 733 if (rq->cfs.nr_running > 1)
739 * nr_running update is assumed to be visible
740 * after IPI is sent from wakers.
741 */
742 if (this_rq()->nr_running > 1)
743 return false; 734 return false;
744 735
745 return true; 736 return true;
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 8ec86abe0ea1..3f1fcffbb18f 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1142,12 +1142,27 @@ unsigned int rt_se_nr_running(struct sched_rt_entity *rt_se)
1142} 1142}
1143 1143
1144static inline 1144static inline
1145unsigned int rt_se_rr_nr_running(struct sched_rt_entity *rt_se)
1146{
1147 struct rt_rq *group_rq = group_rt_rq(rt_se);
1148 struct task_struct *tsk;
1149
1150 if (group_rq)
1151 return group_rq->rr_nr_running;
1152
1153 tsk = rt_task_of(rt_se);
1154
1155 return (tsk->policy == SCHED_RR) ? 1 : 0;
1156}
1157
1158static inline
1145void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 1159void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1146{ 1160{
1147 int prio = rt_se_prio(rt_se); 1161 int prio = rt_se_prio(rt_se);
1148 1162
1149 WARN_ON(!rt_prio(prio)); 1163 WARN_ON(!rt_prio(prio));
1150 rt_rq->rt_nr_running += rt_se_nr_running(rt_se); 1164 rt_rq->rt_nr_running += rt_se_nr_running(rt_se);
1165 rt_rq->rr_nr_running += rt_se_rr_nr_running(rt_se);
1151 1166
1152 inc_rt_prio(rt_rq, prio); 1167 inc_rt_prio(rt_rq, prio);
1153 inc_rt_migration(rt_se, rt_rq); 1168 inc_rt_migration(rt_se, rt_rq);
@@ -1160,6 +1175,7 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1160 WARN_ON(!rt_prio(rt_se_prio(rt_se))); 1175 WARN_ON(!rt_prio(rt_se_prio(rt_se)));
1161 WARN_ON(!rt_rq->rt_nr_running); 1176 WARN_ON(!rt_rq->rt_nr_running);
1162 rt_rq->rt_nr_running -= rt_se_nr_running(rt_se); 1177 rt_rq->rt_nr_running -= rt_se_nr_running(rt_se);
1178 rt_rq->rr_nr_running -= rt_se_rr_nr_running(rt_se);
1163 1179
1164 dec_rt_prio(rt_rq, rt_se_prio(rt_se)); 1180 dec_rt_prio(rt_rq, rt_se_prio(rt_se));
1165 dec_rt_migration(rt_se, rt_rq); 1181 dec_rt_migration(rt_se, rt_rq);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 10f16374df7f..4f0bca770108 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -450,6 +450,7 @@ static inline int rt_bandwidth_enabled(void)
450struct rt_rq { 450struct rt_rq {
451 struct rt_prio_array active; 451 struct rt_prio_array active;
452 unsigned int rt_nr_running; 452 unsigned int rt_nr_running;
453 unsigned int rr_nr_running;
453#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED 454#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
454 struct { 455 struct {
455 int curr; /* highest queued rt task prio */ 456 int curr; /* highest queued rt task prio */
@@ -1278,6 +1279,35 @@ unsigned long to_ratio(u64 period, u64 runtime);
1278 1279
1279extern void init_entity_runnable_average(struct sched_entity *se); 1280extern void init_entity_runnable_average(struct sched_entity *se);
1280 1281
1282#ifdef CONFIG_NO_HZ_FULL
1283extern bool sched_can_stop_tick(struct rq *rq);
1284
1285/*
1286 * Tick may be needed by tasks in the runqueue depending on their policy and
1287 * requirements. If tick is needed, lets send the target an IPI to kick it out of
1288 * nohz mode if necessary.
1289 */
1290static inline void sched_update_tick_dependency(struct rq *rq)
1291{
1292 int cpu;
1293
1294 if (!tick_nohz_full_enabled())
1295 return;
1296
1297 cpu = cpu_of(rq);
1298
1299 if (!tick_nohz_full_cpu(cpu))
1300 return;
1301
1302 if (sched_can_stop_tick(rq))
1303 tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
1304 else
1305 tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
1306}
1307#else
1308static inline void sched_update_tick_dependency(struct rq *rq) { }
1309#endif
1310
1281static inline void add_nr_running(struct rq *rq, unsigned count) 1311static inline void add_nr_running(struct rq *rq, unsigned count)
1282{ 1312{
1283 unsigned prev_nr = rq->nr_running; 1313 unsigned prev_nr = rq->nr_running;
@@ -1289,26 +1319,16 @@ static inline void add_nr_running(struct rq *rq, unsigned count)
1289 if (!rq->rd->overload) 1319 if (!rq->rd->overload)
1290 rq->rd->overload = true; 1320 rq->rd->overload = true;
1291#endif 1321#endif
1292
1293#ifdef CONFIG_NO_HZ_FULL
1294 if (tick_nohz_full_cpu(rq->cpu)) {
1295 /*
1296 * Tick is needed if more than one task runs on a CPU.
1297 * Send the target an IPI to kick it out of nohz mode.
1298 *
1299 * We assume that IPI implies full memory barrier and the
1300 * new value of rq->nr_running is visible on reception
1301 * from the target.
1302 */
1303 tick_nohz_full_kick_cpu(rq->cpu);
1304 }
1305#endif
1306 } 1322 }
1323
1324 sched_update_tick_dependency(rq);
1307} 1325}
1308 1326
1309static inline void sub_nr_running(struct rq *rq, unsigned count) 1327static inline void sub_nr_running(struct rq *rq, unsigned count)
1310{ 1328{
1311 rq->nr_running -= count; 1329 rq->nr_running -= count;
1330 /* Check if we still need preemption */
1331 sched_update_tick_dependency(rq);
1312} 1332}
1313 1333
1314static inline void rq_last_tick_reset(struct rq *rq) 1334static inline void rq_last_tick_reset(struct rq *rq)
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index f5e86d282d52..1cafba860b08 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -333,7 +333,6 @@ static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
333 return err; 333 return err;
334} 334}
335 335
336
337/* 336/*
338 * Validate the clockid_t for a new CPU-clock timer, and initialize the timer. 337 * Validate the clockid_t for a new CPU-clock timer, and initialize the timer.
339 * This is called from sys_timer_create() and do_cpu_nanosleep() with the 338 * This is called from sys_timer_create() and do_cpu_nanosleep() with the
@@ -517,6 +516,10 @@ static void arm_timer(struct k_itimer *timer)
517 cputime_expires->sched_exp = exp; 516 cputime_expires->sched_exp = exp;
518 break; 517 break;
519 } 518 }
519 if (CPUCLOCK_PERTHREAD(timer->it_clock))
520 tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER);
521 else
522 tick_dep_set_signal(p->signal, TICK_DEP_BIT_POSIX_TIMER);
520 } 523 }
521} 524}
522 525
@@ -582,39 +585,6 @@ static int cpu_timer_sample_group(const clockid_t which_clock,
582 return 0; 585 return 0;
583} 586}
584 587
585#ifdef CONFIG_NO_HZ_FULL
586static void nohz_kick_work_fn(struct work_struct *work)
587{
588 tick_nohz_full_kick_all();
589}
590
591static DECLARE_WORK(nohz_kick_work, nohz_kick_work_fn);
592
593/*
594 * We need the IPIs to be sent from sane process context.
595 * The posix cpu timers are always set with irqs disabled.
596 */
597static void posix_cpu_timer_kick_nohz(void)
598{
599 if (context_tracking_is_enabled())
600 schedule_work(&nohz_kick_work);
601}
602
603bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk)
604{
605 if (!task_cputime_zero(&tsk->cputime_expires))
606 return false;
607
608 /* Check if cputimer is running. This is accessed without locking. */
609 if (READ_ONCE(tsk->signal->cputimer.running))
610 return false;
611
612 return true;
613}
614#else
615static inline void posix_cpu_timer_kick_nohz(void) { }
616#endif
617
618/* 588/*
619 * Guts of sys_timer_settime for CPU timers. 589 * Guts of sys_timer_settime for CPU timers.
620 * This is called with the timer locked and interrupts disabled. 590 * This is called with the timer locked and interrupts disabled.
@@ -761,8 +731,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
761 sample_to_timespec(timer->it_clock, 731 sample_to_timespec(timer->it_clock,
762 old_incr, &old->it_interval); 732 old_incr, &old->it_interval);
763 } 733 }
764 if (!ret) 734
765 posix_cpu_timer_kick_nohz();
766 return ret; 735 return ret;
767} 736}
768 737
@@ -911,6 +880,8 @@ static void check_thread_timers(struct task_struct *tsk,
911 __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); 880 __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
912 } 881 }
913 } 882 }
883 if (task_cputime_zero(tsk_expires))
884 tick_dep_clear_task(tsk, TICK_DEP_BIT_POSIX_TIMER);
914} 885}
915 886
916static inline void stop_process_timers(struct signal_struct *sig) 887static inline void stop_process_timers(struct signal_struct *sig)
@@ -919,6 +890,7 @@ static inline void stop_process_timers(struct signal_struct *sig)
919 890
920 /* Turn off cputimer->running. This is done without locking. */ 891 /* Turn off cputimer->running. This is done without locking. */
921 WRITE_ONCE(cputimer->running, false); 892 WRITE_ONCE(cputimer->running, false);
893 tick_dep_clear_signal(sig, TICK_DEP_BIT_POSIX_TIMER);
922} 894}
923 895
924static u32 onecputick; 896static u32 onecputick;
@@ -1095,8 +1067,6 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
1095 arm_timer(timer); 1067 arm_timer(timer);
1096 unlock_task_sighand(p, &flags); 1068 unlock_task_sighand(p, &flags);
1097 1069
1098 /* Kick full dynticks CPUs in case they need to tick on the new timer */
1099 posix_cpu_timer_kick_nohz();
1100out: 1070out:
1101 timer->it_overrun_last = timer->it_overrun; 1071 timer->it_overrun_last = timer->it_overrun;
1102 timer->it_overrun = -1; 1072 timer->it_overrun = -1;
@@ -1270,7 +1240,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
1270 } 1240 }
1271 1241
1272 if (!*newval) 1242 if (!*newval)
1273 goto out; 1243 return;
1274 *newval += now; 1244 *newval += now;
1275 } 1245 }
1276 1246
@@ -1288,8 +1258,8 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
1288 tsk->signal->cputime_expires.virt_exp = *newval; 1258 tsk->signal->cputime_expires.virt_exp = *newval;
1289 break; 1259 break;
1290 } 1260 }
1291out: 1261
1292 posix_cpu_timer_kick_nohz(); 1262 tick_dep_set_signal(tsk->signal, TICK_DEP_BIT_POSIX_TIMER);
1293} 1263}
1294 1264
1295static int do_cpu_nanosleep(const clockid_t which_clock, int flags, 1265static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 0b17424349eb..969e6704c3c9 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -22,7 +22,6 @@
22#include <linux/module.h> 22#include <linux/module.h>
23#include <linux/irq_work.h> 23#include <linux/irq_work.h>
24#include <linux/posix-timers.h> 24#include <linux/posix-timers.h>
25#include <linux/perf_event.h>
26#include <linux/context_tracking.h> 25#include <linux/context_tracking.h>
27 26
28#include <asm/irq_regs.h> 27#include <asm/irq_regs.h>
@@ -158,54 +157,63 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
158cpumask_var_t tick_nohz_full_mask; 157cpumask_var_t tick_nohz_full_mask;
159cpumask_var_t housekeeping_mask; 158cpumask_var_t housekeeping_mask;
160bool tick_nohz_full_running; 159bool tick_nohz_full_running;
160static unsigned long tick_dep_mask;
161 161
162static bool can_stop_full_tick(void) 162static void trace_tick_dependency(unsigned long dep)
163{
164 if (dep & TICK_DEP_MASK_POSIX_TIMER) {
165 trace_tick_stop(0, TICK_DEP_MASK_POSIX_TIMER);
166 return;
167 }
168
169 if (dep & TICK_DEP_MASK_PERF_EVENTS) {
170 trace_tick_stop(0, TICK_DEP_MASK_PERF_EVENTS);
171 return;
172 }
173
174 if (dep & TICK_DEP_MASK_SCHED) {
175 trace_tick_stop(0, TICK_DEP_MASK_SCHED);
176 return;
177 }
178
179 if (dep & TICK_DEP_MASK_CLOCK_UNSTABLE)
180 trace_tick_stop(0, TICK_DEP_MASK_CLOCK_UNSTABLE);
181}
182
183static bool can_stop_full_tick(struct tick_sched *ts)
163{ 184{
164 WARN_ON_ONCE(!irqs_disabled()); 185 WARN_ON_ONCE(!irqs_disabled());
165 186
166 if (!sched_can_stop_tick()) { 187 if (tick_dep_mask) {
167 trace_tick_stop(0, "more than 1 task in runqueue\n"); 188 trace_tick_dependency(tick_dep_mask);
168 return false; 189 return false;
169 } 190 }
170 191
171 if (!posix_cpu_timers_can_stop_tick(current)) { 192 if (ts->tick_dep_mask) {
172 trace_tick_stop(0, "posix timers running\n"); 193 trace_tick_dependency(ts->tick_dep_mask);
173 return false; 194 return false;
174 } 195 }
175 196
176 if (!perf_event_can_stop_tick()) { 197 if (current->tick_dep_mask) {
177 trace_tick_stop(0, "perf events running\n"); 198 trace_tick_dependency(current->tick_dep_mask);
178 return false; 199 return false;
179 } 200 }
180 201
181 /* sched_clock_tick() needs us? */ 202 if (current->signal->tick_dep_mask) {
182#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK 203 trace_tick_dependency(current->signal->tick_dep_mask);
183 /*
184 * TODO: kick full dynticks CPUs when
185 * sched_clock_stable is set.
186 */
187 if (!sched_clock_stable()) {
188 trace_tick_stop(0, "unstable sched clock\n");
189 /*
190 * Don't allow the user to think they can get
191 * full NO_HZ with this machine.
192 */
193 WARN_ONCE(tick_nohz_full_running,
194 "NO_HZ FULL will not work with unstable sched clock");
195 return false; 204 return false;
196 } 205 }
197#endif
198 206
199 return true; 207 return true;
200} 208}
201 209
202static void nohz_full_kick_work_func(struct irq_work *work) 210static void nohz_full_kick_func(struct irq_work *work)
203{ 211{
204 /* Empty, the tick restart happens on tick_nohz_irq_exit() */ 212 /* Empty, the tick restart happens on tick_nohz_irq_exit() */
205} 213}
206 214
207static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { 215static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
208 .func = nohz_full_kick_work_func, 216 .func = nohz_full_kick_func,
209}; 217};
210 218
211/* 219/*
@@ -214,7 +222,7 @@ static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
214 * This kick, unlike tick_nohz_full_kick_cpu() and tick_nohz_full_kick_all(), 222 * This kick, unlike tick_nohz_full_kick_cpu() and tick_nohz_full_kick_all(),
215 * is NMI safe. 223 * is NMI safe.
216 */ 224 */
217void tick_nohz_full_kick(void) 225static void tick_nohz_full_kick(void)
218{ 226{
219 if (!tick_nohz_full_cpu(smp_processor_id())) 227 if (!tick_nohz_full_cpu(smp_processor_id()))
220 return; 228 return;
@@ -234,27 +242,112 @@ void tick_nohz_full_kick_cpu(int cpu)
234 irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu); 242 irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu);
235} 243}
236 244
237static void nohz_full_kick_ipi(void *info)
238{
239 /* Empty, the tick restart happens on tick_nohz_irq_exit() */
240}
241
242/* 245/*
243 * Kick all full dynticks CPUs in order to force these to re-evaluate 246 * Kick all full dynticks CPUs in order to force these to re-evaluate
244 * their dependency on the tick and restart it if necessary. 247 * their dependency on the tick and restart it if necessary.
245 */ 248 */
246void tick_nohz_full_kick_all(void) 249static void tick_nohz_full_kick_all(void)
247{ 250{
251 int cpu;
252
248 if (!tick_nohz_full_running) 253 if (!tick_nohz_full_running)
249 return; 254 return;
250 255
251 preempt_disable(); 256 preempt_disable();
252 smp_call_function_many(tick_nohz_full_mask, 257 for_each_cpu_and(cpu, tick_nohz_full_mask, cpu_online_mask)
253 nohz_full_kick_ipi, NULL, false); 258 tick_nohz_full_kick_cpu(cpu);
254 tick_nohz_full_kick();
255 preempt_enable(); 259 preempt_enable();
256} 260}
257 261
262static void tick_nohz_dep_set_all(unsigned long *dep,
263 enum tick_dep_bits bit)
264{
265 unsigned long prev;
266
267 prev = fetch_or(dep, BIT_MASK(bit));
268 if (!prev)
269 tick_nohz_full_kick_all();
270}
271
272/*
273 * Set a global tick dependency. Used by perf events that rely on freq and
274 * by unstable clock.
275 */
276void tick_nohz_dep_set(enum tick_dep_bits bit)
277{
278 tick_nohz_dep_set_all(&tick_dep_mask, bit);
279}
280
281void tick_nohz_dep_clear(enum tick_dep_bits bit)
282{
283 clear_bit(bit, &tick_dep_mask);
284}
285
286/*
287 * Set per-CPU tick dependency. Used by scheduler and perf events in order to
288 * manage events throttling.
289 */
290void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit)
291{
292 unsigned long prev;
293 struct tick_sched *ts;
294
295 ts = per_cpu_ptr(&tick_cpu_sched, cpu);
296
297 prev = fetch_or(&ts->tick_dep_mask, BIT_MASK(bit));
298 if (!prev) {
299 preempt_disable();
300 /* Perf needs local kick that is NMI safe */
301 if (cpu == smp_processor_id()) {
302 tick_nohz_full_kick();
303 } else {
304 /* Remote irq work not NMI-safe */
305 if (!WARN_ON_ONCE(in_nmi()))
306 tick_nohz_full_kick_cpu(cpu);
307 }
308 preempt_enable();
309 }
310}
311
312void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
313{
314 struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);
315
316 clear_bit(bit, &ts->tick_dep_mask);
317}
318
319/*
320 * Set a per-task tick dependency. Posix CPU timers need this in order to elapse
321 * per task timers.
322 */
323void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit)
324{
325 /*
326 * We could optimize this with just kicking the target running the task
327 * if that noise matters for nohz full users.
328 */
329 tick_nohz_dep_set_all(&tsk->tick_dep_mask, bit);
330}
331
332void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit)
333{
334 clear_bit(bit, &tsk->tick_dep_mask);
335}
336
337/*
338 * Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse
339 * per process timers.
340 */
341void tick_nohz_dep_set_signal(struct signal_struct *sig, enum tick_dep_bits bit)
342{
343 tick_nohz_dep_set_all(&sig->tick_dep_mask, bit);
344}
345
346void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit)
347{
348 clear_bit(bit, &sig->tick_dep_mask);
349}
350
258/* 351/*
259 * Re-evaluate the need for the tick as we switch the current task. 352 * Re-evaluate the need for the tick as we switch the current task.
260 * It might need the tick due to per task/process properties: 353 * It might need the tick due to per task/process properties:
@@ -263,15 +356,19 @@ void tick_nohz_full_kick_all(void)
263void __tick_nohz_task_switch(void) 356void __tick_nohz_task_switch(void)
264{ 357{
265 unsigned long flags; 358 unsigned long flags;
359 struct tick_sched *ts;
266 360
267 local_irq_save(flags); 361 local_irq_save(flags);
268 362
269 if (!tick_nohz_full_cpu(smp_processor_id())) 363 if (!tick_nohz_full_cpu(smp_processor_id()))
270 goto out; 364 goto out;
271 365
272 if (tick_nohz_tick_stopped() && !can_stop_full_tick()) 366 ts = this_cpu_ptr(&tick_cpu_sched);
273 tick_nohz_full_kick();
274 367
368 if (ts->tick_stopped) {
369 if (current->tick_dep_mask || current->signal->tick_dep_mask)
370 tick_nohz_full_kick();
371 }
275out: 372out:
276 local_irq_restore(flags); 373 local_irq_restore(flags);
277} 374}
@@ -689,7 +786,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
689 786
690 ts->last_tick = hrtimer_get_expires(&ts->sched_timer); 787 ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
691 ts->tick_stopped = 1; 788 ts->tick_stopped = 1;
692 trace_tick_stop(1, " "); 789 trace_tick_stop(1, TICK_DEP_MASK_NONE);
693 } 790 }
694 791
695 /* 792 /*
@@ -740,7 +837,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts)
740 if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE) 837 if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
741 return; 838 return;
742 839
743 if (can_stop_full_tick()) 840 if (can_stop_full_tick(ts))
744 tick_nohz_stop_sched_tick(ts, ktime_get(), cpu); 841 tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
745 else if (ts->tick_stopped) 842 else if (ts->tick_stopped)
746 tick_nohz_restart_sched_tick(ts, ktime_get(), 1); 843 tick_nohz_restart_sched_tick(ts, ktime_get(), 1);
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index a4a8d4e9baa1..eb4e32566a83 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -60,6 +60,7 @@ struct tick_sched {
60 u64 next_timer; 60 u64 next_timer;
61 ktime_t idle_expires; 61 ktime_t idle_expires;
62 int do_timer_last; 62 int do_timer_last;
63 unsigned long tick_dep_mask;
63}; 64};
64 65
65extern struct tick_sched *tick_get_tick_sched(int cpu); 66extern struct tick_sched *tick_get_tick_sched(int cpu);