diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-06-22 22:20:04 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-06-22 22:20:04 -0400 |
commit | 3a95398f54cbd664c749fe9f1bfc7e7dbace92d0 (patch) | |
tree | 3eee56a46eada7bc6354e442d9164b555a78e43a | |
parent | 43224b96af3154cedd7220f7b90094905f07ac78 (diff) | |
parent | 8cb9764fc88b41db11f251e8b2a0d006578b7eb4 (diff) |
Merge branch 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull NOHZ updates from Thomas Gleixner:
"A few updates to the nohz infrastructure:
- recursion protection for context tracking
- make the TIF_NOHZ inheritance smarter
- isolate cpus which belong to the NOHZ full set"
* 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
nohz: Set isolcpus when nohz_full is set
nohz: Add tick_nohz_full_add_cpus_to() API
context_tracking: Inherit TIF_NOHZ through forks instead of context switches
context_tracking: Protect against recursion
-rw-r--r-- | include/linux/context_tracking.h | 10 | ||||
-rw-r--r-- | include/linux/context_tracking_state.h | 1 | ||||
-rw-r--r-- | include/linux/sched.h | 3 | ||||
-rw-r--r-- | include/linux/tick.h | 7 | ||||
-rw-r--r-- | kernel/context_tracking.c | 67 | ||||
-rw-r--r-- | kernel/sched/core.c | 4 |
6 files changed, 59 insertions, 33 deletions
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 2821838256b4..b96bd299966f 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h | |||
@@ -14,8 +14,6 @@ extern void context_tracking_enter(enum ctx_state state); | |||
14 | extern void context_tracking_exit(enum ctx_state state); | 14 | extern void context_tracking_exit(enum ctx_state state); |
15 | extern void context_tracking_user_enter(void); | 15 | extern void context_tracking_user_enter(void); |
16 | extern void context_tracking_user_exit(void); | 16 | extern void context_tracking_user_exit(void); |
17 | extern void __context_tracking_task_switch(struct task_struct *prev, | ||
18 | struct task_struct *next); | ||
19 | 17 | ||
20 | static inline void user_enter(void) | 18 | static inline void user_enter(void) |
21 | { | 19 | { |
@@ -51,19 +49,11 @@ static inline void exception_exit(enum ctx_state prev_ctx) | |||
51 | } | 49 | } |
52 | } | 50 | } |
53 | 51 | ||
54 | static inline void context_tracking_task_switch(struct task_struct *prev, | ||
55 | struct task_struct *next) | ||
56 | { | ||
57 | if (context_tracking_is_enabled()) | ||
58 | __context_tracking_task_switch(prev, next); | ||
59 | } | ||
60 | #else | 52 | #else |
61 | static inline void user_enter(void) { } | 53 | static inline void user_enter(void) { } |
62 | static inline void user_exit(void) { } | 54 | static inline void user_exit(void) { } |
63 | static inline enum ctx_state exception_enter(void) { return 0; } | 55 | static inline enum ctx_state exception_enter(void) { return 0; } |
64 | static inline void exception_exit(enum ctx_state prev_ctx) { } | 56 | static inline void exception_exit(enum ctx_state prev_ctx) { } |
65 | static inline void context_tracking_task_switch(struct task_struct *prev, | ||
66 | struct task_struct *next) { } | ||
67 | #endif /* !CONFIG_CONTEXT_TRACKING */ | 57 | #endif /* !CONFIG_CONTEXT_TRACKING */ |
68 | 58 | ||
69 | 59 | ||
diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index 6b7b96a32b75..678ecdf90cf6 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h | |||
@@ -12,6 +12,7 @@ struct context_tracking { | |||
12 | * may be further optimized using static keys. | 12 | * may be further optimized using static keys. |
13 | */ | 13 | */ |
14 | bool active; | 14 | bool active; |
15 | int recursion; | ||
15 | enum ctx_state { | 16 | enum ctx_state { |
16 | CONTEXT_KERNEL = 0, | 17 | CONTEXT_KERNEL = 0, |
17 | CONTEXT_USER, | 18 | CONTEXT_USER, |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 30364cb58b1f..6633e83e608a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -2599,6 +2599,9 @@ static inline unsigned long wait_task_inactive(struct task_struct *p, | |||
2599 | } | 2599 | } |
2600 | #endif | 2600 | #endif |
2601 | 2601 | ||
2602 | #define tasklist_empty() \ | ||
2603 | list_empty(&init_task.tasks) | ||
2604 | |||
2602 | #define next_task(p) \ | 2605 | #define next_task(p) \ |
2603 | list_entry_rcu((p)->tasks.next, struct task_struct, tasks) | 2606 | list_entry_rcu((p)->tasks.next, struct task_struct, tasks) |
2604 | 2607 | ||
diff --git a/include/linux/tick.h b/include/linux/tick.h index f8492da57ad3..4191b5623a28 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h | |||
@@ -134,6 +134,12 @@ static inline bool tick_nohz_full_cpu(int cpu) | |||
134 | return cpumask_test_cpu(cpu, tick_nohz_full_mask); | 134 | return cpumask_test_cpu(cpu, tick_nohz_full_mask); |
135 | } | 135 | } |
136 | 136 | ||
137 | static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) | ||
138 | { | ||
139 | if (tick_nohz_full_enabled()) | ||
140 | cpumask_or(mask, mask, tick_nohz_full_mask); | ||
141 | } | ||
142 | |||
137 | extern void __tick_nohz_full_check(void); | 143 | extern void __tick_nohz_full_check(void); |
138 | extern void tick_nohz_full_kick(void); | 144 | extern void tick_nohz_full_kick(void); |
139 | extern void tick_nohz_full_kick_cpu(int cpu); | 145 | extern void tick_nohz_full_kick_cpu(int cpu); |
@@ -142,6 +148,7 @@ extern void __tick_nohz_task_switch(struct task_struct *tsk); | |||
142 | #else | 148 | #else |
143 | static inline bool tick_nohz_full_enabled(void) { return false; } | 149 | static inline bool tick_nohz_full_enabled(void) { return false; } |
144 | static inline bool tick_nohz_full_cpu(int cpu) { return false; } | 150 | static inline bool tick_nohz_full_cpu(int cpu) { return false; } |
151 | static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { } | ||
145 | static inline void __tick_nohz_full_check(void) { } | 152 | static inline void __tick_nohz_full_check(void) { } |
146 | static inline void tick_nohz_full_kick_cpu(int cpu) { } | 153 | static inline void tick_nohz_full_kick_cpu(int cpu) { } |
147 | static inline void tick_nohz_full_kick(void) { } | 154 | static inline void tick_nohz_full_kick(void) { } |
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index 72d59a1a6eb6..0a495ab35bc7 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c | |||
@@ -30,12 +30,23 @@ EXPORT_SYMBOL_GPL(context_tracking_enabled); | |||
30 | DEFINE_PER_CPU(struct context_tracking, context_tracking); | 30 | DEFINE_PER_CPU(struct context_tracking, context_tracking); |
31 | EXPORT_SYMBOL_GPL(context_tracking); | 31 | EXPORT_SYMBOL_GPL(context_tracking); |
32 | 32 | ||
33 | void context_tracking_cpu_set(int cpu) | 33 | static bool context_tracking_recursion_enter(void) |
34 | { | 34 | { |
35 | if (!per_cpu(context_tracking.active, cpu)) { | 35 | int recursion; |
36 | per_cpu(context_tracking.active, cpu) = true; | 36 | |
37 | static_key_slow_inc(&context_tracking_enabled); | 37 | recursion = __this_cpu_inc_return(context_tracking.recursion); |
38 | } | 38 | if (recursion == 1) |
39 | return true; | ||
40 | |||
41 | WARN_ONCE((recursion < 1), "Invalid context tracking recursion value %d\n", recursion); | ||
42 | __this_cpu_dec(context_tracking.recursion); | ||
43 | |||
44 | return false; | ||
45 | } | ||
46 | |||
47 | static void context_tracking_recursion_exit(void) | ||
48 | { | ||
49 | __this_cpu_dec(context_tracking.recursion); | ||
39 | } | 50 | } |
40 | 51 | ||
41 | /** | 52 | /** |
@@ -75,6 +86,9 @@ void context_tracking_enter(enum ctx_state state) | |||
75 | WARN_ON_ONCE(!current->mm); | 86 | WARN_ON_ONCE(!current->mm); |
76 | 87 | ||
77 | local_irq_save(flags); | 88 | local_irq_save(flags); |
89 | if (!context_tracking_recursion_enter()) | ||
90 | goto out_irq_restore; | ||
91 | |||
78 | if ( __this_cpu_read(context_tracking.state) != state) { | 92 | if ( __this_cpu_read(context_tracking.state) != state) { |
79 | if (__this_cpu_read(context_tracking.active)) { | 93 | if (__this_cpu_read(context_tracking.active)) { |
80 | /* | 94 | /* |
@@ -105,6 +119,8 @@ void context_tracking_enter(enum ctx_state state) | |||
105 | */ | 119 | */ |
106 | __this_cpu_write(context_tracking.state, state); | 120 | __this_cpu_write(context_tracking.state, state); |
107 | } | 121 | } |
122 | context_tracking_recursion_exit(); | ||
123 | out_irq_restore: | ||
108 | local_irq_restore(flags); | 124 | local_irq_restore(flags); |
109 | } | 125 | } |
110 | NOKPROBE_SYMBOL(context_tracking_enter); | 126 | NOKPROBE_SYMBOL(context_tracking_enter); |
@@ -139,6 +155,9 @@ void context_tracking_exit(enum ctx_state state) | |||
139 | return; | 155 | return; |
140 | 156 | ||
141 | local_irq_save(flags); | 157 | local_irq_save(flags); |
158 | if (!context_tracking_recursion_enter()) | ||
159 | goto out_irq_restore; | ||
160 | |||
142 | if (__this_cpu_read(context_tracking.state) == state) { | 161 | if (__this_cpu_read(context_tracking.state) == state) { |
143 | if (__this_cpu_read(context_tracking.active)) { | 162 | if (__this_cpu_read(context_tracking.active)) { |
144 | /* | 163 | /* |
@@ -153,6 +172,8 @@ void context_tracking_exit(enum ctx_state state) | |||
153 | } | 172 | } |
154 | __this_cpu_write(context_tracking.state, CONTEXT_KERNEL); | 173 | __this_cpu_write(context_tracking.state, CONTEXT_KERNEL); |
155 | } | 174 | } |
175 | context_tracking_recursion_exit(); | ||
176 | out_irq_restore: | ||
156 | local_irq_restore(flags); | 177 | local_irq_restore(flags); |
157 | } | 178 | } |
158 | NOKPROBE_SYMBOL(context_tracking_exit); | 179 | NOKPROBE_SYMBOL(context_tracking_exit); |
@@ -164,24 +185,26 @@ void context_tracking_user_exit(void) | |||
164 | } | 185 | } |
165 | NOKPROBE_SYMBOL(context_tracking_user_exit); | 186 | NOKPROBE_SYMBOL(context_tracking_user_exit); |
166 | 187 | ||
167 | /** | 188 | void __init context_tracking_cpu_set(int cpu) |
168 | * __context_tracking_task_switch - context switch the syscall callbacks | ||
169 | * @prev: the task that is being switched out | ||
170 | * @next: the task that is being switched in | ||
171 | * | ||
172 | * The context tracking uses the syscall slow path to implement its user-kernel | ||
173 | * boundaries probes on syscalls. This way it doesn't impact the syscall fast | ||
174 | * path on CPUs that don't do context tracking. | ||
175 | * | ||
176 | * But we need to clear the flag on the previous task because it may later | ||
177 | * migrate to some CPU that doesn't do the context tracking. As such the TIF | ||
178 | * flag may not be desired there. | ||
179 | */ | ||
180 | void __context_tracking_task_switch(struct task_struct *prev, | ||
181 | struct task_struct *next) | ||
182 | { | 189 | { |
183 | clear_tsk_thread_flag(prev, TIF_NOHZ); | 190 | static __initdata bool initialized = false; |
184 | set_tsk_thread_flag(next, TIF_NOHZ); | 191 | |
192 | if (!per_cpu(context_tracking.active, cpu)) { | ||
193 | per_cpu(context_tracking.active, cpu) = true; | ||
194 | static_key_slow_inc(&context_tracking_enabled); | ||
195 | } | ||
196 | |||
197 | if (initialized) | ||
198 | return; | ||
199 | |||
200 | /* | ||
201 | * Set TIF_NOHZ to init/0 and let it propagate to all tasks through fork | ||
202 | * This assumes that init is the only task at this early boot stage. | ||
203 | */ | ||
204 | set_tsk_thread_flag(&init_task, TIF_NOHZ); | ||
205 | WARN_ON_ONCE(!tasklist_empty()); | ||
206 | |||
207 | initialized = true; | ||
185 | } | 208 | } |
186 | 209 | ||
187 | #ifdef CONFIG_CONTEXT_TRACKING_FORCE | 210 | #ifdef CONFIG_CONTEXT_TRACKING_FORCE |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c9a707b59331..c86935a7f1f8 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -2374,7 +2374,6 @@ context_switch(struct rq *rq, struct task_struct *prev, | |||
2374 | */ | 2374 | */ |
2375 | spin_release(&rq->lock.dep_map, 1, _THIS_IP_); | 2375 | spin_release(&rq->lock.dep_map, 1, _THIS_IP_); |
2376 | 2376 | ||
2377 | context_tracking_task_switch(prev, next); | ||
2378 | /* Here we just switch the register state and the stack. */ | 2377 | /* Here we just switch the register state and the stack. */ |
2379 | switch_to(prev, next, prev); | 2378 | switch_to(prev, next, prev); |
2380 | barrier(); | 2379 | barrier(); |
@@ -7068,6 +7067,9 @@ void __init sched_init_smp(void) | |||
7068 | alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL); | 7067 | alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL); |
7069 | alloc_cpumask_var(&fallback_doms, GFP_KERNEL); | 7068 | alloc_cpumask_var(&fallback_doms, GFP_KERNEL); |
7070 | 7069 | ||
7070 | /* nohz_full won't take effect without isolating the cpus. */ | ||
7071 | tick_nohz_full_add_cpus_to(cpu_isolated_map); | ||
7072 | |||
7071 | sched_init_numa(); | 7073 | sched_init_numa(); |
7072 | 7074 | ||
7073 | /* | 7075 | /* |