diff options
-rw-r--r-- | include/linux/cpu.h | 2 | ||||
-rw-r--r-- | include/linux/sched.h | 3 | ||||
-rw-r--r-- | kernel/fork.c | 2 | ||||
-rw-r--r-- | kernel/sched/core.c | 1 | ||||
-rw-r--r-- | kernel/sched/idle.c | 162 |
5 files changed, 107 insertions, 63 deletions
diff --git a/include/linux/cpu.h b/include/linux/cpu.h index b886dc17f2f3..ac0efae38072 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h | |||
@@ -245,6 +245,8 @@ void arch_cpu_idle_dead(void); | |||
245 | int cpu_report_state(int cpu); | 245 | int cpu_report_state(int cpu); |
246 | int cpu_check_up_prepare(int cpu); | 246 | int cpu_check_up_prepare(int cpu); |
247 | void cpu_set_state_online(int cpu); | 247 | void cpu_set_state_online(int cpu); |
248 | void play_idle(unsigned long duration_ms); | ||
249 | |||
248 | #ifdef CONFIG_HOTPLUG_CPU | 250 | #ifdef CONFIG_HOTPLUG_CPU |
249 | bool cpu_wait_death(unsigned int cpu, int seconds); | 251 | bool cpu_wait_death(unsigned int cpu, int seconds); |
250 | bool cpu_report_death(void); | 252 | bool cpu_report_death(void); |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 348f51b0ec92..114c7fcb6af6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -2254,6 +2254,7 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, | |||
2254 | /* | 2254 | /* |
2255 | * Per process flags | 2255 | * Per process flags |
2256 | */ | 2256 | */ |
2257 | #define PF_IDLE 0x00000002 /* I am an IDLE thread */ | ||
2257 | #define PF_EXITING 0x00000004 /* getting shut down */ | 2258 | #define PF_EXITING 0x00000004 /* getting shut down */ |
2258 | #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ | 2259 | #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ |
2259 | #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ | 2260 | #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ |
@@ -2609,7 +2610,7 @@ extern struct task_struct *idle_task(int cpu); | |||
2609 | */ | 2610 | */ |
2610 | static inline bool is_idle_task(const struct task_struct *p) | 2611 | static inline bool is_idle_task(const struct task_struct *p) |
2611 | { | 2612 | { |
2612 | return p->pid == 0; | 2613 | return !!(p->flags & PF_IDLE); |
2613 | } | 2614 | } |
2614 | extern struct task_struct *curr_task(int cpu); | 2615 | extern struct task_struct *curr_task(int cpu); |
2615 | extern void ia64_set_curr_task(int cpu, struct task_struct *p); | 2616 | extern void ia64_set_curr_task(int cpu, struct task_struct *p); |
diff --git a/kernel/fork.c b/kernel/fork.c index 623259fc794d..5074b2f0827b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1537,7 +1537,7 @@ static __latent_entropy struct task_struct *copy_process( | |||
1537 | goto bad_fork_cleanup_count; | 1537 | goto bad_fork_cleanup_count; |
1538 | 1538 | ||
1539 | delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ | 1539 | delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ |
1540 | p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER); | 1540 | p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER | PF_IDLE); |
1541 | p->flags |= PF_FORKNOEXEC; | 1541 | p->flags |= PF_FORKNOEXEC; |
1542 | INIT_LIST_HEAD(&p->children); | 1542 | INIT_LIST_HEAD(&p->children); |
1543 | INIT_LIST_HEAD(&p->sibling); | 1543 | INIT_LIST_HEAD(&p->sibling); |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 94732d1ab00a..63b3a8a49884 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -5285,6 +5285,7 @@ void init_idle(struct task_struct *idle, int cpu) | |||
5285 | __sched_fork(0, idle); | 5285 | __sched_fork(0, idle); |
5286 | idle->state = TASK_RUNNING; | 5286 | idle->state = TASK_RUNNING; |
5287 | idle->se.exec_start = sched_clock(); | 5287 | idle->se.exec_start = sched_clock(); |
5288 | idle->flags |= PF_IDLE; | ||
5288 | 5289 | ||
5289 | kasan_unpoison_task_stack(idle); | 5290 | kasan_unpoison_task_stack(idle); |
5290 | 5291 | ||
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 513e4dfeeae7..6a4bae0a649d 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c | |||
@@ -205,76 +205,65 @@ exit_idle: | |||
205 | * | 205 | * |
206 | * Called with polling cleared. | 206 | * Called with polling cleared. |
207 | */ | 207 | */ |
208 | static void cpu_idle_loop(void) | 208 | static void do_idle(void) |
209 | { | 209 | { |
210 | int cpu = smp_processor_id(); | 210 | /* |
211 | 211 | * If the arch has a polling bit, we maintain an invariant: | |
212 | while (1) { | 212 | * |
213 | /* | 213 | * Our polling bit is clear if we're not scheduled (i.e. if rq->curr != |
214 | * If the arch has a polling bit, we maintain an invariant: | 214 | * rq->idle). This means that, if rq->idle has the polling bit set, |
215 | * | 215 | * then setting need_resched is guaranteed to cause the CPU to |
216 | * Our polling bit is clear if we're not scheduled (i.e. if | 216 | * reschedule. |
217 | * rq->curr != rq->idle). This means that, if rq->idle has | 217 | */ |
218 | * the polling bit set, then setting need_resched is | ||
219 | * guaranteed to cause the cpu to reschedule. | ||
220 | */ | ||
221 | |||
222 | __current_set_polling(); | ||
223 | quiet_vmstat(); | ||
224 | tick_nohz_idle_enter(); | ||
225 | 218 | ||
226 | while (!need_resched()) { | 219 | __current_set_polling(); |
227 | check_pgt_cache(); | 220 | tick_nohz_idle_enter(); |
228 | rmb(); | ||
229 | 221 | ||
230 | if (cpu_is_offline(cpu)) { | 222 | while (!need_resched()) { |
231 | cpuhp_report_idle_dead(); | 223 | check_pgt_cache(); |
232 | arch_cpu_idle_dead(); | 224 | rmb(); |
233 | } | ||
234 | 225 | ||
235 | local_irq_disable(); | 226 | if (cpu_is_offline(smp_processor_id())) { |
236 | arch_cpu_idle_enter(); | 227 | cpuhp_report_idle_dead(); |
237 | 228 | arch_cpu_idle_dead(); | |
238 | /* | ||
239 | * In poll mode we reenable interrupts and spin. | ||
240 | * | ||
241 | * Also if we detected in the wakeup from idle | ||
242 | * path that the tick broadcast device expired | ||
243 | * for us, we don't want to go deep idle as we | ||
244 | * know that the IPI is going to arrive right | ||
245 | * away | ||
246 | */ | ||
247 | if (cpu_idle_force_poll || tick_check_broadcast_expired()) | ||
248 | cpu_idle_poll(); | ||
249 | else | ||
250 | cpuidle_idle_call(); | ||
251 | |||
252 | arch_cpu_idle_exit(); | ||
253 | } | 229 | } |
254 | 230 | ||
255 | /* | 231 | local_irq_disable(); |
256 | * Since we fell out of the loop above, we know | 232 | arch_cpu_idle_enter(); |
257 | * TIF_NEED_RESCHED must be set, propagate it into | ||
258 | * PREEMPT_NEED_RESCHED. | ||
259 | * | ||
260 | * This is required because for polling idle loops we will | ||
261 | * not have had an IPI to fold the state for us. | ||
262 | */ | ||
263 | preempt_set_need_resched(); | ||
264 | tick_nohz_idle_exit(); | ||
265 | __current_clr_polling(); | ||
266 | 233 | ||
267 | /* | 234 | /* |
268 | * We promise to call sched_ttwu_pending and reschedule | 235 | * In poll mode we reenable interrupts and spin. Also if we |
269 | * if need_resched is set while polling is set. That | 236 | * detected in the wakeup from idle path that the tick |
270 | * means that clearing polling needs to be visible | 237 | * broadcast device expired for us, we don't want to go deep |
271 | * before doing these things. | 238 | * idle as we know that the IPI is going to arrive right away. |
272 | */ | 239 | */ |
273 | smp_mb__after_atomic(); | 240 | if (cpu_idle_force_poll || tick_check_broadcast_expired()) |
274 | 241 | cpu_idle_poll(); | |
275 | sched_ttwu_pending(); | 242 | else |
276 | schedule_preempt_disabled(); | 243 | cpuidle_idle_call(); |
244 | arch_cpu_idle_exit(); | ||
277 | } | 245 | } |
246 | |||
247 | /* | ||
248 | * Since we fell out of the loop above, we know TIF_NEED_RESCHED must | ||
249 | * be set, propagate it into PREEMPT_NEED_RESCHED. | ||
250 | * | ||
251 | * This is required because for polling idle loops we will not have had | ||
252 | * an IPI to fold the state for us. | ||
253 | */ | ||
254 | preempt_set_need_resched(); | ||
255 | tick_nohz_idle_exit(); | ||
256 | __current_clr_polling(); | ||
257 | |||
258 | /* | ||
259 | * We promise to call sched_ttwu_pending() and reschedule if | ||
260 | * need_resched() is set while polling is set. That means that clearing | ||
261 | * polling needs to be visible before doing these things. | ||
262 | */ | ||
263 | smp_mb__after_atomic(); | ||
264 | |||
265 | sched_ttwu_pending(); | ||
266 | schedule_preempt_disabled(); | ||
278 | } | 267 | } |
279 | 268 | ||
280 | bool cpu_in_idle(unsigned long pc) | 269 | bool cpu_in_idle(unsigned long pc) |
@@ -283,6 +272,56 @@ bool cpu_in_idle(unsigned long pc) | |||
283 | pc < (unsigned long)__cpuidle_text_end; | 272 | pc < (unsigned long)__cpuidle_text_end; |
284 | } | 273 | } |
285 | 274 | ||
275 | struct idle_timer { | ||
276 | struct hrtimer timer; | ||
277 | int done; | ||
278 | }; | ||
279 | |||
280 | static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer) | ||
281 | { | ||
282 | struct idle_timer *it = container_of(timer, struct idle_timer, timer); | ||
283 | |||
284 | WRITE_ONCE(it->done, 1); | ||
285 | set_tsk_need_resched(current); | ||
286 | |||
287 | return HRTIMER_NORESTART; | ||
288 | } | ||
289 | |||
290 | void play_idle(unsigned long duration_ms) | ||
291 | { | ||
292 | struct idle_timer it; | ||
293 | |||
294 | /* | ||
295 | * Only FIFO tasks can disable the tick since they don't need the forced | ||
296 | * preemption. | ||
297 | */ | ||
298 | WARN_ON_ONCE(current->policy != SCHED_FIFO); | ||
299 | WARN_ON_ONCE(current->nr_cpus_allowed != 1); | ||
300 | WARN_ON_ONCE(!(current->flags & PF_KTHREAD)); | ||
301 | WARN_ON_ONCE(!(current->flags & PF_NO_SETAFFINITY)); | ||
302 | WARN_ON_ONCE(!duration_ms); | ||
303 | |||
304 | rcu_sleep_check(); | ||
305 | preempt_disable(); | ||
306 | current->flags |= PF_IDLE; | ||
307 | cpuidle_use_deepest_state(true); | ||
308 | |||
309 | it.done = 0; | ||
310 | hrtimer_init_on_stack(&it.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
311 | it.timer.function = idle_inject_timer_fn; | ||
312 | hrtimer_start(&it.timer, ms_to_ktime(duration_ms), HRTIMER_MODE_REL_PINNED); | ||
313 | |||
314 | while (!READ_ONCE(it.done)) | ||
315 | do_idle(); | ||
316 | |||
317 | cpuidle_use_deepest_state(false); | ||
318 | current->flags &= ~PF_IDLE; | ||
319 | |||
320 | preempt_fold_need_resched(); | ||
321 | preempt_enable(); | ||
322 | } | ||
323 | EXPORT_SYMBOL_GPL(play_idle); | ||
324 | |||
286 | void cpu_startup_entry(enum cpuhp_state state) | 325 | void cpu_startup_entry(enum cpuhp_state state) |
287 | { | 326 | { |
288 | /* | 327 | /* |
@@ -302,5 +341,6 @@ void cpu_startup_entry(enum cpuhp_state state) | |||
302 | #endif | 341 | #endif |
303 | arch_cpu_idle_prepare(); | 342 | arch_cpu_idle_prepare(); |
304 | cpuhp_online_idle(state); | 343 | cpuhp_online_idle(state); |
305 | cpu_idle_loop(); | 344 | while (1) |
345 | do_idle(); | ||
306 | } | 346 | } |