aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/cpu.h2
-rw-r--r--include/linux/sched.h3
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/sched/core.c1
-rw-r--r--kernel/sched/idle.c162
5 files changed, 107 insertions, 63 deletions
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index b886dc17f2f3..ac0efae38072 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -245,6 +245,8 @@ void arch_cpu_idle_dead(void);
245int cpu_report_state(int cpu); 245int cpu_report_state(int cpu);
246int cpu_check_up_prepare(int cpu); 246int cpu_check_up_prepare(int cpu);
247void cpu_set_state_online(int cpu); 247void cpu_set_state_online(int cpu);
248void play_idle(unsigned long duration_ms);
249
248#ifdef CONFIG_HOTPLUG_CPU 250#ifdef CONFIG_HOTPLUG_CPU
249bool cpu_wait_death(unsigned int cpu, int seconds); 251bool cpu_wait_death(unsigned int cpu, int seconds);
250bool cpu_report_death(void); 252bool cpu_report_death(void);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 348f51b0ec92..114c7fcb6af6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2254,6 +2254,7 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut,
2254/* 2254/*
2255 * Per process flags 2255 * Per process flags
2256 */ 2256 */
2257#define PF_IDLE 0x00000002 /* I am an IDLE thread */
2257#define PF_EXITING 0x00000004 /* getting shut down */ 2258#define PF_EXITING 0x00000004 /* getting shut down */
2258#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ 2259#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
2259#define PF_VCPU 0x00000010 /* I'm a virtual CPU */ 2260#define PF_VCPU 0x00000010 /* I'm a virtual CPU */
@@ -2609,7 +2610,7 @@ extern struct task_struct *idle_task(int cpu);
2609 */ 2610 */
2610static inline bool is_idle_task(const struct task_struct *p) 2611static inline bool is_idle_task(const struct task_struct *p)
2611{ 2612{
2612 return p->pid == 0; 2613 return !!(p->flags & PF_IDLE);
2613} 2614}
2614extern struct task_struct *curr_task(int cpu); 2615extern struct task_struct *curr_task(int cpu);
2615extern void ia64_set_curr_task(int cpu, struct task_struct *p); 2616extern void ia64_set_curr_task(int cpu, struct task_struct *p);
diff --git a/kernel/fork.c b/kernel/fork.c
index 623259fc794d..5074b2f0827b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1537,7 +1537,7 @@ static __latent_entropy struct task_struct *copy_process(
1537 goto bad_fork_cleanup_count; 1537 goto bad_fork_cleanup_count;
1538 1538
1539 delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ 1539 delayacct_tsk_init(p); /* Must remain after dup_task_struct() */
1540 p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER); 1540 p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER | PF_IDLE);
1541 p->flags |= PF_FORKNOEXEC; 1541 p->flags |= PF_FORKNOEXEC;
1542 INIT_LIST_HEAD(&p->children); 1542 INIT_LIST_HEAD(&p->children);
1543 INIT_LIST_HEAD(&p->sibling); 1543 INIT_LIST_HEAD(&p->sibling);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 94732d1ab00a..63b3a8a49884 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5285,6 +5285,7 @@ void init_idle(struct task_struct *idle, int cpu)
5285 __sched_fork(0, idle); 5285 __sched_fork(0, idle);
5286 idle->state = TASK_RUNNING; 5286 idle->state = TASK_RUNNING;
5287 idle->se.exec_start = sched_clock(); 5287 idle->se.exec_start = sched_clock();
5288 idle->flags |= PF_IDLE;
5288 5289
5289 kasan_unpoison_task_stack(idle); 5290 kasan_unpoison_task_stack(idle);
5290 5291
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 513e4dfeeae7..6a4bae0a649d 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -205,76 +205,65 @@ exit_idle:
205 * 205 *
206 * Called with polling cleared. 206 * Called with polling cleared.
207 */ 207 */
208static void cpu_idle_loop(void) 208static void do_idle(void)
209{ 209{
210 int cpu = smp_processor_id(); 210 /*
211 211 * If the arch has a polling bit, we maintain an invariant:
212 while (1) { 212 *
213 /* 213 * Our polling bit is clear if we're not scheduled (i.e. if rq->curr !=
214 * If the arch has a polling bit, we maintain an invariant: 214 * rq->idle). This means that, if rq->idle has the polling bit set,
215 * 215 * then setting need_resched is guaranteed to cause the CPU to
216 * Our polling bit is clear if we're not scheduled (i.e. if 216 * reschedule.
217 * rq->curr != rq->idle). This means that, if rq->idle has 217 */
218 * the polling bit set, then setting need_resched is
219 * guaranteed to cause the cpu to reschedule.
220 */
221
222 __current_set_polling();
223 quiet_vmstat();
224 tick_nohz_idle_enter();
225 218
226 while (!need_resched()) { 219 __current_set_polling();
227 check_pgt_cache(); 220 tick_nohz_idle_enter();
228 rmb();
229 221
230 if (cpu_is_offline(cpu)) { 222 while (!need_resched()) {
231 cpuhp_report_idle_dead(); 223 check_pgt_cache();
232 arch_cpu_idle_dead(); 224 rmb();
233 }
234 225
235 local_irq_disable(); 226 if (cpu_is_offline(smp_processor_id())) {
236 arch_cpu_idle_enter(); 227 cpuhp_report_idle_dead();
237 228 arch_cpu_idle_dead();
238 /*
239 * In poll mode we reenable interrupts and spin.
240 *
241 * Also if we detected in the wakeup from idle
242 * path that the tick broadcast device expired
243 * for us, we don't want to go deep idle as we
244 * know that the IPI is going to arrive right
245 * away
246 */
247 if (cpu_idle_force_poll || tick_check_broadcast_expired())
248 cpu_idle_poll();
249 else
250 cpuidle_idle_call();
251
252 arch_cpu_idle_exit();
253 } 229 }
254 230
255 /* 231 local_irq_disable();
256 * Since we fell out of the loop above, we know 232 arch_cpu_idle_enter();
257 * TIF_NEED_RESCHED must be set, propagate it into
258 * PREEMPT_NEED_RESCHED.
259 *
260 * This is required because for polling idle loops we will
261 * not have had an IPI to fold the state for us.
262 */
263 preempt_set_need_resched();
264 tick_nohz_idle_exit();
265 __current_clr_polling();
266 233
267 /* 234 /*
268 * We promise to call sched_ttwu_pending and reschedule 235 * In poll mode we reenable interrupts and spin. Also if we
269 * if need_resched is set while polling is set. That 236 * detected in the wakeup from idle path that the tick
270 * means that clearing polling needs to be visible 237 * broadcast device expired for us, we don't want to go deep
271 * before doing these things. 238 * idle as we know that the IPI is going to arrive right away.
272 */ 239 */
273 smp_mb__after_atomic(); 240 if (cpu_idle_force_poll || tick_check_broadcast_expired())
274 241 cpu_idle_poll();
275 sched_ttwu_pending(); 242 else
276 schedule_preempt_disabled(); 243 cpuidle_idle_call();
244 arch_cpu_idle_exit();
277 } 245 }
246
247 /*
248 * Since we fell out of the loop above, we know TIF_NEED_RESCHED must
249 * be set, propagate it into PREEMPT_NEED_RESCHED.
250 *
251 * This is required because for polling idle loops we will not have had
252 * an IPI to fold the state for us.
253 */
254 preempt_set_need_resched();
255 tick_nohz_idle_exit();
256 __current_clr_polling();
257
258 /*
259 * We promise to call sched_ttwu_pending() and reschedule if
260 * need_resched() is set while polling is set. That means that clearing
261 * polling needs to be visible before doing these things.
262 */
263 smp_mb__after_atomic();
264
265 sched_ttwu_pending();
266 schedule_preempt_disabled();
278} 267}
279 268
280bool cpu_in_idle(unsigned long pc) 269bool cpu_in_idle(unsigned long pc)
@@ -283,6 +272,56 @@ bool cpu_in_idle(unsigned long pc)
283 pc < (unsigned long)__cpuidle_text_end; 272 pc < (unsigned long)__cpuidle_text_end;
284} 273}
285 274
275struct idle_timer {
276 struct hrtimer timer;
277 int done;
278};
279
280static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer)
281{
282 struct idle_timer *it = container_of(timer, struct idle_timer, timer);
283
284 WRITE_ONCE(it->done, 1);
285 set_tsk_need_resched(current);
286
287 return HRTIMER_NORESTART;
288}
289
290void play_idle(unsigned long duration_ms)
291{
292 struct idle_timer it;
293
294 /*
295 * Only FIFO tasks can disable the tick since they don't need the forced
296 * preemption.
297 */
298 WARN_ON_ONCE(current->policy != SCHED_FIFO);
299 WARN_ON_ONCE(current->nr_cpus_allowed != 1);
300 WARN_ON_ONCE(!(current->flags & PF_KTHREAD));
301 WARN_ON_ONCE(!(current->flags & PF_NO_SETAFFINITY));
302 WARN_ON_ONCE(!duration_ms);
303
304 rcu_sleep_check();
305 preempt_disable();
306 current->flags |= PF_IDLE;
307 cpuidle_use_deepest_state(true);
308
309 it.done = 0;
310 hrtimer_init_on_stack(&it.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
311 it.timer.function = idle_inject_timer_fn;
312 hrtimer_start(&it.timer, ms_to_ktime(duration_ms), HRTIMER_MODE_REL_PINNED);
313
314 while (!READ_ONCE(it.done))
315 do_idle();
316
317 cpuidle_use_deepest_state(false);
318 current->flags &= ~PF_IDLE;
319
320 preempt_fold_need_resched();
321 preempt_enable();
322}
323EXPORT_SYMBOL_GPL(play_idle);
324
286void cpu_startup_entry(enum cpuhp_state state) 325void cpu_startup_entry(enum cpuhp_state state)
287{ 326{
288 /* 327 /*
@@ -302,5 +341,6 @@ void cpu_startup_entry(enum cpuhp_state state)
302#endif 341#endif
303 arch_cpu_idle_prepare(); 342 arch_cpu_idle_prepare();
304 cpuhp_online_idle(state); 343 cpuhp_online_idle(state);
305 cpu_idle_loop(); 344 while (1)
345 do_idle();
306} 346}