From cf37b6b48428d6be8f8762b3599d529c44644fb2 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Sun, 26 Jan 2014 23:42:01 -0500 Subject: sched/idle: Move cpu/idle.c to sched/idle.c Integration of cpuidle with the scheduler requires that the idle loop be closely integrated with the scheduler proper. Moving cpu/idle.c into the sched directory will allow for a smoother integration, and eliminate a subdirectory which contained only one source file. Signed-off-by: Nicolas Pitre Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/alpine.LFD.2.11.1401301102210.1652@knanqh.ubzr Signed-off-by: Ingo Molnar --- kernel/sched/idle.c | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 kernel/sched/idle.c (limited to 'kernel/sched/idle.c') diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c new file mode 100644 index 000000000000..14ca43430aee --- /dev/null +++ b/kernel/sched/idle.c @@ -0,0 +1,144 @@ +/* + * Generic entry point for the idle threads + */ +#include +#include +#include +#include +#include +#include + +#include + +#include + +static int __read_mostly cpu_idle_force_poll; + +void cpu_idle_poll_ctrl(bool enable) +{ + if (enable) { + cpu_idle_force_poll++; + } else { + cpu_idle_force_poll--; + WARN_ON_ONCE(cpu_idle_force_poll < 0); + } +} + +#ifdef CONFIG_GENERIC_IDLE_POLL_SETUP +static int __init cpu_idle_poll_setup(char *__unused) +{ + cpu_idle_force_poll = 1; + return 1; +} +__setup("nohlt", cpu_idle_poll_setup); + +static int __init cpu_idle_nopoll_setup(char *__unused) +{ + cpu_idle_force_poll = 0; + return 1; +} +__setup("hlt", cpu_idle_nopoll_setup); +#endif + +static inline int cpu_idle_poll(void) +{ + rcu_idle_enter(); + trace_cpu_idle_rcuidle(0, smp_processor_id()); + local_irq_enable(); + while (!tif_need_resched()) + cpu_relax(); + trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); + rcu_idle_exit(); + return 1; +} + +/* Weak implementations for optional arch specific functions */ +void __weak arch_cpu_idle_prepare(void) { } +void __weak arch_cpu_idle_enter(void) { } +void __weak arch_cpu_idle_exit(void) { } +void __weak arch_cpu_idle_dead(void) { } +void __weak arch_cpu_idle(void) +{ + cpu_idle_force_poll = 1; + local_irq_enable(); +} + +/* + * Generic idle loop implementation + */ +static void cpu_idle_loop(void) +{ + while (1) { + tick_nohz_idle_enter(); + + while (!need_resched()) { + check_pgt_cache(); + rmb(); + + if (cpu_is_offline(smp_processor_id())) + arch_cpu_idle_dead(); + + local_irq_disable(); + arch_cpu_idle_enter(); + + /* + * In poll mode we reenable interrupts and spin. + * + * Also if we detected in the wakeup from idle + * path that the tick broadcast device expired + * for us, we don't want to go deep idle as we + * know that the IPI is going to arrive right + * away + */ + if (cpu_idle_force_poll || tick_check_broadcast_expired()) { + cpu_idle_poll(); + } else { + if (!current_clr_polling_and_test()) { + stop_critical_timings(); + rcu_idle_enter(); + if (cpuidle_idle_call()) + arch_cpu_idle(); + if (WARN_ON_ONCE(irqs_disabled())) + local_irq_enable(); + rcu_idle_exit(); + start_critical_timings(); + } else { + local_irq_enable(); + } + __current_set_polling(); + } + arch_cpu_idle_exit(); + /* + * We need to test and propagate the TIF_NEED_RESCHED + * bit here because we might not have send the + * reschedule IPI to idle tasks. + */ + if (tif_need_resched()) + set_preempt_need_resched(); + } + tick_nohz_idle_exit(); + schedule_preempt_disabled(); + } +} + +void cpu_startup_entry(enum cpuhp_state state) +{ + /* + * This #ifdef needs to die, but it's too late in the cycle to + * make this generic (arm and sh have never invoked the canary + * init for the non boot cpus!). Will be fixed in 3.11 + */ +#ifdef CONFIG_X86 + /* + * If we're the non-boot CPU, nothing set the stack canary up + * for us. The boot CPU already has it initialized but no harm + * in doing it again. This is a good place for updating it, as + * we wont ever return from this function (so the invalid + * canaries already on the stack wont ever trigger). + */ + boot_init_stack_canary(); +#endif + __current_set_polling(); + arch_cpu_idle_prepare(); + cpu_idle_loop(); +} -- cgit v1.2.2 From 06d50c65b1043b166d102accc081093f79d8f7e5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 24 Feb 2014 18:22:07 +0100 Subject: sched/idle: Remove stale old file Commit cf37b6b48428d ("sched/idle: Move cpu/idle.c to sched/idle.c") said to simply move a file; somehow it got mangled and created an old version of the file and forgot to remove the old file. Fix this fail; add the lost change and remove the now identical old file. Signed-off-by: Peter Zijlstra Cc: rjw@rjwysocki.net Cc: nicolas.pitre@linaro.org Cc: preeti@linux.vnet.ibm.com Cc: Daniel Lezcano Link: http://lkml.kernel.org/r/20140224172207.GC9987@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/sched/idle.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'kernel/sched/idle.c') diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 14ca43430aee..b7976a127178 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -108,14 +108,17 @@ static void cpu_idle_loop(void) __current_set_polling(); } arch_cpu_idle_exit(); - /* - * We need to test and propagate the TIF_NEED_RESCHED - * bit here because we might not have send the - * reschedule IPI to idle tasks. - */ - if (tif_need_resched()) - set_preempt_need_resched(); } + + /* + * Since we fell out of the loop above, we know + * TIF_NEED_RESCHED must be set, propagate it into + * PREEMPT_NEED_RESCHED. + * + * This is required because for polling idle loops we will + * not have had an IPI to fold the state for us. + */ + preempt_set_need_resched(); tick_nohz_idle_exit(); schedule_preempt_disabled(); } -- cgit v1.2.2 From 30cdd69e2a266505ca8229c944d361ff350a6959 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 3 Mar 2014 08:48:51 +0100 Subject: cpuidle/idle: Move the cpuidle_idle_call function to idle.c The cpuidle_idle_call does nothing more than calling the three individuals function and is no longer used by any arch specific code but only in the cpuidle framework code. We can move this function into the idle task code to ensure better proximity to the scheduler code. Signed-off-by: Daniel Lezcano Acked-by: Nicolas Pitre Signed-off-by: Peter Zijlstra Cc: rjw@rjwysocki.net Cc: preeti@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1393832934-11625-2-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- kernel/sched/idle.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'kernel/sched/idle.c') diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index b7976a127178..d5aaf5eb4531 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -63,6 +63,62 @@ void __weak arch_cpu_idle(void) local_irq_enable(); } +#ifdef CONFIG_CPU_IDLE +/** + * cpuidle_idle_call - the main idle function + * + * NOTE: no locks or semaphores should be used here + * return non-zero on failure + */ +static int cpuidle_idle_call(void) +{ + struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); + struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); + int next_state, entered_state, ret; + bool broadcast; + + ret = cpuidle_enabled(drv, dev); + if (ret < 0) + return ret; + + /* ask the governor for the next state */ + next_state = cpuidle_select(drv, dev); + + if (need_resched()) { + dev->last_residency = 0; + /* give the governor an opportunity to reflect on the outcome */ + cpuidle_reflect(dev, next_state); + local_irq_enable(); + return 0; + } + + broadcast = !!(drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP); + + if (broadcast && + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu)) + return -EBUSY; + + trace_cpu_idle_rcuidle(next_state, dev->cpu); + + entered_state = cpuidle_enter(drv, dev, next_state); + + trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu); + + if (broadcast) + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu); + + /* give the governor an opportunity to reflect on the outcome */ + cpuidle_reflect(dev, entered_state); + + return 0; +} +#else +static inline int cpuidle_idle_call(void) +{ + return -ENODEV; +} +#endif + /* * Generic idle loop implementation */ -- cgit v1.2.2 From c8cc7d4de7a4f2fb1f8774ec2de5b49c46c42e64 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 3 Mar 2014 08:48:52 +0100 Subject: sched/idle: Reorganize the idle loop Now that we have the main cpuidle function in idle.c, move some code from the idle mainloop to this function for the sake of clarity. That removes if then else indentation difficult to follow when looking at the code. This patch does not change the current behavior. Signed-off-by: Daniel Lezcano Acked-by: Nicolas Pitre Signed-off-by: Peter Zijlstra Cc: tglx@linutronix.de Cc: rjw@rjwysocki.net Cc: preeti@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1393832934-11625-3-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- kernel/sched/idle.c | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) (limited to 'kernel/sched/idle.c') diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index d5aaf5eb4531..dc8a2466418f 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -63,7 +63,6 @@ void __weak arch_cpu_idle(void) local_irq_enable(); } -#ifdef CONFIG_CPU_IDLE /** * cpuidle_idle_call - the main idle function * @@ -77,9 +76,14 @@ static int cpuidle_idle_call(void) int next_state, entered_state, ret; bool broadcast; + stop_critical_timings(); + rcu_idle_enter(); + ret = cpuidle_enabled(drv, dev); - if (ret < 0) - return ret; + if (ret < 0) { + arch_cpu_idle(); + goto out; + } /* ask the governor for the next state */ next_state = cpuidle_select(drv, dev); @@ -89,7 +93,7 @@ static int cpuidle_idle_call(void) /* give the governor an opportunity to reflect on the outcome */ cpuidle_reflect(dev, next_state); local_irq_enable(); - return 0; + goto out; } broadcast = !!(drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP); @@ -109,15 +113,15 @@ static int cpuidle_idle_call(void) /* give the governor an opportunity to reflect on the outcome */ cpuidle_reflect(dev, entered_state); +out: + if (WARN_ON_ONCE(irqs_disabled())) + local_irq_enable(); + + rcu_idle_exit(); + start_critical_timings(); return 0; } -#else -static inline int cpuidle_idle_call(void) -{ - return -ENODEV; -} -#endif /* * Generic idle loop implementation @@ -150,14 +154,7 @@ static void cpu_idle_loop(void) cpu_idle_poll(); } else { if (!current_clr_polling_and_test()) { - stop_critical_timings(); - rcu_idle_enter(); - if (cpuidle_idle_call()) - arch_cpu_idle(); - if (WARN_ON_ONCE(irqs_disabled())) - local_irq_enable(); - rcu_idle_exit(); - start_critical_timings(); + cpuidle_idle_call(); } else { local_irq_enable(); } -- cgit v1.2.2 From 8ca3c6424f4988fc19ed1067b121fbaf2e884d77 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 3 Mar 2014 08:48:53 +0100 Subject: sched/idle: Move idle conditions in cpuidle_idle main function This patch moves the condition before entering idle into the cpuidle main function located in idle.c. That simplify the idle mainloop functions and increase the readibility of the conditions to enter truly idle. This patch is code reorganization and does not change the behavior of the function. Signed-off-by: Daniel Lezcano Signed-off-by: Peter Zijlstra Cc: tglx@linutronix.de Cc: rjw@rjwysocki.net Cc: nicolas.pitre@linaro.org Cc: preeti@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1393832934-11625-4-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- kernel/sched/idle.c | 78 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 44 insertions(+), 34 deletions(-) (limited to 'kernel/sched/idle.c') diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index dc8a2466418f..cc7a6f3801ff 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -76,44 +76,59 @@ static int cpuidle_idle_call(void) int next_state, entered_state, ret; bool broadcast; + if (current_clr_polling_and_test()) { + local_irq_enable(); + __current_set_polling(); + return 0; + } + stop_critical_timings(); rcu_idle_enter(); ret = cpuidle_enabled(drv, dev); - if (ret < 0) { - arch_cpu_idle(); - goto out; - } - /* ask the governor for the next state */ - next_state = cpuidle_select(drv, dev); + if (!ret) { + /* ask the governor for the next state */ + next_state = cpuidle_select(drv, dev); - if (need_resched()) { - dev->last_residency = 0; - /* give the governor an opportunity to reflect on the outcome */ - cpuidle_reflect(dev, next_state); - local_irq_enable(); - goto out; - } + if (current_clr_polling_and_test()) { + dev->last_residency = 0; + entered_state = next_state; + local_irq_enable(); + } else { + broadcast = !!(drv->states[next_state].flags & + CPUIDLE_FLAG_TIMER_STOP); + + if (broadcast) + ret = clockevents_notify( + CLOCK_EVT_NOTIFY_BROADCAST_ENTER, + &dev->cpu); - broadcast = !!(drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP); + if (!ret) { + trace_cpu_idle_rcuidle(next_state, dev->cpu); - if (broadcast && - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu)) - return -EBUSY; + entered_state = cpuidle_enter(drv, dev, + next_state); - trace_cpu_idle_rcuidle(next_state, dev->cpu); + trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, + dev->cpu); - entered_state = cpuidle_enter(drv, dev, next_state); + if (broadcast) + clockevents_notify( + CLOCK_EVT_NOTIFY_BROADCAST_EXIT, + &dev->cpu); - trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu); + /* give the governor an opportunity to reflect on the outcome */ + cpuidle_reflect(dev, entered_state); + } + } + } + + if (ret) + arch_cpu_idle(); - if (broadcast) - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu); + __current_set_polling(); - /* give the governor an opportunity to reflect on the outcome */ - cpuidle_reflect(dev, entered_state); -out: if (WARN_ON_ONCE(irqs_disabled())) local_irq_enable(); @@ -150,16 +165,11 @@ static void cpu_idle_loop(void) * know that the IPI is going to arrive right * away */ - if (cpu_idle_force_poll || tick_check_broadcast_expired()) { + if (cpu_idle_force_poll || tick_check_broadcast_expired()) cpu_idle_poll(); - } else { - if (!current_clr_polling_and_test()) { - cpuidle_idle_call(); - } else { - local_irq_enable(); - } - __current_set_polling(); - } + else + cpuidle_idle_call(); + arch_cpu_idle_exit(); } -- cgit v1.2.2 From a1d028bd6d2b7789d15eddfd07c5bea2aaf36040 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 3 Mar 2014 08:48:54 +0100 Subject: sched/idle: Add more comments to the code The idle main function is a complex and a critical function. Added more comments to the code. Signed-off-by: Daniel Lezcano Acked-by: Nicolas Pitre Signed-off-by: Peter Zijlstra Cc: tglx@linutronix.de Cc: rjw@rjwysocki.net Cc: preeti@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1393832934-11625-5-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- kernel/sched/idle.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 2 deletions(-) (limited to 'kernel/sched/idle.c') diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index cc7a6f3801ff..8f4390a079c7 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -76,21 +76,49 @@ static int cpuidle_idle_call(void) int next_state, entered_state, ret; bool broadcast; + /* + * Check if the idle task must be rescheduled. If it is the + * case, exit the function after re-enabling the local irq and + * set again the polling flag + */ if (current_clr_polling_and_test()) { local_irq_enable(); __current_set_polling(); return 0; } + /* + * During the idle period, stop measuring the disabled irqs + * critical sections latencies + */ stop_critical_timings(); + + /* + * Tell the RCU framework we are entering an idle section, + * so no more rcu read side critical sections and one more + * step to the grace period + */ rcu_idle_enter(); + /* + * Check if the cpuidle framework is ready, otherwise fallback + * to the default arch specific idle method + */ ret = cpuidle_enabled(drv, dev); if (!ret) { - /* ask the governor for the next state */ + /* + * Ask the governor to choose an idle state it thinks + * it is convenient to go to. There is *always* a + * convenient idle state + */ next_state = cpuidle_select(drv, dev); + /* + * The idle task must be scheduled, it is pointless to + * go to idle, just update no idle residency and get + * out of this function + */ if (current_clr_polling_and_test()) { dev->last_residency = 0; entered_state = next_state; @@ -100,6 +128,14 @@ static int cpuidle_idle_call(void) CPUIDLE_FLAG_TIMER_STOP); if (broadcast) + /* + * Tell the time framework to switch + * to a broadcast timer because our + * local timer will be shutdown. If a + * local timer is used from another + * cpu as a broadcast timer, this call + * may fail if it is not available + */ ret = clockevents_notify( CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu); @@ -107,6 +143,14 @@ static int cpuidle_idle_call(void) if (!ret) { trace_cpu_idle_rcuidle(next_state, dev->cpu); + /* + * Enter the idle state previously + * returned by the governor + * decision. This function will block + * until an interrupt occurs and will + * take care of re-enabling the local + * interrupts + */ entered_state = cpuidle_enter(drv, dev, next_state); @@ -118,17 +162,28 @@ static int cpuidle_idle_call(void) CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu); - /* give the governor an opportunity to reflect on the outcome */ + /* + * Give the governor an opportunity to reflect on the + * outcome + */ cpuidle_reflect(dev, entered_state); } } } + /* + * We can't use the cpuidle framework, let's use the default + * idle routine + */ if (ret) arch_cpu_idle(); __current_set_polling(); + /* + * It is up to the idle functions to enable back the local + * interrupt + */ if (WARN_ON_ONCE(irqs_disabled())) local_irq_enable(); -- cgit v1.2.2