From 5bfb5d690f36d316a5f3b4f7775fda996faa6b12 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 8 Nov 2005 21:39:01 -0800 Subject: [PATCH] sched: disable preempt in idle tasks Run idle threads with preempt disabled. Also corrected a bugs in arm26's cpu_idle (make it actually call schedule()). How did it ever work before? Might fix the CPU hotplugging hang which Nigel Cunningham noted. We think the bug hits if the idle thread is preempted after checking need_resched() and before going to sleep, then the CPU offlined. After calling stop_machine_run, the CPU eventually returns from preemption and into the idle thread and goes to sleep. The CPU will continue executing previous idle and have no chance to call play_dead. By disabling preemption until we are ready to explicitly schedule, this bug is fixed and the idle threads generally become more robust. From: alexs PPC build fix From: Yoichi Yuasa MIPS build fix Signed-off-by: Nick Piggin Signed-off-by: Yoichi Yuasa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/platforms/pseries/setup.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/powerpc/platforms/pseries/setup.c') diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index e78c39368841..4854f5eb5c3d 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -539,7 +539,9 @@ static void pseries_dedicated_idle(void) lpaca->lppaca.idle = 0; ppc64_runlatch_on(); + preempt_enable_no_resched(); schedule(); + preempt_disable(); if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) cpu_die(); @@ -583,7 +585,9 @@ static void pseries_shared_idle(void) lpaca->lppaca.idle = 0; ppc64_runlatch_on(); + preempt_enable_no_resched(); schedule(); + preempt_disable(); if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) cpu_die(); -- cgit v1.2.2 From 64c7c8f88559624abdbe12b5da6502e8879f8d28 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 8 Nov 2005 21:39:04 -0800 Subject: [PATCH] sched: resched and cpu_idle rework Make some changes to the NEED_RESCHED and POLLING_NRFLAG to reduce confusion, and make their semantics rigid. Improves efficiency of resched_task and some cpu_idle routines. * In resched_task: - TIF_NEED_RESCHED is only cleared with the task's runqueue lock held, and as we hold it during resched_task, then there is no need for an atomic test and set there. The only other time this should be set is when the task's quantum expires, in the timer interrupt - this is protected against because the rq lock is irq-safe. - If TIF_NEED_RESCHED is set, then we don't need to do anything. It won't get unset until the task get's schedule()d off. - If we are running on the same CPU as the task we resched, then set TIF_NEED_RESCHED and no further action is required. - If we are running on another CPU, and TIF_POLLING_NRFLAG is *not* set after TIF_NEED_RESCHED has been set, then we need to send an IPI. Using these rules, we are able to remove the test and set operation in resched_task, and make clear the previously vague semantics of POLLING_NRFLAG. * In idle routines: - Enter cpu_idle with preempt disabled. When the need_resched() condition becomes true, explicitly call schedule(). This makes things a bit clearer (IMO), but haven't updated all architectures yet. - Many do a test and clear of TIF_NEED_RESCHED for some reason. According to the resched_task rules, this isn't needed (and actually breaks the assumption that TIF_NEED_RESCHED is only cleared with the runqueue lock held). So remove that. Generally one less locked memory op when switching to the idle thread. - Many idle routines clear TIF_POLLING_NRFLAG, and only set it in the inner most polling idle loops. The above resched_task semantics allow it to be set until before the last time need_resched() is checked before going into a halt requiring interrupt wakeup. Many idle routines simply never enter such a halt, and so POLLING_NRFLAG can be always left set, completely eliminating resched IPIs when rescheduling the idle task. POLLING_NRFLAG width can be increased, to reduce the chance of resched IPIs. Signed-off-by: Nick Piggin Cc: Ingo Molnar Cc: Con Kolivas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/platforms/pseries/setup.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/platforms/pseries/setup.c') diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 4854f5eb5c3d..a093a0d4dd69 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -469,6 +469,7 @@ static inline void dedicated_idle_sleep(unsigned int cpu) * more. */ clear_thread_flag(TIF_POLLING_NRFLAG); + smp_mb__after_clear_bit(); /* * SMT dynamic mode. Cede will result in this thread going @@ -481,6 +482,7 @@ static inline void dedicated_idle_sleep(unsigned int cpu) cede_processor(); else local_irq_enable(); + set_thread_flag(TIF_POLLING_NRFLAG); } else { /* * Give the HV an opportunity at the processor, since we are @@ -492,11 +494,11 @@ static inline void dedicated_idle_sleep(unsigned int cpu) static void pseries_dedicated_idle(void) { - long oldval; struct paca_struct *lpaca = get_paca(); unsigned int cpu = smp_processor_id(); unsigned long start_snooze; unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay); + set_thread_flag(TIF_POLLING_NRFLAG); while (1) { /* @@ -505,10 +507,7 @@ static void pseries_dedicated_idle(void) */ lpaca->lppaca.idle = 1; - oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); - if (!oldval) { - set_thread_flag(TIF_POLLING_NRFLAG); - + if (!need_resched()) { start_snooze = __get_tb() + *smt_snooze_delay * tb_ticks_per_usec; @@ -531,9 +530,6 @@ static void pseries_dedicated_idle(void) } HMT_medium(); - clear_thread_flag(TIF_POLLING_NRFLAG); - } else { - set_need_resched(); } lpaca->lppaca.idle = 0; -- cgit v1.2.2