From 95e904c7da715aa2dbfb595da66b63de37a0bb04 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Sun, 11 May 2008 05:55:33 +0530 Subject: sched: fix defined-but-unused warning Fix this warning, which appears with !CONFIG_SMP: kernel/sched.c:1216: warning: `init_hrtick' defined but not used Signed-off-by: Rabin Vincent Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- kernel/sched.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index eaf6751e7612..04228524d160 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1127,6 +1127,7 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer) return HRTIMER_NORESTART; } +#ifdef CONFIG_SMP static void hotplug_hrtick_disable(int cpu) { struct rq *rq = cpu_rq(cpu); @@ -1182,6 +1183,7 @@ static void init_hrtick(void) { hotcpu_notifier(hotplug_hrtick, 0); } +#endif /* CONFIG_SMP */ static void init_rq_hrtick(struct rq *rq) { -- cgit v1.2.2 From 49307fd6f72bdd68cc2bd23e7da0bcfecf8087c9 Mon Sep 17 00:00:00 2001 From: Dario Faggioli Date: Wed, 18 Jun 2008 09:18:38 +0200 Subject: sched: NULL pointer dereference while setting sched_rt_period_us MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When CONFIG_RT_GROUP_SCHED and CONFIG_CGROUP_SCHED are enabled, with: echo 10000 > /proc/sys/kernel/sched_rt_period_us We get this: BUG: unable to handle kernel NULL pointer dereference at 0000008c [ 947.682233] IP: [] __rt_schedulable+0x12/0x160 [ 947.683123] *pde = 00000000=20 [ 947.683782] Oops: 0000 [#1] [ 947.684307] Modules linked in: [ 947.684308] [ 947.684308] Pid: 2359, comm: bash Not tainted (2.6.26-rc6 #8) [ 947.684308] EIP: 0060:[] EFLAGS: 00000246 CPU: 0 [ 947.684308] EIP is at __rt_schedulable+0x12/0x160 [ 947.684308] EAX: 00000000 EBX: 00000000 ECX: 00000000 EDX: 00000001 [ 947.684308] ESI: c0521db4 EDI: 00000001 EBP: c6cc9f00 ESP: c6cc9ed0 [ 947.684308] DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 0068 [ 947.684308] Process bash (pid: 2359, tiÆcc8000 taskÇa54f00=20 task.tiÆcc8000) [ 947.684308] Stack: c0222790 00000000 080f8c08 c0521db4 c6cc9f00 00000001 00000000 00000000 [ 947.684308] c6cc9f9c 00000000 c0521db4 00000001 c6cc9f28 c0216d40 00000000 00000000 [ 947.684308] c6cc9f9c 000f4240 000e7ef0 ffffffff c0521db4 c79dfb60 c6cc9f58 c02af2cc [ 947.684308] Call Trace: [ 947.684308] [] ? do_proc_dointvec_conv+0x0/0x50 [ 947.684308] [] ? sched_rt_handler+0x80/0x110 [ 947.684308] [] ? proc_sys_call_handler+0x9c/0xb0 [ 947.684308] [] ? proc_sys_write+0x1a/0x20 [ 947.684308] [] ? vfs_write+0x96/0x160 [ 947.684308] [] ? proc_sys_write+0x0/0x20 [ 947.684308] [] ? sys_write+0x3d/0x70 [ 947.684308] [] ? sysenter_past_esp+0x6a/0x91 [ 947.684308] ======================= [ 947.684308] Code: 24 04 e8 62 b1 0e 00 89 c7 89 f8 8b 5d f4 8b 75 f8 8b 7d fc 89 ec 5d c3 90 55 89 e5 57 56 53 83 ec 24 89 45 ec 89 55 e4 89 4d e8 <8b> b8 8c 00 00 00 85 ff 0f 84 c9 00 00 00 8b 57 24 39 55 e8 8b [ 947.684308] EIP: [] __rt_schedulable+0x12/0x160 SS:ESP 0068:c6cc9ed0 We think the following patch solves the issue. Signed-off-by: Dario Faggioli Signed-off-by: Michael Trimarchi Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 04228524d160..320e9a43d4cc 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -8350,7 +8350,7 @@ static unsigned long to_ratio(u64 period, u64 runtime) #ifdef CONFIG_CGROUP_SCHED static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) { - struct task_group *tgi, *parent = tg->parent; + struct task_group *tgi, *parent = tg ? tg->parent : NULL; unsigned long total = 0; if (!parent) { -- cgit v1.2.2 From 7ea56616ba6b3d67a4892728182e38ae162ea3e7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Jun 2008 09:06:56 +0200 Subject: sched: rt-group: fix hierarchy Don't re-set the entity's runqueue to the wrong rq after we've set it to the right one. Signed-off-by: Peter Zijlstra Tested-by: Daniel K. Signed-off-by: Ingo Molnar --- kernel/sched.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 320e9a43d4cc..ce375cb551e9 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -7628,7 +7628,6 @@ static void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq, else rt_se->rt_rq = parent->my_q; - rt_se->rt_rq = &rq->rt; rt_se->my_q = rt_rq; rt_se->parent = parent; INIT_LIST_HEAD(&rt_se->run_list); -- cgit v1.2.2 From ad2a3f13b7258a5daaaeb8cff9f835aac468b71d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Jun 2008 09:06:57 +0200 Subject: sched: rt-group: heirarchy aware throttle The bandwidth throttle code dequeues a group when it runs out of quota, and re-queues it once the period rolls over and the quota gets refreshed. Sadly it failed to take the hierarchy into consideration. Share more of the enqueue/dequeue code with regular task opterations. Also, some operations like sched_setscheduler() can dequeue/enqueue tasks that are in throttled runqueues, we should not inadvertly re-enqueue empty runqueues so check for that. Signed-off-by: Peter Zijlstra Tested-by: Daniel K. Signed-off-by: Ingo Molnar --- kernel/sched_rt.c | 59 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 33 insertions(+), 26 deletions(-) (limited to 'kernel') diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 3432d573205d..837241568d76 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -449,13 +449,19 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) #endif } -static void enqueue_rt_entity(struct sched_rt_entity *rt_se) +static void __enqueue_rt_entity(struct sched_rt_entity *rt_se) { struct rt_rq *rt_rq = rt_rq_of_se(rt_se); struct rt_prio_array *array = &rt_rq->active; struct rt_rq *group_rq = group_rt_rq(rt_se); - if (group_rq && rt_rq_throttled(group_rq)) + /* + * Don't enqueue the group if its throttled, or when empty. + * The latter is a consequence of the former when a child group + * get throttled and the current group doesn't have any other + * active members. + */ + if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) return; list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se)); @@ -464,7 +470,7 @@ static void enqueue_rt_entity(struct sched_rt_entity *rt_se) inc_rt_tasks(rt_se, rt_rq); } -static void dequeue_rt_entity(struct sched_rt_entity *rt_se) +static void __dequeue_rt_entity(struct sched_rt_entity *rt_se) { struct rt_rq *rt_rq = rt_rq_of_se(rt_se); struct rt_prio_array *array = &rt_rq->active; @@ -480,11 +486,10 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se) * Because the prio of an upper entry depends on the lower * entries, we must remove entries top - down. */ -static void dequeue_rt_stack(struct task_struct *p) +static void dequeue_rt_stack(struct sched_rt_entity *rt_se) { - struct sched_rt_entity *rt_se, *back = NULL; + struct sched_rt_entity *back = NULL; - rt_se = &p->rt; for_each_sched_rt_entity(rt_se) { rt_se->back = back; back = rt_se; @@ -492,7 +497,26 @@ static void dequeue_rt_stack(struct task_struct *p) for (rt_se = back; rt_se; rt_se = rt_se->back) { if (on_rt_rq(rt_se)) - dequeue_rt_entity(rt_se); + __dequeue_rt_entity(rt_se); + } +} + +static void enqueue_rt_entity(struct sched_rt_entity *rt_se) +{ + dequeue_rt_stack(rt_se); + for_each_sched_rt_entity(rt_se) + __enqueue_rt_entity(rt_se); +} + +static void dequeue_rt_entity(struct sched_rt_entity *rt_se) +{ + dequeue_rt_stack(rt_se); + + for_each_sched_rt_entity(rt_se) { + struct rt_rq *rt_rq = group_rt_rq(rt_se); + + if (rt_rq && rt_rq->rt_nr_running) + __enqueue_rt_entity(rt_se); } } @@ -506,32 +530,15 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup) if (wakeup) rt_se->timeout = 0; - dequeue_rt_stack(p); - - /* - * enqueue everybody, bottom - up. - */ - for_each_sched_rt_entity(rt_se) - enqueue_rt_entity(rt_se); + enqueue_rt_entity(rt_se); } static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) { struct sched_rt_entity *rt_se = &p->rt; - struct rt_rq *rt_rq; update_curr_rt(rq); - - dequeue_rt_stack(p); - - /* - * re-enqueue all non-empty rt_rq entities. - */ - for_each_sched_rt_entity(rt_se) { - rt_rq = group_rt_rq(rt_se); - if (rt_rq && rt_rq->rt_nr_running) - enqueue_rt_entity(rt_se); - } + dequeue_rt_entity(rt_se); } /* -- cgit v1.2.2 From 15a8641eadb492ef7c5489faa25256967bdfd303 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Jun 2008 09:06:59 +0200 Subject: sched: rt-group: fix RR buglet In tick_task_rt() we first call update_curr_rt() which can dequeue a runqueue due to it running out of runtime, and then we try to requeue it, of it also having exhausted its RR quota. Obviously requeueing something that is no longer on the runqueue will not have the expected result. Signed-off-by: Peter Zijlstra Tested-by: Daniel K. Signed-off-by: Ingo Molnar --- kernel/sched_rt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 837241568d76..1dad5bbb59b6 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -549,8 +549,10 @@ static void requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se) { struct rt_prio_array *array = &rt_rq->active; + struct list_head *queue = array->queue + rt_se_prio(rt_se); - list_move_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se)); + if (on_rt_rq(rt_se)) + list_move_tail(&rt_se->run_list, queue); } static void requeue_task_rt(struct rq *rq, struct task_struct *p) -- cgit v1.2.2 From f18f982abf183e91f435990d337164c7a43d1e6d Mon Sep 17 00:00:00 2001 From: Max Krasnyansky Date: Thu, 29 May 2008 11:17:01 -0700 Subject: sched: CPU hotplug events must not destroy scheduler domains created by the cpusets First issue is not related to the cpusets. We're simply leaking doms_cur. It's allocated in arch_init_sched_domains() which is called for every hotplug event. So we just keep reallocation doms_cur without freeing it. I introduced free_sched_domains() function that cleans things up. Second issue is that sched domains created by the cpusets are completely destroyed by the CPU hotplug events. For all CPU hotplug events scheduler attaches all CPUs to the NULL domain and then puts them all into the single domain thereby destroying domains created by the cpusets (partition_sched_domains). The solution is simple, when cpusets are enabled scheduler should not create default domain and instead let cpusets do that. Which is exactly what the patch does. Signed-off-by: Max Krasnyansky Cc: pj@sgi.com Cc: menage@google.com Cc: rostedt@goodmis.org Acked-by: Peter Zijlstra Signed-off-by: Thomas Gleixner --- kernel/cpuset.c | 6 ++++++ kernel/sched.c | 22 ++++++++++++++++++++++ 2 files changed, 28 insertions(+) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 039baa4cd90c..bceb89557973 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1890,6 +1890,12 @@ static void common_cpu_mem_hotplug_unplug(void) top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; scan_for_empty_cpusets(&top_cpuset); + /* + * Scheduler destroys domains on hotplug events. + * Rebuild them based on the current settings. + */ + rebuild_sched_domains(); + cgroup_unlock(); } diff --git a/kernel/sched.c b/kernel/sched.c index ce375cb551e9..4a3cb0614158 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -7237,6 +7237,18 @@ void __attribute__((weak)) arch_update_cpu_topology(void) { } +/* + * Free current domain masks. + * Called after all cpus are attached to NULL domain. + */ +static void free_sched_domains(void) +{ + ndoms_cur = 0; + if (doms_cur != &fallback_doms) + kfree(doms_cur); + doms_cur = &fallback_doms; +} + /* * Set up scheduler domains and groups. Callers must hold the hotplug lock. * For now this just excludes isolated cpus, but could be used to @@ -7384,6 +7396,7 @@ int arch_reinit_sched_domains(void) get_online_cpus(); mutex_lock(&sched_domains_mutex); detach_destroy_domains(&cpu_online_map); + free_sched_domains(); err = arch_init_sched_domains(&cpu_online_map); mutex_unlock(&sched_domains_mutex); put_online_cpus(); @@ -7469,6 +7482,7 @@ static int update_sched_domains(struct notifier_block *nfb, case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE_FROZEN: detach_destroy_domains(&cpu_online_map); + free_sched_domains(); return NOTIFY_OK; case CPU_UP_CANCELED: @@ -7487,8 +7501,16 @@ static int update_sched_domains(struct notifier_block *nfb, return NOTIFY_DONE; } +#ifndef CONFIG_CPUSETS + /* + * Create default domain partitioning if cpusets are disabled. + * Otherwise we let cpusets rebuild the domains based on the + * current setup. + */ + /* The hotplug lock is already held by cpu_up/cpu_down */ arch_init_sched_domains(&cpu_online_map); +#endif return NOTIFY_OK; } -- cgit v1.2.2 From 30e0e178193d4221abc9926b07a4c7661c7cc4a9 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 13 May 2008 10:27:17 +0800 Subject: cpuset: limit the input of cpuset.sched_relax_domain_level We allow the inputs to be [-1 ... SD_LV_MAX), and return -EINVAL for inputs outside this range. Signed-off-by: Li Zefan Acked-by: Paul Menage Acked-by: Paul Jackson Acked-by: Hidetoshi Seto Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/cpuset.c | 4 ++-- kernel/sched.c | 7 ++++++- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 039baa4cd90c..66103a119bfe 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1037,8 +1037,8 @@ int current_cpuset_is_being_rebound(void) static int update_relax_domain_level(struct cpuset *cs, s64 val) { - if ((int)val < 0) - val = -1; + if (val < -1 || val >= SD_LV_MAX) + return -EINVAL; if (val != cs->relax_domain_level) { cs->relax_domain_level = val; diff --git a/kernel/sched.c b/kernel/sched.c index eaf6751e7612..bb2c699c9f5e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -6877,7 +6877,12 @@ static int default_relax_domain_level = -1; static int __init setup_relax_domain_level(char *str) { - default_relax_domain_level = simple_strtoul(str, NULL, 0); + unsigned long val; + + val = simple_strtoul(str, NULL, 0); + if (val < SD_LV_MAX) + default_relax_domain_level = val; + return 1; } __setup("relax_domain_level=", setup_relax_domain_level); -- cgit v1.2.2 From afd38009cc3acd36d41f349a669ad5825d695b1f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 22 May 2008 14:18:17 -0400 Subject: rcupreempt: remove export of rcu_batches_completed_bh In rcupreempt, rcu_batches_completed_bh is defined as a static inline in the header file. This does not need to be exported, and not only that, this breaks my PPC build. Signed-off-by: Steven Rostedt Cc: "Paul E. McKenney" Cc: paulus@samba.org Cc: linuxppc-dev@ozlabs.org Signed-off-by: Thomas Gleixner --- kernel/rcupreempt.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel') diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c index e1cdf196a515..5e02b7740702 100644 --- a/kernel/rcupreempt.c +++ b/kernel/rcupreempt.c @@ -217,8 +217,6 @@ long rcu_batches_completed(void) } EXPORT_SYMBOL_GPL(rcu_batches_completed); -EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); - void __rcu_read_lock(void) { int idx; -- cgit v1.2.2 From 9c106c119ebedf624fbd682fd2a4d52e3c8c1a67 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Tue, 27 May 2008 12:23:29 -0500 Subject: softlockup: fix NMI hangs due to lock race - 2.6.26-rc regression The touch_nmi_watchdog() routine on x86 ultimately calls touch_softlockup_watchdog(). The problem is that to touch the softlockup watchdog, the cpu_clock code has to be called which could involve multiple cpu locks and can lead to a hard hang if one of the locks is held by a processor that is not going to return anytime soon (such as could be the case with kgdb or perhaps even with some other kind of exception). This patch causes the public version of the touch_softlockup_watchdog() to defer the cpu clock access to a later point. The test case for this problem is to use the following kernel config options: CONFIG_KGDB_TESTS=y CONFIG_KGDB_TESTS_ON_BOOT=y CONFIG_KGDB_TESTS_BOOT_STRING="V1F100I100000" It should be noted that kgdb test suite and these options were not available until 2.6.26-rc2, so it was necessary to patch the kgdb test suite during the bisection. I would consider this patch a regression fix because the problem first appeared in commit 27ec4407790d075c325e1f4da0a19c56953cce23 when some logic was added to try to periodically sync the clocks. It was possible to work around this particular problem by simply not performing the sync anytime the system was in a critical context. This was ok until commit 3e51f33fcc7f55e6df25d15b55ed10c8b4da84cd, which added config option CONFIG_HAVE_UNSTABLE_SCHED_CLOCK and some multi-cpu locks to sync the clocks. It became clear that accessing this code from an nmi was the source of the lockups. Avoiding the access to the low level clock code from an code inside the NMI processing also fixed the problem with the 27ec44... commit. Signed-off-by: Jason Wessel Signed-off-by: Ingo Molnar --- kernel/softlockup.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 01b6522fd92b..c828c2339cc9 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -49,12 +49,17 @@ static unsigned long get_timestamp(int this_cpu) return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */ } -void touch_softlockup_watchdog(void) +static void __touch_softlockup_watchdog(void) { int this_cpu = raw_smp_processor_id(); __raw_get_cpu_var(touch_timestamp) = get_timestamp(this_cpu); } + +void touch_softlockup_watchdog(void) +{ + __raw_get_cpu_var(touch_timestamp) = 0; +} EXPORT_SYMBOL(touch_softlockup_watchdog); void touch_all_softlockup_watchdogs(void) @@ -80,7 +85,7 @@ void softlockup_tick(void) unsigned long now; if (touch_timestamp == 0) { - touch_softlockup_watchdog(); + __touch_softlockup_watchdog(); return; } @@ -95,7 +100,7 @@ void softlockup_tick(void) /* do not print during early bootup: */ if (unlikely(system_state != SYSTEM_RUNNING)) { - touch_softlockup_watchdog(); + __touch_softlockup_watchdog(); return; } @@ -214,7 +219,7 @@ static int watchdog(void *__bind_cpu) sched_setscheduler(current, SCHED_FIFO, ¶m); /* initialize timestamp */ - touch_softlockup_watchdog(); + __touch_softlockup_watchdog(); set_current_state(TASK_INTERRUPTIBLE); /* @@ -223,7 +228,7 @@ static int watchdog(void *__bind_cpu) * debug-printout triggers in softlockup_tick(). */ while (!kthread_should_stop()) { - touch_softlockup_watchdog(); + __touch_softlockup_watchdog(); schedule(); if (kthread_should_stop()) -- cgit v1.2.2 From d4abc238c9f4df8b3216f3e883f5d0a07b7ac75a Mon Sep 17 00:00:00 2001 From: Bharath Ravi Date: Mon, 16 Jun 2008 15:11:01 +0530 Subject: sched, delay accounting: fix incorrect delay time when constantly waiting on runqueue This patch corrects the incorrect value of per process run-queue wait time reported by delay statistics. The anomaly was due to the following reason. When a process leaves the CPU and immediately starts waiting for CPU on the runqueue (which means it remains in the TASK_RUNNABLE state), the time of re-entry into the run-queue is never recorded. Due to this, the waiting time on the runqueue from this point of re-entry upto the next time it hits the CPU is not accounted for. This is solved by recording the time of re-entry of a process leaving the CPU in the sched_info_depart() function IF the process will go back to waiting on the run-queue. This IF condition is verified by checking whether the process is still in the TASK_RUNNABLE state. The patch was tested on 2.6.26-rc6 using two simple CPU hog programs. The values noted prior to the fix did not account for the time spent on the runqueue waiting. After the fix, the correct values were reported back to user space. Signed-off-by: Bharath Ravi Signed-off-by: Madhava K R Cc: dhaval@linux.vnet.ibm.com Cc: vatsa@in.ibm.com Cc: balbir@in.ibm.com Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched_stats.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel') diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index a38878e0e49d..80179ef7450e 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h @@ -198,6 +198,9 @@ static inline void sched_info_queued(struct task_struct *t) /* * Called when a process ceases being the active-running process, either * voluntarily or involuntarily. Now we can calculate how long we ran. + * Also, if the process is still in the TASK_RUNNING state, call + * sched_info_queued() to mark that it has now again started waiting on + * the runqueue. */ static inline void sched_info_depart(struct task_struct *t) { @@ -206,6 +209,9 @@ static inline void sched_info_depart(struct task_struct *t) t->sched_info.cpu_time += delta; rq_sched_info_depart(task_rq(t), delta); + + if (t->state == TASK_RUNNING) + sched_info_queued(t); } /* -- cgit v1.2.2