sched: Do not account irq time to current task

Scheduler accounts both softirq and interrupt processing times to the currently running task. This means, if the interrupt processing was for some other task in the system, then the current task ends up being penalized as it gets shorter runtime than otherwise. Change sched task accounting to acoount only actual task time from currently running task. Now update_curr(), modifies the delta_exec to depend on rq->clock_task. Note that this change only handles CONFIG_IRQ_TIME_ACCOUNTING case. We can extend this to CONFIG_VIRT_CPU_ACCOUNTING with minimal effort. But, thats for later. This change will impact scheduling behavior in interrupt heavy conditions. Tested on a 4-way system with eth0 handled by CPU 2 and a network heavy task (nc) running on CPU 3 (and no RSS/RFS). With that I have CPU 2 spending 75%+ of its time in irq processing. CPU 3 spending around 35% time running nc task. Now, if I run another CPU intensive task on CPU 2, without this change /proc/<pid>/schedstat shows 100% of time accounted to this task. With this change, it rightly shows less than 25% accounted to this task as remaining time is actually spent on irq processing. Signed-off-by: Venkatesh Pallipadi <venki@google.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <1286237003-12406-7-git-send-email-venki@google.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Venkatesh Pallipadi <venki@google.com> 2010-10-04 20:03:21 -0400
committer: Ingo Molnar <mingo@elte.hu> 2010-10-18 14:52:26 -0400
commit: 305e6835e05513406fa12820e40e4a8ecb63743c (patch)
tree: 8b43703e27d26f4a7e743a99459e0b465cd1e5e1 /kernel/sched_fair.c
parent: e82b8e4ea4f3dffe6e7939f90e78da675fcc450e (diff)
1 files changed, 3 insertions, 3 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index f1c615ff39d6..c358d4081b81 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -519,7 +519,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
 static void update_curr(struct cfs_rq *cfs_rq)
 {
        struct sched_entity *curr = cfs_rq->curr;
-        u64 now = rq_of(cfs_rq)->clock;
+        u64 now = rq_of(cfs_rq)->clock_task;
        unsigned long delta_exec;
        if (unlikely(!curr))
@@ -602,7 +602,7 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
        /*
         * We are starting a new run period:
         */
-        se->exec_start = rq_of(cfs_rq)->clock;
+        se->exec_start = rq_of(cfs_rq)->clock_task;
 }
 /**************************************************
@@ -1802,7 +1802,7 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
         * 2) too many balance attempts have failed.
         */
-        tsk_cache_hot = task_hot(p, rq->clock, sd);
+        tsk_cache_hot = task_hot(p, rq->clock_task, sd);
        if (!tsk_cache_hot ||
                sd->nr_balance_failed > sd->cache_nice_tries) {
 #ifdef CONFIG_SCHEDSTATS
author	Venkatesh Pallipadi <venki@google.com>	2010-10-04 20:03:21 -0400
committer	Ingo Molnar <mingo@elte.hu>	2010-10-18 14:52:26 -0400
commit	305e6835e05513406fa12820e40e4a8ecb63743c (patch)
tree	8b43703e27d26f4a7e743a99459e0b465cd1e5e1 /kernel/sched_fair.c
parent	e82b8e4ea4f3dffe6e7939f90e78da675fcc450e (diff)

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index f1c615ff39d6..c358d4081b81 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c
@@ -519,7 +519,7 @@ __update_curr(struct cfs_rq cfs_rq, struct sched_entity curr,
519	static void update_curr(struct cfs_rq *cfs_rq)	519	static void update_curr(struct cfs_rq *cfs_rq)
520	{	520	{
521	struct sched_entity *curr = cfs_rq->curr;	521	struct sched_entity *curr = cfs_rq->curr;
522	u64 now = rq_of(cfs_rq)->clock;	522	u64 now = rq_of(cfs_rq)->clock_task;
523	unsigned long delta_exec;	523	unsigned long delta_exec;
524		524
525	if (unlikely(!curr))	525	if (unlikely(!curr))
@@ -602,7 +602,7 @@ update_stats_curr_start(struct cfs_rq cfs_rq, struct sched_entity se)
602	/*	602	/*
603	* We are starting a new run period:	603	* We are starting a new run period:
604	*/	604	*/
605	se->exec_start = rq_of(cfs_rq)->clock;	605	se->exec_start = rq_of(cfs_rq)->clock_task;
606	}	606	}
607		607
608	/**************************************************	608	/**************************************************
@@ -1802,7 +1802,7 @@ int can_migrate_task(struct task_struct p, struct rq rq, int this_cpu,
1802	* 2) too many balance attempts have failed.	1802	* 2) too many balance attempts have failed.
1803	*/	1803	*/
1804		1804
1805	tsk_cache_hot = task_hot(p, rq->clock, sd);	1805	tsk_cache_hot = task_hot(p, rq->clock_task, sd);
1806	if (!tsk_cache_hot \|\|	1806	if (!tsk_cache_hot \|\|
1807	sd->nr_balance_failed > sd->cache_nice_tries) {	1807	sd->nr_balance_failed > sd->cache_nice_tries) {
1808	#ifdef CONFIG_SCHEDSTATS	1808	#ifdef CONFIG_SCHEDSTATS