sched: Do not account irq time to current task

Scheduler accounts both softirq and interrupt processing times to the currently running task. This means, if the interrupt processing was for some other task in the system, then the current task ends up being penalized as it gets shorter runtime than otherwise. Change sched task accounting to acoount only actual task time from currently running task. Now update_curr(), modifies the delta_exec to depend on rq->clock_task. Note that this change only handles CONFIG_IRQ_TIME_ACCOUNTING case. We can extend this to CONFIG_VIRT_CPU_ACCOUNTING with minimal effort. But, thats for later. This change will impact scheduling behavior in interrupt heavy conditions. Tested on a 4-way system with eth0 handled by CPU 2 and a network heavy task (nc) running on CPU 3 (and no RSS/RFS). With that I have CPU 2 spending 75%+ of its time in irq processing. CPU 3 spending around 35% time running nc task. Now, if I run another CPU intensive task on CPU 2, without this change /proc/<pid>/schedstat shows 100% of time accounted to this task. With this change, it rightly shows less than 25% accounted to this task as remaining time is actually spent on irq processing. Signed-off-by: Venkatesh Pallipadi <venki@google.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <1286237003-12406-7-git-send-email-venki@google.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Venkatesh Pallipadi <venki@google.com> 2010-10-04 20:03:21 -0400
committer: Ingo Molnar <mingo@elte.hu> 2010-10-18 14:52:26 -0400
commit: 305e6835e05513406fa12820e40e4a8ecb63743c (patch)
tree: 8b43703e27d26f4a7e743a99459e0b465cd1e5e1 /kernel/sched_rt.c
parent: e82b8e4ea4f3dffe6e7939f90e78da675fcc450e (diff)
1 files changed, 4 insertions, 4 deletions
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index ab77aa00b7b1..bea7d79f7e9c 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -609,7 +609,7 @@ static void update_curr_rt(struct rq *rq)
        if (!task_has_rt_policy(curr))
                return;
-        delta_exec = rq->clock - curr->se.exec_start;
+        delta_exec = rq->clock_task - curr->se.exec_start;
        if (unlikely((s64)delta_exec < 0))
                delta_exec = 0;
@@ -618,7 +618,7 @@ static void update_curr_rt(struct rq *rq)
        curr->se.sum_exec_runtime += delta_exec;
        account_group_exec_runtime(curr, delta_exec);
-        curr->se.exec_start = rq->clock;
+        curr->se.exec_start = rq->clock_task;
        cpuacct_charge(curr, delta_exec);
        sched_rt_avg_update(rq, delta_exec);
@@ -1075,7 +1075,7 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq)
        } while (rt_rq);
        p = rt_task_of(rt_se);
-        p->se.exec_start = rq->clock;
+        p->se.exec_start = rq->clock_task;
        return p;
 }
@@ -1713,7 +1713,7 @@ static void set_curr_task_rt(struct rq *rq)
 {
        struct task_struct *p = rq->curr;
-        p->se.exec_start = rq->clock;
+        p->se.exec_start = rq->clock_task;
        /* The running task is never eligible for pushing */
        dequeue_pushable_task(rq, p);
author	Venkatesh Pallipadi <venki@google.com>	2010-10-04 20:03:21 -0400
committer	Ingo Molnar <mingo@elte.hu>	2010-10-18 14:52:26 -0400
commit	305e6835e05513406fa12820e40e4a8ecb63743c (patch)
tree	8b43703e27d26f4a7e743a99459e0b465cd1e5e1 /kernel/sched_rt.c
parent	e82b8e4ea4f3dffe6e7939f90e78da675fcc450e (diff)

diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index ab77aa00b7b1..bea7d79f7e9c 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c
@@ -609,7 +609,7 @@ static void update_curr_rt(struct rq *rq)
609	if (!task_has_rt_policy(curr))	609	if (!task_has_rt_policy(curr))
610	return;	610	return;
611		611
612	delta_exec = rq->clock - curr->se.exec_start;	612	delta_exec = rq->clock_task - curr->se.exec_start;
613	if (unlikely((s64)delta_exec < 0))	613	if (unlikely((s64)delta_exec < 0))
614	delta_exec = 0;	614	delta_exec = 0;
615		615
@@ -618,7 +618,7 @@ static void update_curr_rt(struct rq *rq)
618	curr->se.sum_exec_runtime += delta_exec;	618	curr->se.sum_exec_runtime += delta_exec;
619	account_group_exec_runtime(curr, delta_exec);	619	account_group_exec_runtime(curr, delta_exec);
620		620
621	curr->se.exec_start = rq->clock;	621	curr->se.exec_start = rq->clock_task;
622	cpuacct_charge(curr, delta_exec);	622	cpuacct_charge(curr, delta_exec);
623		623
624	sched_rt_avg_update(rq, delta_exec);	624	sched_rt_avg_update(rq, delta_exec);
@@ -1075,7 +1075,7 @@ static struct task_struct _pick_next_task_rt(struct rq rq)
1075	} while (rt_rq);	1075	} while (rt_rq);
1076		1076
1077	p = rt_task_of(rt_se);	1077	p = rt_task_of(rt_se);
1078	p->se.exec_start = rq->clock;	1078	p->se.exec_start = rq->clock_task;
1079		1079
1080	return p;	1080	return p;
1081	}	1081	}
@@ -1713,7 +1713,7 @@ static void set_curr_task_rt(struct rq *rq)
1713	{	1713	{
1714	struct task_struct *p = rq->curr;	1714	struct task_struct *p = rq->curr;
1715		1715
1716	p->se.exec_start = rq->clock;	1716	p->se.exec_start = rq->clock_task;
1717		1717
1718	/* The running task is never eligible for pushing */	1718	/* The running task is never eligible for pushing */
1719	dequeue_pushable_task(rq, p);	1719	dequeue_pushable_task(rq, p);