diff options
author | Venkatesh Pallipadi <venki@google.com> | 2010-10-04 20:03:21 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-10-18 14:52:26 -0400 |
commit | 305e6835e05513406fa12820e40e4a8ecb63743c (patch) | |
tree | 8b43703e27d26f4a7e743a99459e0b465cd1e5e1 /kernel/sched_rt.c | |
parent | e82b8e4ea4f3dffe6e7939f90e78da675fcc450e (diff) |
sched: Do not account irq time to current task
Scheduler accounts both softirq and interrupt processing times to the
currently running task. This means, if the interrupt processing was
for some other task in the system, then the current task ends up being
penalized as it gets shorter runtime than otherwise.
Change sched task accounting to acoount only actual task time from
currently running task. Now update_curr(), modifies the delta_exec to
depend on rq->clock_task.
Note that this change only handles CONFIG_IRQ_TIME_ACCOUNTING case. We can
extend this to CONFIG_VIRT_CPU_ACCOUNTING with minimal effort. But, thats
for later.
This change will impact scheduling behavior in interrupt heavy conditions.
Tested on a 4-way system with eth0 handled by CPU 2 and a network heavy
task (nc) running on CPU 3 (and no RSS/RFS). With that I have CPU 2
spending 75%+ of its time in irq processing. CPU 3 spending around 35%
time running nc task.
Now, if I run another CPU intensive task on CPU 2, without this change
/proc/<pid>/schedstat shows 100% of time accounted to this task. With this
change, it rightly shows less than 25% accounted to this task as remaining
time is actually spent on irq processing.
Signed-off-by: Venkatesh Pallipadi <venki@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1286237003-12406-7-git-send-email-venki@google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched_rt.c')
-rw-r--r-- | kernel/sched_rt.c | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index ab77aa00b7b1..bea7d79f7e9c 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -609,7 +609,7 @@ static void update_curr_rt(struct rq *rq) | |||
609 | if (!task_has_rt_policy(curr)) | 609 | if (!task_has_rt_policy(curr)) |
610 | return; | 610 | return; |
611 | 611 | ||
612 | delta_exec = rq->clock - curr->se.exec_start; | 612 | delta_exec = rq->clock_task - curr->se.exec_start; |
613 | if (unlikely((s64)delta_exec < 0)) | 613 | if (unlikely((s64)delta_exec < 0)) |
614 | delta_exec = 0; | 614 | delta_exec = 0; |
615 | 615 | ||
@@ -618,7 +618,7 @@ static void update_curr_rt(struct rq *rq) | |||
618 | curr->se.sum_exec_runtime += delta_exec; | 618 | curr->se.sum_exec_runtime += delta_exec; |
619 | account_group_exec_runtime(curr, delta_exec); | 619 | account_group_exec_runtime(curr, delta_exec); |
620 | 620 | ||
621 | curr->se.exec_start = rq->clock; | 621 | curr->se.exec_start = rq->clock_task; |
622 | cpuacct_charge(curr, delta_exec); | 622 | cpuacct_charge(curr, delta_exec); |
623 | 623 | ||
624 | sched_rt_avg_update(rq, delta_exec); | 624 | sched_rt_avg_update(rq, delta_exec); |
@@ -1075,7 +1075,7 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq) | |||
1075 | } while (rt_rq); | 1075 | } while (rt_rq); |
1076 | 1076 | ||
1077 | p = rt_task_of(rt_se); | 1077 | p = rt_task_of(rt_se); |
1078 | p->se.exec_start = rq->clock; | 1078 | p->se.exec_start = rq->clock_task; |
1079 | 1079 | ||
1080 | return p; | 1080 | return p; |
1081 | } | 1081 | } |
@@ -1713,7 +1713,7 @@ static void set_curr_task_rt(struct rq *rq) | |||
1713 | { | 1713 | { |
1714 | struct task_struct *p = rq->curr; | 1714 | struct task_struct *p = rq->curr; |
1715 | 1715 | ||
1716 | p->se.exec_start = rq->clock; | 1716 | p->se.exec_start = rq->clock_task; |
1717 | 1717 | ||
1718 | /* The running task is never eligible for pushing */ | 1718 | /* The running task is never eligible for pushing */ |
1719 | dequeue_pushable_task(rq, p); | 1719 | dequeue_pushable_task(rq, p); |