aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
authorVenkatesh Pallipadi <venki@google.com>2010-10-04 20:03:21 -0400
committerIngo Molnar <mingo@elte.hu>2010-10-18 14:52:26 -0400
commit305e6835e05513406fa12820e40e4a8ecb63743c (patch)
tree8b43703e27d26f4a7e743a99459e0b465cd1e5e1 /kernel/sched.c
parente82b8e4ea4f3dffe6e7939f90e78da675fcc450e (diff)
sched: Do not account irq time to current task
Scheduler accounts both softirq and interrupt processing times to the currently running task. This means, if the interrupt processing was for some other task in the system, then the current task ends up being penalized as it gets shorter runtime than otherwise. Change sched task accounting to acoount only actual task time from currently running task. Now update_curr(), modifies the delta_exec to depend on rq->clock_task. Note that this change only handles CONFIG_IRQ_TIME_ACCOUNTING case. We can extend this to CONFIG_VIRT_CPU_ACCOUNTING with minimal effort. But, thats for later. This change will impact scheduling behavior in interrupt heavy conditions. Tested on a 4-way system with eth0 handled by CPU 2 and a network heavy task (nc) running on CPU 3 (and no RSS/RFS). With that I have CPU 2 spending 75%+ of its time in irq processing. CPU 3 spending around 35% time running nc task. Now, if I run another CPU intensive task on CPU 2, without this change /proc/<pid>/schedstat shows 100% of time accounted to this task. With this change, it rightly shows less than 25% accounted to this task as remaining time is actually spent on irq processing. Signed-off-by: Venkatesh Pallipadi <venki@google.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <1286237003-12406-7-git-send-email-venki@google.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c43
1 files changed, 40 insertions, 3 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 9b302e355791..9e01b7100ef6 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -491,6 +491,7 @@ struct rq {
491 struct mm_struct *prev_mm; 491 struct mm_struct *prev_mm;
492 492
493 u64 clock; 493 u64 clock;
494 u64 clock_task;
494 495
495 atomic_t nr_iowait; 496 atomic_t nr_iowait;
496 497
@@ -641,10 +642,19 @@ static inline struct task_group *task_group(struct task_struct *p)
641 642
642#endif /* CONFIG_CGROUP_SCHED */ 643#endif /* CONFIG_CGROUP_SCHED */
643 644
645static u64 irq_time_cpu(int cpu);
646
644inline void update_rq_clock(struct rq *rq) 647inline void update_rq_clock(struct rq *rq)
645{ 648{
646 if (!rq->skip_clock_update) 649 if (!rq->skip_clock_update) {
647 rq->clock = sched_clock_cpu(cpu_of(rq)); 650 int cpu = cpu_of(rq);
651 u64 irq_time;
652
653 rq->clock = sched_clock_cpu(cpu);
654 irq_time = irq_time_cpu(cpu);
655 if (rq->clock - irq_time > rq->clock_task)
656 rq->clock_task = rq->clock - irq_time;
657 }
648} 658}
649 659
650/* 660/*
@@ -1910,6 +1920,18 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
1910 1920
1911#ifdef CONFIG_IRQ_TIME_ACCOUNTING 1921#ifdef CONFIG_IRQ_TIME_ACCOUNTING
1912 1922
1923/*
1924 * There are no locks covering percpu hardirq/softirq time.
1925 * They are only modified in account_system_vtime, on corresponding CPU
1926 * with interrupts disabled. So, writes are safe.
1927 * They are read and saved off onto struct rq in update_rq_clock().
1928 * This may result in other CPU reading this CPU's irq time and can
1929 * race with irq/account_system_vtime on this CPU. We would either get old
1930 * or new value (or semi updated value on 32 bit) with a side effect of
1931 * accounting a slice of irq time to wrong task when irq is in progress
1932 * while we read rq->clock. That is a worthy compromise in place of having
1933 * locks on each irq in account_system_time.
1934 */
1913static DEFINE_PER_CPU(u64, cpu_hardirq_time); 1935static DEFINE_PER_CPU(u64, cpu_hardirq_time);
1914static DEFINE_PER_CPU(u64, cpu_softirq_time); 1936static DEFINE_PER_CPU(u64, cpu_softirq_time);
1915 1937
@@ -1926,6 +1948,14 @@ void disable_sched_clock_irqtime(void)
1926 sched_clock_irqtime = 0; 1948 sched_clock_irqtime = 0;
1927} 1949}
1928 1950
1951static u64 irq_time_cpu(int cpu)
1952{
1953 if (!sched_clock_irqtime)
1954 return 0;
1955
1956 return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu);
1957}
1958
1929void account_system_vtime(struct task_struct *curr) 1959void account_system_vtime(struct task_struct *curr)
1930{ 1960{
1931 unsigned long flags; 1961 unsigned long flags;
@@ -1955,6 +1985,13 @@ void account_system_vtime(struct task_struct *curr)
1955 local_irq_restore(flags); 1985 local_irq_restore(flags);
1956} 1986}
1957 1987
1988#else
1989
1990static u64 irq_time_cpu(int cpu)
1991{
1992 return 0;
1993}
1994
1958#endif 1995#endif
1959 1996
1960#include "sched_idletask.c" 1997#include "sched_idletask.c"
@@ -3322,7 +3359,7 @@ static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq)
3322 3359
3323 if (task_current(rq, p)) { 3360 if (task_current(rq, p)) {
3324 update_rq_clock(rq); 3361 update_rq_clock(rq);
3325 ns = rq->clock - p->se.exec_start; 3362 ns = rq->clock_task - p->se.exec_start;
3326 if ((s64)ns < 0) 3363 if ((s64)ns < 0)
3327 ns = 0; 3364 ns = 0;
3328 } 3365 }