diff options
-rw-r--r-- | kernel/sched.c | 43 | ||||
-rw-r--r-- | kernel/sched_fair.c | 6 | ||||
-rw-r--r-- | kernel/sched_rt.c | 8 |
3 files changed, 47 insertions, 10 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 9b302e355791..9e01b7100ef6 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -491,6 +491,7 @@ struct rq { | |||
491 | struct mm_struct *prev_mm; | 491 | struct mm_struct *prev_mm; |
492 | 492 | ||
493 | u64 clock; | 493 | u64 clock; |
494 | u64 clock_task; | ||
494 | 495 | ||
495 | atomic_t nr_iowait; | 496 | atomic_t nr_iowait; |
496 | 497 | ||
@@ -641,10 +642,19 @@ static inline struct task_group *task_group(struct task_struct *p) | |||
641 | 642 | ||
642 | #endif /* CONFIG_CGROUP_SCHED */ | 643 | #endif /* CONFIG_CGROUP_SCHED */ |
643 | 644 | ||
645 | static u64 irq_time_cpu(int cpu); | ||
646 | |||
644 | inline void update_rq_clock(struct rq *rq) | 647 | inline void update_rq_clock(struct rq *rq) |
645 | { | 648 | { |
646 | if (!rq->skip_clock_update) | 649 | if (!rq->skip_clock_update) { |
647 | rq->clock = sched_clock_cpu(cpu_of(rq)); | 650 | int cpu = cpu_of(rq); |
651 | u64 irq_time; | ||
652 | |||
653 | rq->clock = sched_clock_cpu(cpu); | ||
654 | irq_time = irq_time_cpu(cpu); | ||
655 | if (rq->clock - irq_time > rq->clock_task) | ||
656 | rq->clock_task = rq->clock - irq_time; | ||
657 | } | ||
648 | } | 658 | } |
649 | 659 | ||
650 | /* | 660 | /* |
@@ -1910,6 +1920,18 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int flags) | |||
1910 | 1920 | ||
1911 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | 1921 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING |
1912 | 1922 | ||
1923 | /* | ||
1924 | * There are no locks covering percpu hardirq/softirq time. | ||
1925 | * They are only modified in account_system_vtime, on corresponding CPU | ||
1926 | * with interrupts disabled. So, writes are safe. | ||
1927 | * They are read and saved off onto struct rq in update_rq_clock(). | ||
1928 | * This may result in other CPU reading this CPU's irq time and can | ||
1929 | * race with irq/account_system_vtime on this CPU. We would either get old | ||
1930 | * or new value (or semi updated value on 32 bit) with a side effect of | ||
1931 | * accounting a slice of irq time to wrong task when irq is in progress | ||
1932 | * while we read rq->clock. That is a worthy compromise in place of having | ||
1933 | * locks on each irq in account_system_time. | ||
1934 | */ | ||
1913 | static DEFINE_PER_CPU(u64, cpu_hardirq_time); | 1935 | static DEFINE_PER_CPU(u64, cpu_hardirq_time); |
1914 | static DEFINE_PER_CPU(u64, cpu_softirq_time); | 1936 | static DEFINE_PER_CPU(u64, cpu_softirq_time); |
1915 | 1937 | ||
@@ -1926,6 +1948,14 @@ void disable_sched_clock_irqtime(void) | |||
1926 | sched_clock_irqtime = 0; | 1948 | sched_clock_irqtime = 0; |
1927 | } | 1949 | } |
1928 | 1950 | ||
1951 | static u64 irq_time_cpu(int cpu) | ||
1952 | { | ||
1953 | if (!sched_clock_irqtime) | ||
1954 | return 0; | ||
1955 | |||
1956 | return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu); | ||
1957 | } | ||
1958 | |||
1929 | void account_system_vtime(struct task_struct *curr) | 1959 | void account_system_vtime(struct task_struct *curr) |
1930 | { | 1960 | { |
1931 | unsigned long flags; | 1961 | unsigned long flags; |
@@ -1955,6 +1985,13 @@ void account_system_vtime(struct task_struct *curr) | |||
1955 | local_irq_restore(flags); | 1985 | local_irq_restore(flags); |
1956 | } | 1986 | } |
1957 | 1987 | ||
1988 | #else | ||
1989 | |||
1990 | static u64 irq_time_cpu(int cpu) | ||
1991 | { | ||
1992 | return 0; | ||
1993 | } | ||
1994 | |||
1958 | #endif | 1995 | #endif |
1959 | 1996 | ||
1960 | #include "sched_idletask.c" | 1997 | #include "sched_idletask.c" |
@@ -3322,7 +3359,7 @@ static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq) | |||
3322 | 3359 | ||
3323 | if (task_current(rq, p)) { | 3360 | if (task_current(rq, p)) { |
3324 | update_rq_clock(rq); | 3361 | update_rq_clock(rq); |
3325 | ns = rq->clock - p->se.exec_start; | 3362 | ns = rq->clock_task - p->se.exec_start; |
3326 | if ((s64)ns < 0) | 3363 | if ((s64)ns < 0) |
3327 | ns = 0; | 3364 | ns = 0; |
3328 | } | 3365 | } |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index f1c615ff39d6..c358d4081b81 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -519,7 +519,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, | |||
519 | static void update_curr(struct cfs_rq *cfs_rq) | 519 | static void update_curr(struct cfs_rq *cfs_rq) |
520 | { | 520 | { |
521 | struct sched_entity *curr = cfs_rq->curr; | 521 | struct sched_entity *curr = cfs_rq->curr; |
522 | u64 now = rq_of(cfs_rq)->clock; | 522 | u64 now = rq_of(cfs_rq)->clock_task; |
523 | unsigned long delta_exec; | 523 | unsigned long delta_exec; |
524 | 524 | ||
525 | if (unlikely(!curr)) | 525 | if (unlikely(!curr)) |
@@ -602,7 +602,7 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
602 | /* | 602 | /* |
603 | * We are starting a new run period: | 603 | * We are starting a new run period: |
604 | */ | 604 | */ |
605 | se->exec_start = rq_of(cfs_rq)->clock; | 605 | se->exec_start = rq_of(cfs_rq)->clock_task; |
606 | } | 606 | } |
607 | 607 | ||
608 | /************************************************** | 608 | /************************************************** |
@@ -1802,7 +1802,7 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu, | |||
1802 | * 2) too many balance attempts have failed. | 1802 | * 2) too many balance attempts have failed. |
1803 | */ | 1803 | */ |
1804 | 1804 | ||
1805 | tsk_cache_hot = task_hot(p, rq->clock, sd); | 1805 | tsk_cache_hot = task_hot(p, rq->clock_task, sd); |
1806 | if (!tsk_cache_hot || | 1806 | if (!tsk_cache_hot || |
1807 | sd->nr_balance_failed > sd->cache_nice_tries) { | 1807 | sd->nr_balance_failed > sd->cache_nice_tries) { |
1808 | #ifdef CONFIG_SCHEDSTATS | 1808 | #ifdef CONFIG_SCHEDSTATS |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index ab77aa00b7b1..bea7d79f7e9c 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -609,7 +609,7 @@ static void update_curr_rt(struct rq *rq) | |||
609 | if (!task_has_rt_policy(curr)) | 609 | if (!task_has_rt_policy(curr)) |
610 | return; | 610 | return; |
611 | 611 | ||
612 | delta_exec = rq->clock - curr->se.exec_start; | 612 | delta_exec = rq->clock_task - curr->se.exec_start; |
613 | if (unlikely((s64)delta_exec < 0)) | 613 | if (unlikely((s64)delta_exec < 0)) |
614 | delta_exec = 0; | 614 | delta_exec = 0; |
615 | 615 | ||
@@ -618,7 +618,7 @@ static void update_curr_rt(struct rq *rq) | |||
618 | curr->se.sum_exec_runtime += delta_exec; | 618 | curr->se.sum_exec_runtime += delta_exec; |
619 | account_group_exec_runtime(curr, delta_exec); | 619 | account_group_exec_runtime(curr, delta_exec); |
620 | 620 | ||
621 | curr->se.exec_start = rq->clock; | 621 | curr->se.exec_start = rq->clock_task; |
622 | cpuacct_charge(curr, delta_exec); | 622 | cpuacct_charge(curr, delta_exec); |
623 | 623 | ||
624 | sched_rt_avg_update(rq, delta_exec); | 624 | sched_rt_avg_update(rq, delta_exec); |
@@ -1075,7 +1075,7 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq) | |||
1075 | } while (rt_rq); | 1075 | } while (rt_rq); |
1076 | 1076 | ||
1077 | p = rt_task_of(rt_se); | 1077 | p = rt_task_of(rt_se); |
1078 | p->se.exec_start = rq->clock; | 1078 | p->se.exec_start = rq->clock_task; |
1079 | 1079 | ||
1080 | return p; | 1080 | return p; |
1081 | } | 1081 | } |
@@ -1713,7 +1713,7 @@ static void set_curr_task_rt(struct rq *rq) | |||
1713 | { | 1713 | { |
1714 | struct task_struct *p = rq->curr; | 1714 | struct task_struct *p = rq->curr; |
1715 | 1715 | ||
1716 | p->se.exec_start = rq->clock; | 1716 | p->se.exec_start = rq->clock_task; |
1717 | 1717 | ||
1718 | /* The running task is never eligible for pushing */ | 1718 | /* The running task is never eligible for pushing */ |
1719 | dequeue_pushable_task(rq, p); | 1719 | dequeue_pushable_task(rq, p); |