diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2009-09-01 04:34:37 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-09-04 04:09:55 -0400 |
commit | e9e9250bc78e7f6342517214c0178a529807964b (patch) | |
tree | 9466a437ccfa93b200f4ee434fe807dd6b5fc050 /kernel | |
parent | ab29230e673c646292c90c8b9d378b9562145af0 (diff) |
sched: Scale down cpu_power due to RT tasks
Keep an average on the amount of time spend on RT tasks and use
that fraction to scale down the cpu_power for regular tasks.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Tested-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Acked-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Acked-by: Gautham R Shenoy <ego@in.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
LKML-Reference: <20090901083826.287778431@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/sched.c | 64 | ||||
-rw-r--r-- | kernel/sched_rt.c | 6 | ||||
-rw-r--r-- | kernel/sysctl.c | 8 |
3 files changed, 71 insertions, 7 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 036600fd70bb..ab532b5de40e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -627,6 +627,9 @@ struct rq { | |||
627 | 627 | ||
628 | struct task_struct *migration_thread; | 628 | struct task_struct *migration_thread; |
629 | struct list_head migration_queue; | 629 | struct list_head migration_queue; |
630 | |||
631 | u64 rt_avg; | ||
632 | u64 age_stamp; | ||
630 | #endif | 633 | #endif |
631 | 634 | ||
632 | /* calc_load related fields */ | 635 | /* calc_load related fields */ |
@@ -863,6 +866,14 @@ unsigned int sysctl_sched_shares_ratelimit = 250000; | |||
863 | unsigned int sysctl_sched_shares_thresh = 4; | 866 | unsigned int sysctl_sched_shares_thresh = 4; |
864 | 867 | ||
865 | /* | 868 | /* |
869 | * period over which we average the RT time consumption, measured | ||
870 | * in ms. | ||
871 | * | ||
872 | * default: 1s | ||
873 | */ | ||
874 | const_debug unsigned int sysctl_sched_time_avg = MSEC_PER_SEC; | ||
875 | |||
876 | /* | ||
866 | * period over which we measure -rt task cpu usage in us. | 877 | * period over which we measure -rt task cpu usage in us. |
867 | * default: 1s | 878 | * default: 1s |
868 | */ | 879 | */ |
@@ -1280,12 +1291,37 @@ void wake_up_idle_cpu(int cpu) | |||
1280 | } | 1291 | } |
1281 | #endif /* CONFIG_NO_HZ */ | 1292 | #endif /* CONFIG_NO_HZ */ |
1282 | 1293 | ||
1294 | static u64 sched_avg_period(void) | ||
1295 | { | ||
1296 | return (u64)sysctl_sched_time_avg * NSEC_PER_MSEC / 2; | ||
1297 | } | ||
1298 | |||
1299 | static void sched_avg_update(struct rq *rq) | ||
1300 | { | ||
1301 | s64 period = sched_avg_period(); | ||
1302 | |||
1303 | while ((s64)(rq->clock - rq->age_stamp) > period) { | ||
1304 | rq->age_stamp += period; | ||
1305 | rq->rt_avg /= 2; | ||
1306 | } | ||
1307 | } | ||
1308 | |||
1309 | static void sched_rt_avg_update(struct rq *rq, u64 rt_delta) | ||
1310 | { | ||
1311 | rq->rt_avg += rt_delta; | ||
1312 | sched_avg_update(rq); | ||
1313 | } | ||
1314 | |||
1283 | #else /* !CONFIG_SMP */ | 1315 | #else /* !CONFIG_SMP */ |
1284 | static void resched_task(struct task_struct *p) | 1316 | static void resched_task(struct task_struct *p) |
1285 | { | 1317 | { |
1286 | assert_spin_locked(&task_rq(p)->lock); | 1318 | assert_spin_locked(&task_rq(p)->lock); |
1287 | set_tsk_need_resched(p); | 1319 | set_tsk_need_resched(p); |
1288 | } | 1320 | } |
1321 | |||
1322 | static void sched_rt_avg_update(struct rq *rq, u64 rt_delta) | ||
1323 | { | ||
1324 | } | ||
1289 | #endif /* CONFIG_SMP */ | 1325 | #endif /* CONFIG_SMP */ |
1290 | 1326 | ||
1291 | #if BITS_PER_LONG == 32 | 1327 | #if BITS_PER_LONG == 32 |
@@ -3699,7 +3735,7 @@ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds, | |||
3699 | } | 3735 | } |
3700 | #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ | 3736 | #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ |
3701 | 3737 | ||
3702 | unsigned long __weak arch_smt_gain(struct sched_domain *sd, int cpu) | 3738 | unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu) |
3703 | { | 3739 | { |
3704 | unsigned long weight = cpumask_weight(sched_domain_span(sd)); | 3740 | unsigned long weight = cpumask_weight(sched_domain_span(sd)); |
3705 | unsigned long smt_gain = sd->smt_gain; | 3741 | unsigned long smt_gain = sd->smt_gain; |
@@ -3709,6 +3745,24 @@ unsigned long __weak arch_smt_gain(struct sched_domain *sd, int cpu) | |||
3709 | return smt_gain; | 3745 | return smt_gain; |
3710 | } | 3746 | } |
3711 | 3747 | ||
3748 | unsigned long scale_rt_power(int cpu) | ||
3749 | { | ||
3750 | struct rq *rq = cpu_rq(cpu); | ||
3751 | u64 total, available; | ||
3752 | |||
3753 | sched_avg_update(rq); | ||
3754 | |||
3755 | total = sched_avg_period() + (rq->clock - rq->age_stamp); | ||
3756 | available = total - rq->rt_avg; | ||
3757 | |||
3758 | if (unlikely((s64)total < SCHED_LOAD_SCALE)) | ||
3759 | total = SCHED_LOAD_SCALE; | ||
3760 | |||
3761 | total >>= SCHED_LOAD_SHIFT; | ||
3762 | |||
3763 | return div_u64(available, total); | ||
3764 | } | ||
3765 | |||
3712 | static void update_cpu_power(struct sched_domain *sd, int cpu) | 3766 | static void update_cpu_power(struct sched_domain *sd, int cpu) |
3713 | { | 3767 | { |
3714 | unsigned long weight = cpumask_weight(sched_domain_span(sd)); | 3768 | unsigned long weight = cpumask_weight(sched_domain_span(sd)); |
@@ -3719,11 +3773,15 @@ static void update_cpu_power(struct sched_domain *sd, int cpu) | |||
3719 | /* here we could scale based on cpufreq */ | 3773 | /* here we could scale based on cpufreq */ |
3720 | 3774 | ||
3721 | if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) { | 3775 | if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) { |
3722 | power *= arch_smt_gain(sd, cpu); | 3776 | power *= arch_scale_smt_power(sd, cpu); |
3723 | power >>= SCHED_LOAD_SHIFT; | 3777 | power >>= SCHED_LOAD_SHIFT; |
3724 | } | 3778 | } |
3725 | 3779 | ||
3726 | /* here we could scale based on RT time */ | 3780 | power *= scale_rt_power(cpu); |
3781 | power >>= SCHED_LOAD_SHIFT; | ||
3782 | |||
3783 | if (!power) | ||
3784 | power = 1; | ||
3727 | 3785 | ||
3728 | if (power != old) { | 3786 | if (power != old) { |
3729 | sdg->__cpu_power = power; | 3787 | sdg->__cpu_power = power; |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 3d4020a9ba1b..2eb4bd6a526c 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -615,6 +615,8 @@ static void update_curr_rt(struct rq *rq) | |||
615 | curr->se.exec_start = rq->clock; | 615 | curr->se.exec_start = rq->clock; |
616 | cpuacct_charge(curr, delta_exec); | 616 | cpuacct_charge(curr, delta_exec); |
617 | 617 | ||
618 | sched_rt_avg_update(rq, delta_exec); | ||
619 | |||
618 | if (!rt_bandwidth_enabled()) | 620 | if (!rt_bandwidth_enabled()) |
619 | return; | 621 | return; |
620 | 622 | ||
@@ -887,8 +889,6 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup) | |||
887 | 889 | ||
888 | if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1) | 890 | if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1) |
889 | enqueue_pushable_task(rq, p); | 891 | enqueue_pushable_task(rq, p); |
890 | |||
891 | inc_cpu_load(rq, p->se.load.weight); | ||
892 | } | 892 | } |
893 | 893 | ||
894 | static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) | 894 | static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) |
@@ -899,8 +899,6 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) | |||
899 | dequeue_rt_entity(rt_se); | 899 | dequeue_rt_entity(rt_se); |
900 | 900 | ||
901 | dequeue_pushable_task(rq, p); | 901 | dequeue_pushable_task(rq, p); |
902 | |||
903 | dec_cpu_load(rq, p->se.load.weight); | ||
904 | } | 902 | } |
905 | 903 | ||
906 | /* | 904 | /* |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 58be76017fd0..6c9836ef4b47 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -332,6 +332,14 @@ static struct ctl_table kern_table[] = { | |||
332 | }, | 332 | }, |
333 | { | 333 | { |
334 | .ctl_name = CTL_UNNUMBERED, | 334 | .ctl_name = CTL_UNNUMBERED, |
335 | .procname = "sched_time_avg", | ||
336 | .data = &sysctl_sched_time_avg, | ||
337 | .maxlen = sizeof(unsigned int), | ||
338 | .mode = 0644, | ||
339 | .proc_handler = &proc_dointvec, | ||
340 | }, | ||
341 | { | ||
342 | .ctl_name = CTL_UNNUMBERED, | ||
335 | .procname = "timer_migration", | 343 | .procname = "timer_migration", |
336 | .data = &sysctl_timer_migration, | 344 | .data = &sysctl_timer_migration, |
337 | .maxlen = sizeof(unsigned int), | 345 | .maxlen = sizeof(unsigned int), |