aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2008-08-19 06:33:06 -0400
committerIngo Molnar <mingo@elte.hu>2008-08-19 07:10:12 -0400
commit9a7e0b180da21885988d47558671cf580279f9d6 (patch)
treeab1e69b08eb393a91f7bcfa1cc97170d81f36814 /kernel/sched.c
parenteb755805f21bd5ded84026e167b7a90887ac42e5 (diff)
sched: rt-bandwidth fixes
The last patch allows sysctl_sched_rt_runtime to disable bandwidth accounting for the group scheduler - however it doesn't deal with sched_setscheduler(), which will keep tasks out of groups that have no assigned runtime. If we relax this, we get into the situation where RT tasks can get into a group when we disable bandwidth control, and then starve them by enabling it again. Rework the schedulability code to check for this condition and fail to turn on bandwidth control with -EBUSY when this situation is found. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c125
1 files changed, 63 insertions, 62 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 8c019a19d052..e41bdae2778d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -300,9 +300,9 @@ static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp;
300static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); 300static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
301static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp; 301static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
302#endif /* CONFIG_RT_GROUP_SCHED */ 302#endif /* CONFIG_RT_GROUP_SCHED */
303#else /* !CONFIG_FAIR_GROUP_SCHED */ 303#else /* !CONFIG_USER_SCHED */
304#define root_task_group init_task_group 304#define root_task_group init_task_group
305#endif /* CONFIG_FAIR_GROUP_SCHED */ 305#endif /* CONFIG_USER_SCHED */
306 306
307/* task_group_lock serializes add/remove of task groups and also changes to 307/* task_group_lock serializes add/remove of task groups and also changes to
308 * a task group's cpu shares. 308 * a task group's cpu shares.
@@ -1387,7 +1387,7 @@ static inline void dec_cpu_load(struct rq *rq, unsigned long load)
1387 update_load_sub(&rq->load, load); 1387 update_load_sub(&rq->load, load);
1388} 1388}
1389 1389
1390#if (defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)) 1390#if (defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)) || defined(SCHED_RT_GROUP_SCHED)
1391typedef int (*tg_visitor)(struct task_group *, void *); 1391typedef int (*tg_visitor)(struct task_group *, void *);
1392 1392
1393/* 1393/*
@@ -5082,7 +5082,8 @@ recheck:
5082 * Do not allow realtime tasks into groups that have no runtime 5082 * Do not allow realtime tasks into groups that have no runtime
5083 * assigned. 5083 * assigned.
5084 */ 5084 */
5085 if (rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0) 5085 if (rt_bandwidth_enabled() && rt_policy(policy) &&
5086 task_group(p)->rt_bandwidth.rt_runtime == 0)
5086 return -EPERM; 5087 return -EPERM;
5087#endif 5088#endif
5088 5089
@@ -8707,73 +8708,77 @@ static DEFINE_MUTEX(rt_constraints_mutex);
8707static unsigned long to_ratio(u64 period, u64 runtime) 8708static unsigned long to_ratio(u64 period, u64 runtime)
8708{ 8709{
8709 if (runtime == RUNTIME_INF) 8710 if (runtime == RUNTIME_INF)
8710 return 1ULL << 16; 8711 return 1ULL << 20;
8711 8712
8712 return div64_u64(runtime << 16, period); 8713 return div64_u64(runtime << 20, period);
8713} 8714}
8714 8715
8715#ifdef CONFIG_CGROUP_SCHED 8716/* Must be called with tasklist_lock held */
8716static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) 8717static inline int tg_has_rt_tasks(struct task_group *tg)
8717{ 8718{
8718 struct task_group *tgi, *parent = tg->parent; 8719 struct task_struct *g, *p;
8719 unsigned long total = 0;
8720 8720
8721 if (!parent) { 8721 do_each_thread(g, p) {
8722 if (global_rt_period() < period) 8722 if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg)
8723 return 0; 8723 return 1;
8724 } while_each_thread(g, p);
8724 8725
8725 return to_ratio(period, runtime) < 8726 return 0;
8726 to_ratio(global_rt_period(), global_rt_runtime()); 8727}
8727 }
8728 8728
8729 if (ktime_to_ns(parent->rt_bandwidth.rt_period) < period) 8729struct rt_schedulable_data {
8730 return 0; 8730 struct task_group *tg;
8731 u64 rt_period;
8732 u64 rt_runtime;
8733};
8731 8734
8732 rcu_read_lock(); 8735static int tg_schedulable(struct task_group *tg, void *data)
8733 list_for_each_entry_rcu(tgi, &parent->children, siblings) { 8736{
8734 if (tgi == tg) 8737 struct rt_schedulable_data *d = data;
8735 continue; 8738 struct task_group *child;
8739 unsigned long total, sum = 0;
8740 u64 period, runtime;
8741
8742 period = ktime_to_ns(tg->rt_bandwidth.rt_period);
8743 runtime = tg->rt_bandwidth.rt_runtime;
8736 8744
8737 total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period), 8745 if (tg == d->tg) {
8738 tgi->rt_bandwidth.rt_runtime); 8746 period = d->rt_period;
8747 runtime = d->rt_runtime;
8739 } 8748 }
8740 rcu_read_unlock();
8741 8749
8742 return total + to_ratio(period, runtime) <= 8750 if (rt_bandwidth_enabled() && !runtime && tg_has_rt_tasks(tg))
8743 to_ratio(ktime_to_ns(parent->rt_bandwidth.rt_period), 8751 return -EBUSY;
8744 parent->rt_bandwidth.rt_runtime);
8745}
8746#elif defined CONFIG_USER_SCHED
8747static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
8748{
8749 struct task_group *tgi;
8750 unsigned long total = 0;
8751 unsigned long global_ratio =
8752 to_ratio(global_rt_period(), global_rt_runtime());
8753 8752
8754 rcu_read_lock(); 8753 total = to_ratio(period, runtime);
8755 list_for_each_entry_rcu(tgi, &task_groups, list) {
8756 if (tgi == tg)
8757 continue;
8758 8754
8759 total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period), 8755 list_for_each_entry_rcu(child, &tg->children, siblings) {
8760 tgi->rt_bandwidth.rt_runtime); 8756 period = ktime_to_ns(child->rt_bandwidth.rt_period);
8757 runtime = child->rt_bandwidth.rt_runtime;
8758
8759 if (child == d->tg) {
8760 period = d->rt_period;
8761 runtime = d->rt_runtime;
8762 }
8763
8764 sum += to_ratio(period, runtime);
8761 } 8765 }
8762 rcu_read_unlock();
8763 8766
8764 return total + to_ratio(period, runtime) < global_ratio; 8767 if (sum > total)
8768 return -EINVAL;
8769
8770 return 0;
8765} 8771}
8766#endif
8767 8772
8768/* Must be called with tasklist_lock held */ 8773static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
8769static inline int tg_has_rt_tasks(struct task_group *tg)
8770{ 8774{
8771 struct task_struct *g, *p; 8775 struct rt_schedulable_data data = {
8772 do_each_thread(g, p) { 8776 .tg = tg,
8773 if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg) 8777 .rt_period = period,
8774 return 1; 8778 .rt_runtime = runtime,
8775 } while_each_thread(g, p); 8779 };
8776 return 0; 8780
8781 return walk_tg_tree(tg_schedulable, tg_nop, &data);
8777} 8782}
8778 8783
8779static int tg_set_bandwidth(struct task_group *tg, 8784static int tg_set_bandwidth(struct task_group *tg,
@@ -8783,14 +8788,9 @@ static int tg_set_bandwidth(struct task_group *tg,
8783 8788
8784 mutex_lock(&rt_constraints_mutex); 8789 mutex_lock(&rt_constraints_mutex);
8785 read_lock(&tasklist_lock); 8790 read_lock(&tasklist_lock);
8786 if (rt_runtime == 0 && tg_has_rt_tasks(tg)) { 8791 err = __rt_schedulable(tg, rt_period, rt_runtime);
8787 err = -EBUSY; 8792 if (err)
8788 goto unlock; 8793 goto unlock;
8789 }
8790 if (!__rt_schedulable(tg, rt_period, rt_runtime)) {
8791 err = -EINVAL;
8792 goto unlock;
8793 }
8794 8794
8795 spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock); 8795 spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
8796 tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period); 8796 tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
@@ -8867,8 +8867,9 @@ static int sched_rt_global_constraints(void)
8867 rt_runtime = tg->rt_bandwidth.rt_runtime; 8867 rt_runtime = tg->rt_bandwidth.rt_runtime;
8868 8868
8869 mutex_lock(&rt_constraints_mutex); 8869 mutex_lock(&rt_constraints_mutex);
8870 if (!__rt_schedulable(tg, rt_period, rt_runtime)) 8870 read_lock(&tasklist_lock);
8871 ret = -EINVAL; 8871 ret = __rt_schedulable(tg, rt_period, rt_runtime);
8872 read_unlock(&tasklist_lock);
8872 mutex_unlock(&rt_constraints_mutex); 8873 mutex_unlock(&rt_constraints_mutex);
8873 8874
8874 return ret; 8875 return ret;