aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
authorPaul Turner <pjt@google.com>2011-07-21 12:43:29 -0400
committerIngo Molnar <mingo@elte.hu>2011-08-14 06:03:22 -0400
commita790de99599a29ad3f18667530cf4b9f4b7e3234 (patch)
tree58a06419b7a6a90dac6028263a54b3ed3e68e40b /kernel/sched.c
parentab84d31e15502fb626169ba2663381e34bf965b2 (diff)
sched: Validate CFS quota hierarchies
Add constraints validation for CFS bandwidth hierarchies. Validate that: max(child bandwidth) <= parent_bandwidth In a quota limited hierarchy, an unconstrained entity (e.g. bandwidth==RUNTIME_INF) inherits the bandwidth of its parent. This constraint is chosen over sum(child_bandwidth) as notion of over-commit is valuable within SCHED_OTHER. Some basic code from the RT case is re-factored for reuse. Signed-off-by: Paul Turner <pjt@google.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/20110721184757.083774572@google.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c112
1 files changed, 98 insertions, 14 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index f08cb23be96c..ea6850d93b2a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -252,6 +252,7 @@ struct cfs_bandwidth {
252 raw_spinlock_t lock; 252 raw_spinlock_t lock;
253 ktime_t period; 253 ktime_t period;
254 u64 quota; 254 u64 quota;
255 s64 hierarchal_quota;
255#endif 256#endif
256}; 257};
257 258
@@ -1518,7 +1519,8 @@ static inline void dec_cpu_load(struct rq *rq, unsigned long load)
1518 update_load_sub(&rq->load, load); 1519 update_load_sub(&rq->load, load);
1519} 1520}
1520 1521
1521#if (defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)) || defined(CONFIG_RT_GROUP_SCHED) 1522#if defined(CONFIG_RT_GROUP_SCHED) || (defined(CONFIG_FAIR_GROUP_SCHED) && \
1523 (defined(CONFIG_SMP) || defined(CONFIG_CFS_BANDWIDTH)))
1522typedef int (*tg_visitor)(struct task_group *, void *); 1524typedef int (*tg_visitor)(struct task_group *, void *);
1523 1525
1524/* 1526/*
@@ -8708,12 +8710,7 @@ unsigned long sched_group_shares(struct task_group *tg)
8708} 8710}
8709#endif 8711#endif
8710 8712
8711#ifdef CONFIG_RT_GROUP_SCHED 8713#if defined(CONFIG_RT_GROUP_SCHED) || defined(CONFIG_CFS_BANDWIDTH)
8712/*
8713 * Ensure that the real time constraints are schedulable.
8714 */
8715static DEFINE_MUTEX(rt_constraints_mutex);
8716
8717static unsigned long to_ratio(u64 period, u64 runtime) 8714static unsigned long to_ratio(u64 period, u64 runtime)
8718{ 8715{
8719 if (runtime == RUNTIME_INF) 8716 if (runtime == RUNTIME_INF)
@@ -8721,6 +8718,13 @@ static unsigned long to_ratio(u64 period, u64 runtime)
8721 8718
8722 return div64_u64(runtime << 20, period); 8719 return div64_u64(runtime << 20, period);
8723} 8720}
8721#endif
8722
8723#ifdef CONFIG_RT_GROUP_SCHED
8724/*
8725 * Ensure that the real time constraints are schedulable.
8726 */
8727static DEFINE_MUTEX(rt_constraints_mutex);
8724 8728
8725/* Must be called with tasklist_lock held */ 8729/* Must be called with tasklist_lock held */
8726static inline int tg_has_rt_tasks(struct task_group *tg) 8730static inline int tg_has_rt_tasks(struct task_group *tg)
@@ -8741,7 +8745,7 @@ struct rt_schedulable_data {
8741 u64 rt_runtime; 8745 u64 rt_runtime;
8742}; 8746};
8743 8747
8744static int tg_schedulable(struct task_group *tg, void *data) 8748static int tg_rt_schedulable(struct task_group *tg, void *data)
8745{ 8749{
8746 struct rt_schedulable_data *d = data; 8750 struct rt_schedulable_data *d = data;
8747 struct task_group *child; 8751 struct task_group *child;
@@ -8805,7 +8809,7 @@ static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
8805 .rt_runtime = runtime, 8809 .rt_runtime = runtime,
8806 }; 8810 };
8807 8811
8808 return walk_tg_tree(tg_schedulable, tg_nop, &data); 8812 return walk_tg_tree(tg_rt_schedulable, tg_nop, &data);
8809} 8813}
8810 8814
8811static int tg_set_rt_bandwidth(struct task_group *tg, 8815static int tg_set_rt_bandwidth(struct task_group *tg,
@@ -9064,14 +9068,17 @@ static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft)
9064} 9068}
9065 9069
9066#ifdef CONFIG_CFS_BANDWIDTH 9070#ifdef CONFIG_CFS_BANDWIDTH
9071static DEFINE_MUTEX(cfs_constraints_mutex);
9072
9067const u64 max_cfs_quota_period = 1 * NSEC_PER_SEC; /* 1s */ 9073const u64 max_cfs_quota_period = 1 * NSEC_PER_SEC; /* 1s */
9068const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */ 9074const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */
9069 9075
9076static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime);
9077
9070static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) 9078static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
9071{ 9079{
9072 int i; 9080 int i, ret = 0;
9073 struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); 9081 struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg);
9074 static DEFINE_MUTEX(mutex);
9075 9082
9076 if (tg == &root_task_group) 9083 if (tg == &root_task_group)
9077 return -EINVAL; 9084 return -EINVAL;
@@ -9092,7 +9099,11 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
9092 if (period > max_cfs_quota_period) 9099 if (period > max_cfs_quota_period)
9093 return -EINVAL; 9100 return -EINVAL;
9094 9101
9095 mutex_lock(&mutex); 9102 mutex_lock(&cfs_constraints_mutex);
9103 ret = __cfs_schedulable(tg, period, quota);
9104 if (ret)
9105 goto out_unlock;
9106
9096 raw_spin_lock_irq(&cfs_b->lock); 9107 raw_spin_lock_irq(&cfs_b->lock);
9097 cfs_b->period = ns_to_ktime(period); 9108 cfs_b->period = ns_to_ktime(period);
9098 cfs_b->quota = quota; 9109 cfs_b->quota = quota;
@@ -9107,9 +9118,10 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
9107 cfs_rq->runtime_remaining = 0; 9118 cfs_rq->runtime_remaining = 0;
9108 raw_spin_unlock_irq(&rq->lock); 9119 raw_spin_unlock_irq(&rq->lock);
9109 } 9120 }
9110 mutex_unlock(&mutex); 9121out_unlock:
9122 mutex_unlock(&cfs_constraints_mutex);
9111 9123
9112 return 0; 9124 return ret;
9113} 9125}
9114 9126
9115int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us) 9127int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us)
@@ -9183,6 +9195,78 @@ static int cpu_cfs_period_write_u64(struct cgroup *cgrp, struct cftype *cftype,
9183 return tg_set_cfs_period(cgroup_tg(cgrp), cfs_period_us); 9195 return tg_set_cfs_period(cgroup_tg(cgrp), cfs_period_us);
9184} 9196}
9185 9197
9198struct cfs_schedulable_data {
9199 struct task_group *tg;
9200 u64 period, quota;
9201};
9202
9203/*
9204 * normalize group quota/period to be quota/max_period
9205 * note: units are usecs
9206 */
9207static u64 normalize_cfs_quota(struct task_group *tg,
9208 struct cfs_schedulable_data *d)
9209{
9210 u64 quota, period;
9211
9212 if (tg == d->tg) {
9213 period = d->period;
9214 quota = d->quota;
9215 } else {
9216 period = tg_get_cfs_period(tg);
9217 quota = tg_get_cfs_quota(tg);
9218 }
9219
9220 /* note: these should typically be equivalent */
9221 if (quota == RUNTIME_INF || quota == -1)
9222 return RUNTIME_INF;
9223
9224 return to_ratio(period, quota);
9225}
9226
9227static int tg_cfs_schedulable_down(struct task_group *tg, void *data)
9228{
9229 struct cfs_schedulable_data *d = data;
9230 struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg);
9231 s64 quota = 0, parent_quota = -1;
9232
9233 if (!tg->parent) {
9234 quota = RUNTIME_INF;
9235 } else {
9236 struct cfs_bandwidth *parent_b = tg_cfs_bandwidth(tg->parent);
9237
9238 quota = normalize_cfs_quota(tg, d);
9239 parent_quota = parent_b->hierarchal_quota;
9240
9241 /*
9242 * ensure max(child_quota) <= parent_quota, inherit when no
9243 * limit is set
9244 */
9245 if (quota == RUNTIME_INF)
9246 quota = parent_quota;
9247 else if (parent_quota != RUNTIME_INF && quota > parent_quota)
9248 return -EINVAL;
9249 }
9250 cfs_b->hierarchal_quota = quota;
9251
9252 return 0;
9253}
9254
9255static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota)
9256{
9257 struct cfs_schedulable_data data = {
9258 .tg = tg,
9259 .period = period,
9260 .quota = quota,
9261 };
9262
9263 if (quota != RUNTIME_INF) {
9264 do_div(data.period, NSEC_PER_USEC);
9265 do_div(data.quota, NSEC_PER_USEC);
9266 }
9267
9268 return walk_tg_tree(tg_cfs_schedulable_down, tg_nop, &data);
9269}
9186#endif /* CONFIG_CFS_BANDWIDTH */ 9270#endif /* CONFIG_CFS_BANDWIDTH */
9187#endif /* CONFIG_FAIR_GROUP_SCHED */ 9271#endif /* CONFIG_FAIR_GROUP_SCHED */
9188 9272