diff options
author | Paul Turner <pjt@google.com> | 2011-07-21 12:43:29 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-08-14 06:03:22 -0400 |
commit | a790de99599a29ad3f18667530cf4b9f4b7e3234 (patch) | |
tree | 58a06419b7a6a90dac6028263a54b3ed3e68e40b /kernel/sched.c | |
parent | ab84d31e15502fb626169ba2663381e34bf965b2 (diff) |
sched: Validate CFS quota hierarchies
Add constraints validation for CFS bandwidth hierarchies.
Validate that:
max(child bandwidth) <= parent_bandwidth
In a quota limited hierarchy, an unconstrained entity
(e.g. bandwidth==RUNTIME_INF) inherits the bandwidth of its parent.
This constraint is chosen over sum(child_bandwidth) as notion of over-commit is
valuable within SCHED_OTHER. Some basic code from the RT case is re-factored
for reuse.
Signed-off-by: Paul Turner <pjt@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20110721184757.083774572@google.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 112 |
1 files changed, 98 insertions, 14 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index f08cb23be96c..ea6850d93b2a 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -252,6 +252,7 @@ struct cfs_bandwidth { | |||
252 | raw_spinlock_t lock; | 252 | raw_spinlock_t lock; |
253 | ktime_t period; | 253 | ktime_t period; |
254 | u64 quota; | 254 | u64 quota; |
255 | s64 hierarchal_quota; | ||
255 | #endif | 256 | #endif |
256 | }; | 257 | }; |
257 | 258 | ||
@@ -1518,7 +1519,8 @@ static inline void dec_cpu_load(struct rq *rq, unsigned long load) | |||
1518 | update_load_sub(&rq->load, load); | 1519 | update_load_sub(&rq->load, load); |
1519 | } | 1520 | } |
1520 | 1521 | ||
1521 | #if (defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)) || defined(CONFIG_RT_GROUP_SCHED) | 1522 | #if defined(CONFIG_RT_GROUP_SCHED) || (defined(CONFIG_FAIR_GROUP_SCHED) && \ |
1523 | (defined(CONFIG_SMP) || defined(CONFIG_CFS_BANDWIDTH))) | ||
1522 | typedef int (*tg_visitor)(struct task_group *, void *); | 1524 | typedef int (*tg_visitor)(struct task_group *, void *); |
1523 | 1525 | ||
1524 | /* | 1526 | /* |
@@ -8708,12 +8710,7 @@ unsigned long sched_group_shares(struct task_group *tg) | |||
8708 | } | 8710 | } |
8709 | #endif | 8711 | #endif |
8710 | 8712 | ||
8711 | #ifdef CONFIG_RT_GROUP_SCHED | 8713 | #if defined(CONFIG_RT_GROUP_SCHED) || defined(CONFIG_CFS_BANDWIDTH) |
8712 | /* | ||
8713 | * Ensure that the real time constraints are schedulable. | ||
8714 | */ | ||
8715 | static DEFINE_MUTEX(rt_constraints_mutex); | ||
8716 | |||
8717 | static unsigned long to_ratio(u64 period, u64 runtime) | 8714 | static unsigned long to_ratio(u64 period, u64 runtime) |
8718 | { | 8715 | { |
8719 | if (runtime == RUNTIME_INF) | 8716 | if (runtime == RUNTIME_INF) |
@@ -8721,6 +8718,13 @@ static unsigned long to_ratio(u64 period, u64 runtime) | |||
8721 | 8718 | ||
8722 | return div64_u64(runtime << 20, period); | 8719 | return div64_u64(runtime << 20, period); |
8723 | } | 8720 | } |
8721 | #endif | ||
8722 | |||
8723 | #ifdef CONFIG_RT_GROUP_SCHED | ||
8724 | /* | ||
8725 | * Ensure that the real time constraints are schedulable. | ||
8726 | */ | ||
8727 | static DEFINE_MUTEX(rt_constraints_mutex); | ||
8724 | 8728 | ||
8725 | /* Must be called with tasklist_lock held */ | 8729 | /* Must be called with tasklist_lock held */ |
8726 | static inline int tg_has_rt_tasks(struct task_group *tg) | 8730 | static inline int tg_has_rt_tasks(struct task_group *tg) |
@@ -8741,7 +8745,7 @@ struct rt_schedulable_data { | |||
8741 | u64 rt_runtime; | 8745 | u64 rt_runtime; |
8742 | }; | 8746 | }; |
8743 | 8747 | ||
8744 | static int tg_schedulable(struct task_group *tg, void *data) | 8748 | static int tg_rt_schedulable(struct task_group *tg, void *data) |
8745 | { | 8749 | { |
8746 | struct rt_schedulable_data *d = data; | 8750 | struct rt_schedulable_data *d = data; |
8747 | struct task_group *child; | 8751 | struct task_group *child; |
@@ -8805,7 +8809,7 @@ static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) | |||
8805 | .rt_runtime = runtime, | 8809 | .rt_runtime = runtime, |
8806 | }; | 8810 | }; |
8807 | 8811 | ||
8808 | return walk_tg_tree(tg_schedulable, tg_nop, &data); | 8812 | return walk_tg_tree(tg_rt_schedulable, tg_nop, &data); |
8809 | } | 8813 | } |
8810 | 8814 | ||
8811 | static int tg_set_rt_bandwidth(struct task_group *tg, | 8815 | static int tg_set_rt_bandwidth(struct task_group *tg, |
@@ -9064,14 +9068,17 @@ static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft) | |||
9064 | } | 9068 | } |
9065 | 9069 | ||
9066 | #ifdef CONFIG_CFS_BANDWIDTH | 9070 | #ifdef CONFIG_CFS_BANDWIDTH |
9071 | static DEFINE_MUTEX(cfs_constraints_mutex); | ||
9072 | |||
9067 | const u64 max_cfs_quota_period = 1 * NSEC_PER_SEC; /* 1s */ | 9073 | const u64 max_cfs_quota_period = 1 * NSEC_PER_SEC; /* 1s */ |
9068 | const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */ | 9074 | const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */ |
9069 | 9075 | ||
9076 | static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime); | ||
9077 | |||
9070 | static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) | 9078 | static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) |
9071 | { | 9079 | { |
9072 | int i; | 9080 | int i, ret = 0; |
9073 | struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); | 9081 | struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); |
9074 | static DEFINE_MUTEX(mutex); | ||
9075 | 9082 | ||
9076 | if (tg == &root_task_group) | 9083 | if (tg == &root_task_group) |
9077 | return -EINVAL; | 9084 | return -EINVAL; |
@@ -9092,7 +9099,11 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) | |||
9092 | if (period > max_cfs_quota_period) | 9099 | if (period > max_cfs_quota_period) |
9093 | return -EINVAL; | 9100 | return -EINVAL; |
9094 | 9101 | ||
9095 | mutex_lock(&mutex); | 9102 | mutex_lock(&cfs_constraints_mutex); |
9103 | ret = __cfs_schedulable(tg, period, quota); | ||
9104 | if (ret) | ||
9105 | goto out_unlock; | ||
9106 | |||
9096 | raw_spin_lock_irq(&cfs_b->lock); | 9107 | raw_spin_lock_irq(&cfs_b->lock); |
9097 | cfs_b->period = ns_to_ktime(period); | 9108 | cfs_b->period = ns_to_ktime(period); |
9098 | cfs_b->quota = quota; | 9109 | cfs_b->quota = quota; |
@@ -9107,9 +9118,10 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) | |||
9107 | cfs_rq->runtime_remaining = 0; | 9118 | cfs_rq->runtime_remaining = 0; |
9108 | raw_spin_unlock_irq(&rq->lock); | 9119 | raw_spin_unlock_irq(&rq->lock); |
9109 | } | 9120 | } |
9110 | mutex_unlock(&mutex); | 9121 | out_unlock: |
9122 | mutex_unlock(&cfs_constraints_mutex); | ||
9111 | 9123 | ||
9112 | return 0; | 9124 | return ret; |
9113 | } | 9125 | } |
9114 | 9126 | ||
9115 | int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us) | 9127 | int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us) |
@@ -9183,6 +9195,78 @@ static int cpu_cfs_period_write_u64(struct cgroup *cgrp, struct cftype *cftype, | |||
9183 | return tg_set_cfs_period(cgroup_tg(cgrp), cfs_period_us); | 9195 | return tg_set_cfs_period(cgroup_tg(cgrp), cfs_period_us); |
9184 | } | 9196 | } |
9185 | 9197 | ||
9198 | struct cfs_schedulable_data { | ||
9199 | struct task_group *tg; | ||
9200 | u64 period, quota; | ||
9201 | }; | ||
9202 | |||
9203 | /* | ||
9204 | * normalize group quota/period to be quota/max_period | ||
9205 | * note: units are usecs | ||
9206 | */ | ||
9207 | static u64 normalize_cfs_quota(struct task_group *tg, | ||
9208 | struct cfs_schedulable_data *d) | ||
9209 | { | ||
9210 | u64 quota, period; | ||
9211 | |||
9212 | if (tg == d->tg) { | ||
9213 | period = d->period; | ||
9214 | quota = d->quota; | ||
9215 | } else { | ||
9216 | period = tg_get_cfs_period(tg); | ||
9217 | quota = tg_get_cfs_quota(tg); | ||
9218 | } | ||
9219 | |||
9220 | /* note: these should typically be equivalent */ | ||
9221 | if (quota == RUNTIME_INF || quota == -1) | ||
9222 | return RUNTIME_INF; | ||
9223 | |||
9224 | return to_ratio(period, quota); | ||
9225 | } | ||
9226 | |||
9227 | static int tg_cfs_schedulable_down(struct task_group *tg, void *data) | ||
9228 | { | ||
9229 | struct cfs_schedulable_data *d = data; | ||
9230 | struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); | ||
9231 | s64 quota = 0, parent_quota = -1; | ||
9232 | |||
9233 | if (!tg->parent) { | ||
9234 | quota = RUNTIME_INF; | ||
9235 | } else { | ||
9236 | struct cfs_bandwidth *parent_b = tg_cfs_bandwidth(tg->parent); | ||
9237 | |||
9238 | quota = normalize_cfs_quota(tg, d); | ||
9239 | parent_quota = parent_b->hierarchal_quota; | ||
9240 | |||
9241 | /* | ||
9242 | * ensure max(child_quota) <= parent_quota, inherit when no | ||
9243 | * limit is set | ||
9244 | */ | ||
9245 | if (quota == RUNTIME_INF) | ||
9246 | quota = parent_quota; | ||
9247 | else if (parent_quota != RUNTIME_INF && quota > parent_quota) | ||
9248 | return -EINVAL; | ||
9249 | } | ||
9250 | cfs_b->hierarchal_quota = quota; | ||
9251 | |||
9252 | return 0; | ||
9253 | } | ||
9254 | |||
9255 | static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota) | ||
9256 | { | ||
9257 | struct cfs_schedulable_data data = { | ||
9258 | .tg = tg, | ||
9259 | .period = period, | ||
9260 | .quota = quota, | ||
9261 | }; | ||
9262 | |||
9263 | if (quota != RUNTIME_INF) { | ||
9264 | do_div(data.period, NSEC_PER_USEC); | ||
9265 | do_div(data.quota, NSEC_PER_USEC); | ||
9266 | } | ||
9267 | |||
9268 | return walk_tg_tree(tg_cfs_schedulable_down, tg_nop, &data); | ||
9269 | } | ||
9186 | #endif /* CONFIG_CFS_BANDWIDTH */ | 9270 | #endif /* CONFIG_CFS_BANDWIDTH */ |
9187 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | 9271 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
9188 | 9272 | ||