aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
authorPaul Turner <pjt@google.com>2011-07-21 12:43:28 -0400
committerIngo Molnar <mingo@elte.hu>2011-08-14 06:03:20 -0400
commitab84d31e15502fb626169ba2663381e34bf965b2 (patch)
tree658ce7caa6199aa74c5feea92ec8d3e9a2cb4296 /kernel/sched.c
parent953bfcd10e6f3697233e8e5128c611d275da39c1 (diff)
sched: Introduce primitives to account for CFS bandwidth tracking
In this patch we introduce the notion of CFS bandwidth, partitioned into globally unassigned bandwidth, and locally claimed bandwidth. - The global bandwidth is per task_group, it represents a pool of unclaimed bandwidth that cfs_rqs can allocate from. - The local bandwidth is tracked per-cfs_rq, this represents allotments from the global pool bandwidth assigned to a specific cpu. Bandwidth is managed via cgroupfs, adding two new interfaces to the cpu subsystem: - cpu.cfs_period_us : the bandwidth period in usecs - cpu.cfs_quota_us : the cpu bandwidth (in usecs) that this tg will be allowed to consume over period above. Signed-off-by: Paul Turner <pjt@google.com> Signed-off-by: Nikhil Rao <ncrao@google.com> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> Reviewed-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/20110721184756.972636699@google.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c196
1 files changed, 192 insertions, 4 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index cd1a531ca8ff..f08cb23be96c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -247,6 +247,14 @@ struct cfs_rq;
247 247
248static LIST_HEAD(task_groups); 248static LIST_HEAD(task_groups);
249 249
250struct cfs_bandwidth {
251#ifdef CONFIG_CFS_BANDWIDTH
252 raw_spinlock_t lock;
253 ktime_t period;
254 u64 quota;
255#endif
256};
257
250/* task group related information */ 258/* task group related information */
251struct task_group { 259struct task_group {
252 struct cgroup_subsys_state css; 260 struct cgroup_subsys_state css;
@@ -278,6 +286,8 @@ struct task_group {
278#ifdef CONFIG_SCHED_AUTOGROUP 286#ifdef CONFIG_SCHED_AUTOGROUP
279 struct autogroup *autogroup; 287 struct autogroup *autogroup;
280#endif 288#endif
289
290 struct cfs_bandwidth cfs_bandwidth;
281}; 291};
282 292
283/* task_group_lock serializes the addition/removal of task groups */ 293/* task_group_lock serializes the addition/removal of task groups */
@@ -377,9 +387,48 @@ struct cfs_rq {
377 387
378 unsigned long load_contribution; 388 unsigned long load_contribution;
379#endif 389#endif
390#ifdef CONFIG_CFS_BANDWIDTH
391 int runtime_enabled;
392 s64 runtime_remaining;
393#endif
380#endif 394#endif
381}; 395};
382 396
397#ifdef CONFIG_FAIR_GROUP_SCHED
398#ifdef CONFIG_CFS_BANDWIDTH
399static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
400{
401 return &tg->cfs_bandwidth;
402}
403
404static inline u64 default_cfs_period(void);
405
406static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
407{
408 raw_spin_lock_init(&cfs_b->lock);
409 cfs_b->quota = RUNTIME_INF;
410 cfs_b->period = ns_to_ktime(default_cfs_period());
411}
412
413static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
414{
415 cfs_rq->runtime_enabled = 0;
416}
417
418static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
419{}
420#else
421static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
422static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
423static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
424
425static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
426{
427 return NULL;
428}
429#endif /* CONFIG_CFS_BANDWIDTH */
430#endif /* CONFIG_FAIR_GROUP_SCHED */
431
383/* Real-Time classes' related field in a runqueue: */ 432/* Real-Time classes' related field in a runqueue: */
384struct rt_rq { 433struct rt_rq {
385 struct rt_prio_array active; 434 struct rt_prio_array active;
@@ -7971,6 +8020,7 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
7971 /* allow initial update_cfs_load() to truncate */ 8020 /* allow initial update_cfs_load() to truncate */
7972 cfs_rq->load_stamp = 1; 8021 cfs_rq->load_stamp = 1;
7973#endif 8022#endif
8023 init_cfs_rq_runtime(cfs_rq);
7974 8024
7975 tg->cfs_rq[cpu] = cfs_rq; 8025 tg->cfs_rq[cpu] = cfs_rq;
7976 tg->se[cpu] = se; 8026 tg->se[cpu] = se;
@@ -8110,6 +8160,7 @@ void __init sched_init(void)
8110 * We achieve this by letting root_task_group's tasks sit 8160 * We achieve this by letting root_task_group's tasks sit
8111 * directly in rq->cfs (i.e root_task_group->se[] = NULL). 8161 * directly in rq->cfs (i.e root_task_group->se[] = NULL).
8112 */ 8162 */
8163 init_cfs_bandwidth(&root_task_group.cfs_bandwidth);
8113 init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL); 8164 init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL);
8114#endif /* CONFIG_FAIR_GROUP_SCHED */ 8165#endif /* CONFIG_FAIR_GROUP_SCHED */
8115 8166
@@ -8351,6 +8402,8 @@ static void free_fair_sched_group(struct task_group *tg)
8351{ 8402{
8352 int i; 8403 int i;
8353 8404
8405 destroy_cfs_bandwidth(tg_cfs_bandwidth(tg));
8406
8354 for_each_possible_cpu(i) { 8407 for_each_possible_cpu(i) {
8355 if (tg->cfs_rq) 8408 if (tg->cfs_rq)
8356 kfree(tg->cfs_rq[i]); 8409 kfree(tg->cfs_rq[i]);
@@ -8378,6 +8431,8 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
8378 8431
8379 tg->shares = NICE_0_LOAD; 8432 tg->shares = NICE_0_LOAD;
8380 8433
8434 init_cfs_bandwidth(tg_cfs_bandwidth(tg));
8435
8381 for_each_possible_cpu(i) { 8436 for_each_possible_cpu(i) {
8382 cfs_rq = kzalloc_node(sizeof(struct cfs_rq), 8437 cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
8383 GFP_KERNEL, cpu_to_node(i)); 8438 GFP_KERNEL, cpu_to_node(i));
@@ -8753,7 +8808,7 @@ static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
8753 return walk_tg_tree(tg_schedulable, tg_nop, &data); 8808 return walk_tg_tree(tg_schedulable, tg_nop, &data);
8754} 8809}
8755 8810
8756static int tg_set_bandwidth(struct task_group *tg, 8811static int tg_set_rt_bandwidth(struct task_group *tg,
8757 u64 rt_period, u64 rt_runtime) 8812 u64 rt_period, u64 rt_runtime)
8758{ 8813{
8759 int i, err = 0; 8814 int i, err = 0;
@@ -8792,7 +8847,7 @@ int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)
8792 if (rt_runtime_us < 0) 8847 if (rt_runtime_us < 0)
8793 rt_runtime = RUNTIME_INF; 8848 rt_runtime = RUNTIME_INF;
8794 8849
8795 return tg_set_bandwidth(tg, rt_period, rt_runtime); 8850 return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
8796} 8851}
8797 8852
8798long sched_group_rt_runtime(struct task_group *tg) 8853long sched_group_rt_runtime(struct task_group *tg)
@@ -8817,7 +8872,7 @@ int sched_group_set_rt_period(struct task_group *tg, long rt_period_us)
8817 if (rt_period == 0) 8872 if (rt_period == 0)
8818 return -EINVAL; 8873 return -EINVAL;
8819 8874
8820 return tg_set_bandwidth(tg, rt_period, rt_runtime); 8875 return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
8821} 8876}
8822 8877
8823long sched_group_rt_period(struct task_group *tg) 8878long sched_group_rt_period(struct task_group *tg)
@@ -9007,6 +9062,128 @@ static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft)
9007 9062
9008 return (u64) scale_load_down(tg->shares); 9063 return (u64) scale_load_down(tg->shares);
9009} 9064}
9065
9066#ifdef CONFIG_CFS_BANDWIDTH
9067const u64 max_cfs_quota_period = 1 * NSEC_PER_SEC; /* 1s */
9068const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */
9069
9070static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
9071{
9072 int i;
9073 struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg);
9074 static DEFINE_MUTEX(mutex);
9075
9076 if (tg == &root_task_group)
9077 return -EINVAL;
9078
9079 /*
9080 * Ensure we have at some amount of bandwidth every period. This is
9081 * to prevent reaching a state of large arrears when throttled via
9082 * entity_tick() resulting in prolonged exit starvation.
9083 */
9084 if (quota < min_cfs_quota_period || period < min_cfs_quota_period)
9085 return -EINVAL;
9086
9087 /*
9088 * Likewise, bound things on the otherside by preventing insane quota
9089 * periods. This also allows us to normalize in computing quota
9090 * feasibility.
9091 */
9092 if (period > max_cfs_quota_period)
9093 return -EINVAL;
9094
9095 mutex_lock(&mutex);
9096 raw_spin_lock_irq(&cfs_b->lock);
9097 cfs_b->period = ns_to_ktime(period);
9098 cfs_b->quota = quota;
9099 raw_spin_unlock_irq(&cfs_b->lock);
9100
9101 for_each_possible_cpu(i) {
9102 struct cfs_rq *cfs_rq = tg->cfs_rq[i];
9103 struct rq *rq = rq_of(cfs_rq);
9104
9105 raw_spin_lock_irq(&rq->lock);
9106 cfs_rq->runtime_enabled = quota != RUNTIME_INF;
9107 cfs_rq->runtime_remaining = 0;
9108 raw_spin_unlock_irq(&rq->lock);
9109 }
9110 mutex_unlock(&mutex);
9111
9112 return 0;
9113}
9114
9115int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us)
9116{
9117 u64 quota, period;
9118
9119 period = ktime_to_ns(tg_cfs_bandwidth(tg)->period);
9120 if (cfs_quota_us < 0)
9121 quota = RUNTIME_INF;
9122 else
9123 quota = (u64)cfs_quota_us * NSEC_PER_USEC;
9124
9125 return tg_set_cfs_bandwidth(tg, period, quota);
9126}
9127
9128long tg_get_cfs_quota(struct task_group *tg)
9129{
9130 u64 quota_us;
9131
9132 if (tg_cfs_bandwidth(tg)->quota == RUNTIME_INF)
9133 return -1;
9134
9135 quota_us = tg_cfs_bandwidth(tg)->quota;
9136 do_div(quota_us, NSEC_PER_USEC);
9137
9138 return quota_us;
9139}
9140
9141int tg_set_cfs_period(struct task_group *tg, long cfs_period_us)
9142{
9143 u64 quota, period;
9144
9145 period = (u64)cfs_period_us * NSEC_PER_USEC;
9146 quota = tg_cfs_bandwidth(tg)->quota;
9147
9148 if (period <= 0)
9149 return -EINVAL;
9150
9151 return tg_set_cfs_bandwidth(tg, period, quota);
9152}
9153
9154long tg_get_cfs_period(struct task_group *tg)
9155{
9156 u64 cfs_period_us;
9157
9158 cfs_period_us = ktime_to_ns(tg_cfs_bandwidth(tg)->period);
9159 do_div(cfs_period_us, NSEC_PER_USEC);
9160
9161 return cfs_period_us;
9162}
9163
9164static s64 cpu_cfs_quota_read_s64(struct cgroup *cgrp, struct cftype *cft)
9165{
9166 return tg_get_cfs_quota(cgroup_tg(cgrp));
9167}
9168
9169static int cpu_cfs_quota_write_s64(struct cgroup *cgrp, struct cftype *cftype,
9170 s64 cfs_quota_us)
9171{
9172 return tg_set_cfs_quota(cgroup_tg(cgrp), cfs_quota_us);
9173}
9174
9175static u64 cpu_cfs_period_read_u64(struct cgroup *cgrp, struct cftype *cft)
9176{
9177 return tg_get_cfs_period(cgroup_tg(cgrp));
9178}
9179
9180static int cpu_cfs_period_write_u64(struct cgroup *cgrp, struct cftype *cftype,
9181 u64 cfs_period_us)
9182{
9183 return tg_set_cfs_period(cgroup_tg(cgrp), cfs_period_us);
9184}
9185
9186#endif /* CONFIG_CFS_BANDWIDTH */
9010#endif /* CONFIG_FAIR_GROUP_SCHED */ 9187#endif /* CONFIG_FAIR_GROUP_SCHED */
9011 9188
9012#ifdef CONFIG_RT_GROUP_SCHED 9189#ifdef CONFIG_RT_GROUP_SCHED
@@ -9041,6 +9218,18 @@ static struct cftype cpu_files[] = {
9041 .write_u64 = cpu_shares_write_u64, 9218 .write_u64 = cpu_shares_write_u64,
9042 }, 9219 },
9043#endif 9220#endif
9221#ifdef CONFIG_CFS_BANDWIDTH
9222 {
9223 .name = "cfs_quota_us",
9224 .read_s64 = cpu_cfs_quota_read_s64,
9225 .write_s64 = cpu_cfs_quota_write_s64,
9226 },
9227 {
9228 .name = "cfs_period_us",
9229 .read_u64 = cpu_cfs_period_read_u64,
9230 .write_u64 = cpu_cfs_period_write_u64,
9231 },
9232#endif
9044#ifdef CONFIG_RT_GROUP_SCHED 9233#ifdef CONFIG_RT_GROUP_SCHED
9045 { 9234 {
9046 .name = "rt_runtime_us", 9235 .name = "rt_runtime_us",
@@ -9350,4 +9539,3 @@ struct cgroup_subsys cpuacct_subsys = {
9350 .subsys_id = cpuacct_subsys_id, 9539 .subsys_id = cpuacct_subsys_id,
9351}; 9540};
9352#endif /* CONFIG_CGROUP_CPUACCT */ 9541#endif /* CONFIG_CGROUP_CPUACCT */
9353