diff options
author | Paul Turner <pjt@google.com> | 2011-07-21 12:43:30 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-08-14 06:03:26 -0400 |
commit | ec12cb7f31e28854efae7dd6f9544e0a66379040 (patch) | |
tree | 30a7293a4f9d566043f524bb4c43d4ae8b0560db | |
parent | a790de99599a29ad3f18667530cf4b9f4b7e3234 (diff) |
sched: Accumulate per-cfs_rq cpu usage and charge against bandwidth
Account bandwidth usage on the cfs_rq level versus the task_groups to which
they belong. Whether we are tracking bandwidth on a given cfs_rq is maintained
under cfs_rq->runtime_enabled.
cfs_rq's which belong to a bandwidth constrained task_group have their runtime
accounted via the update_curr() path, which withdraws bandwidth from the global
pool as desired. Updates involving the global pool are currently protected
under cfs_bandwidth->lock, local runtime is protected by rq->lock.
This patch only assigns and tracks quota, no action is taken in the case that
cfs_rq->runtime_used exceeds cfs_rq->runtime_assigned.
Signed-off-by: Paul Turner <pjt@google.com>
Signed-off-by: Nikhil Rao <ncrao@google.com>
Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Reviewed-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20110721184757.179386821@google.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | include/linux/sched.h | 4 | ||||
-rw-r--r-- | kernel/sched.c | 4 | ||||
-rw-r--r-- | kernel/sched_fair.c | 79 | ||||
-rw-r--r-- | kernel/sysctl.c | 10 |
4 files changed, 94 insertions, 3 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index 4ac2c0578e0f..bc6f5f2e24fa 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -2040,6 +2040,10 @@ static inline void sched_autogroup_fork(struct signal_struct *sig) { } | |||
2040 | static inline void sched_autogroup_exit(struct signal_struct *sig) { } | 2040 | static inline void sched_autogroup_exit(struct signal_struct *sig) { } |
2041 | #endif | 2041 | #endif |
2042 | 2042 | ||
2043 | #ifdef CONFIG_CFS_BANDWIDTH | ||
2044 | extern unsigned int sysctl_sched_cfs_bandwidth_slice; | ||
2045 | #endif | ||
2046 | |||
2043 | #ifdef CONFIG_RT_MUTEXES | 2047 | #ifdef CONFIG_RT_MUTEXES |
2044 | extern int rt_mutex_getprio(struct task_struct *p); | 2048 | extern int rt_mutex_getprio(struct task_struct *p); |
2045 | extern void rt_mutex_setprio(struct task_struct *p, int prio); | 2049 | extern void rt_mutex_setprio(struct task_struct *p, int prio); |
diff --git a/kernel/sched.c b/kernel/sched.c index ea6850d93b2a..35561c63a490 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -251,7 +251,7 @@ struct cfs_bandwidth { | |||
251 | #ifdef CONFIG_CFS_BANDWIDTH | 251 | #ifdef CONFIG_CFS_BANDWIDTH |
252 | raw_spinlock_t lock; | 252 | raw_spinlock_t lock; |
253 | ktime_t period; | 253 | ktime_t period; |
254 | u64 quota; | 254 | u64 quota, runtime; |
255 | s64 hierarchal_quota; | 255 | s64 hierarchal_quota; |
256 | #endif | 256 | #endif |
257 | }; | 257 | }; |
@@ -407,6 +407,7 @@ static inline u64 default_cfs_period(void); | |||
407 | static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) | 407 | static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) |
408 | { | 408 | { |
409 | raw_spin_lock_init(&cfs_b->lock); | 409 | raw_spin_lock_init(&cfs_b->lock); |
410 | cfs_b->runtime = 0; | ||
410 | cfs_b->quota = RUNTIME_INF; | 411 | cfs_b->quota = RUNTIME_INF; |
411 | cfs_b->period = ns_to_ktime(default_cfs_period()); | 412 | cfs_b->period = ns_to_ktime(default_cfs_period()); |
412 | } | 413 | } |
@@ -9107,6 +9108,7 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) | |||
9107 | raw_spin_lock_irq(&cfs_b->lock); | 9108 | raw_spin_lock_irq(&cfs_b->lock); |
9108 | cfs_b->period = ns_to_ktime(period); | 9109 | cfs_b->period = ns_to_ktime(period); |
9109 | cfs_b->quota = quota; | 9110 | cfs_b->quota = quota; |
9111 | cfs_b->runtime = quota; | ||
9110 | raw_spin_unlock_irq(&cfs_b->lock); | 9112 | raw_spin_unlock_irq(&cfs_b->lock); |
9111 | 9113 | ||
9112 | for_each_possible_cpu(i) { | 9114 | for_each_possible_cpu(i) { |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index f24f4171019d..9502aa899f73 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -89,6 +89,20 @@ const_debug unsigned int sysctl_sched_migration_cost = 500000UL; | |||
89 | */ | 89 | */ |
90 | unsigned int __read_mostly sysctl_sched_shares_window = 10000000UL; | 90 | unsigned int __read_mostly sysctl_sched_shares_window = 10000000UL; |
91 | 91 | ||
92 | #ifdef CONFIG_CFS_BANDWIDTH | ||
93 | /* | ||
94 | * Amount of runtime to allocate from global (tg) to local (per-cfs_rq) pool | ||
95 | * each time a cfs_rq requests quota. | ||
96 | * | ||
97 | * Note: in the case that the slice exceeds the runtime remaining (either due | ||
98 | * to consumption or the quota being specified to be smaller than the slice) | ||
99 | * we will always only issue the remaining available time. | ||
100 | * | ||
101 | * default: 5 msec, units: microseconds | ||
102 | */ | ||
103 | unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL; | ||
104 | #endif | ||
105 | |||
92 | static const struct sched_class fair_sched_class; | 106 | static const struct sched_class fair_sched_class; |
93 | 107 | ||
94 | /************************************************************** | 108 | /************************************************************** |
@@ -292,6 +306,8 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse) | |||
292 | 306 | ||
293 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | 307 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
294 | 308 | ||
309 | static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, | ||
310 | unsigned long delta_exec); | ||
295 | 311 | ||
296 | /************************************************************** | 312 | /************************************************************** |
297 | * Scheduling class tree data structure manipulation methods: | 313 | * Scheduling class tree data structure manipulation methods: |
@@ -583,6 +599,8 @@ static void update_curr(struct cfs_rq *cfs_rq) | |||
583 | cpuacct_charge(curtask, delta_exec); | 599 | cpuacct_charge(curtask, delta_exec); |
584 | account_group_exec_runtime(curtask, delta_exec); | 600 | account_group_exec_runtime(curtask, delta_exec); |
585 | } | 601 | } |
602 | |||
603 | account_cfs_rq_runtime(cfs_rq, delta_exec); | ||
586 | } | 604 | } |
587 | 605 | ||
588 | static inline void | 606 | static inline void |
@@ -1248,6 +1266,58 @@ static inline u64 default_cfs_period(void) | |||
1248 | { | 1266 | { |
1249 | return 100000000ULL; | 1267 | return 100000000ULL; |
1250 | } | 1268 | } |
1269 | |||
1270 | static inline u64 sched_cfs_bandwidth_slice(void) | ||
1271 | { | ||
1272 | return (u64)sysctl_sched_cfs_bandwidth_slice * NSEC_PER_USEC; | ||
1273 | } | ||
1274 | |||
1275 | static void assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) | ||
1276 | { | ||
1277 | struct task_group *tg = cfs_rq->tg; | ||
1278 | struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); | ||
1279 | u64 amount = 0, min_amount; | ||
1280 | |||
1281 | /* note: this is a positive sum as runtime_remaining <= 0 */ | ||
1282 | min_amount = sched_cfs_bandwidth_slice() - cfs_rq->runtime_remaining; | ||
1283 | |||
1284 | raw_spin_lock(&cfs_b->lock); | ||
1285 | if (cfs_b->quota == RUNTIME_INF) | ||
1286 | amount = min_amount; | ||
1287 | else if (cfs_b->runtime > 0) { | ||
1288 | amount = min(cfs_b->runtime, min_amount); | ||
1289 | cfs_b->runtime -= amount; | ||
1290 | } | ||
1291 | raw_spin_unlock(&cfs_b->lock); | ||
1292 | |||
1293 | cfs_rq->runtime_remaining += amount; | ||
1294 | } | ||
1295 | |||
1296 | static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, | ||
1297 | unsigned long delta_exec) | ||
1298 | { | ||
1299 | if (!cfs_rq->runtime_enabled) | ||
1300 | return; | ||
1301 | |||
1302 | cfs_rq->runtime_remaining -= delta_exec; | ||
1303 | if (cfs_rq->runtime_remaining > 0) | ||
1304 | return; | ||
1305 | |||
1306 | assign_cfs_rq_runtime(cfs_rq); | ||
1307 | } | ||
1308 | |||
1309 | static __always_inline void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, | ||
1310 | unsigned long delta_exec) | ||
1311 | { | ||
1312 | if (!cfs_rq->runtime_enabled) | ||
1313 | return; | ||
1314 | |||
1315 | __account_cfs_rq_runtime(cfs_rq, delta_exec); | ||
1316 | } | ||
1317 | |||
1318 | #else | ||
1319 | static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, | ||
1320 | unsigned long delta_exec) {} | ||
1251 | #endif | 1321 | #endif |
1252 | 1322 | ||
1253 | /************************************************** | 1323 | /************************************************** |
@@ -4266,8 +4336,13 @@ static void set_curr_task_fair(struct rq *rq) | |||
4266 | { | 4336 | { |
4267 | struct sched_entity *se = &rq->curr->se; | 4337 | struct sched_entity *se = &rq->curr->se; |
4268 | 4338 | ||
4269 | for_each_sched_entity(se) | 4339 | for_each_sched_entity(se) { |
4270 | set_next_entity(cfs_rq_of(se), se); | 4340 | struct cfs_rq *cfs_rq = cfs_rq_of(se); |
4341 | |||
4342 | set_next_entity(cfs_rq, se); | ||
4343 | /* ensure bandwidth has been allocated on our new cfs_rq */ | ||
4344 | account_cfs_rq_runtime(cfs_rq, 0); | ||
4345 | } | ||
4271 | } | 4346 | } |
4272 | 4347 | ||
4273 | #ifdef CONFIG_FAIR_GROUP_SCHED | 4348 | #ifdef CONFIG_FAIR_GROUP_SCHED |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 11d65b531e50..2d2ecdcc8cdb 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -379,6 +379,16 @@ static struct ctl_table kern_table[] = { | |||
379 | .extra2 = &one, | 379 | .extra2 = &one, |
380 | }, | 380 | }, |
381 | #endif | 381 | #endif |
382 | #ifdef CONFIG_CFS_BANDWIDTH | ||
383 | { | ||
384 | .procname = "sched_cfs_bandwidth_slice_us", | ||
385 | .data = &sysctl_sched_cfs_bandwidth_slice, | ||
386 | .maxlen = sizeof(unsigned int), | ||
387 | .mode = 0644, | ||
388 | .proc_handler = proc_dointvec_minmax, | ||
389 | .extra1 = &one, | ||
390 | }, | ||
391 | #endif | ||
382 | #ifdef CONFIG_PROVE_LOCKING | 392 | #ifdef CONFIG_PROVE_LOCKING |
383 | { | 393 | { |
384 | .procname = "prove_locking", | 394 | .procname = "prove_locking", |