aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2013-12-17 06:44:49 -0500
committerIngo Molnar <mingo@kernel.org>2014-01-13 07:47:23 -0500
commit1724813d9f2c7ff702b46d3e4a4f6d9b10a8f8c2 (patch)
tree6df01bd6de3641b7f62440e22cf1ac8020b82ce5 /kernel/sched
parente4099a5e929435cd6349343f002583f29868c900 (diff)
sched/deadline: Remove the sysctl_sched_dl knobs
Remove the deadline specific sysctls for now. The problem with them is that the interaction with the exisiting rt knobs is nearly impossible to get right. The current (as per before this patch) situation is that the rt and dl bandwidth is completely separate and we enforce rt+dl < 100%. This is undesirable because this means that the rt default of 95% leaves us hardly any room, even though dl tasks are saver than rt tasks. Another proposed solution was (a discarted patch) to have the dl bandwidth be a fraction of the rt bandwidth. This is highly confusing imo. Furthermore neither proposal is consistent with the situation we actually want; which is rt tasks ran from a dl server. In which case the rt bandwidth is a direct subset of dl. So whichever way we go, the introduction of dl controls at this point is painful. Therefore remove them and instead share the rt budget. This means that for now the rt knobs are used for dl admission control and the dl runtime is accounted against the rt runtime. I realise that this isn't entirely desirable either; but whatever we do we appear to need to change the interface later, so better have a small interface for now. Signed-off-by: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/n/tip-zpyqbqds1r0vyxtxza1e7rdc@git.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/core.c259
-rw-r--r--kernel/sched/deadline.c27
-rw-r--r--kernel/sched/sched.h18
3 files changed, 97 insertions, 207 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 27c6375d182a..1d33eb8143cc 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6771,7 +6771,7 @@ void __init sched_init(void)
6771 init_rt_bandwidth(&def_rt_bandwidth, 6771 init_rt_bandwidth(&def_rt_bandwidth,
6772 global_rt_period(), global_rt_runtime()); 6772 global_rt_period(), global_rt_runtime());
6773 init_dl_bandwidth(&def_dl_bandwidth, 6773 init_dl_bandwidth(&def_dl_bandwidth,
6774 global_dl_period(), global_dl_runtime()); 6774 global_rt_period(), global_rt_runtime());
6775 6775
6776#ifdef CONFIG_SMP 6776#ifdef CONFIG_SMP
6777 init_defrootdomain(); 6777 init_defrootdomain();
@@ -7354,64 +7354,11 @@ static long sched_group_rt_period(struct task_group *tg)
7354} 7354}
7355#endif /* CONFIG_RT_GROUP_SCHED */ 7355#endif /* CONFIG_RT_GROUP_SCHED */
7356 7356
7357/*
7358 * Coupling of -rt and -deadline bandwidth.
7359 *
7360 * Here we check if the new -rt bandwidth value is consistent
7361 * with the system settings for the bandwidth available
7362 * to -deadline tasks.
7363 *
7364 * IOW, we want to enforce that
7365 *
7366 * rt_bandwidth + dl_bandwidth <= 100%
7367 *
7368 * is always true.
7369 */
7370static bool __sched_rt_dl_global_constraints(u64 rt_bw)
7371{
7372 unsigned long flags;
7373 u64 dl_bw;
7374 bool ret;
7375
7376 raw_spin_lock_irqsave(&def_dl_bandwidth.dl_runtime_lock, flags);
7377 if (global_rt_runtime() == RUNTIME_INF ||
7378 global_dl_runtime() == RUNTIME_INF) {
7379 ret = true;
7380 goto unlock;
7381 }
7382
7383 dl_bw = to_ratio(def_dl_bandwidth.dl_period,
7384 def_dl_bandwidth.dl_runtime);
7385
7386 ret = rt_bw + dl_bw <= to_ratio(RUNTIME_INF, RUNTIME_INF);
7387unlock:
7388 raw_spin_unlock_irqrestore(&def_dl_bandwidth.dl_runtime_lock, flags);
7389
7390 return ret;
7391}
7392
7393#ifdef CONFIG_RT_GROUP_SCHED 7357#ifdef CONFIG_RT_GROUP_SCHED
7394static int sched_rt_global_constraints(void) 7358static int sched_rt_global_constraints(void)
7395{ 7359{
7396 u64 runtime, period, bw;
7397 int ret = 0; 7360 int ret = 0;
7398 7361
7399 if (sysctl_sched_rt_period <= 0)
7400 return -EINVAL;
7401
7402 runtime = global_rt_runtime();
7403 period = global_rt_period();
7404
7405 /*
7406 * Sanity check on the sysctl variables.
7407 */
7408 if (runtime > period && runtime != RUNTIME_INF)
7409 return -EINVAL;
7410
7411 bw = to_ratio(period, runtime);
7412 if (!__sched_rt_dl_global_constraints(bw))
7413 return -EINVAL;
7414
7415 mutex_lock(&rt_constraints_mutex); 7362 mutex_lock(&rt_constraints_mutex);
7416 read_lock(&tasklist_lock); 7363 read_lock(&tasklist_lock);
7417 ret = __rt_schedulable(NULL, 0, 0); 7364 ret = __rt_schedulable(NULL, 0, 0);
@@ -7435,18 +7382,8 @@ static int sched_rt_global_constraints(void)
7435{ 7382{
7436 unsigned long flags; 7383 unsigned long flags;
7437 int i, ret = 0; 7384 int i, ret = 0;
7438 u64 bw;
7439
7440 if (sysctl_sched_rt_period <= 0)
7441 return -EINVAL;
7442 7385
7443 raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags); 7386 raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
7444 bw = to_ratio(global_rt_period(), global_rt_runtime());
7445 if (!__sched_rt_dl_global_constraints(bw)) {
7446 ret = -EINVAL;
7447 goto unlock;
7448 }
7449
7450 for_each_possible_cpu(i) { 7387 for_each_possible_cpu(i) {
7451 struct rt_rq *rt_rq = &cpu_rq(i)->rt; 7388 struct rt_rq *rt_rq = &cpu_rq(i)->rt;
7452 7389
@@ -7454,69 +7391,18 @@ static int sched_rt_global_constraints(void)
7454 rt_rq->rt_runtime = global_rt_runtime(); 7391 rt_rq->rt_runtime = global_rt_runtime();
7455 raw_spin_unlock(&rt_rq->rt_runtime_lock); 7392 raw_spin_unlock(&rt_rq->rt_runtime_lock);
7456 } 7393 }
7457unlock:
7458 raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags); 7394 raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
7459 7395
7460 return ret; 7396 return ret;
7461} 7397}
7462#endif /* CONFIG_RT_GROUP_SCHED */ 7398#endif /* CONFIG_RT_GROUP_SCHED */
7463 7399
7464/*
7465 * Coupling of -dl and -rt bandwidth.
7466 *
7467 * Here we check, while setting the system wide bandwidth available
7468 * for -dl tasks and groups, if the new values are consistent with
7469 * the system settings for the bandwidth available to -rt entities.
7470 *
7471 * IOW, we want to enforce that
7472 *
7473 * rt_bandwidth + dl_bandwidth <= 100%
7474 *
7475 * is always true.
7476 */
7477static bool __sched_dl_rt_global_constraints(u64 dl_bw)
7478{
7479 u64 rt_bw;
7480 bool ret;
7481
7482 raw_spin_lock(&def_rt_bandwidth.rt_runtime_lock);
7483 if (global_dl_runtime() == RUNTIME_INF ||
7484 global_rt_runtime() == RUNTIME_INF) {
7485 ret = true;
7486 goto unlock;
7487 }
7488
7489 rt_bw = to_ratio(ktime_to_ns(def_rt_bandwidth.rt_period),
7490 def_rt_bandwidth.rt_runtime);
7491
7492 ret = rt_bw + dl_bw <= to_ratio(RUNTIME_INF, RUNTIME_INF);
7493unlock:
7494 raw_spin_unlock(&def_rt_bandwidth.rt_runtime_lock);
7495
7496 return ret;
7497}
7498
7499static bool __sched_dl_global_constraints(u64 runtime, u64 period)
7500{
7501 if (!period || (runtime != RUNTIME_INF && runtime > period))
7502 return -EINVAL;
7503
7504 return 0;
7505}
7506
7507static int sched_dl_global_constraints(void) 7400static int sched_dl_global_constraints(void)
7508{ 7401{
7509 u64 runtime = global_dl_runtime(); 7402 u64 runtime = global_rt_runtime();
7510 u64 period = global_dl_period(); 7403 u64 period = global_rt_period();
7511 u64 new_bw = to_ratio(period, runtime); 7404 u64 new_bw = to_ratio(period, runtime);
7512 int ret, i; 7405 int cpu, ret = 0;
7513
7514 ret = __sched_dl_global_constraints(runtime, period);
7515 if (ret)
7516 return ret;
7517
7518 if (!__sched_dl_rt_global_constraints(new_bw))
7519 return -EINVAL;
7520 7406
7521 /* 7407 /*
7522 * Here we want to check the bandwidth not being set to some 7408 * Here we want to check the bandwidth not being set to some
@@ -7527,46 +7413,68 @@ static int sched_dl_global_constraints(void)
7527 * cycling on root_domains... Discussion on different/better 7413 * cycling on root_domains... Discussion on different/better
7528 * solutions is welcome! 7414 * solutions is welcome!
7529 */ 7415 */
7530 for_each_possible_cpu(i) { 7416 for_each_possible_cpu(cpu) {
7531 struct dl_bw *dl_b = dl_bw_of(i); 7417 struct dl_bw *dl_b = dl_bw_of(cpu);
7532 7418
7533 raw_spin_lock(&dl_b->lock); 7419 raw_spin_lock(&dl_b->lock);
7534 if (new_bw < dl_b->total_bw) { 7420 if (new_bw < dl_b->total_bw)
7535 raw_spin_unlock(&dl_b->lock); 7421 ret = -EBUSY;
7536 return -EBUSY;
7537 }
7538 raw_spin_unlock(&dl_b->lock); 7422 raw_spin_unlock(&dl_b->lock);
7423
7424 if (ret)
7425 break;
7539 } 7426 }
7540 7427
7541 return 0; 7428 return ret;
7542} 7429}
7543 7430
7544int sched_rr_handler(struct ctl_table *table, int write, 7431static void sched_dl_do_global(void)
7545 void __user *buffer, size_t *lenp,
7546 loff_t *ppos)
7547{ 7432{
7548 int ret; 7433 u64 new_bw = -1;
7549 static DEFINE_MUTEX(mutex); 7434 int cpu;
7550 7435
7551 mutex_lock(&mutex); 7436 def_dl_bandwidth.dl_period = global_rt_period();
7552 ret = proc_dointvec(table, write, buffer, lenp, ppos); 7437 def_dl_bandwidth.dl_runtime = global_rt_runtime();
7553 /* make sure that internally we keep jiffies */ 7438
7554 /* also, writing zero resets timeslice to default */ 7439 if (global_rt_runtime() != RUNTIME_INF)
7555 if (!ret && write) { 7440 new_bw = to_ratio(global_rt_period(), global_rt_runtime());
7556 sched_rr_timeslice = sched_rr_timeslice <= 0 ? 7441
7557 RR_TIMESLICE : msecs_to_jiffies(sched_rr_timeslice); 7442 /*
7443 * FIXME: As above...
7444 */
7445 for_each_possible_cpu(cpu) {
7446 struct dl_bw *dl_b = dl_bw_of(cpu);
7447
7448 raw_spin_lock(&dl_b->lock);
7449 dl_b->bw = new_bw;
7450 raw_spin_unlock(&dl_b->lock);
7558 } 7451 }
7559 mutex_unlock(&mutex); 7452}
7560 return ret; 7453
7454static int sched_rt_global_validate(void)
7455{
7456 if (sysctl_sched_rt_period <= 0)
7457 return -EINVAL;
7458
7459 if (sysctl_sched_rt_runtime > sysctl_sched_rt_period)
7460 return -EINVAL;
7461
7462 return 0;
7463}
7464
7465static void sched_rt_do_global(void)
7466{
7467 def_rt_bandwidth.rt_runtime = global_rt_runtime();
7468 def_rt_bandwidth.rt_period = ns_to_ktime(global_rt_period());
7561} 7469}
7562 7470
7563int sched_rt_handler(struct ctl_table *table, int write, 7471int sched_rt_handler(struct ctl_table *table, int write,
7564 void __user *buffer, size_t *lenp, 7472 void __user *buffer, size_t *lenp,
7565 loff_t *ppos) 7473 loff_t *ppos)
7566{ 7474{
7567 int ret;
7568 int old_period, old_runtime; 7475 int old_period, old_runtime;
7569 static DEFINE_MUTEX(mutex); 7476 static DEFINE_MUTEX(mutex);
7477 int ret;
7570 7478
7571 mutex_lock(&mutex); 7479 mutex_lock(&mutex);
7572 old_period = sysctl_sched_rt_period; 7480 old_period = sysctl_sched_rt_period;
@@ -7575,72 +7483,47 @@ int sched_rt_handler(struct ctl_table *table, int write,
7575 ret = proc_dointvec(table, write, buffer, lenp, ppos); 7483 ret = proc_dointvec(table, write, buffer, lenp, ppos);
7576 7484
7577 if (!ret && write) { 7485 if (!ret && write) {
7486 ret = sched_rt_global_validate();
7487 if (ret)
7488 goto undo;
7489
7578 ret = sched_rt_global_constraints(); 7490 ret = sched_rt_global_constraints();
7579 if (ret) { 7491 if (ret)
7580 sysctl_sched_rt_period = old_period; 7492 goto undo;
7581 sysctl_sched_rt_runtime = old_runtime; 7493
7582 } else { 7494 ret = sched_dl_global_constraints();
7583 def_rt_bandwidth.rt_runtime = global_rt_runtime(); 7495 if (ret)
7584 def_rt_bandwidth.rt_period = 7496 goto undo;
7585 ns_to_ktime(global_rt_period()); 7497
7586 } 7498 sched_rt_do_global();
7499 sched_dl_do_global();
7500 }
7501 if (0) {
7502undo:
7503 sysctl_sched_rt_period = old_period;
7504 sysctl_sched_rt_runtime = old_runtime;
7587 } 7505 }
7588 mutex_unlock(&mutex); 7506 mutex_unlock(&mutex);
7589 7507
7590 return ret; 7508 return ret;
7591} 7509}
7592 7510
7593int sched_dl_handler(struct ctl_table *table, int write, 7511int sched_rr_handler(struct ctl_table *table, int write,
7594 void __user *buffer, size_t *lenp, 7512 void __user *buffer, size_t *lenp,
7595 loff_t *ppos) 7513 loff_t *ppos)
7596{ 7514{
7597 int ret; 7515 int ret;
7598 int old_period, old_runtime;
7599 static DEFINE_MUTEX(mutex); 7516 static DEFINE_MUTEX(mutex);
7600 unsigned long flags;
7601 7517
7602 mutex_lock(&mutex); 7518 mutex_lock(&mutex);
7603 old_period = sysctl_sched_dl_period;
7604 old_runtime = sysctl_sched_dl_runtime;
7605
7606 ret = proc_dointvec(table, write, buffer, lenp, ppos); 7519 ret = proc_dointvec(table, write, buffer, lenp, ppos);
7607 7520 /* make sure that internally we keep jiffies */
7521 /* also, writing zero resets timeslice to default */
7608 if (!ret && write) { 7522 if (!ret && write) {
7609 raw_spin_lock_irqsave(&def_dl_bandwidth.dl_runtime_lock, 7523 sched_rr_timeslice = sched_rr_timeslice <= 0 ?
7610 flags); 7524 RR_TIMESLICE : msecs_to_jiffies(sched_rr_timeslice);
7611
7612 ret = sched_dl_global_constraints();
7613 if (ret) {
7614 sysctl_sched_dl_period = old_period;
7615 sysctl_sched_dl_runtime = old_runtime;
7616 } else {
7617 u64 new_bw;
7618 int i;
7619
7620 def_dl_bandwidth.dl_period = global_dl_period();
7621 def_dl_bandwidth.dl_runtime = global_dl_runtime();
7622 if (global_dl_runtime() == RUNTIME_INF)
7623 new_bw = -1;
7624 else
7625 new_bw = to_ratio(global_dl_period(),
7626 global_dl_runtime());
7627 /*
7628 * FIXME: As above...
7629 */
7630 for_each_possible_cpu(i) {
7631 struct dl_bw *dl_b = dl_bw_of(i);
7632
7633 raw_spin_lock(&dl_b->lock);
7634 dl_b->bw = new_bw;
7635 raw_spin_unlock(&dl_b->lock);
7636 }
7637 }
7638
7639 raw_spin_unlock_irqrestore(&def_dl_bandwidth.dl_runtime_lock,
7640 flags);
7641 } 7525 }
7642 mutex_unlock(&mutex); 7526 mutex_unlock(&mutex);
7643
7644 return ret; 7527 return ret;
7645} 7528}
7646 7529
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 0c6b1d089cd4..ee25361becdd 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -63,10 +63,10 @@ void init_dl_bw(struct dl_bw *dl_b)
63{ 63{
64 raw_spin_lock_init(&dl_b->lock); 64 raw_spin_lock_init(&dl_b->lock);
65 raw_spin_lock(&def_dl_bandwidth.dl_runtime_lock); 65 raw_spin_lock(&def_dl_bandwidth.dl_runtime_lock);
66 if (global_dl_runtime() == RUNTIME_INF) 66 if (global_rt_runtime() == RUNTIME_INF)
67 dl_b->bw = -1; 67 dl_b->bw = -1;
68 else 68 else
69 dl_b->bw = to_ratio(global_dl_period(), global_dl_runtime()); 69 dl_b->bw = to_ratio(global_rt_period(), global_rt_runtime());
70 raw_spin_unlock(&def_dl_bandwidth.dl_runtime_lock); 70 raw_spin_unlock(&def_dl_bandwidth.dl_runtime_lock);
71 dl_b->total_bw = 0; 71 dl_b->total_bw = 0;
72} 72}
@@ -612,6 +612,29 @@ static void update_curr_dl(struct rq *rq)
612 if (!is_leftmost(curr, &rq->dl)) 612 if (!is_leftmost(curr, &rq->dl))
613 resched_task(curr); 613 resched_task(curr);
614 } 614 }
615
616 /*
617 * Because -- for now -- we share the rt bandwidth, we need to
618 * account our runtime there too, otherwise actual rt tasks
619 * would be able to exceed the shared quota.
620 *
621 * Account to the root rt group for now.
622 *
623 * The solution we're working towards is having the RT groups scheduled
624 * using deadline servers -- however there's a few nasties to figure
625 * out before that can happen.
626 */
627 if (rt_bandwidth_enabled()) {
628 struct rt_rq *rt_rq = &rq->rt;
629
630 raw_spin_lock(&rt_rq->rt_runtime_lock);
631 rt_rq->rt_time += delta_exec;
632 /*
633 * We'll let actual RT tasks worry about the overflow here, we
634 * have our own CBS to keep us inline -- see above.
635 */
636 raw_spin_unlock(&rt_rq->rt_runtime_lock);
637 }
615} 638}
616 639
617#ifdef CONFIG_SMP 640#ifdef CONFIG_SMP
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 2b7421db6c41..890339099550 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -176,7 +176,7 @@ struct dl_bandwidth {
176 176
177static inline int dl_bandwidth_enabled(void) 177static inline int dl_bandwidth_enabled(void)
178{ 178{
179 return sysctl_sched_dl_runtime >= 0; 179 return sysctl_sched_rt_runtime >= 0;
180} 180}
181 181
182extern struct dl_bw *dl_bw_of(int i); 182extern struct dl_bw *dl_bw_of(int i);
@@ -186,9 +186,6 @@ struct dl_bw {
186 u64 bw, total_bw; 186 u64 bw, total_bw;
187}; 187};
188 188
189static inline u64 global_dl_period(void);
190static inline u64 global_dl_runtime(void);
191
192extern struct mutex sched_domains_mutex; 189extern struct mutex sched_domains_mutex;
193 190
194#ifdef CONFIG_CGROUP_SCHED 191#ifdef CONFIG_CGROUP_SCHED
@@ -953,19 +950,6 @@ static inline u64 global_rt_runtime(void)
953 return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC; 950 return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
954} 951}
955 952
956static inline u64 global_dl_period(void)
957{
958 return (u64)sysctl_sched_dl_period * NSEC_PER_USEC;
959}
960
961static inline u64 global_dl_runtime(void)
962{
963 if (sysctl_sched_dl_runtime < 0)
964 return RUNTIME_INF;
965
966 return (u64)sysctl_sched_dl_runtime * NSEC_PER_USEC;
967}
968
969static inline int task_current(struct rq *rq, struct task_struct *p) 953static inline int task_current(struct rq *rq, struct task_struct *p)
970{ 954{
971 return rq->curr == p; 955 return rq->curr == p;