sched/deadline: Remove the sysctl_sched_dl knobs

Remove the deadline specific sysctls for now. The problem with them is that the interaction with the exisiting rt knobs is nearly impossible to get right. The current (as per before this patch) situation is that the rt and dl bandwidth is completely separate and we enforce rt+dl < 100%. This is undesirable because this means that the rt default of 95% leaves us hardly any room, even though dl tasks are saver than rt tasks. Another proposed solution was (a discarted patch) to have the dl bandwidth be a fraction of the rt bandwidth. This is highly confusing imo. Furthermore neither proposal is consistent with the situation we actually want; which is rt tasks ran from a dl server. In which case the rt bandwidth is a direct subset of dl. So whichever way we go, the introduction of dl controls at this point is painful. Therefore remove them and instead share the rt budget. This means that for now the rt knobs are used for dl admission control and the dl runtime is accounted against the rt runtime. I realise that this isn't entirely desirable either; but whatever we do we appear to need to change the interface later, so better have a small interface for now. Signed-off-by: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/n/tip-zpyqbqds1r0vyxtxza1e7rdc@git.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
author: Peter Zijlstra <peterz@infradead.org> 2013-12-17 06:44:49 -0500
committer: Ingo Molnar <mingo@kernel.org> 2014-01-13 07:47:23 -0500
commit: 1724813d9f2c7ff702b46d3e4a4f6d9b10a8f8c2 (patch)
tree: 6df01bd6de3641b7f62440e22cf1ac8020b82ce5 /kernel/sched
parent: e4099a5e929435cd6349343f002583f29868c900 (diff)
3 files changed, 97 insertions, 207 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 27c6375d182a..1d33eb8143cc 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6771,7 +6771,7 @@ void __init sched_init(void)
        init_rt_bandwidth(&def_rt_bandwidth,
                        global_rt_period(), global_rt_runtime());
        init_dl_bandwidth(&def_dl_bandwidth,
-                        global_dl_period(), global_dl_runtime());
+                        global_rt_period(), global_rt_runtime());
 #ifdef CONFIG_SMP
        init_defrootdomain();
@@ -7354,64 +7354,11 @@ static long sched_group_rt_period(struct task_group *tg)
 }
 #endif /* CONFIG_RT_GROUP_SCHED */
-/*
- * Coupling of -rt and -deadline bandwidth.
- *
- * Here we check if the new -rt bandwidth value is consistent
- * with the system settings for the bandwidth available
- * to -deadline tasks.
- *
- * IOW, we want to enforce that
- *
- *   rt_bandwidth + dl_bandwidth <= 100%
- *
- * is always true.
- */
-static bool __sched_rt_dl_global_constraints(u64 rt_bw)
-{
-        unsigned long flags;
-        u64 dl_bw;
-        bool ret;
-        raw_spin_lock_irqsave(&def_dl_bandwidth.dl_runtime_lock, flags);
-        if (global_rt_runtime() == RUNTIME_INF ||
-            global_dl_runtime() == RUNTIME_INF) {
-                ret = true;
-                goto unlock;
-        }
-        dl_bw = to_ratio(def_dl_bandwidth.dl_period,
-                         def_dl_bandwidth.dl_runtime);
-        ret = rt_bw + dl_bw <= to_ratio(RUNTIME_INF, RUNTIME_INF);
-unlock:
-        raw_spin_unlock_irqrestore(&def_dl_bandwidth.dl_runtime_lock, flags);
-        return ret;
-}
 #ifdef CONFIG_RT_GROUP_SCHED
 static int sched_rt_global_constraints(void)
 {
-        u64 runtime, period, bw;
        int ret = 0;
-        if (sysctl_sched_rt_period <= 0)
-                return -EINVAL;
-        runtime = global_rt_runtime();
-        period = global_rt_period();
-        /*
-         * Sanity check on the sysctl variables.
-         */
-        if (runtime > period && runtime != RUNTIME_INF)
-                return -EINVAL;
-        bw = to_ratio(period, runtime);
-        if (!__sched_rt_dl_global_constraints(bw))
-                return -EINVAL;
        mutex_lock(&rt_constraints_mutex);
        read_lock(&tasklist_lock);
        ret = __rt_schedulable(NULL, 0, 0);
@@ -7435,18 +7382,8 @@ static int sched_rt_global_constraints(void)
 {
        unsigned long flags;
        int i, ret = 0;
-        u64 bw;
-        if (sysctl_sched_rt_period <= 0)
-                return -EINVAL;
        raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
-        bw = to_ratio(global_rt_period(), global_rt_runtime());
-        if (!__sched_rt_dl_global_constraints(bw)) {
-                ret = -EINVAL;
-                goto unlock;
-        }
        for_each_possible_cpu(i) {
                struct rt_rq *rt_rq = &cpu_rq(i)->rt;
@@ -7454,69 +7391,18 @@ static int sched_rt_global_constraints(void)
                rt_rq->rt_runtime = global_rt_runtime();
                raw_spin_unlock(&rt_rq->rt_runtime_lock);
        }
-unlock:
        raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
        return ret;
 }
 #endif /* CONFIG_RT_GROUP_SCHED */
-/*
- * Coupling of -dl and -rt bandwidth.
- *
- * Here we check, while setting the system wide bandwidth available
- * for -dl tasks and groups, if the new values are consistent with
- * the system settings for the bandwidth available to -rt entities.
- *
- * IOW, we want to enforce that
- *
- *   rt_bandwidth + dl_bandwidth <= 100%
- *
- * is always true.
- */
-static bool __sched_dl_rt_global_constraints(u64 dl_bw)
-{
-        u64 rt_bw;
-        bool ret;
-        raw_spin_lock(&def_rt_bandwidth.rt_runtime_lock);
-        if (global_dl_runtime() == RUNTIME_INF ||
-            global_rt_runtime() == RUNTIME_INF) {
-                ret = true;
-                goto unlock;
-        }
-        rt_bw = to_ratio(ktime_to_ns(def_rt_bandwidth.rt_period),
-                         def_rt_bandwidth.rt_runtime);
-        ret = rt_bw + dl_bw <= to_ratio(RUNTIME_INF, RUNTIME_INF);
-unlock:
-        raw_spin_unlock(&def_rt_bandwidth.rt_runtime_lock);
-        return ret;
-}
-static bool __sched_dl_global_constraints(u64 runtime, u64 period)
-{
-        if (!period || (runtime != RUNTIME_INF && runtime > period))
-                return -EINVAL;
-        return 0;
-}
 static int sched_dl_global_constraints(void)
 {
-        u64 runtime = global_dl_runtime();
+        u64 runtime = global_rt_runtime();
-        u64 period = global_dl_period();
+        u64 period = global_rt_period();
        u64 new_bw = to_ratio(period, runtime);
-        int ret, i;
+        int cpu, ret = 0;
-        ret = __sched_dl_global_constraints(runtime, period);
-        if (ret)
-                return ret;
-        if (!__sched_dl_rt_global_constraints(new_bw))
-                return -EINVAL;
        /*
         * Here we want to check the bandwidth not being set to some
@@ -7527,46 +7413,68 @@ static int sched_dl_global_constraints(void)
         * cycling on root_domains... Discussion on different/better
         * solutions is welcome!
         */
-        for_each_possible_cpu(i) {
+        for_each_possible_cpu(cpu) {
-                struct dl_bw *dl_b = dl_bw_of(i);
+                struct dl_bw *dl_b = dl_bw_of(cpu);
                raw_spin_lock(&dl_b->lock);
-                if (new_bw < dl_b->total_bw) {
+                if (new_bw < dl_b->total_bw)
-                        raw_spin_unlock(&dl_b->lock);
+                        ret = -EBUSY;
-                        return -EBUSY;
-                }
                raw_spin_unlock(&dl_b->lock);
+                if (ret)
+                        break;
        }
-        return 0;
+        return ret;
 }
-int sched_rr_handler(struct ctl_table *table, int write,
+static void sched_dl_do_global(void)
-                void __user *buffer, size_t *lenp,
-                loff_t *ppos)
 {
-        int ret;
+        u64 new_bw = -1;
-        static DEFINE_MUTEX(mutex);
+        int cpu;
-        mutex_lock(&mutex);
+        def_dl_bandwidth.dl_period = global_rt_period();
-        ret = proc_dointvec(table, write, buffer, lenp, ppos);
+        def_dl_bandwidth.dl_runtime = global_rt_runtime();
-        /* make sure that internally we keep jiffies */
-        /* also, writing zero resets timeslice to default */
+        if (global_rt_runtime() != RUNTIME_INF)
-        if (!ret && write) {
+                new_bw = to_ratio(global_rt_period(), global_rt_runtime());
-                sched_rr_timeslice = sched_rr_timeslice <= 0 ?
-                        RR_TIMESLICE : msecs_to_jiffies(sched_rr_timeslice);
+        /*
+         * FIXME: As above...
+         */
+        for_each_possible_cpu(cpu) {
+                struct dl_bw *dl_b = dl_bw_of(cpu);
+                raw_spin_lock(&dl_b->lock);
+                dl_b->bw = new_bw;
+                raw_spin_unlock(&dl_b->lock);
        }
-        mutex_unlock(&mutex);
+}
-        return ret;
+static int sched_rt_global_validate(void)
+{
+        if (sysctl_sched_rt_period <= 0)
+                return -EINVAL;
+        if (sysctl_sched_rt_runtime > sysctl_sched_rt_period)
+                return -EINVAL;
+        return 0;
+}
+static void sched_rt_do_global(void)
+{
+        def_rt_bandwidth.rt_runtime = global_rt_runtime();
+        def_rt_bandwidth.rt_period = ns_to_ktime(global_rt_period());
 }
 int sched_rt_handler(struct ctl_table *table, int write,
                void __user *buffer, size_t *lenp,
                loff_t *ppos)
 {
-        int ret;
        int old_period, old_runtime;
        static DEFINE_MUTEX(mutex);
+        int ret;
        mutex_lock(&mutex);
        old_period = sysctl_sched_rt_period;
@@ -7575,72 +7483,47 @@ int sched_rt_handler(struct ctl_table *table, int write,
        ret = proc_dointvec(table, write, buffer, lenp, ppos);
        if (!ret && write) {
+                ret = sched_rt_global_validate();
+                if (ret)
+                        goto undo;
                ret = sched_rt_global_constraints();
-                if (ret) {
+                if (ret)
-                        sysctl_sched_rt_period = old_period;
+                        goto undo;
-                        sysctl_sched_rt_runtime = old_runtime;
-                } else {
+                ret = sched_dl_global_constraints();
-                        def_rt_bandwidth.rt_runtime = global_rt_runtime();
+                if (ret)
-                        def_rt_bandwidth.rt_period =
+                        goto undo;
-                                ns_to_ktime(global_rt_period());
-                }
+                sched_rt_do_global();
+                sched_dl_do_global();
+        }
+        if (0) {
+undo:
+                sysctl_sched_rt_period = old_period;
+                sysctl_sched_rt_runtime = old_runtime;
        }
        mutex_unlock(&mutex);
        return ret;
 }
-int sched_dl_handler(struct ctl_table *table, int write,
+int sched_rr_handler(struct ctl_table *table, int write,
                void __user *buffer, size_t *lenp,
                loff_t *ppos)
 {
        int ret;
-        int old_period, old_runtime;
        static DEFINE_MUTEX(mutex);
-        unsigned long flags;
        mutex_lock(&mutex);
-        old_period = sysctl_sched_dl_period;
-        old_runtime = sysctl_sched_dl_runtime;
        ret = proc_dointvec(table, write, buffer, lenp, ppos);
+        /* make sure that internally we keep jiffies */
+        /* also, writing zero resets timeslice to default */
        if (!ret && write) {
-                raw_spin_lock_irqsave(&def_dl_bandwidth.dl_runtime_lock,
+                sched_rr_timeslice = sched_rr_timeslice <= 0 ?
-                                      flags);
+                        RR_TIMESLICE : msecs_to_jiffies(sched_rr_timeslice);
-                ret = sched_dl_global_constraints();
-                if (ret) {
-                        sysctl_sched_dl_period = old_period;
-                        sysctl_sched_dl_runtime = old_runtime;
-                } else {
-                        u64 new_bw;
-                        int i;
-                        def_dl_bandwidth.dl_period = global_dl_period();
-                        def_dl_bandwidth.dl_runtime = global_dl_runtime();
-                        if (global_dl_runtime() == RUNTIME_INF)
-                                new_bw = -1;
-                        else
-                                new_bw = to_ratio(global_dl_period(),
-                                                  global_dl_runtime());
-                        /*
-                         * FIXME: As above...
-                         */
-                        for_each_possible_cpu(i) {
-                                struct dl_bw *dl_b = dl_bw_of(i);
-                                raw_spin_lock(&dl_b->lock);
-                                dl_b->bw = new_bw;
-                                raw_spin_unlock(&dl_b->lock);
-                        }
-                }
-                raw_spin_unlock_irqrestore(&def_dl_bandwidth.dl_runtime_lock,
-                                           flags);
        }
        mutex_unlock(&mutex);
        return ret;
 }
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 0c6b1d089cd4..ee25361becdd 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -63,10 +63,10 @@ void init_dl_bw(struct dl_bw *dl_b)
 {
        raw_spin_lock_init(&dl_b->lock);
        raw_spin_lock(&def_dl_bandwidth.dl_runtime_lock);
-        if (global_dl_runtime() == RUNTIME_INF)
+        if (global_rt_runtime() == RUNTIME_INF)
                dl_b->bw = -1;
        else
-                dl_b->bw = to_ratio(global_dl_period(), global_dl_runtime());
+                dl_b->bw = to_ratio(global_rt_period(), global_rt_runtime());
        raw_spin_unlock(&def_dl_bandwidth.dl_runtime_lock);
        dl_b->total_bw = 0;
 }
@@ -612,6 +612,29 @@ static void update_curr_dl(struct rq *rq)
                if (!is_leftmost(curr, &rq->dl))
                        resched_task(curr);
        }
+        /*
+         * Because -- for now -- we share the rt bandwidth, we need to
+         * account our runtime there too, otherwise actual rt tasks
+         * would be able to exceed the shared quota.
+         *
+         * Account to the root rt group for now.
+         *
+         * The solution we're working towards is having the RT groups scheduled
+         * using deadline servers -- however there's a few nasties to figure
+         * out before that can happen.
+         */
+        if (rt_bandwidth_enabled()) {
+                struct rt_rq *rt_rq = &rq->rt;
+                raw_spin_lock(&rt_rq->rt_runtime_lock);
+                rt_rq->rt_time += delta_exec;
+                /*
+                 * We'll let actual RT tasks worry about the overflow here, we
+                 * have our own CBS to keep us inline -- see above.
+                 */
+                raw_spin_unlock(&rt_rq->rt_runtime_lock);
+        }
 }
 #ifdef CONFIG_SMP
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 2b7421db6c41..890339099550 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -176,7 +176,7 @@ struct dl_bandwidth {
 static inline int dl_bandwidth_enabled(void)
 {
-        return sysctl_sched_dl_runtime >= 0;
+        return sysctl_sched_rt_runtime >= 0;
 }
 extern struct dl_bw *dl_bw_of(int i);
@@ -186,9 +186,6 @@ struct dl_bw {
        u64 bw, total_bw;
 };
-static inline u64 global_dl_period(void);
-static inline u64 global_dl_runtime(void);
 extern struct mutex sched_domains_mutex;
 #ifdef CONFIG_CGROUP_SCHED
@@ -953,19 +950,6 @@ static inline u64 global_rt_runtime(void)
        return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
 }
-static inline u64 global_dl_period(void)
-{
-        return (u64)sysctl_sched_dl_period * NSEC_PER_USEC;
-}
-static inline u64 global_dl_runtime(void)
-{
-        if (sysctl_sched_dl_runtime < 0)
-                return RUNTIME_INF;
-        return (u64)sysctl_sched_dl_runtime * NSEC_PER_USEC;
-}
 static inline int task_current(struct rq *rq, struct task_struct *p)
 {
        return rq->curr == p;
author	Peter Zijlstra <peterz@infradead.org>	2013-12-17 06:44:49 -0500
committer	Ingo Molnar <mingo@kernel.org>	2014-01-13 07:47:23 -0500
commit	1724813d9f2c7ff702b46d3e4a4f6d9b10a8f8c2 (patch)
tree	6df01bd6de3641b7f62440e22cf1ac8020b82ce5 /kernel/sched
parent	e4099a5e929435cd6349343f002583f29868c900 (diff)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 27c6375d182a..1d33eb8143cc 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c
@@ -6771,7 +6771,7 @@ void __init sched_init(void)
6771	init_rt_bandwidth(&def_rt_bandwidth,	6771	init_rt_bandwidth(&def_rt_bandwidth,
6772	global_rt_period(), global_rt_runtime());	6772	global_rt_period(), global_rt_runtime());
6773	init_dl_bandwidth(&def_dl_bandwidth,	6773	init_dl_bandwidth(&def_dl_bandwidth,
6774	global_dl_period(), global_dl_runtime());	6774	global_rt_period(), global_rt_runtime());
6775		6775
6776	#ifdef CONFIG_SMP	6776	#ifdef CONFIG_SMP
6777	init_defrootdomain();	6777	init_defrootdomain();
@@ -7354,64 +7354,11 @@ static long sched_group_rt_period(struct task_group *tg)
7354	}	7354	}
7355	#endif /* CONFIG_RT_GROUP_SCHED */	7355	#endif /* CONFIG_RT_GROUP_SCHED */
7356		7356
7357	/*
7358	* Coupling of -rt and -deadline bandwidth.
7359	*
7360	* Here we check if the new -rt bandwidth value is consistent
7361	* with the system settings for the bandwidth available
7362	* to -deadline tasks.
7363	*
7364	* IOW, we want to enforce that
7365	*
7366	* rt_bandwidth + dl_bandwidth <= 100%
7367	*
7368	* is always true.
7369	*/
7370	static bool __sched_rt_dl_global_constraints(u64 rt_bw)
7371	{
7372	unsigned long flags;
7373	u64 dl_bw;
7374	bool ret;
7375
7376	raw_spin_lock_irqsave(&def_dl_bandwidth.dl_runtime_lock, flags);
7377	if (global_rt_runtime() == RUNTIME_INF \|\|
7378	global_dl_runtime() == RUNTIME_INF) {
7379	ret = true;
7380	goto unlock;
7381	}
7382
7383	dl_bw = to_ratio(def_dl_bandwidth.dl_period,
7384	def_dl_bandwidth.dl_runtime);
7385
7386	ret = rt_bw + dl_bw <= to_ratio(RUNTIME_INF, RUNTIME_INF);
7387	unlock:
7388	raw_spin_unlock_irqrestore(&def_dl_bandwidth.dl_runtime_lock, flags);
7389
7390	return ret;
7391	}
7392
7393	#ifdef CONFIG_RT_GROUP_SCHED	7357	#ifdef CONFIG_RT_GROUP_SCHED
7394	static int sched_rt_global_constraints(void)	7358	static int sched_rt_global_constraints(void)
7395	{	7359	{
7396	u64 runtime, period, bw;
7397	int ret = 0;	7360	int ret = 0;
7398		7361
7399	if (sysctl_sched_rt_period <= 0)
7400	return -EINVAL;
7401
7402	runtime = global_rt_runtime();
7403	period = global_rt_period();
7404
7405	/*
7406	* Sanity check on the sysctl variables.
7407	*/
7408	if (runtime > period && runtime != RUNTIME_INF)
7409	return -EINVAL;
7410
7411	bw = to_ratio(period, runtime);
7412	if (!__sched_rt_dl_global_constraints(bw))
7413	return -EINVAL;
7414
7415	mutex_lock(&rt_constraints_mutex);	7362	mutex_lock(&rt_constraints_mutex);
7416	read_lock(&tasklist_lock);	7363	read_lock(&tasklist_lock);
7417	ret = __rt_schedulable(NULL, 0, 0);	7364	ret = __rt_schedulable(NULL, 0, 0);
@@ -7435,18 +7382,8 @@ static int sched_rt_global_constraints(void)
7435	{	7382	{
7436	unsigned long flags;	7383	unsigned long flags;
7437	int i, ret = 0;	7384	int i, ret = 0;
7438	u64 bw;
7439
7440	if (sysctl_sched_rt_period <= 0)
7441	return -EINVAL;
7442		7385
7443	raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);	7386	raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
7444	bw = to_ratio(global_rt_period(), global_rt_runtime());
7445	if (!__sched_rt_dl_global_constraints(bw)) {
7446	ret = -EINVAL;
7447	goto unlock;
7448	}
7449
7450	for_each_possible_cpu(i) {	7387	for_each_possible_cpu(i) {
7451	struct rt_rq *rt_rq = &cpu_rq(i)->rt;	7388	struct rt_rq *rt_rq = &cpu_rq(i)->rt;
7452		7389
@@ -7454,69 +7391,18 @@ static int sched_rt_global_constraints(void)
7454	rt_rq->rt_runtime = global_rt_runtime();	7391	rt_rq->rt_runtime = global_rt_runtime();
7455	raw_spin_unlock(&rt_rq->rt_runtime_lock);	7392	raw_spin_unlock(&rt_rq->rt_runtime_lock);
7456	}	7393	}
7457	unlock:
7458	raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);	7394	raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
7459		7395
7460	return ret;	7396	return ret;
7461	}	7397	}
7462	#endif /* CONFIG_RT_GROUP_SCHED */	7398	#endif /* CONFIG_RT_GROUP_SCHED */
7463		7399
7464	/*
7465	* Coupling of -dl and -rt bandwidth.
7466	*
7467	* Here we check, while setting the system wide bandwidth available
7468	* for -dl tasks and groups, if the new values are consistent with
7469	* the system settings for the bandwidth available to -rt entities.
7470	*
7471	* IOW, we want to enforce that
7472	*
7473	* rt_bandwidth + dl_bandwidth <= 100%
7474	*
7475	* is always true.
7476	*/
7477	static bool __sched_dl_rt_global_constraints(u64 dl_bw)
7478	{
7479	u64 rt_bw;
7480	bool ret;
7481
7482	raw_spin_lock(&def_rt_bandwidth.rt_runtime_lock);
7483	if (global_dl_runtime() == RUNTIME_INF \|\|
7484	global_rt_runtime() == RUNTIME_INF) {
7485	ret = true;
7486	goto unlock;
7487	}
7488
7489	rt_bw = to_ratio(ktime_to_ns(def_rt_bandwidth.rt_period),
7490	def_rt_bandwidth.rt_runtime);
7491
7492	ret = rt_bw + dl_bw <= to_ratio(RUNTIME_INF, RUNTIME_INF);
7493	unlock:
7494	raw_spin_unlock(&def_rt_bandwidth.rt_runtime_lock);
7495
7496	return ret;
7497	}
7498
7499	static bool __sched_dl_global_constraints(u64 runtime, u64 period)
7500	{
7501	if (!period \|\| (runtime != RUNTIME_INF && runtime > period))
7502	return -EINVAL;
7503
7504	return 0;
7505	}
7506
7507	static int sched_dl_global_constraints(void)	7400	static int sched_dl_global_constraints(void)
7508	{	7401	{
7509	u64 runtime = global_dl_runtime();	7402	u64 runtime = global_rt_runtime();
7510	u64 period = global_dl_period();	7403	u64 period = global_rt_period();
7511	u64 new_bw = to_ratio(period, runtime);	7404	u64 new_bw = to_ratio(period, runtime);
7512	int ret, i;	7405	int cpu, ret = 0;
7513
7514	ret = __sched_dl_global_constraints(runtime, period);
7515	if (ret)
7516	return ret;
7517
7518	if (!__sched_dl_rt_global_constraints(new_bw))
7519	return -EINVAL;
7520		7406
7521	/*	7407	/*
7522	* Here we want to check the bandwidth not being set to some	7408	* Here we want to check the bandwidth not being set to some
@@ -7527,46 +7413,68 @@ static int sched_dl_global_constraints(void)
7527	* cycling on root_domains... Discussion on different/better	7413	* cycling on root_domains... Discussion on different/better
7528	* solutions is welcome!	7414	* solutions is welcome!
7529	*/	7415	*/
7530	for_each_possible_cpu(i) {	7416	for_each_possible_cpu(cpu) {
7531	struct dl_bw *dl_b = dl_bw_of(i);	7417	struct dl_bw *dl_b = dl_bw_of(cpu);
7532		7418
7533	raw_spin_lock(&dl_b->lock);	7419	raw_spin_lock(&dl_b->lock);
7534	if (new_bw < dl_b->total_bw) {	7420	if (new_bw < dl_b->total_bw)
7535	raw_spin_unlock(&dl_b->lock);	7421	ret = -EBUSY;
7536	return -EBUSY;
7537	}
7538	raw_spin_unlock(&dl_b->lock);	7422	raw_spin_unlock(&dl_b->lock);
		7423
		7424	if (ret)
		7425	break;
7539	}	7426	}
7540		7427
7541	return 0;	7428	return ret;
7542	}	7429	}
7543		7430
7544	int sched_rr_handler(struct ctl_table *table, int write,	7431	static void sched_dl_do_global(void)
7545	void __user buffer, size_t lenp,
7546	loff_t *ppos)
7547	{	7432	{
7548	int ret;	7433	u64 new_bw = -1;
7549	static DEFINE_MUTEX(mutex);	7434	int cpu;
7550		7435
7551	mutex_lock(&mutex);	7436	def_dl_bandwidth.dl_period = global_rt_period();
7552	ret = proc_dointvec(table, write, buffer, lenp, ppos);	7437	def_dl_bandwidth.dl_runtime = global_rt_runtime();
7553	/* make sure that internally we keep jiffies */	7438
7554	/* also, writing zero resets timeslice to default */	7439	if (global_rt_runtime() != RUNTIME_INF)
7555	if (!ret && write) {	7440	new_bw = to_ratio(global_rt_period(), global_rt_runtime());
7556	sched_rr_timeslice = sched_rr_timeslice <= 0 ?	7441
7557	RR_TIMESLICE : msecs_to_jiffies(sched_rr_timeslice);	7442	/*
		7443	* FIXME: As above...
		7444	*/
		7445	for_each_possible_cpu(cpu) {
		7446	struct dl_bw *dl_b = dl_bw_of(cpu);
		7447
		7448	raw_spin_lock(&dl_b->lock);
		7449	dl_b->bw = new_bw;
		7450	raw_spin_unlock(&dl_b->lock);
7558	}	7451	}
7559	mutex_unlock(&mutex);	7452	}
7560	return ret;	7453
		7454	static int sched_rt_global_validate(void)
		7455	{
		7456	if (sysctl_sched_rt_period <= 0)
		7457	return -EINVAL;
		7458
		7459	if (sysctl_sched_rt_runtime > sysctl_sched_rt_period)
		7460	return -EINVAL;
		7461
		7462	return 0;
		7463	}
		7464
		7465	static void sched_rt_do_global(void)
		7466	{
		7467	def_rt_bandwidth.rt_runtime = global_rt_runtime();
		7468	def_rt_bandwidth.rt_period = ns_to_ktime(global_rt_period());
7561	}	7469	}
7562		7470
7563	int sched_rt_handler(struct ctl_table *table, int write,	7471	int sched_rt_handler(struct ctl_table *table, int write,
7564	void __user buffer, size_t lenp,	7472	void __user buffer, size_t lenp,
7565	loff_t *ppos)	7473	loff_t *ppos)
7566	{	7474	{
7567	int ret;
7568	int old_period, old_runtime;	7475	int old_period, old_runtime;
7569	static DEFINE_MUTEX(mutex);	7476	static DEFINE_MUTEX(mutex);
		7477	int ret;
7570		7478
7571	mutex_lock(&mutex);	7479	mutex_lock(&mutex);
7572	old_period = sysctl_sched_rt_period;	7480	old_period = sysctl_sched_rt_period;
@@ -7575,72 +7483,47 @@ int sched_rt_handler(struct ctl_table *table, int write,
7575	ret = proc_dointvec(table, write, buffer, lenp, ppos);	7483	ret = proc_dointvec(table, write, buffer, lenp, ppos);
7576		7484
7577	if (!ret && write) {	7485	if (!ret && write) {
		7486	ret = sched_rt_global_validate();
		7487	if (ret)
		7488	goto undo;
		7489
7578	ret = sched_rt_global_constraints();	7490	ret = sched_rt_global_constraints();
7579	if (ret) {	7491	if (ret)
7580	sysctl_sched_rt_period = old_period;	7492	goto undo;
7581	sysctl_sched_rt_runtime = old_runtime;	7493
7582	} else {	7494	ret = sched_dl_global_constraints();
7583	def_rt_bandwidth.rt_runtime = global_rt_runtime();	7495	if (ret)
7584	def_rt_bandwidth.rt_period =	7496	goto undo;
7585	ns_to_ktime(global_rt_period());	7497
7586	}	7498	sched_rt_do_global();
		7499	sched_dl_do_global();
		7500	}
		7501	if (0) {
		7502	undo:
		7503	sysctl_sched_rt_period = old_period;
		7504	sysctl_sched_rt_runtime = old_runtime;
7587	}	7505	}
7588	mutex_unlock(&mutex);	7506	mutex_unlock(&mutex);
7589		7507
7590	return ret;	7508	return ret;
7591	}	7509	}
7592		7510
7593	int sched_dl_handler(struct ctl_table *table, int write,	7511	int sched_rr_handler(struct ctl_table *table, int write,
7594	void __user buffer, size_t lenp,	7512	void __user buffer, size_t lenp,
7595	loff_t *ppos)	7513	loff_t *ppos)
7596	{	7514	{
7597	int ret;	7515	int ret;
7598	int old_period, old_runtime;
7599	static DEFINE_MUTEX(mutex);	7516	static DEFINE_MUTEX(mutex);
7600	unsigned long flags;
7601		7517
7602	mutex_lock(&mutex);	7518	mutex_lock(&mutex);
7603	old_period = sysctl_sched_dl_period;
7604	old_runtime = sysctl_sched_dl_runtime;
7605
7606	ret = proc_dointvec(table, write, buffer, lenp, ppos);	7519	ret = proc_dointvec(table, write, buffer, lenp, ppos);
7607		7520	/* make sure that internally we keep jiffies */
		7521	/* also, writing zero resets timeslice to default */
7608	if (!ret && write) {	7522	if (!ret && write) {
7609	raw_spin_lock_irqsave(&def_dl_bandwidth.dl_runtime_lock,	7523	sched_rr_timeslice = sched_rr_timeslice <= 0 ?
7610	flags);	7524	RR_TIMESLICE : msecs_to_jiffies(sched_rr_timeslice);
7611
7612	ret = sched_dl_global_constraints();
7613	if (ret) {
7614	sysctl_sched_dl_period = old_period;
7615	sysctl_sched_dl_runtime = old_runtime;
7616	} else {
7617	u64 new_bw;
7618	int i;
7619
7620	def_dl_bandwidth.dl_period = global_dl_period();
7621	def_dl_bandwidth.dl_runtime = global_dl_runtime();
7622	if (global_dl_runtime() == RUNTIME_INF)
7623	new_bw = -1;
7624	else
7625	new_bw = to_ratio(global_dl_period(),
7626	global_dl_runtime());
7627	/*
7628	* FIXME: As above...
7629	*/
7630	for_each_possible_cpu(i) {
7631	struct dl_bw *dl_b = dl_bw_of(i);
7632
7633	raw_spin_lock(&dl_b->lock);
7634	dl_b->bw = new_bw;
7635	raw_spin_unlock(&dl_b->lock);
7636	}
7637	}
7638
7639	raw_spin_unlock_irqrestore(&def_dl_bandwidth.dl_runtime_lock,
7640	flags);
7641	}	7525	}
7642	mutex_unlock(&mutex);	7526	mutex_unlock(&mutex);
7643
7644	return ret;	7527	return ret;
7645	}	7528	}
7646		7529


diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 0c6b1d089cd4..ee25361becdd 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c
@@ -63,10 +63,10 @@ void init_dl_bw(struct dl_bw *dl_b)
63	{	63	{
64	raw_spin_lock_init(&dl_b->lock);	64	raw_spin_lock_init(&dl_b->lock);
65	raw_spin_lock(&def_dl_bandwidth.dl_runtime_lock);	65	raw_spin_lock(&def_dl_bandwidth.dl_runtime_lock);
66	if (global_dl_runtime() == RUNTIME_INF)	66	if (global_rt_runtime() == RUNTIME_INF)
67	dl_b->bw = -1;	67	dl_b->bw = -1;
68	else	68	else
69	dl_b->bw = to_ratio(global_dl_period(), global_dl_runtime());	69	dl_b->bw = to_ratio(global_rt_period(), global_rt_runtime());
70	raw_spin_unlock(&def_dl_bandwidth.dl_runtime_lock);	70	raw_spin_unlock(&def_dl_bandwidth.dl_runtime_lock);
71	dl_b->total_bw = 0;	71	dl_b->total_bw = 0;
72	}	72	}
@@ -612,6 +612,29 @@ static void update_curr_dl(struct rq *rq)
612	if (!is_leftmost(curr, &rq->dl))	612	if (!is_leftmost(curr, &rq->dl))
613	resched_task(curr);	613	resched_task(curr);
614	}	614	}
		615
		616	/*
		617	* Because -- for now -- we share the rt bandwidth, we need to
		618	* account our runtime there too, otherwise actual rt tasks
		619	* would be able to exceed the shared quota.
		620	*
		621	* Account to the root rt group for now.
		622	*
		623	* The solution we're working towards is having the RT groups scheduled
		624	* using deadline servers -- however there's a few nasties to figure
		625	* out before that can happen.
		626	*/
		627	if (rt_bandwidth_enabled()) {
		628	struct rt_rq *rt_rq = &rq->rt;
		629
		630	raw_spin_lock(&rt_rq->rt_runtime_lock);
		631	rt_rq->rt_time += delta_exec;
		632	/*
		633	* We'll let actual RT tasks worry about the overflow here, we
		634	* have our own CBS to keep us inline -- see above.
		635	*/
		636	raw_spin_unlock(&rt_rq->rt_runtime_lock);
		637	}
615	}	638	}
616		639
617	#ifdef CONFIG_SMP	640	#ifdef CONFIG_SMP


diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 2b7421db6c41..890339099550 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h
@@ -176,7 +176,7 @@ struct dl_bandwidth {
176		176
177	static inline int dl_bandwidth_enabled(void)	177	static inline int dl_bandwidth_enabled(void)
178	{	178	{
179	return sysctl_sched_dl_runtime >= 0;	179	return sysctl_sched_rt_runtime >= 0;
180	}	180	}
181		181
182	extern struct dl_bw *dl_bw_of(int i);	182	extern struct dl_bw *dl_bw_of(int i);
@@ -186,9 +186,6 @@ struct dl_bw {
186	u64 bw, total_bw;	186	u64 bw, total_bw;
187	};	187	};
188		188
189	static inline u64 global_dl_period(void);
190	static inline u64 global_dl_runtime(void);
191
192	extern struct mutex sched_domains_mutex;	189	extern struct mutex sched_domains_mutex;
193		190
194	#ifdef CONFIG_CGROUP_SCHED	191	#ifdef CONFIG_CGROUP_SCHED
@@ -953,19 +950,6 @@ static inline u64 global_rt_runtime(void)
953	return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;	950	return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
954	}	951	}
955		952
956	static inline u64 global_dl_period(void)
957	{
958	return (u64)sysctl_sched_dl_period * NSEC_PER_USEC;
959	}
960
961	static inline u64 global_dl_runtime(void)
962	{
963	if (sysctl_sched_dl_runtime < 0)
964	return RUNTIME_INF;
965
966	return (u64)sysctl_sched_dl_runtime * NSEC_PER_USEC;
967	}
968
969	static inline int task_current(struct rq rq, struct task_struct p)	953	static inline int task_current(struct rq rq, struct task_struct p)
970	{	954	{
971	return rq->curr == p;	955	return rq->curr == p;